From 96125fd398a5bb988ab7e5e0df669efdec1d3546 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko Date: Thu, 19 Oct 2023 14:20:00 +0200 Subject: [PATCH 001/566] Implement tag dispatching prototype #Source commit from RA: aba39a7e22b025299e05302088261ef747ddcd90 # Conflicts: # include/oneapi/dpl/pstl/algorithm_impl.h # include/oneapi/dpl/pstl/execution_impl.h # include/oneapi/dpl/pstl/hetero/dpcpp/execution_sycl_defs.h # include/oneapi/dpl/pstl/iterator_defs.h # include/oneapi/dpl/pstl/utils.h --- include/oneapi/dpl/pstl/algorithm_impl.h | 74 +++++++++++++++++-- include/oneapi/dpl/pstl/execution_impl.h | 54 +++++++++++--- include/oneapi/dpl/pstl/glue_algorithm_impl.h | 18 ++--- .../dpl/pstl/hetero/algorithm_impl_hetero.h | 23 +++++- .../pstl/hetero/dpcpp/execution_sycl_defs.h | 12 +++ include/oneapi/dpl/pstl/iterator_defs.h | 33 ++++----- 6 files changed, 165 insertions(+), 49 deletions(-) diff --git a/include/oneapi/dpl/pstl/algorithm_impl.h b/include/oneapi/dpl/pstl/algorithm_impl.h index f4e64d63b4b..6c79ca30b5c 100644 --- a/include/oneapi/dpl/pstl/algorithm_impl.h +++ b/include/oneapi/dpl/pstl/algorithm_impl.h @@ -134,6 +134,15 @@ __brick_walk1(_DifferenceType __n, _Function __f, ::std::true_type) noexcept oneapi::dpl::__internal::__brick_walk1(__n, __f, ::std::false_type{}); } +template +void +__pattern_walk1(__serial_tag<_ExecutionPolicy, _ForwardIterator> __tag, _ExecutionPolicy&&, _ForwardIterator __first, + _ForwardIterator __last, _Function __f) noexcept +{ + using __tag_type = decltype(__tag); + __internal::__brick_walk1(__first, __last, __f, typename __tag_type::__is_vector{}); +} + template oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy> __pattern_walk1(_ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, _Function __f, @@ -143,6 +152,45 @@ __pattern_walk1(_ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator _ __internal::__brick_walk1(__first, __last, __f, __is_vector); } +template +void +__pattern_walk1(__parallel_forward_tag<_ExecutionPolicy, _ForwardIterator>, _ExecutionPolicy&& __exec, + _ForwardIterator __first, _ForwardIterator __last, _Function __f) +{ + typedef typename ::std::iterator_traits<_ForwardIterator>::reference _ReferenceType; + auto __func = [&__f](_ReferenceType arg) { __f(arg); }; + __internal::__except_handler([&]() { + __par_backend::__parallel_for_each(::std::forward<_ExecutionPolicy>(__exec), __first, __last, __func); + }); +} + +template +void +__pattern_walk1(__parallel_tag<_ExecutionPolicy, _ForwardIterator> __tag, _ExecutionPolicy&& __exec, + _ForwardIterator __first, _ForwardIterator __last, _Function __f) +{ + using __tag_type = decltype(__tag); + __internal::__except_handler([&]() { + __par_backend::__parallel_for(::std::forward<_ExecutionPolicy>(__exec), __first, __last, + [__f](_ForwardIterator __i, _ForwardIterator __j) { + __internal::__brick_walk1(__i, __j, __f, typename __tag_type::__is_vector{}); + }); + }); +} + +template +oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, void> +__pattern_replace_if(_Tag __tag, _ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, + _UnaryPredicate __pred, const _Tp& __new_value) +{ + oneapi::dpl::__internal::__pattern_walk1( + __tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, + oneapi::dpl::__internal::__replace_functor< + oneapi::dpl::__internal::__ref_or_copy<_ExecutionPolicy, const _Tp>, + oneapi::dpl::__internal::__ref_or_copy<_ExecutionPolicy, _UnaryPredicate>>(__new_value, __pred)); +} + +template template oneapi::dpl::__internal::__enable_if_host_execution_policy_conditional< _ExecutionPolicy, __is_random_access_iterator_v<_RandomAccessIterator>> @@ -669,13 +717,12 @@ __brick_find_if(_RandomAccessIterator __first, _RandomAccessIterator __last, _Pr [&__pred](_RandomAccessIterator __it, _SizeType __i) { return __pred(__it[__i]); }); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _ForwardIterator> -__pattern_find_if(_ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, _Predicate __pred, - _IsVector __is_vector, - /*is_parallel=*/::std::false_type) noexcept +template +_ForwardIterator +__pattern_find_if(_Tag __tag, _ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, + _Predicate __pred) noexcept { - return __internal::__brick_find_if(__first, __last, __pred, __is_vector); + return __internal::__brick_find_if(__first, __last, __pred, typename _Tag::__is_vector{}); } template @@ -693,6 +740,21 @@ __pattern_find_if(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _Ran }); } +template +_ForwardIterator +__pattern_find_if(__parallel_tag<_ExecutionPolicy, _ForwardIterator> __tag, _ExecutionPolicy&& __exec, + _ForwardIterator __first, _ForwardIterator __last, _Predicate __pred) +{ + using __tag_type = decltype(__tag); + return __except_handler([&]() { + return __parallel_find(::std::forward<_ExecutionPolicy>(__exec), __first, __last, + [__pred](_ForwardIterator __i, _ForwardIterator __j) { + return __brick_find_if(__i, __j, __pred, typename __tag_type::__is_vector{}); + }, + ::std::true_type{}); + }); +} + //------------------------------------------------------------------------ // find_end //------------------------------------------------------------------------ diff --git a/include/oneapi/dpl/pstl/execution_impl.h b/include/oneapi/dpl/pstl/execution_impl.h index fb9d9682103..fc7d6bec1cd 100644 --- a/include/oneapi/dpl/pstl/execution_impl.h +++ b/include/oneapi/dpl/pstl/execution_impl.h @@ -127,22 +127,56 @@ __is_parallelization_preferred(_ExecutionPolicy& __exec) __internal::__is_random_access_iterator_t<_IteratorTypes...>()); } -template -struct __prefer_unsequenced_tag +//------------------------------------------------------------------------ +// backend selector with tags +//------------------------------------------------------------------------ + +template +struct __vectorable_tag +{ + using __is_vector = __conjunction<__allow_unsequenced<_Policy>, + typename __internal::__is_random_access_iterator<_IteratorTypes...>>; +}; + +template +struct __serial_tag : __vectorable_tag<_Policy, _IteratorTypes...> +{ +}; + +template +struct __parallel_tag : __vectorable_tag<_Policy, _IteratorTypes...> { - static constexpr bool value = - __internal::__allow_unsequenced::value && __internal::__is_random_access_iterator_v<_IteratorTypes...>; - typedef ::std::integral_constant type; }; -template -struct __prefer_parallel_tag +template +struct __parallel_forward_tag : __vectorable_tag<_Policy, _IteratorTypes...> { - static constexpr bool value = - __internal::__allow_parallel::value && __internal::__is_random_access_iterator_v<_IteratorTypes...>; - typedef ::std::integral_constant type; }; +template +using __tag_type = + typename ::std::conditional<__internal::__is_random_access_iterator<_IteratorTypes...>::value, + __parallel_tag<_Policy, _IteratorTypes...>, + typename ::std::conditional<__is_forward_iterator<_IteratorTypes...>::value, + __parallel_forward_tag<_Policy, _IteratorTypes...>, + __serial_tag<_Policy, _IteratorTypes...>>::type>::type; + +template +typename ::std::enable_if::value, + __serial_tag<_Policy, typename ::std::decay<_IteratorTypes>::type...>>::type +__select_backend(_Policy&&, _IteratorTypes&&...) +{ + return {}; +} + +template +typename ::std::enable_if<__allow_parallel<_Policy>::value, + __tag_type<_Policy, typename ::std::decay<_IteratorTypes>::type...>>::type +__select_backend(_Policy&&, _IteratorTypes&&...) +{ + return {}; +} + } // namespace __internal } // namespace dpl } // namespace oneapi diff --git a/include/oneapi/dpl/pstl/glue_algorithm_impl.h b/include/oneapi/dpl/pstl/glue_algorithm_impl.h index 37d6f91a2a6..9c2186d6bda 100644 --- a/include/oneapi/dpl/pstl/glue_algorithm_impl.h +++ b/include/oneapi/dpl/pstl/glue_algorithm_impl.h @@ -97,10 +97,10 @@ template oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator> find_if(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _Predicate __pred) { - return oneapi::dpl::__internal::__pattern_find_if( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __pred, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec)); + auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(std::forward<_ExecutionPolicy>(__exec), __first); + + return oneapi::dpl::__internal::__pattern_find_if(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, + __last, __pred); } template @@ -363,13 +363,9 @@ oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy> replace_if(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _UnaryPredicate __pred, const _Tp& __new_value) { - oneapi::dpl::__internal::__pattern_walk1( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, - oneapi::dpl::__internal::__replace_functor< - oneapi::dpl::__internal::__ref_or_copy<_ExecutionPolicy, const _Tp>, - oneapi::dpl::__internal::__ref_or_copy<_ExecutionPolicy, _UnaryPredicate>>(__new_value, __pred), - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec)); + auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(::std::forward<_ExecutionPolicy>(__exec), __first); + __pattern_replace_if(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __pred, + __new_value); } template diff --git a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h index 87118eb27ce..4a3c9ce13b8 100644 --- a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h +++ b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h @@ -58,6 +58,24 @@ __pattern_walk1(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIte .wait(); } +template +void +__pattern_walk1(__offload_tag, _ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, + _Function __f) +{ + auto __n = __last - __first; + if (__n <= 0) + return; + + auto __keep = + oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read_write, _ForwardIterator>(); + auto __buf = __keep(__first, __last); + + oneapi::dpl::__par_backend_hetero::__parallel_for(__exec, unseq_backend::walk_n<_ExecutionPolicy, _Function>{__f}, + __n, __buf.all_view()) + .wait(); +} + //------------------------------------------------------------------------ // walk1_n //------------------------------------------------------------------------ @@ -639,9 +657,8 @@ __pattern_equal(_ExecutionPolicy&& __exec, _Iterator1 __first1, _Iterator1 __las //------------------------------------------------------------------------ template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, _Iterator> -__pattern_find_if(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, _Pred __pred, - /*vector=*/::std::true_type, /*parallel=*/::std::true_type) +_Iterator +__pattern_find_if(__offload_tag, _ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, _Pred __pred) { if (__first == __last) return __last; diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/execution_sycl_defs.h b/include/oneapi/dpl/pstl/hetero/dpcpp/execution_sycl_defs.h index b70e5518ff6..e8567f05442 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/execution_sycl_defs.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/execution_sycl_defs.h @@ -18,6 +18,7 @@ #include "../../onedpl_config.h" #include "../../execution_defs.h" +#include "../../iterator_defs.h" #include "sycl_defs.h" @@ -312,6 +313,17 @@ using __enable_if_device_execution_policy_double_no_default = oneapi::dpl::__internal::__is_convertible_to_event<_Events...>, _T>; +struct __offload_tag +{ +}; + +template +typename ::std::enable_if<__is_random_access_iterator<_IteratorTypes...>::value, __offload_tag>::type +__select_backend(const execution::device_policy<_PolicyParams...>&, _IteratorTypes&&...) +{ + return {}; +} + } // namespace __internal } // namespace dpl diff --git a/include/oneapi/dpl/pstl/iterator_defs.h b/include/oneapi/dpl/pstl/iterator_defs.h index 50c1f9a6507..f0aac046362 100644 --- a/include/oneapi/dpl/pstl/iterator_defs.h +++ b/include/oneapi/dpl/pstl/iterator_defs.h @@ -50,30 +50,25 @@ struct __iterator_traits<_Tp*, void> : ::std::iterator_traits<_Tp*> { }; -// Make is_random_access_iterator not to fail with a 'hard' error when it's used in SFINAE with -// a non-iterator type by providing a default value. -template -struct __is_random_access_iterator_impl : ::std::false_type -{ -}; +// Make is_random_access_iterator and is_forward_iterator not to fail with a 'hard' error when it's used in +//SFINAE with a non-iterator type by providing a default value. +template +auto +__is_needed_iter(int) + -> decltype(__conjunction<::std::is_base_of<_IteratorTag, typename __iterator_traits::type>::iterator_category>...>{}); -template -struct __is_random_access_iterator_impl<_IteratorType, - __void_type::iterator_category>> - : ::std::is_same::iterator_category, ::std::random_access_iterator_tag> -{ -}; +template +auto +__is_needed_iter(...) -> ::std::false_type; -/* iterator */ -template -struct __is_random_access_iterator - : ::std::conditional_t<__is_random_access_iterator_impl<_IteratorType>::value, - __is_random_access_iterator<_OtherIteratorTypes...>, ::std::false_type> +template +struct __is_random_access_iterator : decltype(__is_needed_iter<::std::random_access_iterator_tag, _IteratorTypes...>(0)) { }; -template -struct __is_random_access_iterator<_IteratorType> : __is_random_access_iterator_impl<_IteratorType> +template +struct __is_forward_iterator : decltype(__is_needed_iter<::std::forward_iterator_tag, _IteratorTypes...>(0)) { }; From 185023f5a741f962c7f17680401a3882bd2c3e63 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko Date: Thu, 19 Oct 2023 14:25:22 +0200 Subject: [PATCH 002/566] Replace struct __conjunction -> std::conjunction --- include/oneapi/dpl/pstl/execution_impl.h | 4 ++-- include/oneapi/dpl/pstl/iterator_defs.h | 4 ++-- include/oneapi/dpl/pstl/tuple_impl.h | 4 ++-- include/oneapi/dpl/pstl/utils.h | 15 --------------- 4 files changed, 6 insertions(+), 21 deletions(-) diff --git a/include/oneapi/dpl/pstl/execution_impl.h b/include/oneapi/dpl/pstl/execution_impl.h index fc7d6bec1cd..e75f7e6945d 100644 --- a/include/oneapi/dpl/pstl/execution_impl.h +++ b/include/oneapi/dpl/pstl/execution_impl.h @@ -134,8 +134,8 @@ __is_parallelization_preferred(_ExecutionPolicy& __exec) template struct __vectorable_tag { - using __is_vector = __conjunction<__allow_unsequenced<_Policy>, - typename __internal::__is_random_access_iterator<_IteratorTypes...>>; + using __is_vector = std::conjunction<__allow_unsequenced<_Policy>, + typename __internal::__is_random_access_iterator<_IteratorTypes...>>; }; template diff --git a/include/oneapi/dpl/pstl/iterator_defs.h b/include/oneapi/dpl/pstl/iterator_defs.h index f0aac046362..210b7e0ca82 100644 --- a/include/oneapi/dpl/pstl/iterator_defs.h +++ b/include/oneapi/dpl/pstl/iterator_defs.h @@ -55,8 +55,8 @@ struct __iterator_traits<_Tp*, void> : ::std::iterator_traits<_Tp*> template auto __is_needed_iter(int) - -> decltype(__conjunction<::std::is_base_of<_IteratorTag, typename __iterator_traits::type>::iterator_category>...>{}); + -> decltype(std::conjunction<::std::is_base_of<_IteratorTag, typename __iterator_traits::type>::iterator_category>...>{}); template auto diff --git a/include/oneapi/dpl/pstl/tuple_impl.h b/include/oneapi/dpl/pstl/tuple_impl.h index b3e373a58fe..e5dc3bdf859 100644 --- a/include/oneapi/dpl/pstl/tuple_impl.h +++ b/include/oneapi/dpl/pstl/tuple_impl.h @@ -341,8 +341,8 @@ struct tuple template , - ::std::is_constructible...>::value)>> + std::conjunction<::std::is_constructible, + ::std::is_constructible...>::value)>> tuple(_U1&& _value, _U&&... _next) : holder(::std::forward<_U1>(_value)), next(::std::forward<_U>(_next)...) { } diff --git a/include/oneapi/dpl/pstl/utils.h b/include/oneapi/dpl/pstl/utils.h index dec07f676a0..2f7edb08d31 100644 --- a/include/oneapi/dpl/pstl/utils.h +++ b/include/oneapi/dpl/pstl/utils.h @@ -553,21 +553,6 @@ struct __next_to_last template class __future; -template -struct __conjunction : ::std::true_type -{ -}; - -template -struct __conjunction<_B1> : _B1 -{ -}; - -template -struct __conjunction<_B1, _Bs...> : ::std::conditional_t> -{ -}; - // empty base class for type erasure struct __lifetime_keeper_base { From 81e802eabe926042a903218134b1f19a89931174 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko Date: Fri, 20 Oct 2023 10:29:53 +0200 Subject: [PATCH 003/566] Support FPGA backend. Adopt offload tags (cherry picked from commit 42d44064d9a9102ac52c38d25ab81374ea864d25) # Conflicts: # include/oneapi/dpl/pstl/algorithm_impl.h # include/oneapi/dpl/pstl/execution_impl.h # include/oneapi/dpl/pstl/hetero/dpcpp/execution_sycl_defs.h # include/oneapi/dpl/pstl/iterator_defs.h # include/oneapi/dpl/pstl/parallel_backend_tbb.h --- include/oneapi/dpl/pstl/algorithm_impl.h | 32 ++++----- include/oneapi/dpl/pstl/execution_impl.h | 67 ++++++++++++------- include/oneapi/dpl/pstl/glue_algorithm_impl.h | 4 +- .../dpl/pstl/hetero/algorithm_impl_hetero.h | 12 ++-- .../pstl/hetero/dpcpp/execution_sycl_defs.h | 29 ++++++-- .../pstl/hetero/dpcpp/parallel_backend_sycl.h | 41 ++++++++++++ .../hetero/dpcpp/parallel_backend_sycl_fpga.h | 44 ++++++++++++ include/oneapi/dpl/pstl/iterator_defs.h | 10 +-- .../oneapi/dpl/pstl/parallel_backend_tbb.h | 18 +++++ include/oneapi/dpl/pstl/parallel_impl.h | 4 +- 10 files changed, 201 insertions(+), 60 deletions(-) diff --git a/include/oneapi/dpl/pstl/algorithm_impl.h b/include/oneapi/dpl/pstl/algorithm_impl.h index 6c79ca30b5c..2d1f115a76e 100644 --- a/include/oneapi/dpl/pstl/algorithm_impl.h +++ b/include/oneapi/dpl/pstl/algorithm_impl.h @@ -134,13 +134,12 @@ __brick_walk1(_DifferenceType __n, _Function __f, ::std::true_type) noexcept oneapi::dpl::__internal::__brick_walk1(__n, __f, ::std::false_type{}); } -template +template void -__pattern_walk1(__serial_tag<_ExecutionPolicy, _ForwardIterator> __tag, _ExecutionPolicy&&, _ForwardIterator __first, +__pattern_walk1(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, _Function __f) noexcept { - using __tag_type = decltype(__tag); - __internal::__brick_walk1(__first, __last, __f, typename __tag_type::__is_vector{}); + __internal::__brick_walk1(__first, __last, __f, typename _Tag::__is_vector{}); } template @@ -154,26 +153,26 @@ __pattern_walk1(_ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator _ template void -__pattern_walk1(__parallel_forward_tag<_ExecutionPolicy, _ForwardIterator>, _ExecutionPolicy&& __exec, +__pattern_walk1(__parallel_forward_tag, _ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _Function __f) { typedef typename ::std::iterator_traits<_ForwardIterator>::reference _ReferenceType; auto __func = [&__f](_ReferenceType arg) { __f(arg); }; __internal::__except_handler([&]() { - __par_backend::__parallel_for_each(::std::forward<_ExecutionPolicy>(__exec), __first, __last, __func); + __par_backend::__parallel_for_each(__parallel_forward_tag::__backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __func); }); } -template +template void -__pattern_walk1(__parallel_tag<_ExecutionPolicy, _ForwardIterator> __tag, _ExecutionPolicy&& __exec, +__pattern_walk1(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _Function __f) { - using __tag_type = decltype(__tag); + using __backend_tag = typename decltype(__tag)::__backend_tag; __internal::__except_handler([&]() { - __par_backend::__parallel_for(::std::forward<_ExecutionPolicy>(__exec), __first, __last, + __par_backend::__parallel_for(__backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, [__f](_ForwardIterator __i, _ForwardIterator __j) { - __internal::__brick_walk1(__i, __j, __f, typename __tag_type::__is_vector{}); + __internal::__brick_walk1(__i, __j, __f, _IsVector{}); }); }); } @@ -725,6 +724,7 @@ __pattern_find_if(_Tag __tag, _ExecutionPolicy&&, _ForwardIterator __first, _For return __internal::__brick_find_if(__first, __last, __pred, typename _Tag::__is_vector{}); } +// KSATODO were deleted in prototype? template oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator> __pattern_find_if(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last, @@ -740,16 +740,16 @@ __pattern_find_if(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _Ran }); } -template +template _ForwardIterator -__pattern_find_if(__parallel_tag<_ExecutionPolicy, _ForwardIterator> __tag, _ExecutionPolicy&& __exec, +__pattern_find_if(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _Predicate __pred) { - using __tag_type = decltype(__tag); + using __backend_tag = typename decltype(__tag)::__backend_tag; return __except_handler([&]() { - return __parallel_find(::std::forward<_ExecutionPolicy>(__exec), __first, __last, + return __parallel_find(__backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, [__pred](_ForwardIterator __i, _ForwardIterator __j) { - return __brick_find_if(__i, __j, __pred, typename __tag_type::__is_vector{}); + return __brick_find_if(__i, __j, __pred, _IsVector{}); }, ::std::true_type{}); }); diff --git a/include/oneapi/dpl/pstl/execution_impl.h b/include/oneapi/dpl/pstl/execution_impl.h index e75f7e6945d..d7743361f7b 100644 --- a/include/oneapi/dpl/pstl/execution_impl.h +++ b/include/oneapi/dpl/pstl/execution_impl.h @@ -131,48 +131,67 @@ __is_parallelization_preferred(_ExecutionPolicy& __exec) // backend selector with tags //------------------------------------------------------------------------ -template -struct __vectorable_tag -{ - using __is_vector = std::conjunction<__allow_unsequenced<_Policy>, - typename __internal::__is_random_access_iterator<_IteratorTypes...>>; -}; +struct __tbb_backend {}; +// KSATODO required create tag for onedpl +// KSATODO required create tag for omp -template -struct __serial_tag : __vectorable_tag<_Policy, _IteratorTypes...> +template +struct __serial_tag { + using __is_vector = _IsVector; }; -template -struct __parallel_tag : __vectorable_tag<_Policy, _IteratorTypes...> +template +struct __parallel_tag { + using __is_vector = _IsVector; + // backend tag can be change depending on + // TBB availability in the environment + using __backend_tag = __tbb_backend; }; -template -struct __parallel_forward_tag : __vectorable_tag<_Policy, _IteratorTypes...> +struct __parallel_forward_tag { + using __is_vector = ::std::false_type; + // backend tag can be change depending on + // TBB availability in the environment + using __backend_tag = __tbb_backend; }; -template +template using __tag_type = typename ::std::conditional<__internal::__is_random_access_iterator<_IteratorTypes...>::value, - __parallel_tag<_Policy, _IteratorTypes...>, + __parallel_tag<_IsVector>, typename ::std::conditional<__is_forward_iterator<_IteratorTypes...>::value, - __parallel_forward_tag<_Policy, _IteratorTypes...>, - __serial_tag<_Policy, _IteratorTypes...>>::type>::type; + __parallel_forward_tag, + __serial_tag<_IsVector> + >::type + >::type; + +template +__serial_tag +__select_backend(oneapi::dpl::execution::sequenced_policy, _IteratorTypes&&...) +{ + return {}; +} + +template +__serial_tag<__internal::__is_random_access_iterator<_IteratorTypes...>> +__select_backend(oneapi::dpl::execution::unsequenced_policy, _IteratorTypes&&...) +{ + return {}; +} -template -typename ::std::enable_if::value, - __serial_tag<_Policy, typename ::std::decay<_IteratorTypes>::type...>>::type -__select_backend(_Policy&&, _IteratorTypes&&...) +template +__tag_type +__select_backend(oneapi::dpl::execution::parallel_policy, _IteratorTypes&&...) { return {}; } -template -typename ::std::enable_if<__allow_parallel<_Policy>::value, - __tag_type<_Policy, typename ::std::decay<_IteratorTypes>::type...>>::type -__select_backend(_Policy&&, _IteratorTypes&&...) +template +__tag_type<__internal::__is_random_access_iterator<_IteratorTypes...>, _IteratorTypes...> +__select_backend(oneapi::dpl::execution::parallel_unsequenced_policy, _IteratorTypes&&...) { return {}; } diff --git a/include/oneapi/dpl/pstl/glue_algorithm_impl.h b/include/oneapi/dpl/pstl/glue_algorithm_impl.h index 9c2186d6bda..86651375bee 100644 --- a/include/oneapi/dpl/pstl/glue_algorithm_impl.h +++ b/include/oneapi/dpl/pstl/glue_algorithm_impl.h @@ -97,7 +97,7 @@ template oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator> find_if(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _Predicate __pred) { - auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(std::forward<_ExecutionPolicy>(__exec), __first); + auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first); return oneapi::dpl::__internal::__pattern_find_if(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __pred); @@ -363,7 +363,7 @@ oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy> replace_if(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _UnaryPredicate __pred, const _Tp& __new_value) { - auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(::std::forward<_ExecutionPolicy>(__exec), __first); + auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first); __pattern_replace_if(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __pred, __new_value); } diff --git a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h index 4a3c9ce13b8..1ec77c0d5ea 100644 --- a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h +++ b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h @@ -58,9 +58,9 @@ __pattern_walk1(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIte .wait(); } -template +template void -__pattern_walk1(__offload_tag, _ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, +__pattern_walk1(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _Function __f) { auto __n = __last - __first; @@ -71,7 +71,7 @@ __pattern_walk1(__offload_tag, _ExecutionPolicy&& __exec, _ForwardIterator __fir oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read_write, _ForwardIterator>(); auto __buf = __keep(__first, __last); - oneapi::dpl::__par_backend_hetero::__parallel_for(__exec, unseq_backend::walk_n<_ExecutionPolicy, _Function>{__f}, + oneapi::dpl::__par_backend_hetero::__parallel_for(_BackendTag{}, __exec, unseq_backend::walk_n<_ExecutionPolicy, _Function>{__f}, __n, __buf.all_view()) .wait(); } @@ -656,16 +656,16 @@ __pattern_equal(_ExecutionPolicy&& __exec, _Iterator1 __first1, _Iterator1 __las // find_if //------------------------------------------------------------------------ -template +template _Iterator -__pattern_find_if(__offload_tag, _ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, _Pred __pred) +__pattern_find_if(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, _Pred __pred) { if (__first == __last) return __last; using _Predicate = oneapi::dpl::unseq_backend::single_match_pred<_ExecutionPolicy, _Pred>; - return __par_backend_hetero::__parallel_find( + return __par_backend_hetero::__parallel_find(_BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__first), __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__last), _Predicate{__pred}, diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/execution_sycl_defs.h b/include/oneapi/dpl/pstl/hetero/dpcpp/execution_sycl_defs.h index e8567f05442..6d4cad20373 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/execution_sycl_defs.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/execution_sycl_defs.h @@ -313,16 +313,35 @@ using __enable_if_device_execution_policy_double_no_default = oneapi::dpl::__internal::__is_convertible_to_event<_Events...>, _T>; -struct __offload_tag +template +struct __hetero_tag { + using __backend_tag = _BackendTag; }; -template -typename ::std::enable_if<__is_random_access_iterator<_IteratorTypes...>::value, __offload_tag>::type -__select_backend(const execution::device_policy<_PolicyParams...>&, _IteratorTypes&&...) +struct __device_backend +{ +}; + +template +typename ::std::enable_if<__is_random_access_iterator<_IteratorTypes...>::value, __hetero_tag<__device_backend>>::type +__select_backend(const execution::device_policy<_KernelName>&, _IteratorTypes&&...) { return {}; -} +} + +#if _ONEDPL_FPGA_DEVICE +struct __fpga_backend : __device_backend +{ +}; + +template +typename ::std::enable_if<__is_random_access_iterator<_IteratorTypes...>::value, __hetero_tag<__fpga_backend>>::type +__select_backend(const execution::fpga_policy<_Factor, _KernelName>&, _IteratorTypes&&...) +{ + return {}; +} +#endif } // namespace __internal diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h index 2335bad252e..4b23fc1d922 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h @@ -257,6 +257,31 @@ __parallel_for(_ExecutionPolicy&& __exec, _Fp __brick, _Index __count, _Ranges&& ::std::forward<_Ranges>(__rngs)...); } +template +__future +__parallel_for(oneapi::dpl::__internal::__device_backend, _ExecutionPolicy&& __exec, _Fp __brick, _Index __count, _Ranges&&... __rngs) +{ + assert(__get_first_range_size(__rngs...) > 0); + + using _Policy = typename ::std::decay<_ExecutionPolicy>::type; + using _CustomName = typename _Policy::kernel_name; + using _ForKernel = oneapi::dpl::__par_backend_hetero::__internal::_KernelName_t<__parallel_for_kernel, _CustomName, + _Fp, _Ranges...>; + + _PRINT_INFO_IN_DEBUG_MODE(__exec); + auto __event = __exec.queue().submit([&__rngs..., &__brick, __count](sycl::handler& __cgh) { + //get an access to data under SYCL buffer: + oneapi::dpl::__ranges::__require_access(__cgh, __rngs...); + + __cgh.parallel_for<_ForKernel>(sycl::range(__count), [=](sycl::item __item_id) { + auto __idx = __item_id.get_linear_id(); + __brick(__idx, __rngs...); + }); + }); + + return __future(__event); +} + //------------------------------------------------------------------------ // parallel_transform_scan - async pattern //------------------------------------------------------------------------ @@ -1263,6 +1288,22 @@ __parallel_find(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, __f, _TagType{}, __buf.all_view()); } +template +_Iterator +__parallel_find(oneapi::dpl::__internal::__device_backend, _ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, _Brick __f, _IsFirst) +{ + auto __keep = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read, _Iterator>(); + auto __buf = __keep(__first, __last); + + using _TagType = + typename ::std::conditional<_IsFirst::value, __parallel_find_forward_tag, + __parallel_find_backward_tag>::type; + return __first + oneapi::dpl::__par_backend_hetero::__parallel_find_or( + __par_backend_hetero::make_wrapped_policy<__find_policy_wrapper>( + ::std::forward<_ExecutionPolicy>(__exec)), + __f, _TagType{}, __buf.all_view()); +} + //------------------------------------------------------------------------ // parallel_merge - async pattern //----------------------------------------------------------------------- diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h index 8f9108c085e..4949aa48aaf 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h @@ -88,6 +88,38 @@ __parallel_for(_ExecutionPolicy&& __exec, _Fp __brick, _Index __count, _Ranges&& __count, std::forward<_Ranges>(__rngs)...); } +template +__future +__parallel_for(oneapi::dpl::__internal::__fpga_backend, _ExecutionPolicy&& __exec, _Fp __brick, _Index __count, _Ranges&&... __rngs) +{ + auto __n = __get_first_range_size(__rngs...); + assert(__n > 0); + + using _Policy = typename ::std::decay<_ExecutionPolicy>::type; + using __kernel_name = typename _Policy::kernel_name; +#if __SYCL_UNNAMED_LAMBDA__ + using __kernel_name_t = __parallel_for_kernel<_Fp, __kernel_name, _Ranges...>; +#else + using __kernel_name_t = __parallel_for_kernel<__kernel_name>; +#endif + + _PRINT_INFO_IN_DEBUG_MODE(__exec); + auto __event = __exec.queue().submit([&__rngs..., &__brick, __count](sycl::handler& __cgh) { + //get an access to data under SYCL buffer: + oneapi::dpl::__ranges::__require_access(__cgh, __rngs...); + + __cgh.single_task<__kernel_name_t>([=]() { +#pragma unroll(::std::decay <_ExecutionPolicy>::type::unroll_factor) + for (auto __idx = 0; __idx < __count; ++__idx) + { + __brick(__idx, __rngs...); + } + }); + }); + + return __future(__event); +} + //------------------------------------------------------------------------ // parallel_transform_reduce //------------------------------------------------------------------------ @@ -228,6 +260,7 @@ oneapi::dpl::__internal::__enable_if_fpga_execution_policy<_ExecutionPolicy, _It __parallel_find(_ExecutionPolicy&& __exec, _Iterator1 __first, _Iterator1 __last, _Iterator2 __s_first, _Iterator2 __s_last, _Brick __f, _IsFirst __is_first) { + // workaround until we implement more performant version for patterns using _Policy = ::std::decay_t<_ExecutionPolicy>; using __kernel_name = typename _Policy::kernel_name; @@ -247,6 +280,17 @@ __parallel_find(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, return oneapi::dpl::__par_backend_hetero::__parallel_find(__device_policy, __first, __last, __f, __is_first); } +template +_Iterator +__parallel_find(oneapi::dpl::__internal::__fpga_backend __tag, _ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, _Brick __f, _IsFirst __is_first) +{ + // workaround until we implement more performant version for patterns + using _Policy = typename ::std::decay<_ExecutionPolicy>::type; + using __kernel_name = typename _Policy::kernel_name; + auto __device_policy = oneapi::dpl::execution::make_device_policy<__kernel_name>(__exec.queue()); + return oneapi::dpl::__par_backend_hetero::__parallel_find(oneapi::dpl::__internal::__device_backend{}, __device_policy, __first, __last, __f, __is_first); +} + template auto __device_policy(_ExecutionPolicy&& __exec) diff --git a/include/oneapi/dpl/pstl/iterator_defs.h b/include/oneapi/dpl/pstl/iterator_defs.h index 210b7e0ca82..6cd2f6cb204 100644 --- a/include/oneapi/dpl/pstl/iterator_defs.h +++ b/include/oneapi/dpl/pstl/iterator_defs.h @@ -54,21 +54,21 @@ struct __iterator_traits<_Tp*, void> : ::std::iterator_traits<_Tp*> //SFINAE with a non-iterator type by providing a default value. template auto -__is_needed_iter(int) - -> decltype(std::conjunction<::std::is_base_of<_IteratorTag, typename __iterator_traits::type>::iterator_category>...>{}); +__is_iterator_of(int) + -> decltype(__conjunction<::std::is_base_of<_IteratorTag, typename __iterator_traits::type>::iterator_category>...>{}); template auto __is_needed_iter(...) -> ::std::false_type; template -struct __is_random_access_iterator : decltype(__is_needed_iter<::std::random_access_iterator_tag, _IteratorTypes...>(0)) +struct __is_random_access_iterator : decltype(__is_iterator_of<::std::random_access_iterator_tag, _IteratorTypes...>(0)) { }; template -struct __is_forward_iterator : decltype(__is_needed_iter<::std::forward_iterator_tag, _IteratorTypes...>(0)) +struct __is_forward_iterator : decltype(__is_iterator_of<::std::forward_iterator_tag, _IteratorTypes...>(0)) { }; diff --git a/include/oneapi/dpl/pstl/parallel_backend_tbb.h b/include/oneapi/dpl/pstl/parallel_backend_tbb.h index 556e305e1c7..67615fffc2b 100644 --- a/include/oneapi/dpl/pstl/parallel_backend_tbb.h +++ b/include/oneapi/dpl/pstl/parallel_backend_tbb.h @@ -125,6 +125,17 @@ __parallel_for(_ExecutionPolicy&&, _Index __first, _Index __last, _Fp __f) }); } +//! Evaluation of brick f[i,j) for each subrange [i,j) of [first,last) +// wrapper over tbb::parallel_for +template +void +__parallel_for(oneapi::dpl::__internal::__tbb_backend, _ExecutionPolicy&&, _Index __first, _Index __last, _Fp __f) +{ + tbb::this_task_arena::isolate([=]() { + tbb::parallel_for(tbb::blocked_range<_Index>(__first, __last), __parallel_for_body<_Index, _Fp>(__f)); + }); +} + //! Evaluation of brick f[i,j) for each subrange [i,j) of [first,last) // wrapper over tbb::parallel_reduce template @@ -1322,6 +1333,13 @@ __parallel_for_each(_ExecutionPolicy&&, _ForwardIterator __begin, _ForwardIterat tbb::this_task_arena::isolate([&]() { tbb::parallel_for_each(__begin, __end, __f); }); } +template +void +__parallel_for_each(oneapi::dpl::__internal::__tbb_backend, _ExecutionPolicy&&, _ForwardIterator __begin, _ForwardIterator __end, _Fp __f) +{ + tbb::this_task_arena::isolate([&]() { tbb::parallel_for_each(__begin, __end, __f); }); +} + } // namespace __tbb_backend } // namespace dpl } // namespace oneapi diff --git a/include/oneapi/dpl/pstl/parallel_impl.h b/include/oneapi/dpl/pstl/parallel_impl.h index a2d7d20e562..780a495897d 100644 --- a/include/oneapi/dpl/pstl/parallel_impl.h +++ b/include/oneapi/dpl/pstl/parallel_impl.h @@ -34,7 +34,7 @@ namespace __internal Each f[i,j) must return a value in [i,j). */ template _Index -__parallel_find(_ExecutionPolicy&& __exec, _Index __first, _Index __last, _Brick __f, _IsFirst) +__parallel_find(__tbb_backend __tag, _ExecutionPolicy&& __exec, _Index __first, _Index __last, _Brick __f, _IsFirst) { typedef typename ::std::iterator_traits<_Index>::difference_type _DifferenceType; const _DifferenceType __n = __last - __first; @@ -44,7 +44,7 @@ __parallel_find(_ExecutionPolicy&& __exec, _Index __first, _Index __last, _Brick ::std::atomic<_DifferenceType> __extremum(__initial_dist); // TODO: find out what is better here: parallel_for or parallel_reduce - __par_backend::__parallel_for(::std::forward<_ExecutionPolicy>(__exec), __first, __last, + __par_backend::__parallel_for(__tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, [__comp, __f, __first, &__extremum](_Index __i, _Index __j) { // See "Reducing Contention Through Priority Updates", PPoPP '13, for discussion of // why using a shared variable scales fairly well in this situation. From 7005dc7558fa0c333e94cc4dbd0de408ec0b65f5 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko Date: Thu, 19 Oct 2023 15:39:07 +0200 Subject: [PATCH 004/566] Replace struct __conjunction -> std::conjunction --- include/oneapi/dpl/pstl/iterator_defs.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/oneapi/dpl/pstl/iterator_defs.h b/include/oneapi/dpl/pstl/iterator_defs.h index 6cd2f6cb204..524b3e887ff 100644 --- a/include/oneapi/dpl/pstl/iterator_defs.h +++ b/include/oneapi/dpl/pstl/iterator_defs.h @@ -55,8 +55,8 @@ struct __iterator_traits<_Tp*, void> : ::std::iterator_traits<_Tp*> template auto __is_iterator_of(int) - -> decltype(__conjunction<::std::is_base_of<_IteratorTag, typename __iterator_traits::type>::iterator_category>...>{}); + -> decltype(std::conjunction<::std::is_base_of<_IteratorTag, typename __iterator_traits::type>::iterator_category>...>{}); template auto From 73d3ad3b8ea6afc844ff776e8f3a6bf5579de1bb Mon Sep 17 00:00:00 2001 From: Sergey Kopienko Date: Thu, 19 Oct 2023 15:44:32 +0200 Subject: [PATCH 005/566] Fix compile error: cherry pick error #1 --- include/oneapi/dpl/pstl/algorithm_impl.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/oneapi/dpl/pstl/algorithm_impl.h b/include/oneapi/dpl/pstl/algorithm_impl.h index 2d1f115a76e..e35f2465bc2 100644 --- a/include/oneapi/dpl/pstl/algorithm_impl.h +++ b/include/oneapi/dpl/pstl/algorithm_impl.h @@ -189,7 +189,7 @@ __pattern_replace_if(_Tag __tag, _ExecutionPolicy&& __exec, _ForwardIterator __f oneapi::dpl::__internal::__ref_or_copy<_ExecutionPolicy, _UnaryPredicate>>(__new_value, __pred)); } -template +//template template oneapi::dpl::__internal::__enable_if_host_execution_policy_conditional< _ExecutionPolicy, __is_random_access_iterator_v<_RandomAccessIterator>> From c5f1a11a7806f6c79a3b2bbfd5647fdd3efa29de Mon Sep 17 00:00:00 2001 From: Sergey Kopienko Date: Thu, 19 Oct 2023 17:49:02 +0200 Subject: [PATCH 006/566] Fix compile error: __future - too old code #2 --- include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h | 4 ++-- .../oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h index 4b23fc1d922..d2760f5ee17 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h @@ -258,7 +258,7 @@ __parallel_for(_ExecutionPolicy&& __exec, _Fp __brick, _Index __count, _Ranges&& } template -__future +auto __parallel_for(oneapi::dpl::__internal::__device_backend, _ExecutionPolicy&& __exec, _Fp __brick, _Index __count, _Ranges&&... __rngs) { assert(__get_first_range_size(__rngs...) > 0); @@ -279,7 +279,7 @@ __parallel_for(oneapi::dpl::__internal::__device_backend, _ExecutionPolicy&& __e }); }); - return __future(__event); + return __future(__event); } //------------------------------------------------------------------------ diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h index 4949aa48aaf..36d755f5734 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h @@ -89,7 +89,7 @@ __parallel_for(_ExecutionPolicy&& __exec, _Fp __brick, _Index __count, _Ranges&& } template -__future +auto __parallel_for(oneapi::dpl::__internal::__fpga_backend, _ExecutionPolicy&& __exec, _Fp __brick, _Index __count, _Ranges&&... __rngs) { auto __n = __get_first_range_size(__rngs...); @@ -117,7 +117,7 @@ __parallel_for(oneapi::dpl::__internal::__fpga_backend, _ExecutionPolicy&& __exe }); }); - return __future(__event); + return __future(__event); } //------------------------------------------------------------------------ From 0609db5d4b7797981326790c280c9a3732c0ae6f Mon Sep 17 00:00:00 2001 From: Sergey Kopienko Date: Thu, 19 Oct 2023 17:57:45 +0200 Subject: [PATCH 007/566] Fix compile error: no template named '_KernelName_t' in namespace 'oneapi::dpl::__par_backend_hetero::__internal' #3 --- .../oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h index d2760f5ee17..51854c08a1c 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h @@ -180,6 +180,9 @@ make_iter_mode(const _Iterator& __it) -> decltype(iter_mode()(__it)) // set of class templates to name kernels +template +class __parallel_for_kernel; + template class __scan_local_kernel; @@ -265,8 +268,9 @@ __parallel_for(oneapi::dpl::__internal::__device_backend, _ExecutionPolicy&& __e using _Policy = typename ::std::decay<_ExecutionPolicy>::type; using _CustomName = typename _Policy::kernel_name; - using _ForKernel = oneapi::dpl::__par_backend_hetero::__internal::_KernelName_t<__parallel_for_kernel, _CustomName, - _Fp, _Ranges...>; + using _ForKernel = + oneapi::dpl::__par_backend_hetero::__internal::__kernel_name_generator<__parallel_for_kernel, _CustomName, _Fp, + _Ranges...>; _PRINT_INFO_IN_DEBUG_MODE(__exec); auto __event = __exec.queue().submit([&__rngs..., &__brick, __count](sycl::handler& __cgh) { From 243c6620f4cb84831be796124276cd8675d8dcb9 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko Date: Fri, 20 Oct 2023 10:14:46 +0200 Subject: [PATCH 008/566] include/oneapi/dpl/pstl/parallel_backend_tbb.h - fix compile error #4 in header_inclusion_order_algorithm_1.pass : parallel_backend_tbb.h:132:41: error: no type named '__tbb_backend' in namespace 'oneapi::dpl::__internal' include/oneapi/dpl/pstl/parallel_backend_tbb.h:132:41: error: no type named '__tbb_backend' in namespace 'oneapi::dpl::__internal' __parallel_for(oneapi::dpl::__internal::__tbb_backend, _ExecutionPolicy&&, _Index __first, _Index __last, _Fp __f) ~~~~~~~~~~~~~~~~~~~~~~~~~^ --- include/oneapi/dpl/pstl/execution_impl.h | 6 +++--- include/oneapi/dpl/pstl/parallel_backend_tbb.h | 5 +++-- include/oneapi/dpl/pstl/parallel_impl.h | 2 +- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/include/oneapi/dpl/pstl/execution_impl.h b/include/oneapi/dpl/pstl/execution_impl.h index d7743361f7b..7e747db1223 100644 --- a/include/oneapi/dpl/pstl/execution_impl.h +++ b/include/oneapi/dpl/pstl/execution_impl.h @@ -131,7 +131,7 @@ __is_parallelization_preferred(_ExecutionPolicy& __exec) // backend selector with tags //------------------------------------------------------------------------ -struct __tbb_backend {}; +struct __tbb_backend_tag {}; // KSATODO required create tag for onedpl // KSATODO required create tag for omp @@ -147,7 +147,7 @@ struct __parallel_tag using __is_vector = _IsVector; // backend tag can be change depending on // TBB availability in the environment - using __backend_tag = __tbb_backend; + using __backend_tag = __tbb_backend_tag; }; struct __parallel_forward_tag @@ -155,7 +155,7 @@ struct __parallel_forward_tag using __is_vector = ::std::false_type; // backend tag can be change depending on // TBB availability in the environment - using __backend_tag = __tbb_backend; + using __backend_tag = __tbb_backend_tag; }; template diff --git a/include/oneapi/dpl/pstl/parallel_backend_tbb.h b/include/oneapi/dpl/pstl/parallel_backend_tbb.h index 67615fffc2b..d3663b2d301 100644 --- a/include/oneapi/dpl/pstl/parallel_backend_tbb.h +++ b/include/oneapi/dpl/pstl/parallel_backend_tbb.h @@ -23,6 +23,7 @@ #include #include "parallel_backend_utils.h" +#include "execution_impl.h" // Bring in minimal required subset of Intel(R) Threading Building Blocks (Intel(R) TBB) #include @@ -129,7 +130,7 @@ __parallel_for(_ExecutionPolicy&&, _Index __first, _Index __last, _Fp __f) // wrapper over tbb::parallel_for template void -__parallel_for(oneapi::dpl::__internal::__tbb_backend, _ExecutionPolicy&&, _Index __first, _Index __last, _Fp __f) +__parallel_for(oneapi::dpl::__internal::__tbb_backend_tag, _ExecutionPolicy&&, _Index __first, _Index __last, _Fp __f) { tbb::this_task_arena::isolate([=]() { tbb::parallel_for(tbb::blocked_range<_Index>(__first, __last), __parallel_for_body<_Index, _Fp>(__f)); @@ -1335,7 +1336,7 @@ __parallel_for_each(_ExecutionPolicy&&, _ForwardIterator __begin, _ForwardIterat template void -__parallel_for_each(oneapi::dpl::__internal::__tbb_backend, _ExecutionPolicy&&, _ForwardIterator __begin, _ForwardIterator __end, _Fp __f) +__parallel_for_each(oneapi::dpl::__internal::__tbb_backend_tag, _ExecutionPolicy&&, _ForwardIterator __begin, _ForwardIterator __end, _Fp __f) { tbb::this_task_arena::isolate([&]() { tbb::parallel_for_each(__begin, __end, __f); }); } diff --git a/include/oneapi/dpl/pstl/parallel_impl.h b/include/oneapi/dpl/pstl/parallel_impl.h index 780a495897d..cc92205b360 100644 --- a/include/oneapi/dpl/pstl/parallel_impl.h +++ b/include/oneapi/dpl/pstl/parallel_impl.h @@ -34,7 +34,7 @@ namespace __internal Each f[i,j) must return a value in [i,j). */ template _Index -__parallel_find(__tbb_backend __tag, _ExecutionPolicy&& __exec, _Index __first, _Index __last, _Brick __f, _IsFirst) +__parallel_find(__tbb_backend_tag __tag, _ExecutionPolicy&& __exec, _Index __first, _Index __last, _Brick __f, _IsFirst) { typedef typename ::std::iterator_traits<_Index>::difference_type _DifferenceType; const _DifferenceType __n = __last - __first; From 18ef3e7dfa695a5a7646e363516323748c398899 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko Date: Fri, 20 Oct 2023 12:12:02 +0200 Subject: [PATCH 009/566] Fix cherry pick error #5: delete previously not removed code --- include/oneapi/dpl/pstl/algorithm_impl.h | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/include/oneapi/dpl/pstl/algorithm_impl.h b/include/oneapi/dpl/pstl/algorithm_impl.h index e35f2465bc2..125c8ca312f 100644 --- a/include/oneapi/dpl/pstl/algorithm_impl.h +++ b/include/oneapi/dpl/pstl/algorithm_impl.h @@ -724,22 +724,6 @@ __pattern_find_if(_Tag __tag, _ExecutionPolicy&&, _ForwardIterator __first, _For return __internal::__brick_find_if(__first, __last, __pred, typename _Tag::__is_vector{}); } -// KSATODO were deleted in prototype? -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator> -__pattern_find_if(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last, - _Predicate __pred, _IsVector __is_vector, - /*is_parallel=*/::std::true_type) -{ - return __except_handler([&]() { - return __parallel_find(::std::forward<_ExecutionPolicy>(__exec), __first, __last, - [__pred, __is_vector](_RandomAccessIterator __i, _RandomAccessIterator __j) { - return __brick_find_if(__i, __j, __pred, __is_vector); - }, - ::std::true_type{}); - }); -} - template _ForwardIterator __pattern_find_if(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, From 8d2efba6254c93edef996b4d7f8381e89f20e1b6 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko Date: Fri, 20 Oct 2023 12:18:23 +0200 Subject: [PATCH 010/566] include/oneapi/dpl/pstl/hetero/dpcpp/execution_sycl_defs.h - using __is_random_access_iterator_v instead of __is_random_access_iterator<_IteratorTypes...>::value --- include/oneapi/dpl/pstl/hetero/dpcpp/execution_sycl_defs.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/execution_sycl_defs.h b/include/oneapi/dpl/pstl/hetero/dpcpp/execution_sycl_defs.h index 6d4cad20373..c77247e71eb 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/execution_sycl_defs.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/execution_sycl_defs.h @@ -324,7 +324,7 @@ struct __device_backend }; template -typename ::std::enable_if<__is_random_access_iterator<_IteratorTypes...>::value, __hetero_tag<__device_backend>>::type +typename ::std::enable_if<__is_random_access_iterator_v<_IteratorTypes...>, __hetero_tag<__device_backend>>::type __select_backend(const execution::device_policy<_KernelName>&, _IteratorTypes&&...) { return {}; @@ -336,7 +336,7 @@ struct __fpga_backend : __device_backend }; template -typename ::std::enable_if<__is_random_access_iterator<_IteratorTypes...>::value, __hetero_tag<__fpga_backend>>::type +typename ::std::enable_if<__is_random_access_iterator_v<_IteratorTypes...>, __hetero_tag<__fpga_backend>>::type __select_backend(const execution::fpga_policy<_Factor, _KernelName>&, _IteratorTypes&&...) { return {}; From e67b8709cbc05235523ef03cfb4d3bb7af6e7cbc Mon Sep 17 00:00:00 2001 From: Sergey Kopienko Date: Fri, 20 Oct 2023 17:47:50 +0200 Subject: [PATCH 011/566] include/oneapi/dpl/pstl/hetero/dpcpp/execution_sycl_defs.h - using std::enable_if_t instead of typename ::std::enable_if<...>::type --- include/oneapi/dpl/pstl/hetero/dpcpp/execution_sycl_defs.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/execution_sycl_defs.h b/include/oneapi/dpl/pstl/hetero/dpcpp/execution_sycl_defs.h index c77247e71eb..75d1f54885d 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/execution_sycl_defs.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/execution_sycl_defs.h @@ -324,7 +324,7 @@ struct __device_backend }; template -typename ::std::enable_if<__is_random_access_iterator_v<_IteratorTypes...>, __hetero_tag<__device_backend>>::type +::std::enable_if_t<__is_random_access_iterator_v<_IteratorTypes...>, __hetero_tag<__device_backend>> __select_backend(const execution::device_policy<_KernelName>&, _IteratorTypes&&...) { return {}; @@ -336,7 +336,7 @@ struct __fpga_backend : __device_backend }; template -typename ::std::enable_if<__is_random_access_iterator_v<_IteratorTypes...>, __hetero_tag<__fpga_backend>>::type +::std::enable_if_t<__is_random_access_iterator_v<_IteratorTypes...>, __hetero_tag<__fpga_backend>> __select_backend(const execution::fpga_policy<_Factor, _KernelName>&, _IteratorTypes&&...) { return {}; From 0b27fc29626be303ff98852fd2b6ceaeb1d957ed Mon Sep 17 00:00:00 2001 From: Sergey Kopienko Date: Wed, 25 Oct 2023 09:55:57 +0200 Subject: [PATCH 012/566] Rename __device_backend to __device_backend_tag --- include/oneapi/dpl/pstl/hetero/dpcpp/execution_sycl_defs.h | 6 +++--- .../oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h | 4 ++-- .../dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/execution_sycl_defs.h b/include/oneapi/dpl/pstl/hetero/dpcpp/execution_sycl_defs.h index 75d1f54885d..6fe53805c28 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/execution_sycl_defs.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/execution_sycl_defs.h @@ -319,19 +319,19 @@ struct __hetero_tag using __backend_tag = _BackendTag; }; -struct __device_backend +struct __device_backend_tag { }; template -::std::enable_if_t<__is_random_access_iterator_v<_IteratorTypes...>, __hetero_tag<__device_backend>> +::std::enable_if_t<__is_random_access_iterator_v<_IteratorTypes...>, __hetero_tag<__device_backend_tag>> __select_backend(const execution::device_policy<_KernelName>&, _IteratorTypes&&...) { return {}; } #if _ONEDPL_FPGA_DEVICE -struct __fpga_backend : __device_backend +struct __fpga_backend : __device_backend_tag { }; diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h index 51854c08a1c..7a9ce585b7a 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h @@ -262,7 +262,7 @@ __parallel_for(_ExecutionPolicy&& __exec, _Fp __brick, _Index __count, _Ranges&& template auto -__parallel_for(oneapi::dpl::__internal::__device_backend, _ExecutionPolicy&& __exec, _Fp __brick, _Index __count, _Ranges&&... __rngs) +__parallel_for(oneapi::dpl::__internal::__device_backend_tag, _ExecutionPolicy&& __exec, _Fp __brick, _Index __count, _Ranges&&... __rngs) { assert(__get_first_range_size(__rngs...) > 0); @@ -1294,7 +1294,7 @@ __parallel_find(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, template _Iterator -__parallel_find(oneapi::dpl::__internal::__device_backend, _ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, _Brick __f, _IsFirst) +__parallel_find(oneapi::dpl::__internal::__device_backend_tag, _ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, _Brick __f, _IsFirst) { auto __keep = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read, _Iterator>(); auto __buf = __keep(__first, __last); diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h index 36d755f5734..374bd4dfc14 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h @@ -288,7 +288,7 @@ __parallel_find(oneapi::dpl::__internal::__fpga_backend __tag, _ExecutionPolicy& using _Policy = typename ::std::decay<_ExecutionPolicy>::type; using __kernel_name = typename _Policy::kernel_name; auto __device_policy = oneapi::dpl::execution::make_device_policy<__kernel_name>(__exec.queue()); - return oneapi::dpl::__par_backend_hetero::__parallel_find(oneapi::dpl::__internal::__device_backend{}, __device_policy, __first, __last, __f, __is_first); + return oneapi::dpl::__par_backend_hetero::__parallel_find(oneapi::dpl::__internal::__device_backend_tag{}, __device_policy, __first, __last, __f, __is_first); } template From 29ab80bb129fa4acb4aa188587ee942a40e5a28e Mon Sep 17 00:00:00 2001 From: Sergey Kopienko Date: Wed, 25 Oct 2023 09:57:46 +0200 Subject: [PATCH 013/566] Rename __fpga_backend to __fpga_backend_tag --- include/oneapi/dpl/pstl/hetero/dpcpp/execution_sycl_defs.h | 4 ++-- .../oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/execution_sycl_defs.h b/include/oneapi/dpl/pstl/hetero/dpcpp/execution_sycl_defs.h index 6fe53805c28..8a0e03f33f9 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/execution_sycl_defs.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/execution_sycl_defs.h @@ -331,12 +331,12 @@ __select_backend(const execution::device_policy<_KernelName>&, _IteratorTypes&&. } #if _ONEDPL_FPGA_DEVICE -struct __fpga_backend : __device_backend_tag +struct __fpga_backend_tag : __device_backend_tag { }; template -::std::enable_if_t<__is_random_access_iterator_v<_IteratorTypes...>, __hetero_tag<__fpga_backend>> +::std::enable_if_t<__is_random_access_iterator_v<_IteratorTypes...>, __hetero_tag<__fpga_backend_tag>> __select_backend(const execution::fpga_policy<_Factor, _KernelName>&, _IteratorTypes&&...) { return {}; diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h index 374bd4dfc14..4389e04132e 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h @@ -90,7 +90,7 @@ __parallel_for(_ExecutionPolicy&& __exec, _Fp __brick, _Index __count, _Ranges&& template auto -__parallel_for(oneapi::dpl::__internal::__fpga_backend, _ExecutionPolicy&& __exec, _Fp __brick, _Index __count, _Ranges&&... __rngs) +__parallel_for(oneapi::dpl::__internal::__fpga_backend_tag, _ExecutionPolicy&& __exec, _Fp __brick, _Index __count, _Ranges&&... __rngs) { auto __n = __get_first_range_size(__rngs...); assert(__n > 0); @@ -282,7 +282,7 @@ __parallel_find(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, template _Iterator -__parallel_find(oneapi::dpl::__internal::__fpga_backend __tag, _ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, _Brick __f, _IsFirst __is_first) +__parallel_find(oneapi::dpl::__internal::__fpga_backend_tag __tag, _ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, _Brick __f, _IsFirst __is_first) { // workaround until we implement more performant version for patterns using _Policy = typename ::std::decay<_ExecutionPolicy>::type; From d5f80ee21e49c60fa9d3955cf2c42af992fb3bf0 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko Date: Wed, 25 Oct 2023 10:22:52 +0200 Subject: [PATCH 014/566] include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h - Please add overload for operator() of the __parallel_for_submitter similar to what we have on line 231. It should take the __device_backend_tag as the first argument --- .../pstl/hetero/dpcpp/parallel_backend_sycl.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h index 7a9ce585b7a..453bbd50782 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h @@ -243,6 +243,25 @@ struct __parallel_for_submitter<__internal::__optional_kernel_name<_Name...>> }); return __future(__event); } + + template + auto + operator()(oneapi::dpl::__internal::__device_backend_tag, _ExecutionPolicy&& __exec, _Fp __brick, _Index __count, + _Ranges&&... __rngs) const + { + assert(oneapi::dpl::__ranges::__get_first_range_size(__rngs...) > 0); + _PRINT_INFO_IN_DEBUG_MODE(__exec); + auto __event = __exec.queue().submit([&__rngs..., &__brick, __count](sycl::handler& __cgh) { + //get an access to data under SYCL buffer: + oneapi::dpl::__ranges::__require_access(__cgh, __rngs...); + + __cgh.parallel_for<_Name...>(sycl::range(__count), [=](sycl::item __item_id) { + auto __idx = __item_id.get_linear_id(); + __brick(__idx, __rngs...); + }); + }); + return __future(__event); + } }; //General version of parallel_for, one additional parameter - __count of iterations of loop __cgh.parallel_for, From 7e11f557cd463ffef285710f5642742b58e3a4a0 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko Date: Wed, 25 Oct 2023 10:24:34 +0200 Subject: [PATCH 015/566] include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h - Please add overload for __parallel_for similar to what we have on line 253. It should take the __device_backend_tag as the first argument --- .../pstl/hetero/dpcpp/parallel_backend_sycl.h | 23 ++++--------------- 1 file changed, 5 insertions(+), 18 deletions(-) diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h index 453bbd50782..98a010635a7 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h @@ -283,26 +283,13 @@ template 0); - - using _Policy = typename ::std::decay<_ExecutionPolicy>::type; + using _Policy = ::std::decay_t<_ExecutionPolicy>; using _CustomName = typename _Policy::kernel_name; - using _ForKernel = - oneapi::dpl::__par_backend_hetero::__internal::__kernel_name_generator<__parallel_for_kernel, _CustomName, _Fp, - _Ranges...>; - - _PRINT_INFO_IN_DEBUG_MODE(__exec); - auto __event = __exec.queue().submit([&__rngs..., &__brick, __count](sycl::handler& __cgh) { - //get an access to data under SYCL buffer: - oneapi::dpl::__ranges::__require_access(__cgh, __rngs...); - - __cgh.parallel_for<_ForKernel>(sycl::range(__count), [=](sycl::item __item_id) { - auto __idx = __item_id.get_linear_id(); - __brick(__idx, __rngs...); - }); - }); + using _ForKernel = oneapi::dpl::__par_backend_hetero::__internal::__kernel_name_provider<_CustomName>; - return __future(__event); + return __parallel_for_submitter<_ForKernel>()(oneapi::dpl::__internal::__device_backend_tag{}, + ::std::forward<_ExecutionPolicy>(__exec), __brick, __count, + ::std::forward<_Ranges>(__rngs)...); } //------------------------------------------------------------------------ From a9cf52cda88032286999c272165cf7863ec184b8 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko Date: Wed, 25 Oct 2023 10:25:44 +0200 Subject: [PATCH 016/566] include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h - remove class __parallel_for_kernel as not required anymore --- include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h | 3 --- 1 file changed, 3 deletions(-) diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h index 98a010635a7..7c8530c004c 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h @@ -180,9 +180,6 @@ make_iter_mode(const _Iterator& __it) -> decltype(iter_mode()(__it)) // set of class templates to name kernels -template -class __parallel_for_kernel; - template class __scan_local_kernel; From 1622733cefb4425cc174f90e66653d0575c45be8 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko Date: Wed, 25 Oct 2023 09:25:17 +0200 Subject: [PATCH 017/566] Support FPGA backend. Adopt offload tags --- include/oneapi/dpl/pstl/execution_impl.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/oneapi/dpl/pstl/execution_impl.h b/include/oneapi/dpl/pstl/execution_impl.h index 7e747db1223..2b37babfae2 100644 --- a/include/oneapi/dpl/pstl/execution_impl.h +++ b/include/oneapi/dpl/pstl/execution_impl.h @@ -132,7 +132,7 @@ __is_parallelization_preferred(_ExecutionPolicy& __exec) //------------------------------------------------------------------------ struct __tbb_backend_tag {}; -// KSATODO required create tag for onedpl +// KSATODO required create tag for dpcpp -> already implemented: __device_backend_tag, __fpga_backend_tag // KSATODO required create tag for omp template From 94e25c05516235e7d47e161ba9d0ea9abd8721fb Mon Sep 17 00:00:00 2001 From: Sergey Kopienko Date: Wed, 25 Oct 2023 11:25:11 +0200 Subject: [PATCH 018/566] include/oneapi/dpl/pstl/parallel_backend_tbb.h - remove #include "execution_impl.h" as not required --- include/oneapi/dpl/pstl/parallel_backend_tbb.h | 1 - 1 file changed, 1 deletion(-) diff --git a/include/oneapi/dpl/pstl/parallel_backend_tbb.h b/include/oneapi/dpl/pstl/parallel_backend_tbb.h index d3663b2d301..3525e47ca5c 100644 --- a/include/oneapi/dpl/pstl/parallel_backend_tbb.h +++ b/include/oneapi/dpl/pstl/parallel_backend_tbb.h @@ -23,7 +23,6 @@ #include #include "parallel_backend_utils.h" -#include "execution_impl.h" // Bring in minimal required subset of Intel(R) Threading Building Blocks (Intel(R) TBB) #include From 0f2a4ec2dc568c8ec7ca71c8e7eb58128994e3c5 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko Date: Wed, 25 Oct 2023 17:13:21 +0200 Subject: [PATCH 019/566] include/oneapi/dpl/pstl/parallel_impl.h - restore __parallel_find(_ExecutionPolicy&& __exec, _Index __first, _Index __last, _Brick __f, _IsFirst) --- include/oneapi/dpl/pstl/parallel_impl.h | 34 +++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/include/oneapi/dpl/pstl/parallel_impl.h b/include/oneapi/dpl/pstl/parallel_impl.h index cc92205b360..752db370260 100644 --- a/include/oneapi/dpl/pstl/parallel_impl.h +++ b/include/oneapi/dpl/pstl/parallel_impl.h @@ -32,6 +32,40 @@ namespace __internal //----------------------------------------------------------------------- /** Return extremum value returned by brick f[i,j) for subranges [i,j) of [first,last) Each f[i,j) must return a value in [i,j). */ +template +_Index +__parallel_find(_ExecutionPolicy&& __exec, _Index __first, _Index __last, _Brick __f, _IsFirst) +{ + typedef typename ::std::iterator_traits<_Index>::difference_type _DifferenceType; + const _DifferenceType __n = __last - __first; + _DifferenceType __initial_dist = _IsFirst::value ? __n : -1; + + constexpr auto __comp = ::std::conditional_t<_IsFirst::value, __pstl_less, __pstl_greater>{}; + + ::std::atomic<_DifferenceType> __extremum(__initial_dist); + // TODO: find out what is better here: parallel_for or parallel_reduce + __par_backend::__parallel_for(::std::forward<_ExecutionPolicy>(__exec), __first, __last, + [__comp, __f, __first, &__extremum](_Index __i, _Index __j) { + // See "Reducing Contention Through Priority Updates", PPoPP '13, for discussion of + // why using a shared variable scales fairly well in this situation. + if (__comp(__i - __first, __extremum)) + { + _Index __res = __f(__i, __j); + // If not '__last' returned then we found what we want so put this to extremum + if (__res != __j) + { + const _DifferenceType __k = __res - __first; + for (_DifferenceType __old = __extremum; __comp(__k, __old); + __old = __extremum) + { + __extremum.compare_exchange_weak(__old, __k); + } + } + } + }); + return __extremum != __initial_dist ? __first + __extremum : __last; +} + template _Index __parallel_find(__tbb_backend_tag __tag, _ExecutionPolicy&& __exec, _Index __first, _Index __last, _Brick __f, _IsFirst) From 4d1fb6a456c721d406854e3c2cfe264db3cbd27a Mon Sep 17 00:00:00 2001 From: Sergey Kopienko Date: Wed, 25 Oct 2023 20:03:58 +0200 Subject: [PATCH 020/566] include/oneapi/dpl/pstl/iterator_defs.h - fix compile error in tbb backend and for_loop.pass --- include/oneapi/dpl/pstl/iterator_defs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/oneapi/dpl/pstl/iterator_defs.h b/include/oneapi/dpl/pstl/iterator_defs.h index 524b3e887ff..5e190289540 100644 --- a/include/oneapi/dpl/pstl/iterator_defs.h +++ b/include/oneapi/dpl/pstl/iterator_defs.h @@ -60,7 +60,7 @@ __is_iterator_of(int) template auto -__is_needed_iter(...) -> ::std::false_type; +__is_iterator_of(...) -> ::std::false_type; template struct __is_random_access_iterator : decltype(__is_iterator_of<::std::random_access_iterator_tag, _IteratorTypes...>(0)) From e30b32575ed4ae5eee06a6129fa2e5b8cbf5df27 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko Date: Wed, 25 Oct 2023 20:28:24 +0200 Subject: [PATCH 021/566] include/oneapi/dpl/pstl/glue_numeric_impl.h - fix compile error in test/general/header_inclusion_order_numeric_1.pass.cpp: no type named '__tbb_backend_tag' in namespace 'oneapi::dpl::__internal' [1/2] Building CXX object test/CMakeFiles/header_inclusion_order_numeric_1.pass.dir/general/header_inclusion_order_numeric_1.pass.cpp.o FAILED: test/CMakeFiles/header_inclusion_order_numeric_1.pass.dir/general/header_inclusion_order_numeric_1.pass.cpp.o /usr/bin/oneapi_compiler/linux_prod/compiler/linux/bin/icpx -DPSTL_USE_DEBUG -DTBB_USE_DEBUG=1 -D_PSTL_TEST_SUCCESSFUL_KEYWORD=1 -I/home/oneDPL/test -I/home/oneDPL/include -isystem /usr/lib/oneapi-tbb-2021.5.0/include -fsycl -fsycl-device-code-split=per_kernel -ftemplate-backtrace-limit=0 -O0 -g -fno-fast-math -fopenmp-simd -fsycl -std=c++17 -MD -MT test/CMakeFiles/header_inclusion_order_numeric_1.pass.dir/general/header_inclusion_order_numeric_1.pass.cpp.o -MF test/CMakeFiles/header_inclusion_order_numeric_1.pass.dir/general/header_inclusion_order_numeric_1.pass.cpp.o.d -o test/CMakeFiles/header_inclusion_order_numeric_1.pass.dir/general/header_inclusion_order_numeric_1.pass.cpp.o -c /home/oneDPL/test/general/header_inclusion_order_numeric_1.pass.cpp In file included from /home/oneDPL/test/general/header_inclusion_order_numeric_1.pass.cpp:19: In file included from /home/oneDPL/include/oneapi/dpl/execution:47: In file included from /home/oneDPL/include/oneapi/dpl/pstl/glue_numeric_impl.h:24: In file included from /home/oneDPL/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h:22: In file included from /home/oneDPL/include/oneapi/dpl/pstl/hetero/../parallel_backend.h:21: /home/oneDPL/include/oneapi/dpl/pstl/parallel_backend_tbb.h:132:41: error: no type named '__tbb_backend_tag' in namespace 'oneapi::dpl::__internal' __parallel_for(oneapi::dpl::__internal::__tbb_backend_tag, _ExecutionPolicy&&, _Index __first, _Index __last, _Fp __f) ~~~~~~~~~~~~~~~~~~~~~~~~~^ /home/oneDPL/include/oneapi/dpl/pstl/parallel_backend_tbb.h:1338:46: error: no type named '__tbb_backend_tag' in namespace 'oneapi::dpl::__internal' __parallel_for_each(oneapi::dpl::__internal::__tbb_backend_tag, _ExecutionPolicy&&, _ForwardIterator __begin, _ForwardIterator __end, _Fp __f) ~~~~~~~~~~~~~~~~~~~~~~~~~^ --- include/oneapi/dpl/pstl/glue_numeric_impl.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/oneapi/dpl/pstl/glue_numeric_impl.h b/include/oneapi/dpl/pstl/glue_numeric_impl.h index f2564db3132..f9347a5f7ac 100644 --- a/include/oneapi/dpl/pstl/glue_numeric_impl.h +++ b/include/oneapi/dpl/pstl/glue_numeric_impl.h @@ -20,14 +20,14 @@ #include "utils.h" +#include "numeric_fwd.h" +#include "execution_impl.h" + #if _ONEDPL_HETERO_BACKEND # include "hetero/algorithm_impl_hetero.h" # include "hetero/numeric_impl_hetero.h" #endif -#include "numeric_fwd.h" -#include "execution_impl.h" - namespace oneapi { namespace dpl From e104737c06ad38f0cc9279ed87510961c87c1d4d Mon Sep 17 00:00:00 2001 From: Sergey Kopienko Date: Wed, 25 Oct 2023 20:32:53 +0200 Subject: [PATCH 022/566] Revert "include/oneapi/dpl/pstl/glue_numeric_impl.h - fix compile error in test/general/header_inclusion_order_numeric_1.pass.cpp: no type named '__tbb_backend_tag' in namespace 'oneapi::dpl::__internal'" This reverts commit e30b32575ed4ae5eee06a6129fa2e5b8cbf5df27. --- include/oneapi/dpl/pstl/glue_numeric_impl.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/oneapi/dpl/pstl/glue_numeric_impl.h b/include/oneapi/dpl/pstl/glue_numeric_impl.h index f9347a5f7ac..f2564db3132 100644 --- a/include/oneapi/dpl/pstl/glue_numeric_impl.h +++ b/include/oneapi/dpl/pstl/glue_numeric_impl.h @@ -20,14 +20,14 @@ #include "utils.h" -#include "numeric_fwd.h" -#include "execution_impl.h" - #if _ONEDPL_HETERO_BACKEND # include "hetero/algorithm_impl_hetero.h" # include "hetero/numeric_impl_hetero.h" #endif +#include "numeric_fwd.h" +#include "execution_impl.h" + namespace oneapi { namespace dpl From 014d544183631866c602e3288a01c75a5de44037 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko Date: Wed, 25 Oct 2023 20:33:56 +0200 Subject: [PATCH 023/566] include/oneapi/dpl/pstl/parallel_backend_tbb.h - fix compile error in test/general/header_inclusion_order_numeric_1.pass.cpp: no type named '__tbb_backend_tag' in namespace 'oneapi::dpl::__internal' [1/2] Building CXX object test/CMakeFiles/header_inclusion_order_numeric_1.pass.dir/general/header_inclusion_order_numeric_1.pass.cpp.o FAILED: test/CMakeFiles/header_inclusion_order_numeric_1.pass.dir/general/header_inclusion_order_numeric_1.pass.cpp.o /usr/bin/oneapi_compiler/linux_prod/compiler/linux/bin/icpx -DPSTL_USE_DEBUG -DTBB_USE_DEBUG=1 -D_PSTL_TEST_SUCCESSFUL_KEYWORD=1 -I/home/oneDPL/test -I/home/oneDPL/include -isystem /usr/lib/oneapi-tbb-2021.5.0/include -fsycl -fsycl-device-code-split=per_kernel -ftemplate-backtrace-limit=0 -O0 -g -fno-fast-math -fopenmp-simd -fsycl -std=c++17 -MD -MT test/CMakeFiles/header_inclusion_order_numeric_1.pass.dir/general/header_inclusion_order_numeric_1.pass.cpp.o -MF test/CMakeFiles/header_inclusion_order_numeric_1.pass.dir/general/header_inclusion_order_numeric_1.pass.cpp.o.d -o test/CMakeFiles/header_inclusion_order_numeric_1.pass.dir/general/header_inclusion_order_numeric_1.pass.cpp.o -c /home/oneDPL/test/general/header_inclusion_order_numeric_1.pass.cpp In file included from /home/oneDPL/test/general/header_inclusion_order_numeric_1.pass.cpp:19: In file included from /home/oneDPL/include/oneapi/dpl/execution:47: In file included from /home/oneDPL/include/oneapi/dpl/pstl/glue_numeric_impl.h:24: In file included from /home/oneDPL/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h:22: In file included from /home/oneDPL/include/oneapi/dpl/pstl/hetero/../parallel_backend.h:21: /home/oneDPL/include/oneapi/dpl/pstl/parallel_backend_tbb.h:132:41: error: no type named '__tbb_backend_tag' in namespace 'oneapi::dpl::__internal' __parallel_for(oneapi::dpl::__internal::__tbb_backend_tag, _ExecutionPolicy&&, _Index __first, _Index __last, _Fp __f) ~~~~~~~~~~~~~~~~~~~~~~~~~^ /home/oneDPL/include/oneapi/dpl/pstl/parallel_backend_tbb.h:1338:46: error: no type named '__tbb_backend_tag' in namespace 'oneapi::dpl::__internal' __parallel_for_each(oneapi::dpl::__internal::__tbb_backend_tag, _ExecutionPolicy&&, _ForwardIterator __begin, _ForwardIterator __end, _Fp __f) ~~~~~~~~~~~~~~~~~~~~~~~~~^ --- include/oneapi/dpl/pstl/parallel_backend_tbb.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/oneapi/dpl/pstl/parallel_backend_tbb.h b/include/oneapi/dpl/pstl/parallel_backend_tbb.h index 3525e47ca5c..d3663b2d301 100644 --- a/include/oneapi/dpl/pstl/parallel_backend_tbb.h +++ b/include/oneapi/dpl/pstl/parallel_backend_tbb.h @@ -23,6 +23,7 @@ #include #include "parallel_backend_utils.h" +#include "execution_impl.h" // Bring in minimal required subset of Intel(R) Threading Building Blocks (Intel(R) TBB) #include From f557579e2c2078d7074b441a13a05e0e24c7d5f3 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko Date: Wed, 25 Oct 2023 20:48:29 +0200 Subject: [PATCH 024/566] include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h - fix error in __parallel_for(oneapi::dpl::__internal::__fpga_backend_tag, _ExecutionPolicy&& __exec, _Fp __brick, _Index __count, _Ranges&&... __rngs) --- .../hetero/dpcpp/parallel_backend_sycl_fpga.h | 57 +++++++++++-------- 1 file changed, 32 insertions(+), 25 deletions(-) diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h index 4389e04132e..dc0aafae8d1 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h @@ -74,6 +74,33 @@ struct __parallel_for_fpga_submitter<__internal::__optional_kernel_name<_Name... }); return __future(__event); } + +// KSATODO is this define check really required here? +#if _ONEDPL_FPGA_DEVICE + template + auto + operator()(oneapi::dpl::__internal::__fpga_backend_tag, _ExecutionPolicy&& __exec, _Fp __brick, _Index __count, + _Ranges&&... __rngs) const + { + auto __n = oneapi::dpl::__ranges::__get_first_range_size(__rngs...); + assert(__n > 0); + + _PRINT_INFO_IN_DEBUG_MODE(__exec); + auto __event = __exec.queue().submit([&__rngs..., &__brick, __count](sycl::handler& __cgh) { + //get an access to data under SYCL buffer: + oneapi::dpl::__ranges::__require_access(__cgh, __rngs...); + + __cgh.single_task<_Name...>([=]() { +#pragma unroll(::std::decay <_ExecutionPolicy>::type::unroll_factor) + for (auto __idx = 0; __idx < __count; ++__idx) + { + __brick(__idx, __rngs...); + } + }); + }); + return __future(__event); + } +#endif // _ONEDPL_FPGA_DEVICE }; template 0); - - using _Policy = typename ::std::decay<_ExecutionPolicy>::type; - using __kernel_name = typename _Policy::kernel_name; -#if __SYCL_UNNAMED_LAMBDA__ - using __kernel_name_t = __parallel_for_kernel<_Fp, __kernel_name, _Ranges...>; -#else - using __kernel_name_t = __parallel_for_kernel<__kernel_name>; -#endif - - _PRINT_INFO_IN_DEBUG_MODE(__exec); - auto __event = __exec.queue().submit([&__rngs..., &__brick, __count](sycl::handler& __cgh) { - //get an access to data under SYCL buffer: - oneapi::dpl::__ranges::__require_access(__cgh, __rngs...); - - __cgh.single_task<__kernel_name_t>([=]() { -#pragma unroll(::std::decay <_ExecutionPolicy>::type::unroll_factor) - for (auto __idx = 0; __idx < __count; ++__idx) - { - __brick(__idx, __rngs...); - } - }); - }); + using _Policy = ::std::decay_t<_ExecutionPolicy>; + using __parallel_for_name = __internal::__kernel_name_provider; - return __future(__event); + return __parallel_for_fpga_submitter<__parallel_for_name>()(oneapi::dpl::__internal::__fpga_backend_tag{}, + std::forward<_ExecutionPolicy>(__exec), __brick, + __count, std::forward<_Ranges>(__rngs)...); } //------------------------------------------------------------------------ From c01e222d556ddc8b44958fc2fcdeba4afbe9bf1c Mon Sep 17 00:00:00 2001 From: Sergey Kopienko Date: Wed, 25 Oct 2023 20:55:59 +0200 Subject: [PATCH 025/566] Apply clang-format from gitHUB --- include/oneapi/dpl/pstl/algorithm_impl.h | 29 ++++++++++--------- include/oneapi/dpl/pstl/execution_impl.h | 16 +++++----- .../dpl/pstl/hetero/algorithm_impl_hetero.h | 15 +++++----- .../pstl/hetero/dpcpp/parallel_backend_sycl.h | 6 ++-- .../hetero/dpcpp/parallel_backend_sycl_fpga.h | 11 ++++--- .../oneapi/dpl/pstl/parallel_backend_tbb.h | 3 +- include/oneapi/dpl/pstl/tuple_impl.h | 3 +- 7 files changed, 44 insertions(+), 39 deletions(-) diff --git a/include/oneapi/dpl/pstl/algorithm_impl.h b/include/oneapi/dpl/pstl/algorithm_impl.h index 125c8ca312f..5d395d48754 100644 --- a/include/oneapi/dpl/pstl/algorithm_impl.h +++ b/include/oneapi/dpl/pstl/algorithm_impl.h @@ -136,8 +136,7 @@ __brick_walk1(_DifferenceType __n, _Function __f, ::std::true_type) noexcept template void -__pattern_walk1(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, - _ForwardIterator __last, _Function __f) noexcept +__pattern_walk1(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, _Function __f) noexcept { __internal::__brick_walk1(__first, __last, __f, typename _Tag::__is_vector{}); } @@ -153,20 +152,21 @@ __pattern_walk1(_ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator _ template void -__pattern_walk1(__parallel_forward_tag, _ExecutionPolicy&& __exec, - _ForwardIterator __first, _ForwardIterator __last, _Function __f) +__pattern_walk1(__parallel_forward_tag, _ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, + _Function __f) { typedef typename ::std::iterator_traits<_ForwardIterator>::reference _ReferenceType; auto __func = [&__f](_ReferenceType arg) { __f(arg); }; __internal::__except_handler([&]() { - __par_backend::__parallel_for_each(__parallel_forward_tag::__backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __func); + __par_backend::__parallel_for_each(__parallel_forward_tag::__backend_tag{}, + ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __func); }); } template void -__pattern_walk1(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, - _ForwardIterator __first, _ForwardIterator __last, _Function __f) +__pattern_walk1(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _ForwardIterator __first, + _ForwardIterator __last, _Function __f) { using __backend_tag = typename decltype(__tag)::__backend_tag; __internal::__except_handler([&]() { @@ -726,16 +726,17 @@ __pattern_find_if(_Tag __tag, _ExecutionPolicy&&, _ForwardIterator __first, _For template _ForwardIterator -__pattern_find_if(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, - _ForwardIterator __first, _ForwardIterator __last, _Predicate __pred) +__pattern_find_if(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _ForwardIterator __first, + _ForwardIterator __last, _Predicate __pred) { using __backend_tag = typename decltype(__tag)::__backend_tag; return __except_handler([&]() { - return __parallel_find(__backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, - [__pred](_ForwardIterator __i, _ForwardIterator __j) { - return __brick_find_if(__i, __j, __pred, _IsVector{}); - }, - ::std::true_type{}); + return __parallel_find( + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, + [__pred](_ForwardIterator __i, _ForwardIterator __j) { + return __brick_find_if(__i, __j, __pred, _IsVector{}); + }, + ::std::true_type{}); }); } diff --git a/include/oneapi/dpl/pstl/execution_impl.h b/include/oneapi/dpl/pstl/execution_impl.h index 2b37babfae2..a187ddb50cf 100644 --- a/include/oneapi/dpl/pstl/execution_impl.h +++ b/include/oneapi/dpl/pstl/execution_impl.h @@ -131,7 +131,9 @@ __is_parallelization_preferred(_ExecutionPolicy& __exec) // backend selector with tags //------------------------------------------------------------------------ -struct __tbb_backend_tag {}; +struct __tbb_backend_tag +{ +}; // KSATODO required create tag for dpcpp -> already implemented: __device_backend_tag, __fpga_backend_tag // KSATODO required create tag for omp @@ -159,14 +161,10 @@ struct __parallel_forward_tag }; template -using __tag_type = - typename ::std::conditional<__internal::__is_random_access_iterator<_IteratorTypes...>::value, - __parallel_tag<_IsVector>, - typename ::std::conditional<__is_forward_iterator<_IteratorTypes...>::value, - __parallel_forward_tag, - __serial_tag<_IsVector> - >::type - >::type; +using __tag_type = typename ::std::conditional< + __internal::__is_random_access_iterator<_IteratorTypes...>::value, __parallel_tag<_IsVector>, + typename ::std::conditional<__is_forward_iterator<_IteratorTypes...>::value, __parallel_forward_tag, + __serial_tag<_IsVector>>::type>::type; template __serial_tag diff --git a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h index 1ec77c0d5ea..8dda2bb2088 100644 --- a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h +++ b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h @@ -60,8 +60,8 @@ __pattern_walk1(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIte template void -__pattern_walk1(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, - _Function __f) +__pattern_walk1(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _ForwardIterator __first, + _ForwardIterator __last, _Function __f) { auto __n = __last - __first; if (__n <= 0) @@ -71,8 +71,8 @@ __pattern_walk1(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _For oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read_write, _ForwardIterator>(); auto __buf = __keep(__first, __last); - oneapi::dpl::__par_backend_hetero::__parallel_for(_BackendTag{}, __exec, unseq_backend::walk_n<_ExecutionPolicy, _Function>{__f}, - __n, __buf.all_view()) + oneapi::dpl::__par_backend_hetero::__parallel_for( + _BackendTag{}, __exec, unseq_backend::walk_n<_ExecutionPolicy, _Function>{__f}, __n, __buf.all_view()) .wait(); } @@ -658,15 +658,16 @@ __pattern_equal(_ExecutionPolicy&& __exec, _Iterator1 __first1, _Iterator1 __las template _Iterator -__pattern_find_if(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, _Pred __pred) +__pattern_find_if(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, + _Pred __pred) { if (__first == __last) return __last; using _Predicate = oneapi::dpl::unseq_backend::single_match_pred<_ExecutionPolicy, _Pred>; - return __par_backend_hetero::__parallel_find(_BackendTag{}, - ::std::forward<_ExecutionPolicy>(__exec), + return __par_backend_hetero::__parallel_find( + _BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__first), __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__last), _Predicate{__pred}, ::std::true_type{}); diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h index 7c8530c004c..fe1f6e4c202 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h @@ -278,7 +278,8 @@ __parallel_for(_ExecutionPolicy&& __exec, _Fp __brick, _Index __count, _Ranges&& template auto -__parallel_for(oneapi::dpl::__internal::__device_backend_tag, _ExecutionPolicy&& __exec, _Fp __brick, _Index __count, _Ranges&&... __rngs) +__parallel_for(oneapi::dpl::__internal::__device_backend_tag, _ExecutionPolicy&& __exec, _Fp __brick, _Index __count, + _Ranges&&... __rngs) { using _Policy = ::std::decay_t<_ExecutionPolicy>; using _CustomName = typename _Policy::kernel_name; @@ -1297,7 +1298,8 @@ __parallel_find(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, template _Iterator -__parallel_find(oneapi::dpl::__internal::__device_backend_tag, _ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, _Brick __f, _IsFirst) +__parallel_find(oneapi::dpl::__internal::__device_backend_tag, _ExecutionPolicy&& __exec, _Iterator __first, + _Iterator __last, _Brick __f, _IsFirst) { auto __keep = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read, _Iterator>(); auto __buf = __keep(__first, __last); diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h index dc0aafae8d1..d9552030354 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h @@ -91,7 +91,7 @@ struct __parallel_for_fpga_submitter<__internal::__optional_kernel_name<_Name... oneapi::dpl::__ranges::__require_access(__cgh, __rngs...); __cgh.single_task<_Name...>([=]() { -#pragma unroll(::std::decay <_ExecutionPolicy>::type::unroll_factor) +# pragma unroll(::std::decay <_ExecutionPolicy>::type::unroll_factor) for (auto __idx = 0; __idx < __count; ++__idx) { __brick(__idx, __rngs...); @@ -117,7 +117,8 @@ __parallel_for(_ExecutionPolicy&& __exec, _Fp __brick, _Index __count, _Ranges&& template auto -__parallel_for(oneapi::dpl::__internal::__fpga_backend_tag, _ExecutionPolicy&& __exec, _Fp __brick, _Index __count, _Ranges&&... __rngs) +__parallel_for(oneapi::dpl::__internal::__fpga_backend_tag, _ExecutionPolicy&& __exec, _Fp __brick, _Index __count, + _Ranges&&... __rngs) { using _Policy = ::std::decay_t<_ExecutionPolicy>; using __parallel_for_name = __internal::__kernel_name_provider; @@ -289,13 +290,15 @@ __parallel_find(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, template _Iterator -__parallel_find(oneapi::dpl::__internal::__fpga_backend_tag __tag, _ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, _Brick __f, _IsFirst __is_first) +__parallel_find(oneapi::dpl::__internal::__fpga_backend_tag __tag, _ExecutionPolicy&& __exec, _Iterator __first, + _Iterator __last, _Brick __f, _IsFirst __is_first) { // workaround until we implement more performant version for patterns using _Policy = typename ::std::decay<_ExecutionPolicy>::type; using __kernel_name = typename _Policy::kernel_name; auto __device_policy = oneapi::dpl::execution::make_device_policy<__kernel_name>(__exec.queue()); - return oneapi::dpl::__par_backend_hetero::__parallel_find(oneapi::dpl::__internal::__device_backend_tag{}, __device_policy, __first, __last, __f, __is_first); + return oneapi::dpl::__par_backend_hetero::__parallel_find(oneapi::dpl::__internal::__device_backend_tag{}, + __device_policy, __first, __last, __f, __is_first); } template diff --git a/include/oneapi/dpl/pstl/parallel_backend_tbb.h b/include/oneapi/dpl/pstl/parallel_backend_tbb.h index d3663b2d301..67db87432e5 100644 --- a/include/oneapi/dpl/pstl/parallel_backend_tbb.h +++ b/include/oneapi/dpl/pstl/parallel_backend_tbb.h @@ -1336,7 +1336,8 @@ __parallel_for_each(_ExecutionPolicy&&, _ForwardIterator __begin, _ForwardIterat template void -__parallel_for_each(oneapi::dpl::__internal::__tbb_backend_tag, _ExecutionPolicy&&, _ForwardIterator __begin, _ForwardIterator __end, _Fp __f) +__parallel_for_each(oneapi::dpl::__internal::__tbb_backend_tag, _ExecutionPolicy&&, _ForwardIterator __begin, + _ForwardIterator __end, _Fp __f) { tbb::this_task_arena::isolate([&]() { tbb::parallel_for_each(__begin, __end, __f); }); } diff --git a/include/oneapi/dpl/pstl/tuple_impl.h b/include/oneapi/dpl/pstl/tuple_impl.h index e5dc3bdf859..370751a19ae 100644 --- a/include/oneapi/dpl/pstl/tuple_impl.h +++ b/include/oneapi/dpl/pstl/tuple_impl.h @@ -341,8 +341,7 @@ struct tuple template , - ::std::is_constructible...>::value)>> + std::conjunction<::std::is_constructible, ::std::is_constructible...>::value)>> tuple(_U1&& _value, _U&&... _next) : holder(::std::forward<_U1>(_value)), next(::std::forward<_U>(_next)...) { } From 8cb876c2a8f4a856ee8f14c0a3f71a95cb0f6338 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko Date: Fri, 27 Oct 2023 12:43:03 +0200 Subject: [PATCH 026/566] Code improvements in tag dispatch prototype for current code base (#1249) --- include/oneapi/dpl/pstl/execution_impl.h | 7 ++- .../experimental/internal/for_loop_impl.h | 17 +++---- include/oneapi/dpl/pstl/iterator_defs.h | 49 +++++++------------ 3 files changed, 28 insertions(+), 45 deletions(-) diff --git a/include/oneapi/dpl/pstl/execution_impl.h b/include/oneapi/dpl/pstl/execution_impl.h index a187ddb50cf..06e7aa4d675 100644 --- a/include/oneapi/dpl/pstl/execution_impl.h +++ b/include/oneapi/dpl/pstl/execution_impl.h @@ -161,10 +161,9 @@ struct __parallel_forward_tag }; template -using __tag_type = typename ::std::conditional< - __internal::__is_random_access_iterator<_IteratorTypes...>::value, __parallel_tag<_IsVector>, - typename ::std::conditional<__is_forward_iterator<_IteratorTypes...>::value, __parallel_forward_tag, - __serial_tag<_IsVector>>::type>::type; +using __tag_type = ::std::conditional_t< + __internal::__is_random_access_iterator_v<_IteratorTypes...>, __parallel_tag<_IsVector>, + ::std::conditional_t<__is_forward_iterator_v<_IteratorTypes...>, __parallel_forward_tag, __serial_tag<_IsVector>>>; template __serial_tag diff --git a/include/oneapi/dpl/pstl/experimental/internal/for_loop_impl.h b/include/oneapi/dpl/pstl/experimental/internal/for_loop_impl.h index d7f738036c3..52163caf0f8 100644 --- a/include/oneapi/dpl/pstl/experimental/internal/for_loop_impl.h +++ b/include/oneapi/dpl/pstl/experimental/internal/for_loop_impl.h @@ -65,7 +65,7 @@ struct __difference<_Ip, ::std::enable_if_t<::std::is_integral_v<_Ip>>> template struct __difference<_Ip, ::std::enable_if_t>> { - using __type = typename oneapi::dpl::__internal::__iterator_traits<_Ip>::difference_type; + using __type = typename ::std::iterator_traits<_Ip>::difference_type; }; // This type is used as a stride value when it's known that stride == 1 at compile time(the case of for_loop and for_loop_n). @@ -232,9 +232,9 @@ __pattern_for_loop(_ExecutionPolicy&& __exec, _Ip __first, _Ip __last, _Function } template -::std::enable_if_t<::std::is_same_v::iterator_category, - ::std::bidirectional_iterator_tag>, - _IndexType> +::std::enable_if_t< + ::std::is_same_v::iterator_category, ::std::bidirectional_iterator_tag>, + _IndexType> __execute_loop_strided(_Ip __first, _Ip __last, _Function __f, _Sp __stride, _Pack& __pack, _IndexType) noexcept { _IndexType __ordinal_position = 0; @@ -269,11 +269,10 @@ __execute_loop_strided(_Ip __first, _Ip __last, _Function __f, _Sp __stride, _Pa } template -::std::enable_if_t<::std::is_same_v::iterator_category, - ::std::forward_iterator_tag> || - ::std::is_same_v::iterator_category, - ::std::input_iterator_tag>, - _IndexType> +::std::enable_if_t< + ::std::is_same_v::iterator_category, ::std::forward_iterator_tag> || + ::std::is_same_v::iterator_category, ::std::input_iterator_tag>, + _IndexType> __execute_loop_strided(_Ip __first, _Ip __last, _Function __f, _Sp __stride, _Pack& __pack, _IndexType) noexcept { _IndexType __ordinal_position = 0; diff --git a/include/oneapi/dpl/pstl/iterator_defs.h b/include/oneapi/dpl/pstl/iterator_defs.h index 5e190289540..7106ee19a4e 100644 --- a/include/oneapi/dpl/pstl/iterator_defs.h +++ b/include/oneapi/dpl/pstl/iterator_defs.h @@ -27,36 +27,13 @@ namespace dpl namespace __internal { -// Internal wrapper around ::std::iterator_traits as it is required to be -// SFINAE-friendly(not produce "hard" error when _Ip is not an iterator) -// only starting with C++17. Although many standard library implementations -// provide it for older versions, we cannot rely on that. -template -struct __iterator_traits -{ -}; - -template -struct __iterator_traits<_Ip, - __void_type> - : ::std::iterator_traits<_Ip> -{ -}; - -// Handles _Tp* and const _Tp* specializations -template -struct __iterator_traits<_Tp*, void> : ::std::iterator_traits<_Tp*> -{ -}; - // Make is_random_access_iterator and is_forward_iterator not to fail with a 'hard' error when it's used in -//SFINAE with a non-iterator type by providing a default value. +// SFINAE with a non-iterator type by providing a default value. template auto -__is_iterator_of(int) - -> decltype(std::conjunction<::std::is_base_of<_IteratorTag, typename __iterator_traits::type>::iterator_category>...>{}); +__is_iterator_of(int) -> decltype( + std::conjunction<::std::is_base_of< + _IteratorTag, typename ::std::iterator_traits<::std::decay_t<_IteratorTypes>>::iterator_category>...>{}); template auto @@ -78,28 +55,36 @@ using __is_random_access_iterator_t = typename __is_random_access_iterator<_Iter template inline constexpr bool __is_random_access_iterator_v = __is_random_access_iterator<_IteratorTypes...>::value; +template +inline constexpr bool __is_forward_iterator_v = __is_forward_iterator<_IteratorTypes...>::value; + // struct for checking if iterator is heterogeneous or not -template // for non-heterogeneous iterators +// for non-heterogeneous iterators +template struct is_hetero_iterator : ::std::false_type { }; -template // for heterogeneous iterators +// for heterogeneous iterators +template struct is_hetero_iterator> : ::std::true_type { }; // struct for checking if iterator should be passed directly to device or not -template // for iterators that should not be passed directly +// for iterators that should not be passed directly +template struct is_passed_directly : ::std::false_type { }; -template // for iterators defined as direct pass +// for iterators defined as direct pass +template struct is_passed_directly> : ::std::true_type { }; -template // for pointers to objects on device +// for pointers to objects on device +template struct is_passed_directly>> : ::std::true_type { }; From 7edb8736f4ed3e8509e5051d430dd5d8ba5d0edf Mon Sep 17 00:00:00 2001 From: Sergey Kopienko Date: Fri, 27 Oct 2023 12:47:01 +0200 Subject: [PATCH 027/566] include/oneapi/dpl/pstl/glue_algorithm_impl.h - pass empty iterator instance into __select_backend insttead of __first into __select_backend call --- include/oneapi/dpl/pstl/glue_algorithm_impl.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/oneapi/dpl/pstl/glue_algorithm_impl.h b/include/oneapi/dpl/pstl/glue_algorithm_impl.h index 86651375bee..15782092570 100644 --- a/include/oneapi/dpl/pstl/glue_algorithm_impl.h +++ b/include/oneapi/dpl/pstl/glue_algorithm_impl.h @@ -97,7 +97,7 @@ template oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator> find_if(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _Predicate __pred) { - auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first); + auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, _ForwardIterator{}); return oneapi::dpl::__internal::__pattern_find_if(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __pred); @@ -363,7 +363,7 @@ oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy> replace_if(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _UnaryPredicate __pred, const _Tp& __new_value) { - auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first); + auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, _ForwardIterator{}); __pattern_replace_if(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __pred, __new_value); } From fb11f70eb94b9ede8d15f0149aaeba396a772d63 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Wed, 7 Feb 2024 16:41:04 +0100 Subject: [PATCH 028/566] include/oneapi/dpl/pstl/execution_impl.h - remove KSATODO --- include/oneapi/dpl/pstl/execution_impl.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/include/oneapi/dpl/pstl/execution_impl.h b/include/oneapi/dpl/pstl/execution_impl.h index 0d237bac733..8069da72b0f 100644 --- a/include/oneapi/dpl/pstl/execution_impl.h +++ b/include/oneapi/dpl/pstl/execution_impl.h @@ -131,8 +131,6 @@ __is_parallelization_preferred(_ExecutionPolicy& __exec) struct __tbb_backend_tag { }; -// KSATODO required create tag for dpcpp -> already implemented: __device_backend_tag, __fpga_backend_tag -// KSATODO required create tag for omp template struct __serial_tag From 959a410f5082f9d9497fef1050c2a3bb10bf79ed Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Thu, 8 Feb 2024 09:20:38 +0100 Subject: [PATCH 029/566] Implementation of ability to check __is_backend_tag, __is_backend_tag_serial_v, __is_backend_tag_v --- include/oneapi/dpl/pstl/execution_impl.h | 36 +++++++++++++++++++ .../pstl/hetero/dpcpp/execution_sycl_defs.h | 9 +++++ 2 files changed, 45 insertions(+) diff --git a/include/oneapi/dpl/pstl/execution_impl.h b/include/oneapi/dpl/pstl/execution_impl.h index 8069da72b0f..af425473bde 100644 --- a/include/oneapi/dpl/pstl/execution_impl.h +++ b/include/oneapi/dpl/pstl/execution_impl.h @@ -188,6 +188,42 @@ __select_backend(oneapi::dpl::execution::parallel_unsequenced_policy, _IteratorT return {}; } +template +struct __is_backend_tag : ::std::false_type +{ +}; + +template +struct __is_backend_tag_serial : ::std::false_type +{ +}; + +template +struct __is_backend_tag<__serial_tag<_IsVector>> : ::std::true_type +{ +}; + +template +struct __is_backend_tag_serial<__serial_tag<_IsVector>> : ::std::true_type +{ +}; + +template +struct __is_backend_tag<__parallel_tag<_IsVector>> : ::std::true_type +{ +}; + +template <> +struct __is_backend_tag<__parallel_forward_tag> : ::std::true_type +{ +}; + +template +inline constexpr bool __is_backend_tag_serial_v = __is_backend_tag_serial<::std::decay_t<_Tag>>::value; + +template +inline constexpr bool __is_backend_tag_v = __is_backend_tag<::std::decay_t<_Tag>>::value; + } // namespace __internal } // namespace dpl } // namespace oneapi diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/execution_sycl_defs.h b/include/oneapi/dpl/pstl/hetero/dpcpp/execution_sycl_defs.h index 15fcfbb39f4..8c91c597c31 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/execution_sycl_defs.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/execution_sycl_defs.h @@ -342,6 +342,15 @@ __select_backend(const execution::fpga_policy<_Factor, _KernelName>&, _IteratorT } #endif + +template +struct __is_backend_tag; + +template +struct __is_backend_tag<__hetero_tag<_BackendTag>> : ::std::true_type +{ +}; + } // namespace __internal } // namespace dpl From 35e74492a732674cfbf96c873f84b83b5a30dead Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Mon, 29 Jan 2024 16:53:38 +0100 Subject: [PATCH 030/566] Declare __is_vectorization_preferred as constexpr without runtime params --- include/oneapi/dpl/pstl/algorithm_impl.h | 173 +++++++++--------- include/oneapi/dpl/pstl/execution_impl.h | 8 +- .../experimental/internal/for_loop_impl.h | 21 +-- include/oneapi/dpl/pstl/glue_algorithm_impl.h | 140 ++++++-------- include/oneapi/dpl/pstl/glue_memory_impl.h | 54 +++--- include/oneapi/dpl/pstl/glue_numeric_impl.h | 20 +- 6 files changed, 191 insertions(+), 225 deletions(-) diff --git a/include/oneapi/dpl/pstl/algorithm_impl.h b/include/oneapi/dpl/pstl/algorithm_impl.h index 3b3759964ad..7b02f71722d 100644 --- a/include/oneapi/dpl/pstl/algorithm_impl.h +++ b/include/oneapi/dpl/pstl/algorithm_impl.h @@ -223,7 +223,7 @@ oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy> __pattern_walk_brick(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _Brick __brick, /*parallel=*/::std::false_type) noexcept { - const auto __is_vector = __internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec); + constexpr auto __is_vector = __internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(); __brick(__first, __last, __is_vector); } @@ -233,7 +233,7 @@ __pattern_walk_brick(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _ _Brick __brick, /*parallel=*/::std::true_type) { - const auto __is_vector = __internal::__is_vectorization_preferred<_ExecutionPolicy, _RandomAccessIterator>(__exec); + constexpr auto __is_vector = __internal::__is_vectorization_preferred<_ExecutionPolicy, _RandomAccessIterator>(); __internal::__except_handler([&]() { __par_backend::__parallel_for(::std::forward<_ExecutionPolicy>(__exec), __first, __last, [__brick, __is_vector](_RandomAccessIterator __i, _RandomAccessIterator __j) { @@ -285,7 +285,7 @@ oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _Fo __pattern_walk_brick_n(_ExecutionPolicy&& __exec, _ForwardIterator __first, _Size __n, _Brick __brick, /*is_parallel=*/::std::false_type) noexcept { - const auto __is_vector = __internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec); + constexpr auto __is_vector = __internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(); return __brick(__first, __n, __is_vector); } @@ -294,7 +294,7 @@ oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _Ra __pattern_walk_brick_n(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _Size __n, _Brick __brick, /*is_parallel=*/::std::true_type) { - const auto __is_vector = __internal::__is_vectorization_preferred<_ExecutionPolicy, _RandomAccessIterator>(__exec); + constexpr auto __is_vector = __internal::__is_vectorization_preferred<_ExecutionPolicy, _RandomAccessIterator>(); return __internal::__except_handler([&]() { __par_backend::__parallel_for(::std::forward<_ExecutionPolicy>(__exec), __first, __first + __n, [__brick, __is_vector](_RandomAccessIterator __i, _RandomAccessIterator __j) { @@ -423,8 +423,8 @@ oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _Fo __pattern_walk2_brick(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, _Brick __brick, /*parallel=*/::std::false_type) noexcept { - const auto __is_vector = - __internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>(__exec); + constexpr auto __is_vector = + __internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>(); return __brick(__first1, __last1, __first2, __is_vector); } @@ -435,9 +435,8 @@ oneapi::dpl::__internal::__enable_if_host_execution_policy_conditional< __pattern_walk2_brick(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, _Brick __brick, /*parallel=*/::std::true_type) { - const auto __is_vector = - __internal::__is_vectorization_preferred<_ExecutionPolicy, _RandomAccessIterator1, _RandomAccessIterator2>( - __exec); + constexpr auto __is_vector = + __internal::__is_vectorization_preferred<_ExecutionPolicy, _RandomAccessIterator1, _RandomAccessIterator2>(); return __except_handler([&]() { __par_backend::__parallel_for( @@ -483,9 +482,8 @@ oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _Ra __pattern_walk2_brick_n(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, _Size __n, _RandomAccessIterator2 __first2, _Brick __brick, /*parallel=*/::std::true_type) { - const auto __is_vector = - __internal::__is_vectorization_preferred<_ExecutionPolicy, _RandomAccessIterator1, _RandomAccessIterator2>( - __exec); + constexpr auto __is_vector = + __internal::__is_vectorization_preferred<_ExecutionPolicy, _RandomAccessIterator1, _RandomAccessIterator2>(); return __except_handler([&]() { __par_backend::__parallel_for( @@ -502,8 +500,8 @@ oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _Fo __pattern_walk2_brick_n(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _Size __n, _ForwardIterator2 __first2, _Brick __brick, /*parallel=*/::std::false_type) noexcept { - const auto __is_vector = - __internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>(__exec); + constexpr auto __is_vector = + __internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>(); return __brick(__first1, __n, __first2, __is_vector); } @@ -909,14 +907,14 @@ __pattern_find_end(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first, _R else { return __internal::__except_handler([&]() { - return __internal::__parallel_find(::std::forward<_ExecutionPolicy>(__exec), __first, __last, - [__last, __s_first, __s_last, __pred, - __is_vector](_RandomAccessIterator1 __i, _RandomAccessIterator1 __j) { - return __internal::__find_subrange(__i, __j, __last, __s_first, - __s_last, __pred, false, - __is_vector); - }, - ::std::false_type{}); + return __internal::__parallel_find( + ::std::forward<_ExecutionPolicy>(__exec), __first, __last, + [__last, __s_first, __s_last, __pred, __is_vector](_RandomAccessIterator1 __i, + _RandomAccessIterator1 __j) { + return __internal::__find_subrange(__i, __j, __last, __s_first, __s_last, __pred, false, + __is_vector); + }, + ::std::false_type{}); }); } } @@ -1013,14 +1011,14 @@ __pattern_search(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first, _Ran else { return __internal::__except_handler([&]() { - return __internal::__parallel_find(::std::forward<_ExecutionPolicy>(__exec), __first, __last, - [__last, __s_first, __s_last, __pred, - __is_vector](_RandomAccessIterator1 __i, _RandomAccessIterator1 __j) { - return __internal::__find_subrange(__i, __j, __last, __s_first, - __s_last, __pred, true, - __is_vector); - }, - ::std::true_type{}); + return __internal::__parallel_find( + ::std::forward<_ExecutionPolicy>(__exec), __first, __last, + [__last, __s_first, __s_last, __pred, __is_vector](_RandomAccessIterator1 __i, + _RandomAccessIterator1 __j) { + return __internal::__find_subrange(__i, __j, __last, __s_first, __s_last, __pred, true, + __is_vector); + }, + ::std::true_type{}); }); } } @@ -1492,8 +1490,8 @@ __remove_elements(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardI return __local_min; } // find first iterator that should be removed - bool* __result = __internal::__brick_find_if(__mask + __i, __mask + __j, - [](bool __val) { return !__val; }, __is_vector); + bool* __result = __internal::__brick_find_if( + __mask + __i, __mask + __j, [](bool __val) { return !__val; }, __is_vector); if (__result - __mask == __j) { return __local_min; @@ -1520,8 +1518,8 @@ __remove_elements(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardI __par_backend::__parallel_strict_scan( ::std::forward<_ExecutionPolicy>(__exec), __n, _DifferenceType(0), [__mask, __is_vector](_DifferenceType __i, _DifferenceType __len) { - return __internal::__brick_count(__mask + __i, __mask + __i + __len, [](bool __val) { return __val; }, - __is_vector); + return __internal::__brick_count( + __mask + __i, __mask + __i + __len, [](bool __val) { return __val; }, __is_vector); }, ::std::plus<_DifferenceType>(), [=](_DifferenceType __i, _DifferenceType __len, _DifferenceType __initial) { @@ -2405,10 +2403,12 @@ __pattern_sort(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _Random _IsVector /*is_vector*/, /*is_parallel=*/::std::true_type, /*is_move_constructible=*/::std::true_type) { __internal::__except_handler([&]() { - __par_backend::__parallel_stable_sort(::std::forward<_ExecutionPolicy>(__exec), __first, __last, __comp, - [](_RandomAccessIterator __first, _RandomAccessIterator __last, - _Compare __comp) { ::std::sort(__first, __last, __comp); }, - __last - __first); + __par_backend::__parallel_stable_sort( + ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __comp, + [](_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp) { + ::std::sort(__first, __last, __comp); + }, + __last - __first); }); } @@ -2430,10 +2430,12 @@ __pattern_stable_sort(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _Compare __comp, _IsVector /*is_vector*/, /*is_parallel=*/::std::true_type) { __internal::__except_handler([&]() { - __par_backend::__parallel_stable_sort(::std::forward<_ExecutionPolicy>(__exec), __first, __last, __comp, - [](_RandomAccessIterator __first, _RandomAccessIterator __last, - _Compare __comp) { ::std::stable_sort(__first, __last, __comp); }, - __last - __first); + __par_backend::__parallel_stable_sort( + ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __comp, + [](_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp) { + ::std::stable_sort(__first, __last, __comp); + }, + __last - __first); }); } @@ -2450,8 +2452,9 @@ __pattern_sort_by_key(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __keys_f { auto __beg = oneapi::dpl::make_zip_iterator(__keys_first, __values_first); auto __end = __beg + (__keys_last - __keys_first); - auto __cmp_f = - [__comp](const auto& __a, const auto& __b) { return __comp(::std::get<0>(__a), ::std::get<0>(__b)); }; + auto __cmp_f = [__comp](const auto& __a, const auto& __b) { + return __comp(::std::get<0>(__a), ::std::get<0>(__b)); + }; ::std::sort(__beg, __end, __cmp_f); } @@ -2463,19 +2466,21 @@ __pattern_sort_by_key(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __keys_f _RandomAccessIterator1 __keys_last, _RandomAccessIterator2 __values_first, _Compare __comp, _IsVector /*vector=*/, /*is_parallel=*/::std::true_type) { - static_assert(::std::is_move_constructible_v::value_type> - && ::std::is_move_constructible_v::value_type>, + static_assert( + ::std::is_move_constructible_v::value_type> && + ::std::is_move_constructible_v::value_type>, "The keys and values should be move constructible in case of parallel execution."); auto __beg = oneapi::dpl::make_zip_iterator(__keys_first, __values_first); auto __end = __beg + (__keys_last - __keys_first); - auto __cmp_f = - [__comp](const auto& __a, const auto& __b) { return __comp(::std::get<0>(__a), ::std::get<0>(__b)); }; + auto __cmp_f = [__comp](const auto& __a, const auto& __b) { + return __comp(::std::get<0>(__a), ::std::get<0>(__b)); + }; __internal::__except_handler([&]() { - __par_backend::__parallel_stable_sort(::std::forward<_ExecutionPolicy>(__exec), __beg, __end, __cmp_f, - [](auto __first, auto __last, auto __cmp) - { ::std::sort(__first, __last, __cmp); },__end - __beg); + __par_backend::__parallel_stable_sort( + ::std::forward<_ExecutionPolicy>(__exec), __beg, __end, __cmp_f, + [](auto __first, auto __last, auto __cmp) { ::std::sort(__first, __last, __cmp); }, __end - __beg); }); } @@ -2565,23 +2570,24 @@ __pattern_partial_sort_copy(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __ __par_backend::__buffer<_ExecutionPolicy, _T1> __buf(__n1); _T1* __r = __buf.get(); - __par_backend::__parallel_stable_sort(::std::forward<_ExecutionPolicy>(__exec), __r, __r + __n1, __comp, - [__n2, __first, __r](_T1* __i, _T1* __j, _Compare __comp) { - _RandomAccessIterator1 __it = __first + (__i - __r); + __par_backend::__parallel_stable_sort( + ::std::forward<_ExecutionPolicy>(__exec), __r, __r + __n1, __comp, + [__n2, __first, __r](_T1* __i, _T1* __j, _Compare __comp) { + _RandomAccessIterator1 __it = __first + (__i - __r); - // 1. Copy elements from input to raw memory - for (_T1* __k = __i; __k != __j; ++__k, ++__it) - { - ::new (__k) _T2(*__it); - } + // 1. Copy elements from input to raw memory + for (_T1* __k = __i; __k != __j; ++__k, ++__it) + { + ::new (__k) _T2(*__it); + } - // 2. Sort elements in temporary buffer - if (__n2 < __j - __i) - ::std::partial_sort(__i, __i + __n2, __j, __comp); - else - ::std::sort(__i, __j, __comp); - }, - __n2); + // 2. Sort elements in temporary buffer + if (__n2 < __j - __i) + ::std::partial_sort(__i, __i + __n2, __j, __comp); + else + ::std::sort(__i, __j, __comp); + }, + __n2); // 3. Move elements from temporary buffer to output __par_backend::__parallel_for(::std::forward<_ExecutionPolicy>(__exec), __r, __r + __n2, @@ -2701,10 +2707,10 @@ __pattern_nth_element(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __x; do { - __x = __internal::__pattern_partition(::std::forward<_ExecutionPolicy>(__exec), __first + 1, __last, - [&__comp, __first](const _Tp& __x) { return __comp(__x, *__first); }, - __is_vector, - /*is_parallel=*/::std::true_type()); + __x = __internal::__pattern_partition( + ::std::forward<_ExecutionPolicy>(__exec), __first + 1, __last, + [&__comp, __first](const _Tp& __x) { return __comp(__x, *__first); }, __is_vector, + /*is_parallel=*/::std::true_type()); --__x; if (__x != __first) { @@ -2942,8 +2948,8 @@ __pattern_remove_if(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _R return __internal::__remove_elements( ::std::forward<_ExecutionPolicy>(__exec), __first, __last, [&__pred, __is_vector](bool* __b, bool* __e, _RandomAccessIterator __it) { - __internal::__brick_walk2(__b, __e, __it, [&__pred](bool& __x, _ReferenceType __y) { __x = !__pred(__y); }, - __is_vector); + __internal::__brick_walk2( + __b, __e, __it, [&__pred](bool& __x, _ReferenceType __y) { __x = !__pred(__y); }, __is_vector); }, __is_vector); } @@ -3722,12 +3728,12 @@ __pattern_is_heap_until(_ExecutionPolicy&& __exec, _RandomAccessIterator __first _Compare __comp, _IsVector __is_vector, /* is_parallel = */ ::std::true_type) { return __internal::__except_handler([&]() { - return __parallel_find(::std::forward<_ExecutionPolicy>(__exec), __first, __last, - [__first, __comp, __is_vector](_RandomAccessIterator __i, _RandomAccessIterator __j) { - return __internal::__is_heap_until_local(__first, __i - __first, __j - __first, - __comp, __is_vector); - }, - ::std::true_type{}); + return __parallel_find( + ::std::forward<_ExecutionPolicy>(__exec), __first, __last, + [__first, __comp, __is_vector](_RandomAccessIterator __i, _RandomAccessIterator __j) { + return __internal::__is_heap_until_local(__first, __i - __first, __j - __first, __comp, __is_vector); + }, + ::std::true_type{}); }); } @@ -4077,11 +4083,12 @@ __pattern_lexicographical_compare(_ExecutionPolicy&& __exec, _RandomAccessIterat auto __result = __internal::__parallel_find( ::std::forward<_ExecutionPolicy>(__exec), __first1, __first1 + __n, [__first1, __first2, &__comp, __is_vector](_RandomAccessIterator1 __i, _RandomAccessIterator1 __j) { - return __internal::__brick_mismatch(__i, __j, __first2 + (__i - __first1), __first2 + (__j - __first1), - [&__comp](const _RefType1 __x, const _RefType2 __y) { - return !__comp(__x, __y) && !__comp(__y, __x); - }, - __is_vector) + return __internal::__brick_mismatch( + __i, __j, __first2 + (__i - __first1), __first2 + (__j - __first1), + [&__comp](const _RefType1 __x, const _RefType2 __y) { + return !__comp(__x, __y) && !__comp(__y, __x); + }, + __is_vector) .first; }, ::std::true_type{}); diff --git a/include/oneapi/dpl/pstl/execution_impl.h b/include/oneapi/dpl/pstl/execution_impl.h index af425473bde..828f703333d 100644 --- a/include/oneapi/dpl/pstl/execution_impl.h +++ b/include/oneapi/dpl/pstl/execution_impl.h @@ -105,12 +105,10 @@ template using __allow_parallel = typename __internal::__policy_traits<::std::decay_t<_ExecutionPolicy>>::__allow_parallel; template -auto -__is_vectorization_preferred(_ExecutionPolicy& __exec) - -> decltype(__internal::__lazy_and(__exec.__allow_vector(), - __internal::__is_random_access_iterator_t<_IteratorTypes...>())) +constexpr decltype(auto) +__is_vectorization_preferred() { - return __internal::__lazy_and(__exec.__allow_vector(), + return __internal::__lazy_and(::std::decay_t<_ExecutionPolicy>::__allow_vector(), __internal::__is_random_access_iterator_t<_IteratorTypes...>()); } diff --git a/include/oneapi/dpl/pstl/experimental/internal/for_loop_impl.h b/include/oneapi/dpl/pstl/experimental/internal/for_loop_impl.h index 52163caf0f8..d4827c11ea4 100644 --- a/include/oneapi/dpl/pstl/experimental/internal/for_loop_impl.h +++ b/include/oneapi/dpl/pstl/experimental/internal/for_loop_impl.h @@ -479,9 +479,9 @@ struct __use_par_vec_helper<_Ip, ::std::enable_if_t<::std::is_integral_v<_Ip>>> { template static constexpr auto - __use_vector(_ExecutionPolicy&& __exec) -> decltype(__exec.__allow_vector()) + __use_vector() { - return __exec.__allow_vector(); + return ::std::decay_t<_ExecutionPolicy>::__allow_vector(); } template @@ -497,12 +497,9 @@ struct __use_par_vec_helper<_Ip, ::std::enable_if_t>> { template static constexpr auto - __use_vector(_ExecutionPolicy&& __exec) - -> decltype(oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _Ip>( - ::std::forward<_ExecutionPolicy>(__exec))) + __use_vector() { - return oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _Ip>( - ::std::forward<_ExecutionPolicy>(__exec)); + return oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _Ip>(); } template @@ -519,10 +516,10 @@ struct __use_par_vec_helper<_Ip, ::std::enable_if_t>> // Special versions for for_loop: handles both iterators and integral types(treated as random access iterators) template auto -__use_vectorization(_ExecutionPolicy&& __exec) - -> decltype(__use_par_vec_helper<_Ip>::__use_vector(::std::forward<_ExecutionPolicy>(__exec))) +__use_vectorization() + -> decltype(__use_par_vec_helper<_Ip>::template __use_vector<_ExecutionPolicy>()) { - return __use_par_vec_helper<_Ip>::__use_vector(::std::forward<_ExecutionPolicy>(__exec)); + return __use_par_vec_helper<_Ip>::template __use_vector<_ExecutionPolicy>(); } template @@ -541,7 +538,7 @@ __for_loop_impl(_ExecutionPolicy&& __exec, _Ip __start, _Ip __finish, _Fp&& __f, { oneapi::dpl::__internal::__pattern_for_loop( ::std::forward<_ExecutionPolicy>(__exec), __start, __finish, __f, __stride, - oneapi::dpl::__internal::__use_vectorization<_ExecutionPolicy, _Ip>(__exec), + oneapi::dpl::__internal::__use_vectorization<_ExecutionPolicy, _Ip>(), oneapi::dpl::__internal::__use_parallelization<_ExecutionPolicy, _Ip>(__exec), ::std::get<_Is>(::std::move(__t))...); } @@ -554,7 +551,7 @@ __for_loop_n_impl(_ExecutionPolicy&& __exec, _Ip __start, _Size __n, _Fp&& __f, { oneapi::dpl::__internal::__pattern_for_loop_n( ::std::forward<_ExecutionPolicy>(__exec), __start, __n, __f, __stride, - oneapi::dpl::__internal::__use_vectorization<_ExecutionPolicy, _Ip>(__exec), + oneapi::dpl::__internal::__use_vectorization<_ExecutionPolicy, _Ip>(), oneapi::dpl::__internal::__use_parallelization<_ExecutionPolicy, _Ip>(__exec), ::std::get<_Is>(::std::move(__t))...); } diff --git a/include/oneapi/dpl/pstl/glue_algorithm_impl.h b/include/oneapi/dpl/pstl/glue_algorithm_impl.h index b7164977ff4..ac1ff531bce 100644 --- a/include/oneapi/dpl/pstl/glue_algorithm_impl.h +++ b/include/oneapi/dpl/pstl/glue_algorithm_impl.h @@ -45,7 +45,7 @@ any_of(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __l { return oneapi::dpl::__internal::__pattern_any_of( ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __pred, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec), + oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(), oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec)); } @@ -77,7 +77,7 @@ for_each(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator _ { oneapi::dpl::__internal::__pattern_walk1( ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __f, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec), + oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(), __exec.__allow_parallel()); } @@ -87,7 +87,7 @@ for_each_n(_ExecutionPolicy&& __exec, _ForwardIterator __first, _Size __n, _Func { return oneapi::dpl::__internal::__pattern_walk1_n( ::std::forward<_ExecutionPolicy>(__exec), __first, __n, __f, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec), + oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(), oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec)); } @@ -131,8 +131,7 @@ find_end(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 { return oneapi::dpl::__internal::__pattern_find_end( ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __s_first, __s_last, __pred, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>( - __exec), + oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>(), oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>( __exec)); } @@ -154,8 +153,7 @@ find_first_of(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIter { return oneapi::dpl::__internal::__pattern_find_first_of( ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __s_first, __s_last, __pred, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>( - __exec), + oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>(), oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>( __exec)); } @@ -178,7 +176,7 @@ adjacent_find(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardItera return oneapi::dpl::__internal::__pattern_adjacent_find( ::std::forward<_ExecutionPolicy>(__exec), __first, __last, ::std::equal_to<_ValueType>(), oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec), - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec), + oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(), oneapi::dpl::__internal::__first_semantic()); } @@ -189,7 +187,7 @@ adjacent_find(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardItera return oneapi::dpl::__internal::__pattern_adjacent_find( ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __pred, oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec), - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec), + oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(), oneapi::dpl::__internal::__first_semantic()); } @@ -208,7 +206,7 @@ count(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __la oneapi::dpl::__internal::__equal_value>( __value), oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec), - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec)); + oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>()); } template @@ -219,7 +217,7 @@ count_if(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator _ return oneapi::dpl::__internal::__pattern_count( ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __pred, oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec), - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec)); + oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>()); } // [alg.search] @@ -231,8 +229,7 @@ search(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 _ { return oneapi::dpl::__internal::__pattern_search( ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __s_first, __s_last, __pred, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>( - __exec), + oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>(), oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>( __exec)); } @@ -253,7 +250,7 @@ search_n(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator _ { return oneapi::dpl::__internal::__pattern_search_n( ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __count, __value, __pred, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec), + oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(), oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec)); } @@ -299,8 +296,7 @@ copy_if(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 { return oneapi::dpl::__internal::__pattern_copy_if( ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __result, __pred, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>( - __exec), + oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>(), oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>( __exec)); } @@ -320,8 +316,7 @@ swap_ranges(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardItera using ::std::swap; swap(__x, __y); }, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>( - __exec), + oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>(), oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>( __exec)); } @@ -336,8 +331,7 @@ transform(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator return oneapi::dpl::__internal::__pattern_walk2( ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __result, oneapi::dpl::__internal::__transform_functor<_UnaryOperation>{::std::move(__op)}, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>( - __exec), + oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>(), __exec.__allow_parallel()); } @@ -351,8 +345,7 @@ transform(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterato return oneapi::dpl::__internal::__pattern_walk3( ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __result, oneapi::dpl::__internal::__transform_functor<_BinaryOperation>(::std::move(__op)), - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2, - _ForwardIterator>(__exec), + oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2, _ForwardIterator>(), __exec.__allow_parallel()); } @@ -368,8 +361,7 @@ transform_if(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardItera ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __result, oneapi::dpl::__internal::__transform_if_unary_functor<_UnaryOperation, _UnaryPredicate>(::std::move(__op), ::std::move(__pred)), - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>( - __exec), + oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>(), oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>( __exec)); } @@ -384,8 +376,7 @@ transform_if(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIter ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __result, oneapi::dpl::__internal::__transform_if_binary_functor<_BinaryOperation, _BinaryPredicate>(::std::move(__op), ::std::move(__pred)), - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2, - _ForwardIterator3>(__exec), + oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2, _ForwardIterator3>(), oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2, _ForwardIterator3>(__exec)); } @@ -427,8 +418,7 @@ replace_copy_if(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIt _UnaryPredicate, oneapi::dpl::__internal::__ref_or_copy<_ExecutionPolicy, _UnaryPredicate>>>( __new_value, __pred), - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>( - __exec), + oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>(), oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>( __exec)); } @@ -454,7 +444,7 @@ fill(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __las oneapi::dpl::__internal::__pattern_fill( ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __value, oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec), - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec)); + oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>()); } template @@ -467,7 +457,7 @@ fill_n(_ExecutionPolicy&& __exec, _ForwardIterator __first, _Size __count, const return oneapi::dpl::__internal::__pattern_fill_n( ::std::forward<_ExecutionPolicy>(__exec), __first, __count, __value, oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec), - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec)); + oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>()); } // [alg.generate] @@ -478,7 +468,7 @@ generate(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator _ oneapi::dpl::__internal::__pattern_generate( ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __g, oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec), - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec)); + oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>()); } template @@ -491,7 +481,7 @@ generate_n(_ExecutionPolicy&& __exec, _ForwardIterator __first, _Size __count, _ return oneapi::dpl::__internal::__pattern_generate_n( ::std::forward<_ExecutionPolicy>(__exec), __first, __count, __g, oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec), - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec)); + oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>()); } // [alg.remove] @@ -524,7 +514,7 @@ remove_if(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator { return oneapi::dpl::__internal::__pattern_remove_if( ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __pred, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec), + oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(), oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec)); } @@ -546,7 +536,7 @@ unique(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __l { return oneapi::dpl::__internal::__pattern_unique( ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __pred, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec), + oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(), oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec)); } @@ -565,8 +555,7 @@ unique_copy(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterat { return oneapi::dpl::__internal::__pattern_unique_copy( ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __result, __pred, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>( - __exec), + oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>(), oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>( __exec)); } @@ -587,7 +576,7 @@ reverse(_ExecutionPolicy&& __exec, _BidirectionalIterator __first, _Bidirectiona { oneapi::dpl::__internal::__pattern_reverse( ::std::forward<_ExecutionPolicy>(__exec), __first, __last, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _BidirectionalIterator>(__exec), + oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _BidirectionalIterator>(), oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _BidirectionalIterator>(__exec)); } @@ -598,8 +587,7 @@ reverse_copy(_ExecutionPolicy&& __exec, _BidirectionalIterator __first, _Bidirec { return oneapi::dpl::__internal::__pattern_reverse_copy( ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __d_first, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _BidirectionalIterator, - _ForwardIterator>(__exec), + oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _BidirectionalIterator, _ForwardIterator>(), oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _BidirectionalIterator, _ForwardIterator>(__exec)); } @@ -612,7 +600,7 @@ rotate(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __m { return oneapi::dpl::__internal::__pattern_rotate( ::std::forward<_ExecutionPolicy>(__exec), __first, __middle, __last, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec), + oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(), oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec)); } @@ -623,8 +611,7 @@ rotate_copy(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterat { return oneapi::dpl::__internal::__pattern_rotate_copy( ::std::forward<_ExecutionPolicy>(__exec), __first, __middle, __last, __result, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>( - __exec), + oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>(), oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>( __exec)); } @@ -637,7 +624,7 @@ is_partitioned(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIter { return oneapi::dpl::__internal::__pattern_is_partitioned( ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __pred, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec), + oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(), oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec)); } @@ -647,7 +634,7 @@ partition(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator { return oneapi::dpl::__internal::__pattern_partition( ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __pred, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec), + oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(), oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec)); } @@ -658,7 +645,7 @@ stable_partition(_ExecutionPolicy&& __exec, _BidirectionalIterator __first, _Bid { return oneapi::dpl::__internal::__pattern_stable_partition( ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __pred, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _BidirectionalIterator>(__exec), + oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _BidirectionalIterator>(), oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _BidirectionalIterator>(__exec)); } @@ -671,8 +658,7 @@ partition_copy(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIter { return oneapi::dpl::__internal::__pattern_partition_copy( ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __out_true, __out_false, __pred, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator, _ForwardIterator1, - _ForwardIterator2>(__exec), + oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator, _ForwardIterator1, _ForwardIterator2>(), oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator, _ForwardIterator1, _ForwardIterator2>(__exec)); } @@ -686,7 +672,7 @@ sort(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIter typedef typename ::std::iterator_traits<_RandomAccessIterator>::value_type _InputType; oneapi::dpl::__internal::__pattern_sort( ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __comp, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _RandomAccessIterator>(__exec), + oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _RandomAccessIterator>(), oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _RandomAccessIterator>(__exec), typename ::std::is_move_constructible<_InputType>::type()); } @@ -707,7 +693,7 @@ stable_sort(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAcc { oneapi::dpl::__internal::__pattern_stable_sort( ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __comp, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _RandomAccessIterator>(__exec), + oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _RandomAccessIterator>(), oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _RandomAccessIterator>(__exec)); } @@ -729,8 +715,7 @@ sort_by_key(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __keys_first, _Ran { oneapi::dpl::__internal::__pattern_sort_by_key( ::std::forward<_ExecutionPolicy>(__exec), __keys_first, __keys_last, __values_first, __comp, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _RandomAccessIterator1, - _RandomAccessIterator2>(__exec), + oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _RandomAccessIterator1, _RandomAccessIterator2>(), oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _RandomAccessIterator1, _RandomAccessIterator2>(__exec)); } @@ -754,8 +739,7 @@ mismatch(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator { return oneapi::dpl::__internal::__pattern_mismatch( ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2, __pred, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>( - __exec), + oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>(), oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>( __exec)); } @@ -800,7 +784,7 @@ equal(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 _ { return oneapi::dpl::__internal::__pattern_equal( ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __p, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1>(__exec), + oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1>(), oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator1>(__exec)); } @@ -819,7 +803,7 @@ equal(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 _ { return oneapi::dpl::__internal::__pattern_equal( ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2, __p, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1>(__exec), + oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1>(), oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator1>(__exec)); } @@ -855,7 +839,7 @@ partial_sort(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAc { oneapi::dpl::__internal::__pattern_partial_sort( ::std::forward<_ExecutionPolicy>(__exec), __first, __middle, __last, __comp, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _RandomAccessIterator>(__exec), + oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _RandomAccessIterator>(), oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _RandomAccessIterator>(__exec)); } @@ -877,8 +861,7 @@ partial_sort_copy(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardI { return oneapi::dpl::__internal::__pattern_partial_sort_copy( ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __d_first, __d_last, __comp, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator, - _RandomAccessIterator>(__exec), + oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator, _RandomAccessIterator>(), oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator, _RandomAccessIterator>(__exec)); } @@ -901,7 +884,7 @@ is_sorted_until(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIte ::std::forward<_ExecutionPolicy>(__exec), __first, __last, oneapi::dpl::__internal::__reorder_pred<_Compare>(__comp), oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec), - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec), + oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(), oneapi::dpl::__internal::__first_semantic()); return __res == __last ? __last : oneapi::dpl::__internal::__pstl_next(__res); } @@ -922,7 +905,7 @@ is_sorted(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator ::std::forward<_ExecutionPolicy>(__exec), __first, __last, oneapi::dpl::__internal::__reorder_pred<_Compare>(__comp), oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec), - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec), + oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(), oneapi::dpl::__internal::__or_semantic()) == __last; } @@ -943,8 +926,7 @@ merge(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 _ { return oneapi::dpl::__internal::__pattern_merge( ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2, __d_first, __comp, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2, - _ForwardIterator>(__exec), + oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2, _ForwardIterator>(), oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2, _ForwardIterator>(__exec)); } @@ -965,7 +947,7 @@ inplace_merge(_ExecutionPolicy&& __exec, _BidirectionalIterator __first, _Bidire { oneapi::dpl::__internal::__pattern_inplace_merge( ::std::forward<_ExecutionPolicy>(__exec), __first, __middle, __last, __comp, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _BidirectionalIterator>(__exec), + oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _BidirectionalIterator>(), oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _BidirectionalIterator>(__exec)); } @@ -987,8 +969,7 @@ includes(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator { return oneapi::dpl::__internal::__pattern_includes( ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2, __comp, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>( - __exec), + oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>(), oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>( __exec)); } @@ -1012,8 +993,7 @@ set_union(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterato { return oneapi::dpl::__internal::__pattern_set_union( ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2, __result, __comp, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2, - _ForwardIterator>(__exec), + oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2, _ForwardIterator>(), oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2, _ForwardIterator>(__exec)); } @@ -1037,8 +1017,7 @@ set_intersection(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _Forward { return oneapi::dpl::__internal::__pattern_set_intersection( ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2, __result, __comp, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2, - _ForwardIterator>(__exec), + oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2, _ForwardIterator>(), oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2, _ForwardIterator>(__exec)); } @@ -1062,8 +1041,7 @@ set_difference(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIt { return oneapi::dpl::__internal::__pattern_set_difference( ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2, __result, __comp, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2, - _ForwardIterator>(__exec), + oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2, _ForwardIterator>(), oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2, _ForwardIterator>(__exec)); } @@ -1088,8 +1066,7 @@ set_symmetric_difference(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, { return oneapi::dpl::__internal::__pattern_set_symmetric_difference( ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2, __result, __comp, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2, - _ForwardIterator>(__exec), + oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2, _ForwardIterator>(), oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2, _ForwardIterator>(__exec)); } @@ -1110,7 +1087,7 @@ is_heap_until(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomA { return oneapi::dpl::__internal::__pattern_is_heap_until( ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __comp, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _RandomAccessIterator>(__exec), + oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _RandomAccessIterator>(), oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _RandomAccessIterator>(__exec)); } @@ -1128,7 +1105,7 @@ is_heap(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessI { return oneapi::dpl::__internal::__pattern_is_heap( ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __comp, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _RandomAccessIterator>(__exec), + oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _RandomAccessIterator>(), oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _RandomAccessIterator>(__exec)); } @@ -1148,7 +1125,7 @@ min_element(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterato { return oneapi::dpl::__internal::__pattern_min_element( ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __comp, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec), + oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(), oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec)); } @@ -1183,7 +1160,7 @@ minmax_element(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIter { return oneapi::dpl::__internal::__pattern_minmax_element( ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __comp, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec), + oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(), oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec)); } @@ -1204,7 +1181,7 @@ nth_element(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAcc { oneapi::dpl::__internal::__pattern_nth_element( ::std::forward<_ExecutionPolicy>(__exec), __first, __nth, __last, __comp, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _RandomAccessIterator>(__exec), + oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _RandomAccessIterator>(), oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _RandomAccessIterator>(__exec)); } @@ -1226,8 +1203,7 @@ lexicographical_compare(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ { return oneapi::dpl::__internal::__pattern_lexicographical_compare( ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2, __comp, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>( - __exec), + oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>(), oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>( __exec)); } @@ -1250,7 +1226,7 @@ shift_left(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator { return oneapi::dpl::__internal::__pattern_shift_left( ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __n, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec), + oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(), oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec)); } @@ -1263,7 +1239,7 @@ shift_right(_ExecutionPolicy&& __exec, _BidirectionalIterator __first, _Bidirect { return oneapi::dpl::__internal::__pattern_shift_right( ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __n, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _BidirectionalIterator>(__exec), + oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _BidirectionalIterator>(), oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _BidirectionalIterator>(__exec)); } diff --git a/include/oneapi/dpl/pstl/glue_memory_impl.h b/include/oneapi/dpl/pstl/glue_memory_impl.h index 082856131e7..de59de54ebf 100644 --- a/include/oneapi/dpl/pstl/glue_memory_impl.h +++ b/include/oneapi/dpl/pstl/glue_memory_impl.h @@ -48,9 +48,8 @@ uninitialized_copy(_ExecutionPolicy&& __exec, _InputIterator __first, _InputIter const auto __is_parallel = oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _InputIterator, _ForwardIterator>( __exec); - const auto __is_vector = - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _InputIterator, _ForwardIterator>( - __exec); + constexpr auto __is_vector = + oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _InputIterator, _ForwardIterator>(); if constexpr (::std::is_trivial_v<_ValueType1> && ::std::is_trivial_v<_ValueType2>) { @@ -78,9 +77,8 @@ uninitialized_copy_n(_ExecutionPolicy&& __exec, _InputIterator __first, _Size __ const auto __is_parallel = oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _InputIterator, _ForwardIterator>( __exec); - const auto __is_vector = - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _InputIterator, _ForwardIterator>( - __exec); + constexpr auto __is_vector = + oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _InputIterator, _ForwardIterator>(); if constexpr (::std::is_trivial_v<_ValueType1> && ::std::is_trivial_v<_ValueType2>) { @@ -110,9 +108,8 @@ uninitialized_move(_ExecutionPolicy&& __exec, _InputIterator __first, _InputIter const auto __is_parallel = oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _InputIterator, _ForwardIterator>( __exec); - const auto __is_vector = - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _InputIterator, _ForwardIterator>( - __exec); + constexpr auto __is_vector = + oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _InputIterator, _ForwardIterator>(); if constexpr (::std::is_trivial_v<_ValueType1> && ::std::is_trivial_v<_ValueType2>) { @@ -140,9 +137,8 @@ uninitialized_move_n(_ExecutionPolicy&& __exec, _InputIterator __first, _Size __ const auto __is_parallel = oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _InputIterator, _ForwardIterator>( __exec); - const auto __is_vector = - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _InputIterator, _ForwardIterator>( - __exec); + constexpr auto __is_vector = + oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _InputIterator, _ForwardIterator>(); if constexpr (::std::is_trivial_v<_ValueType1> && ::std::is_trivial_v<_ValueType2>) { @@ -170,8 +166,8 @@ uninitialized_fill(_ExecutionPolicy&& __exec, _ForwardIterator __first, _Forward const auto __is_parallel = oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec); - const auto __is_vector = - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec); + constexpr auto __is_vector = + oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(); if constexpr (::std::is_arithmetic_v<_ValueType>) { @@ -198,8 +194,8 @@ uninitialized_fill_n(_ExecutionPolicy&& __exec, _ForwardIterator __first, _Size const auto __is_parallel = oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec); - const auto __is_vector = - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec); + constexpr auto __is_vector = + oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(); if constexpr (::std::is_arithmetic_v<_ValueType>) { @@ -233,10 +229,9 @@ destroy(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __ ::std::conditional_t< oneapi::dpl::__internal::__is_host_execution_policy<::std::decay_t<_ExecutionPolicy>>::value, ::std::false_type, - decltype(oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>( - __exec))>; + decltype(oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>())>; #else - decltype(oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec)); + decltype(oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>()); #endif // _PSTL_ICPX_OMP_SIMD_DESTROY_WINDOWS_BROKEN || _ONEDPL_ICPX_OMP_SIMD_DESTROY_WINDOWS_BROKEN constexpr _is_vector_type __is_vector; @@ -262,10 +257,9 @@ destroy_n(_ExecutionPolicy&& __exec, _ForwardIterator __first, _Size __n) ::std::conditional_t< oneapi::dpl::__internal::__is_host_execution_policy<::std::decay_t<_ExecutionPolicy>>::value, ::std::false_type, - decltype(oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>( - __exec))>; + decltype(oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>())>; #else - decltype(oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec)); + decltype(oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>()); #endif // _PSTL_ICPX_OMP_SIMD_DESTROY_WINDOWS_BROKEN || _ONEDPL_ICPX_OMP_SIMD_DESTROY_WINDOWS_BROKEN constexpr _is_vector_type __is_vector; @@ -292,8 +286,8 @@ uninitialized_default_construct(_ExecutionPolicy&& __exec, _ForwardIterator __fi const auto __is_parallel = oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec); - const auto __is_vector = - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec); + constexpr auto __is_vector = + oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(); if constexpr (!::std::is_trivial_v<_ValueType>) { @@ -313,8 +307,8 @@ uninitialized_default_construct_n(_ExecutionPolicy&& __exec, _ForwardIterator __ const auto __is_parallel = oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec); - const auto __is_vector = - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec); + constexpr auto __is_vector = + oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(); if constexpr (::std::is_trivial_v<_ValueType>) { @@ -340,8 +334,8 @@ uninitialized_value_construct(_ExecutionPolicy&& __exec, _ForwardIterator __firs const auto __is_parallel = oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec); - const auto __is_vector = - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec); + constexpr auto __is_vector = + oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(); if constexpr (::std::is_trivial_v<_ValueType>) { @@ -368,8 +362,8 @@ uninitialized_value_construct_n(_ExecutionPolicy&& __exec, _ForwardIterator __fi const auto __is_parallel = oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec); - const auto __is_vector = - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec); + constexpr auto __is_vector = + oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(); if constexpr (::std::is_trivial_v<_ValueType>) { diff --git a/include/oneapi/dpl/pstl/glue_numeric_impl.h b/include/oneapi/dpl/pstl/glue_numeric_impl.h index f2564db3132..2a66a0e9743 100644 --- a/include/oneapi/dpl/pstl/glue_numeric_impl.h +++ b/include/oneapi/dpl/pstl/glue_numeric_impl.h @@ -73,8 +73,7 @@ transform_reduce(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _Forward return oneapi::dpl::__internal::__pattern_transform_reduce( ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __init, ::std::plus<_InputType>(), ::std::multiplies<_InputType>(), - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>( - __exec), + oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>(), oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>( __exec)); } @@ -87,8 +86,7 @@ transform_reduce(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _Forward { return oneapi::dpl::__internal::__pattern_transform_reduce( ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __init, __binary_op1, __binary_op2, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>( - __exec), + oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>(), oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>( __exec)); } @@ -100,7 +98,7 @@ transform_reduce(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIt { return oneapi::dpl::__internal::__pattern_transform_reduce( ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __init, __binary_op, __unary_op, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec), + oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(), oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec)); } @@ -228,8 +226,7 @@ transform_exclusive_scan(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ return oneapi::dpl::__internal::__pattern_transform_scan( ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __result, __unary_op, __init, __binary_op, /*inclusive=*/::std::false_type(), - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>( - __exec), + oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>(), oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>( __exec)); } @@ -246,8 +243,7 @@ transform_inclusive_scan(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ return oneapi::dpl::__internal::__pattern_transform_scan( ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __result, __unary_op, __init, __binary_op, /*inclusive=*/::std::true_type(), - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>( - __exec), + oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>(), oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>( __exec)); } @@ -261,8 +257,7 @@ transform_inclusive_scan(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ return oneapi::dpl::__internal::__pattern_transform_scan( ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __result, __unary_op, __binary_op, /*inclusive=*/::std::true_type(), - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>( - __exec), + oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>(), oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>( __exec)); } @@ -280,8 +275,7 @@ adjacent_difference(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _Forwa return oneapi::dpl::__internal::__pattern_adjacent_difference( ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __d_first, __op, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>( - __exec), + oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>(), oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>( __exec)); } From 0b181a519a2c6d79d68b237ddaf223f9f8b60d84 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Mon, 29 Jan 2024 17:04:52 +0100 Subject: [PATCH 031/566] Declare __is_parallelization_preferred as constexpr without runtime params --- include/oneapi/dpl/pstl/execution_impl.h | 8 +- .../experimental/internal/for_loop_impl.h | 21 ++- include/oneapi/dpl/pstl/glue_algorithm_impl.h | 141 +++++++----------- include/oneapi/dpl/pstl/glue_memory_impl.h | 52 +++---- include/oneapi/dpl/pstl/glue_numeric_impl.h | 20 +-- 5 files changed, 101 insertions(+), 141 deletions(-) diff --git a/include/oneapi/dpl/pstl/execution_impl.h b/include/oneapi/dpl/pstl/execution_impl.h index 828f703333d..27a955551f9 100644 --- a/include/oneapi/dpl/pstl/execution_impl.h +++ b/include/oneapi/dpl/pstl/execution_impl.h @@ -113,12 +113,10 @@ __is_vectorization_preferred() } template -auto -__is_parallelization_preferred(_ExecutionPolicy& __exec) - -> decltype(__internal::__lazy_and(__exec.__allow_parallel(), - __internal::__is_random_access_iterator_t<_IteratorTypes...>())) +constexpr decltype(auto) +__is_parallelization_preferred() { - return __internal::__lazy_and(__exec.__allow_parallel(), + return __internal::__lazy_and(::std::decay_t<_ExecutionPolicy>::__allow_parallel(), __internal::__is_random_access_iterator_t<_IteratorTypes...>()); } diff --git a/include/oneapi/dpl/pstl/experimental/internal/for_loop_impl.h b/include/oneapi/dpl/pstl/experimental/internal/for_loop_impl.h index d4827c11ea4..9f4314f20ee 100644 --- a/include/oneapi/dpl/pstl/experimental/internal/for_loop_impl.h +++ b/include/oneapi/dpl/pstl/experimental/internal/for_loop_impl.h @@ -486,9 +486,9 @@ struct __use_par_vec_helper<_Ip, ::std::enable_if_t<::std::is_integral_v<_Ip>>> template static constexpr auto - __use_parallel(_ExecutionPolicy&& __exec) -> decltype(__exec.__allow_parallel()) + __use_parallel() { - return __exec.__allow_parallel(); + return ::std::decay_t<_ExecutionPolicy>::__allow_parallel(); } }; @@ -504,12 +504,9 @@ struct __use_par_vec_helper<_Ip, ::std::enable_if_t>> template static constexpr auto - __use_parallel(_ExecutionPolicy&& __exec) - -> decltype(oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _Ip>( - ::std::forward<_ExecutionPolicy>(__exec))) + __use_parallel() { - return oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _Ip>( - ::std::forward<_ExecutionPolicy>(__exec)); + return oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _Ip>(); } }; @@ -524,10 +521,10 @@ __use_vectorization() template auto -__use_parallelization(_ExecutionPolicy&& __exec) - -> decltype(__use_par_vec_helper<_Ip>::__use_parallel(::std::forward<_ExecutionPolicy>(__exec))) +__use_parallelization() + -> decltype(__use_par_vec_helper<_Ip>::template __use_parallel<_ExecutionPolicy>()) { - return __use_par_vec_helper<_Ip>::__use_parallel(::std::forward<_ExecutionPolicy>(__exec)); + return __use_par_vec_helper<_Ip>::template __use_parallel<_ExecutionPolicy>(); } // Helper functions to extract to separate a Callable object from the pack of reductions and inductions @@ -539,7 +536,7 @@ __for_loop_impl(_ExecutionPolicy&& __exec, _Ip __start, _Ip __finish, _Fp&& __f, oneapi::dpl::__internal::__pattern_for_loop( ::std::forward<_ExecutionPolicy>(__exec), __start, __finish, __f, __stride, oneapi::dpl::__internal::__use_vectorization<_ExecutionPolicy, _Ip>(), - oneapi::dpl::__internal::__use_parallelization<_ExecutionPolicy, _Ip>(__exec), + oneapi::dpl::__internal::__use_parallelization<_ExecutionPolicy, _Ip>(), ::std::get<_Is>(::std::move(__t))...); } @@ -552,7 +549,7 @@ __for_loop_n_impl(_ExecutionPolicy&& __exec, _Ip __start, _Size __n, _Fp&& __f, oneapi::dpl::__internal::__pattern_for_loop_n( ::std::forward<_ExecutionPolicy>(__exec), __start, __n, __f, __stride, oneapi::dpl::__internal::__use_vectorization<_ExecutionPolicy, _Ip>(), - oneapi::dpl::__internal::__use_parallelization<_ExecutionPolicy, _Ip>(__exec), + oneapi::dpl::__internal::__use_parallelization<_ExecutionPolicy, _Ip>(), ::std::get<_Is>(::std::move(__t))...); } diff --git a/include/oneapi/dpl/pstl/glue_algorithm_impl.h b/include/oneapi/dpl/pstl/glue_algorithm_impl.h index ac1ff531bce..17c5e331710 100644 --- a/include/oneapi/dpl/pstl/glue_algorithm_impl.h +++ b/include/oneapi/dpl/pstl/glue_algorithm_impl.h @@ -46,7 +46,7 @@ any_of(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __l return oneapi::dpl::__internal::__pattern_any_of( ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __pred, oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec)); + oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>()); } // [alg.all_of] @@ -88,7 +88,7 @@ for_each_n(_ExecutionPolicy&& __exec, _ForwardIterator __first, _Size __n, _Func return oneapi::dpl::__internal::__pattern_walk1_n( ::std::forward<_ExecutionPolicy>(__exec), __first, __n, __f, oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec)); + oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>()); } // [alg.find] @@ -132,8 +132,7 @@ find_end(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 return oneapi::dpl::__internal::__pattern_find_end( ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __s_first, __s_last, __pred, oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>(), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>( - __exec)); + oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>()); } template @@ -154,8 +153,7 @@ find_first_of(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIter return oneapi::dpl::__internal::__pattern_find_first_of( ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __s_first, __s_last, __pred, oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>(), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>( - __exec)); + oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>()); } template @@ -175,7 +173,7 @@ adjacent_find(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardItera typedef typename ::std::iterator_traits<_ForwardIterator>::value_type _ValueType; return oneapi::dpl::__internal::__pattern_adjacent_find( ::std::forward<_ExecutionPolicy>(__exec), __first, __last, ::std::equal_to<_ValueType>(), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec), + oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(), oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(), oneapi::dpl::__internal::__first_semantic()); } @@ -186,7 +184,7 @@ adjacent_find(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardItera { return oneapi::dpl::__internal::__pattern_adjacent_find( ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __pred, - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec), + oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(), oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(), oneapi::dpl::__internal::__first_semantic()); } @@ -205,7 +203,7 @@ count(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __la ::std::forward<_ExecutionPolicy>(__exec), __first, __last, oneapi::dpl::__internal::__equal_value>( __value), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec), + oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(), oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>()); } @@ -216,7 +214,7 @@ count_if(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator _ { return oneapi::dpl::__internal::__pattern_count( ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __pred, - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec), + oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(), oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>()); } @@ -230,8 +228,7 @@ search(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 _ return oneapi::dpl::__internal::__pattern_search( ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __s_first, __s_last, __pred, oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>(), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>( - __exec)); + oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>()); } template @@ -251,7 +248,7 @@ search_n(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator _ return oneapi::dpl::__internal::__pattern_search_n( ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __count, __value, __pred, oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec)); + oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>()); } template @@ -272,8 +269,7 @@ copy(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 __l return oneapi::dpl::__internal::__pattern_walk2_brick( ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __result, oneapi::dpl::__internal::__brick_copy<_ExecutionPolicy>{}, - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>( - __exec)); + oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>()); } template @@ -285,8 +281,7 @@ copy_n(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _Size __n, _Forward return oneapi::dpl::__internal::__pattern_walk2_brick_n( ::std::forward<_ExecutionPolicy>(__exec), __first, __n, __result, oneapi::dpl::__internal::__brick_copy_n<_DecayedExecutionPolicy>{}, - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>( - __exec)); + oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>()); } template @@ -297,8 +292,7 @@ copy_if(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 return oneapi::dpl::__internal::__pattern_copy_if( ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __result, __pred, oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>(), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>( - __exec)); + oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>()); } // [alg.swap] @@ -317,8 +311,7 @@ swap_ranges(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardItera swap(__x, __y); }, oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>(), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>( - __exec)); + oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>()); } // [alg.transform] @@ -362,8 +355,7 @@ transform_if(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardItera oneapi::dpl::__internal::__transform_if_unary_functor<_UnaryOperation, _UnaryPredicate>(::std::move(__op), ::std::move(__pred)), oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>(), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>( - __exec)); + oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>()); } template (::std::move(__op), ::std::move(__pred)), oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2, _ForwardIterator3>(), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2, - _ForwardIterator3>(__exec)); + oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2, _ForwardIterator3>()); } // [alg.replace] @@ -419,8 +410,7 @@ replace_copy_if(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIt oneapi::dpl::__internal::__ref_or_copy<_ExecutionPolicy, _UnaryPredicate>>>( __new_value, __pred), oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>(), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>( - __exec)); + oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>()); } template @@ -443,7 +433,7 @@ fill(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __las { oneapi::dpl::__internal::__pattern_fill( ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __value, - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec), + oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(), oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>()); } @@ -456,7 +446,7 @@ fill_n(_ExecutionPolicy&& __exec, _ForwardIterator __first, _Size __count, const return oneapi::dpl::__internal::__pattern_fill_n( ::std::forward<_ExecutionPolicy>(__exec), __first, __count, __value, - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec), + oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(), oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>()); } @@ -467,7 +457,7 @@ generate(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator _ { oneapi::dpl::__internal::__pattern_generate( ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __g, - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec), + oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(), oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>()); } @@ -480,7 +470,7 @@ generate_n(_ExecutionPolicy&& __exec, _ForwardIterator __first, _Size __count, _ return oneapi::dpl::__internal::__pattern_generate_n( ::std::forward<_ExecutionPolicy>(__exec), __first, __count, __g, - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec), + oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(), oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>()); } @@ -515,7 +505,7 @@ remove_if(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator return oneapi::dpl::__internal::__pattern_remove_if( ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __pred, oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec)); + oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>()); } template @@ -537,7 +527,7 @@ unique(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __l return oneapi::dpl::__internal::__pattern_unique( ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __pred, oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec)); + oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>()); } template @@ -556,8 +546,7 @@ unique_copy(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterat return oneapi::dpl::__internal::__pattern_unique_copy( ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __result, __pred, oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>(), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>( - __exec)); + oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>()); } template @@ -577,7 +566,7 @@ reverse(_ExecutionPolicy&& __exec, _BidirectionalIterator __first, _Bidirectiona oneapi::dpl::__internal::__pattern_reverse( ::std::forward<_ExecutionPolicy>(__exec), __first, __last, oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _BidirectionalIterator>(), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _BidirectionalIterator>(__exec)); + oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _BidirectionalIterator>()); } template @@ -588,8 +577,7 @@ reverse_copy(_ExecutionPolicy&& __exec, _BidirectionalIterator __first, _Bidirec return oneapi::dpl::__internal::__pattern_reverse_copy( ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __d_first, oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _BidirectionalIterator, _ForwardIterator>(), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _BidirectionalIterator, - _ForwardIterator>(__exec)); + oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _BidirectionalIterator, _ForwardIterator>()); } // [alg.rotate] @@ -601,7 +589,7 @@ rotate(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __m return oneapi::dpl::__internal::__pattern_rotate( ::std::forward<_ExecutionPolicy>(__exec), __first, __middle, __last, oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec)); + oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>()); } template @@ -612,8 +600,7 @@ rotate_copy(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterat return oneapi::dpl::__internal::__pattern_rotate_copy( ::std::forward<_ExecutionPolicy>(__exec), __first, __middle, __last, __result, oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>(), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>( - __exec)); + oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>()); } // [alg.partitions] @@ -625,7 +612,7 @@ is_partitioned(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIter return oneapi::dpl::__internal::__pattern_is_partitioned( ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __pred, oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec)); + oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>()); } template @@ -635,7 +622,7 @@ partition(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator return oneapi::dpl::__internal::__pattern_partition( ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __pred, oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec)); + oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>()); } template @@ -646,7 +633,7 @@ stable_partition(_ExecutionPolicy&& __exec, _BidirectionalIterator __first, _Bid return oneapi::dpl::__internal::__pattern_stable_partition( ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __pred, oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _BidirectionalIterator>(), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _BidirectionalIterator>(__exec)); + oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _BidirectionalIterator>()); } template (__exec), __first, __last, __out_true, __out_false, __pred, oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator, _ForwardIterator1, _ForwardIterator2>(), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator, _ForwardIterator1, - _ForwardIterator2>(__exec)); + oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator, _ForwardIterator1, _ForwardIterator2>()); } // [alg.sort] @@ -673,7 +659,7 @@ sort(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIter oneapi::dpl::__internal::__pattern_sort( ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __comp, oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _RandomAccessIterator>(), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _RandomAccessIterator>(__exec), + oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _RandomAccessIterator>(), typename ::std::is_move_constructible<_InputType>::type()); } @@ -694,7 +680,7 @@ stable_sort(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAcc oneapi::dpl::__internal::__pattern_stable_sort( ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __comp, oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _RandomAccessIterator>(), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _RandomAccessIterator>(__exec)); + oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _RandomAccessIterator>()); } template @@ -716,8 +702,7 @@ sort_by_key(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __keys_first, _Ran oneapi::dpl::__internal::__pattern_sort_by_key( ::std::forward<_ExecutionPolicy>(__exec), __keys_first, __keys_last, __values_first, __comp, oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _RandomAccessIterator1, _RandomAccessIterator2>(), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _RandomAccessIterator1, - _RandomAccessIterator2>(__exec)); + oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _RandomAccessIterator1, _RandomAccessIterator2>()); } template @@ -740,8 +725,7 @@ mismatch(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator return oneapi::dpl::__internal::__pattern_mismatch( ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2, __pred, oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>(), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>( - __exec)); + oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>()); } template @@ -785,7 +769,7 @@ equal(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 _ return oneapi::dpl::__internal::__pattern_equal( ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __p, oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1>(), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator1>(__exec)); + oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator1>()); } template @@ -804,7 +788,7 @@ equal(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 _ return oneapi::dpl::__internal::__pattern_equal( ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2, __p, oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1>(), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator1>(__exec)); + oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator1>()); } template @@ -826,8 +810,7 @@ move(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 __l return oneapi::dpl::__internal::__pattern_walk2_brick( ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __d_first, oneapi::dpl::__internal::__brick_move<_DecayedExecutionPolicy>{}, - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>( - __exec)); + oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>()); } // [partial.sort] @@ -840,7 +823,7 @@ partial_sort(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAc oneapi::dpl::__internal::__pattern_partial_sort( ::std::forward<_ExecutionPolicy>(__exec), __first, __middle, __last, __comp, oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _RandomAccessIterator>(), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _RandomAccessIterator>(__exec)); + oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _RandomAccessIterator>()); } template @@ -862,8 +845,7 @@ partial_sort_copy(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardI return oneapi::dpl::__internal::__pattern_partial_sort_copy( ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __d_first, __d_last, __comp, oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator, _RandomAccessIterator>(), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator, - _RandomAccessIterator>(__exec)); + oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator, _RandomAccessIterator>()); } template @@ -883,7 +865,7 @@ is_sorted_until(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIte const _ForwardIterator __res = oneapi::dpl::__internal::__pattern_adjacent_find( ::std::forward<_ExecutionPolicy>(__exec), __first, __last, oneapi::dpl::__internal::__reorder_pred<_Compare>(__comp), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec), + oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(), oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(), oneapi::dpl::__internal::__first_semantic()); return __res == __last ? __last : oneapi::dpl::__internal::__pstl_next(__res); @@ -904,7 +886,7 @@ is_sorted(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator return oneapi::dpl::__internal::__pattern_adjacent_find( ::std::forward<_ExecutionPolicy>(__exec), __first, __last, oneapi::dpl::__internal::__reorder_pred<_Compare>(__comp), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec), + oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(), oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(), oneapi::dpl::__internal::__or_semantic()) == __last; } @@ -927,8 +909,7 @@ merge(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 _ return oneapi::dpl::__internal::__pattern_merge( ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2, __d_first, __comp, oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2, _ForwardIterator>(), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2, - _ForwardIterator>(__exec)); + oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2, _ForwardIterator>()); } template @@ -948,7 +929,7 @@ inplace_merge(_ExecutionPolicy&& __exec, _BidirectionalIterator __first, _Bidire oneapi::dpl::__internal::__pattern_inplace_merge( ::std::forward<_ExecutionPolicy>(__exec), __first, __middle, __last, __comp, oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _BidirectionalIterator>(), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _BidirectionalIterator>(__exec)); + oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _BidirectionalIterator>()); } template @@ -970,8 +951,7 @@ includes(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator return oneapi::dpl::__internal::__pattern_includes( ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2, __comp, oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>(), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>( - __exec)); + oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>()); } template @@ -994,8 +974,7 @@ set_union(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterato return oneapi::dpl::__internal::__pattern_set_union( ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2, __result, __comp, oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2, _ForwardIterator>(), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2, - _ForwardIterator>(__exec)); + oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2, _ForwardIterator>()); } template @@ -1018,8 +997,7 @@ set_intersection(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _Forward return oneapi::dpl::__internal::__pattern_set_intersection( ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2, __result, __comp, oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2, _ForwardIterator>(), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2, - _ForwardIterator>(__exec)); + oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2, _ForwardIterator>()); } template @@ -1042,8 +1020,7 @@ set_difference(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIt return oneapi::dpl::__internal::__pattern_set_difference( ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2, __result, __comp, oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2, _ForwardIterator>(), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2, - _ForwardIterator>(__exec)); + oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2, _ForwardIterator>()); } template @@ -1067,8 +1044,7 @@ set_symmetric_difference(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, return oneapi::dpl::__internal::__pattern_set_symmetric_difference( ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2, __result, __comp, oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2, _ForwardIterator>(), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2, - _ForwardIterator>(__exec)); + oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2, _ForwardIterator>()); } template @@ -1088,7 +1064,7 @@ is_heap_until(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomA return oneapi::dpl::__internal::__pattern_is_heap_until( ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __comp, oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _RandomAccessIterator>(), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _RandomAccessIterator>(__exec)); + oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _RandomAccessIterator>()); } template @@ -1106,7 +1082,7 @@ is_heap(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessI return oneapi::dpl::__internal::__pattern_is_heap( ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __comp, oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _RandomAccessIterator>(), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _RandomAccessIterator>(__exec)); + oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _RandomAccessIterator>()); } template @@ -1126,7 +1102,7 @@ min_element(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterato return oneapi::dpl::__internal::__pattern_min_element( ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __comp, oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec)); + oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>()); } template @@ -1161,7 +1137,7 @@ minmax_element(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIter return oneapi::dpl::__internal::__pattern_minmax_element( ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __comp, oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec)); + oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>()); } template @@ -1182,7 +1158,7 @@ nth_element(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAcc oneapi::dpl::__internal::__pattern_nth_element( ::std::forward<_ExecutionPolicy>(__exec), __first, __nth, __last, __comp, oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _RandomAccessIterator>(), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _RandomAccessIterator>(__exec)); + oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _RandomAccessIterator>()); } template @@ -1204,8 +1180,7 @@ lexicographical_compare(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ return oneapi::dpl::__internal::__pattern_lexicographical_compare( ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2, __comp, oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>(), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>( - __exec)); + oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>()); } template @@ -1227,7 +1202,7 @@ shift_left(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator return oneapi::dpl::__internal::__pattern_shift_left( ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __n, oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec)); + oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>()); } // [shift.right] @@ -1240,7 +1215,7 @@ shift_right(_ExecutionPolicy&& __exec, _BidirectionalIterator __first, _Bidirect return oneapi::dpl::__internal::__pattern_shift_right( ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __n, oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _BidirectionalIterator>(), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _BidirectionalIterator>(__exec)); + oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _BidirectionalIterator>()); } } // namespace dpl diff --git a/include/oneapi/dpl/pstl/glue_memory_impl.h b/include/oneapi/dpl/pstl/glue_memory_impl.h index de59de54ebf..6bdc66d510b 100644 --- a/include/oneapi/dpl/pstl/glue_memory_impl.h +++ b/include/oneapi/dpl/pstl/glue_memory_impl.h @@ -45,9 +45,8 @@ uninitialized_copy(_ExecutionPolicy&& __exec, _InputIterator __first, _InputIter typedef typename ::std::iterator_traits<_ForwardIterator>::value_type _ValueType2; typedef ::std::decay_t<_ExecutionPolicy> _DecayedExecutionPolicy; - const auto __is_parallel = - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _InputIterator, _ForwardIterator>( - __exec); + constexpr auto __is_parallel = + oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _InputIterator, _ForwardIterator>(); constexpr auto __is_vector = oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _InputIterator, _ForwardIterator>(); @@ -74,9 +73,8 @@ uninitialized_copy_n(_ExecutionPolicy&& __exec, _InputIterator __first, _Size __ typedef typename ::std::iterator_traits<_ForwardIterator>::value_type _ValueType2; typedef ::std::decay_t<_ExecutionPolicy> _DecayedExecutionPolicy; - const auto __is_parallel = - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _InputIterator, _ForwardIterator>( - __exec); + constexpr auto __is_parallel = + oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _InputIterator, _ForwardIterator>(); constexpr auto __is_vector = oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _InputIterator, _ForwardIterator>(); @@ -105,9 +103,8 @@ uninitialized_move(_ExecutionPolicy&& __exec, _InputIterator __first, _InputIter typedef typename ::std::iterator_traits<_ForwardIterator>::value_type _ValueType2; typedef ::std::decay_t<_ExecutionPolicy> _DecayedExecutionPolicy; - const auto __is_parallel = - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _InputIterator, _ForwardIterator>( - __exec); + constexpr auto __is_parallel = + oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _InputIterator, _ForwardIterator>(); constexpr auto __is_vector = oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _InputIterator, _ForwardIterator>(); @@ -134,9 +131,8 @@ uninitialized_move_n(_ExecutionPolicy&& __exec, _InputIterator __first, _Size __ typedef typename ::std::iterator_traits<_ForwardIterator>::value_type _ValueType2; typedef ::std::decay_t<_ExecutionPolicy> _DecayedExecutionPolicy; - const auto __is_parallel = - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _InputIterator, _ForwardIterator>( - __exec); + constexpr auto __is_parallel = + oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _InputIterator, _ForwardIterator>(); constexpr auto __is_vector = oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _InputIterator, _ForwardIterator>(); @@ -164,8 +160,8 @@ uninitialized_fill(_ExecutionPolicy&& __exec, _ForwardIterator __first, _Forward typedef typename ::std::iterator_traits<_ForwardIterator>::value_type _ValueType; typedef ::std::decay_t<_ExecutionPolicy> _DecayedExecutionPolicy; - const auto __is_parallel = - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec); + constexpr auto __is_parallel = + oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(); constexpr auto __is_vector = oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(); @@ -192,8 +188,8 @@ uninitialized_fill_n(_ExecutionPolicy&& __exec, _ForwardIterator __first, _Size typedef typename ::std::iterator_traits<_ForwardIterator>::value_type _ValueType; typedef ::std::decay_t<_ExecutionPolicy> _DecayedExecutionPolicy; - const auto __is_parallel = - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec); + constexpr auto __is_parallel = + oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(); constexpr auto __is_vector = oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(); @@ -222,8 +218,8 @@ destroy(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __ typedef typename ::std::iterator_traits<_ForwardIterator>::value_type _ValueType; typedef typename ::std::iterator_traits<_ForwardIterator>::reference _ReferenceType; - const auto __is_parallel = - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec); + constexpr auto __is_parallel = + oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(); using _is_vector_type = #if (_PSTL_ICPX_OMP_SIMD_DESTROY_WINDOWS_BROKEN || _ONEDPL_ICPX_OMP_SIMD_DESTROY_WINDOWS_BROKEN) ::std::conditional_t< @@ -250,8 +246,8 @@ destroy_n(_ExecutionPolicy&& __exec, _ForwardIterator __first, _Size __n) typedef typename ::std::iterator_traits<_ForwardIterator>::value_type _ValueType; typedef typename ::std::iterator_traits<_ForwardIterator>::reference _ReferenceType; - const auto __is_parallel = - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec); + constexpr auto __is_parallel = + oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(); using _is_vector_type = #if (_PSTL_ICPX_OMP_SIMD_DESTROY_WINDOWS_BROKEN || _ONEDPL_ICPX_OMP_SIMD_DESTROY_WINDOWS_BROKEN) ::std::conditional_t< @@ -284,8 +280,8 @@ uninitialized_default_construct(_ExecutionPolicy&& __exec, _ForwardIterator __fi typedef typename ::std::iterator_traits<_ForwardIterator>::value_type _ValueType; typedef ::std::decay_t<_ExecutionPolicy> _DecayedExecutionPolicy; - const auto __is_parallel = - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec); + constexpr auto __is_parallel = + oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(); constexpr auto __is_vector = oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(); @@ -305,8 +301,8 @@ uninitialized_default_construct_n(_ExecutionPolicy&& __exec, _ForwardIterator __ typedef typename ::std::iterator_traits<_ForwardIterator>::value_type _ValueType; typedef ::std::decay_t<_ExecutionPolicy> _DecayedExecutionPolicy; - const auto __is_parallel = - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec); + constexpr auto __is_parallel = + oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(); constexpr auto __is_vector = oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(); @@ -332,8 +328,8 @@ uninitialized_value_construct(_ExecutionPolicy&& __exec, _ForwardIterator __firs typedef typename ::std::iterator_traits<_ForwardIterator>::value_type _ValueType; typedef ::std::decay_t<_ExecutionPolicy> _DecayedExecutionPolicy; - const auto __is_parallel = - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec); + constexpr auto __is_parallel = + oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(); constexpr auto __is_vector = oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(); @@ -360,8 +356,8 @@ uninitialized_value_construct_n(_ExecutionPolicy&& __exec, _ForwardIterator __fi typedef typename ::std::iterator_traits<_ForwardIterator>::value_type _ValueType; typedef ::std::decay_t<_ExecutionPolicy> _DecayedExecutionPolicy; - const auto __is_parallel = - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec); + constexpr auto __is_parallel = + oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(); constexpr auto __is_vector = oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(); diff --git a/include/oneapi/dpl/pstl/glue_numeric_impl.h b/include/oneapi/dpl/pstl/glue_numeric_impl.h index 2a66a0e9743..9c450f79a2b 100644 --- a/include/oneapi/dpl/pstl/glue_numeric_impl.h +++ b/include/oneapi/dpl/pstl/glue_numeric_impl.h @@ -74,8 +74,7 @@ transform_reduce(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _Forward ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __init, ::std::plus<_InputType>(), ::std::multiplies<_InputType>(), oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>(), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>( - __exec)); + oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>()); } template (__exec), __first1, __last1, __first2, __init, __binary_op1, __binary_op2, oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>(), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>( - __exec)); + oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>()); } template @@ -99,7 +97,7 @@ transform_reduce(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIt return oneapi::dpl::__internal::__pattern_transform_reduce( ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __init, __binary_op, __unary_op, oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec)); + oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>()); } // [exclusive.scan] @@ -227,8 +225,7 @@ transform_exclusive_scan(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __result, __unary_op, __init, __binary_op, /*inclusive=*/::std::false_type(), oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>(), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>( - __exec)); + oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>()); } // [transform.inclusive.scan] @@ -244,8 +241,7 @@ transform_inclusive_scan(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __result, __unary_op, __init, __binary_op, /*inclusive=*/::std::true_type(), oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>(), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>( - __exec)); + oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>()); } template (__exec), __first, __last, __result, __unary_op, __binary_op, /*inclusive=*/::std::true_type(), oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>(), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>( - __exec)); + oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>()); } // [adjacent.difference] @@ -276,8 +271,7 @@ adjacent_difference(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _Forwa return oneapi::dpl::__internal::__pattern_adjacent_difference( ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __d_first, __op, oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>(), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>( - __exec)); + oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>()); } template From 382a14055756869a1586e22925512e0f5ec41a20 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Mon, 29 Jan 2024 19:49:39 +0100 Subject: [PATCH 032/566] include/oneapi/dpl/pstl/glue_memory_impl.h - move constexpr auto __is_vector under constexpr condition check --- include/oneapi/dpl/pstl/glue_memory_impl.h | 105 +++++++++++---------- 1 file changed, 57 insertions(+), 48 deletions(-) diff --git a/include/oneapi/dpl/pstl/glue_memory_impl.h b/include/oneapi/dpl/pstl/glue_memory_impl.h index 6bdc66d510b..e810e20bf9a 100644 --- a/include/oneapi/dpl/pstl/glue_memory_impl.h +++ b/include/oneapi/dpl/pstl/glue_memory_impl.h @@ -47,8 +47,6 @@ uninitialized_copy(_ExecutionPolicy&& __exec, _InputIterator __first, _InputIter constexpr auto __is_parallel = oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _InputIterator, _ForwardIterator>(); - constexpr auto __is_vector = - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _InputIterator, _ForwardIterator>(); if constexpr (::std::is_trivial_v<_ValueType1> && ::std::is_trivial_v<_ValueType2>) { @@ -58,6 +56,9 @@ uninitialized_copy(_ExecutionPolicy&& __exec, _InputIterator __first, _InputIter } else { + constexpr auto __is_vector = + oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _InputIterator, _ForwardIterator>(); + return oneapi::dpl::__internal::__pattern_walk2( ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __result, oneapi::dpl::__internal::__op_uninitialized_copy<_DecayedExecutionPolicy>{}, __is_vector, @@ -75,8 +76,6 @@ uninitialized_copy_n(_ExecutionPolicy&& __exec, _InputIterator __first, _Size __ constexpr auto __is_parallel = oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _InputIterator, _ForwardIterator>(); - constexpr auto __is_vector = - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _InputIterator, _ForwardIterator>(); if constexpr (::std::is_trivial_v<_ValueType1> && ::std::is_trivial_v<_ValueType2>) { @@ -86,6 +85,9 @@ uninitialized_copy_n(_ExecutionPolicy&& __exec, _InputIterator __first, _Size __ } else { + constexpr auto __is_vector = + oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _InputIterator, _ForwardIterator>(); + return oneapi::dpl::__internal::__pattern_walk2_n( ::std::forward<_ExecutionPolicy>(__exec), __first, __n, __result, oneapi::dpl::__internal::__op_uninitialized_copy<_DecayedExecutionPolicy>{}, __is_vector, @@ -105,8 +107,6 @@ uninitialized_move(_ExecutionPolicy&& __exec, _InputIterator __first, _InputIter constexpr auto __is_parallel = oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _InputIterator, _ForwardIterator>(); - constexpr auto __is_vector = - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _InputIterator, _ForwardIterator>(); if constexpr (::std::is_trivial_v<_ValueType1> && ::std::is_trivial_v<_ValueType2>) { @@ -116,6 +116,9 @@ uninitialized_move(_ExecutionPolicy&& __exec, _InputIterator __first, _InputIter } else { + constexpr auto __is_vector = + oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _InputIterator, _ForwardIterator>(); + return oneapi::dpl::__internal::__pattern_walk2( ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __result, oneapi::dpl::__internal::__op_uninitialized_move<_DecayedExecutionPolicy>{}, __is_vector, @@ -133,8 +136,6 @@ uninitialized_move_n(_ExecutionPolicy&& __exec, _InputIterator __first, _Size __ constexpr auto __is_parallel = oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _InputIterator, _ForwardIterator>(); - constexpr auto __is_vector = - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _InputIterator, _ForwardIterator>(); if constexpr (::std::is_trivial_v<_ValueType1> && ::std::is_trivial_v<_ValueType2>) { @@ -144,6 +145,9 @@ uninitialized_move_n(_ExecutionPolicy&& __exec, _InputIterator __first, _Size __ } else { + constexpr auto __is_vector = + oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _InputIterator, _ForwardIterator>(); + return oneapi::dpl::__internal::__pattern_walk2_n( ::std::forward<_ExecutionPolicy>(__exec), __first, __n, __result, oneapi::dpl::__internal::__op_uninitialized_move<_DecayedExecutionPolicy>{}, __is_vector, @@ -162,8 +166,6 @@ uninitialized_fill(_ExecutionPolicy&& __exec, _ForwardIterator __first, _Forward constexpr auto __is_parallel = oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(); - constexpr auto __is_vector = - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(); if constexpr (::std::is_arithmetic_v<_ValueType>) { @@ -174,6 +176,9 @@ uninitialized_fill(_ExecutionPolicy&& __exec, _ForwardIterator __first, _Forward } else { + constexpr auto __is_vector = + oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(); + oneapi::dpl::__internal::__pattern_walk1( ::std::forward<_ExecutionPolicy>(__exec), __first, __last, oneapi::dpl::__internal::__op_uninitialized_fill<_Tp, _DecayedExecutionPolicy>{__value}, __is_vector, @@ -190,8 +195,6 @@ uninitialized_fill_n(_ExecutionPolicy&& __exec, _ForwardIterator __first, _Size constexpr auto __is_parallel = oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(); - constexpr auto __is_vector = - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(); if constexpr (::std::is_arithmetic_v<_ValueType>) { @@ -202,6 +205,9 @@ uninitialized_fill_n(_ExecutionPolicy&& __exec, _ForwardIterator __first, _Size } else { + constexpr auto __is_vector = + oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(); + return oneapi::dpl::__internal::__pattern_walk1_n( ::std::forward<_ExecutionPolicy>(__exec), __first, __n, oneapi::dpl::__internal::__op_uninitialized_fill<_Tp, _DecayedExecutionPolicy>{__value}, __is_vector, @@ -220,19 +226,20 @@ destroy(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __ constexpr auto __is_parallel = oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(); - using _is_vector_type = + + if constexpr (!::std::is_trivially_destructible_v<_ValueType>) + { + using _is_vector_type = #if (_PSTL_ICPX_OMP_SIMD_DESTROY_WINDOWS_BROKEN || _ONEDPL_ICPX_OMP_SIMD_DESTROY_WINDOWS_BROKEN) - ::std::conditional_t< - oneapi::dpl::__internal::__is_host_execution_policy<::std::decay_t<_ExecutionPolicy>>::value, - ::std::false_type, - decltype(oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>())>; + ::std::conditional_t< + oneapi::dpl::__internal::__is_host_execution_policy<::std::decay_t<_ExecutionPolicy>>::value, + ::std::false_type, + decltype(oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>())>; #else - decltype(oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>()); + decltype(oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>()); #endif // _PSTL_ICPX_OMP_SIMD_DESTROY_WINDOWS_BROKEN || _ONEDPL_ICPX_OMP_SIMD_DESTROY_WINDOWS_BROKEN - constexpr _is_vector_type __is_vector; + constexpr _is_vector_type __is_vector; - if constexpr (!::std::is_trivially_destructible_v<_ValueType>) - { oneapi::dpl::__internal::__pattern_walk1( ::std::forward<_ExecutionPolicy>(__exec), __first, __last, [](_ReferenceType __val) { __val.~_ValueType(); }, __is_vector, __is_parallel); @@ -246,25 +253,25 @@ destroy_n(_ExecutionPolicy&& __exec, _ForwardIterator __first, _Size __n) typedef typename ::std::iterator_traits<_ForwardIterator>::value_type _ValueType; typedef typename ::std::iterator_traits<_ForwardIterator>::reference _ReferenceType; - constexpr auto __is_parallel = - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(); - using _is_vector_type = -#if (_PSTL_ICPX_OMP_SIMD_DESTROY_WINDOWS_BROKEN || _ONEDPL_ICPX_OMP_SIMD_DESTROY_WINDOWS_BROKEN) - ::std::conditional_t< - oneapi::dpl::__internal::__is_host_execution_policy<::std::decay_t<_ExecutionPolicy>>::value, - ::std::false_type, - decltype(oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>())>; -#else - decltype(oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>()); -#endif // _PSTL_ICPX_OMP_SIMD_DESTROY_WINDOWS_BROKEN || _ONEDPL_ICPX_OMP_SIMD_DESTROY_WINDOWS_BROKEN - constexpr _is_vector_type __is_vector; - if constexpr (::std::is_trivially_destructible_v<_ValueType>) { return oneapi::dpl::__internal::__pstl_next(__first, __n); } else { + constexpr auto __is_parallel = + oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(); + using _is_vector_type = +#if (_PSTL_ICPX_OMP_SIMD_DESTROY_WINDOWS_BROKEN || _ONEDPL_ICPX_OMP_SIMD_DESTROY_WINDOWS_BROKEN) + ::std::conditional_t< + oneapi::dpl::__internal::__is_host_execution_policy<::std::decay_t<_ExecutionPolicy>>::value, + ::std::false_type, + decltype(oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>())>; +#else + decltype(oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>()); +#endif // _PSTL_ICPX_OMP_SIMD_DESTROY_WINDOWS_BROKEN || _ONEDPL_ICPX_OMP_SIMD_DESTROY_WINDOWS_BROKEN + constexpr _is_vector_type __is_vector; + return oneapi::dpl::__internal::__pattern_walk1_n(::std::forward<_ExecutionPolicy>(__exec), __first, __n, [](_ReferenceType __val) { __val.~_ValueType(); }, __is_vector, __is_parallel); @@ -280,13 +287,13 @@ uninitialized_default_construct(_ExecutionPolicy&& __exec, _ForwardIterator __fi typedef typename ::std::iterator_traits<_ForwardIterator>::value_type _ValueType; typedef ::std::decay_t<_ExecutionPolicy> _DecayedExecutionPolicy; - constexpr auto __is_parallel = - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(); - constexpr auto __is_vector = - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(); - if constexpr (!::std::is_trivial_v<_ValueType>) { + constexpr auto __is_parallel = + oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(); + constexpr auto __is_vector = + oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(); + oneapi::dpl::__internal::__pattern_walk1( ::std::forward<_ExecutionPolicy>(__exec), __first, __last, oneapi::dpl::__internal::__op_uninitialized_default_construct<_DecayedExecutionPolicy>{}, __is_vector, @@ -301,17 +308,17 @@ uninitialized_default_construct_n(_ExecutionPolicy&& __exec, _ForwardIterator __ typedef typename ::std::iterator_traits<_ForwardIterator>::value_type _ValueType; typedef ::std::decay_t<_ExecutionPolicy> _DecayedExecutionPolicy; - constexpr auto __is_parallel = - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(); - constexpr auto __is_vector = - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(); - if constexpr (::std::is_trivial_v<_ValueType>) { return oneapi::dpl::__internal::__pstl_next(__first, __n); } else { + constexpr auto __is_parallel = + oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(); + constexpr auto __is_vector = + oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(); + return oneapi::dpl::__internal::__pattern_walk1_n( ::std::forward<_ExecutionPolicy>(__exec), __first, __n, oneapi::dpl::__internal::__op_uninitialized_default_construct<_DecayedExecutionPolicy>{}, __is_vector, @@ -330,8 +337,6 @@ uninitialized_value_construct(_ExecutionPolicy&& __exec, _ForwardIterator __firs constexpr auto __is_parallel = oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(); - constexpr auto __is_vector = - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(); if constexpr (::std::is_trivial_v<_ValueType>) { @@ -342,6 +347,9 @@ uninitialized_value_construct(_ExecutionPolicy&& __exec, _ForwardIterator __firs } else { + constexpr auto __is_vector = + oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(); + oneapi::dpl::__internal::__pattern_walk1( ::std::forward<_ExecutionPolicy>(__exec), __first, __last, oneapi::dpl::__internal::__op_uninitialized_value_construct<_DecayedExecutionPolicy>{}, __is_vector, @@ -358,8 +366,6 @@ uninitialized_value_construct_n(_ExecutionPolicy&& __exec, _ForwardIterator __fi constexpr auto __is_parallel = oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(); - constexpr auto __is_vector = - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(); if constexpr (::std::is_trivial_v<_ValueType>) { @@ -370,6 +376,9 @@ uninitialized_value_construct_n(_ExecutionPolicy&& __exec, _ForwardIterator __fi } else { + constexpr auto __is_vector = + oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(); + return oneapi::dpl::__internal::__pattern_walk1_n( ::std::forward<_ExecutionPolicy>(__exec), __first, __n, oneapi::dpl::__internal::__op_uninitialized_value_construct<_DecayedExecutionPolicy>{}, __is_vector, From 2ef6ba0edef1557f608064277306fbcfd0a37c04 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Thu, 8 Feb 2024 17:46:31 +0100 Subject: [PATCH 033/566] Implement __select_backend function as constexpr without params --- include/oneapi/dpl/pstl/execution_impl.h | 32 ++++++++++++------- include/oneapi/dpl/pstl/glue_algorithm_impl.h | 5 +-- .../pstl/hetero/dpcpp/execution_sycl_defs.h | 29 +++++++++++------ 3 files changed, 42 insertions(+), 24 deletions(-) diff --git a/include/oneapi/dpl/pstl/execution_impl.h b/include/oneapi/dpl/pstl/execution_impl.h index 27a955551f9..5be0170150d 100644 --- a/include/oneapi/dpl/pstl/execution_impl.h +++ b/include/oneapi/dpl/pstl/execution_impl.h @@ -156,30 +156,38 @@ using __tag_type = ::std::conditional_t< __internal::__is_random_access_iterator_v<_IteratorTypes...>, __parallel_tag<_IsVector>, ::std::conditional_t<__is_forward_iterator_v<_IteratorTypes...>, __parallel_forward_tag, __serial_tag<_IsVector>>>; -template -__serial_tag -__select_backend(oneapi::dpl::execution::sequenced_policy, _IteratorTypes&&...) +template +constexpr ::std::enable_if_t< + ::std::is_same_v<::std::decay_t<_ExecutionPolicy>, oneapi::dpl::execution::sequenced_policy>, + __serial_tag> +__select_backend() { return {}; } -template -__serial_tag<__internal::__is_random_access_iterator<_IteratorTypes...>> -__select_backend(oneapi::dpl::execution::unsequenced_policy, _IteratorTypes&&...) +template +constexpr ::std::enable_if_t< + ::std::is_same_v<::std::decay_t<_ExecutionPolicy>, oneapi::dpl::execution::unsequenced_policy>, + __serial_tag<__internal::__is_random_access_iterator<_IteratorTypes...>>> +__select_backend() { return {}; } -template -__tag_type -__select_backend(oneapi::dpl::execution::parallel_policy, _IteratorTypes&&...) +template +constexpr ::std::enable_if_t< + ::std::is_same_v<::std::decay_t<_ExecutionPolicy>, oneapi::dpl::execution::parallel_policy>, + __tag_type> +__select_backend() { return {}; } -template -__tag_type<__internal::__is_random_access_iterator<_IteratorTypes...>, _IteratorTypes...> -__select_backend(oneapi::dpl::execution::parallel_unsequenced_policy, _IteratorTypes&&...) +template +constexpr ::std::enable_if_t< + ::std::is_same_v<::std::decay_t<_ExecutionPolicy>, oneapi::dpl::execution::parallel_unsequenced_policy>, + __tag_type<__internal::__is_random_access_iterator<_IteratorTypes...>, _IteratorTypes...>> +__select_backend() { return {}; } diff --git a/include/oneapi/dpl/pstl/glue_algorithm_impl.h b/include/oneapi/dpl/pstl/glue_algorithm_impl.h index 17c5e331710..c6b0956cb50 100644 --- a/include/oneapi/dpl/pstl/glue_algorithm_impl.h +++ b/include/oneapi/dpl/pstl/glue_algorithm_impl.h @@ -97,7 +97,7 @@ template oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator> find_if(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _Predicate __pred) { - auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, _ForwardIterator{}); + constexpr auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _ForwardIterator>(); return oneapi::dpl::__internal::__pattern_find_if(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __pred); @@ -379,7 +379,8 @@ oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy> replace_if(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _UnaryPredicate __pred, const _Tp& __new_value) { - auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, _ForwardIterator{}); + constexpr auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _ForwardIterator>(); + __pattern_replace_if(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __pred, __new_value); } diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/execution_sycl_defs.h b/include/oneapi/dpl/pstl/hetero/dpcpp/execution_sycl_defs.h index 8c91c597c31..9f72220225a 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/execution_sycl_defs.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/execution_sycl_defs.h @@ -322,26 +322,35 @@ struct __device_backend_tag { }; -template -::std::enable_if_t<__is_random_access_iterator_v<_IteratorTypes...>, __hetero_tag<__device_backend_tag>> -__select_backend(const execution::device_policy<_KernelName>&, _IteratorTypes&&...) +template +struct __select_backend_for_hetero_policy_trait; + +template +struct __select_backend_for_hetero_policy_trait> { - return {}; -} + using __backend_tag = __device_backend_tag; +}; #if _ONEDPL_FPGA_DEVICE struct __fpga_backend_tag : __device_backend_tag { }; -template -::std::enable_if_t<__is_random_access_iterator_v<_IteratorTypes...>, __hetero_tag<__fpga_backend_tag>> -__select_backend(const execution::fpga_policy<_Factor, _KernelName>&, _IteratorTypes&&...) +template +struct __select_backend_for_hetero_policy_trait> { - return {}; -} + using __backend_tag = __fpga_backend_tag; +}; #endif +template +constexpr ::std::enable_if_t< + __is_random_access_iterator_v<_IteratorTypes...>, + __hetero_tag>::__backend_tag>> +__select_backend() +{ + return {}; // return __hetero_tag<__device_backend_tag> or __hetero_tag<__fpga_backend_tag> +} template struct __is_backend_tag; From a3dfcc00ec22d36d18a8f2ec85eda18fe5ba3f13 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Tue, 30 Jan 2024 12:56:51 +0100 Subject: [PATCH 034/566] Replace __internal::__lazy_and to ::std::conjunction_v; remove __internal::__lazy_and and __internal::__lazy_or as not required anymore --- include/oneapi/dpl/pstl/execution_impl.h | 40 ++++-------------------- 1 file changed, 6 insertions(+), 34 deletions(-) diff --git a/include/oneapi/dpl/pstl/execution_impl.h b/include/oneapi/dpl/pstl/execution_impl.h index 5be0170150d..ad29a81b0c5 100644 --- a/include/oneapi/dpl/pstl/execution_impl.h +++ b/include/oneapi/dpl/pstl/execution_impl.h @@ -29,34 +29,6 @@ namespace dpl namespace __internal { -/* predicate */ - -template -::std::false_type __lazy_and(_Tp, ::std::false_type) -{ - return ::std::false_type{}; -} - -template -inline _Tp -__lazy_and(_Tp __a, ::std::true_type) -{ - return __a; -} - -template -::std::true_type __lazy_or(_Tp, ::std::true_type) -{ - return ::std::true_type{}; -} - -template -inline _Tp -__lazy_or(_Tp __a, ::std::false_type) -{ - return __a; -} - /* policy */ template struct __policy_traits @@ -105,19 +77,19 @@ template using __allow_parallel = typename __internal::__policy_traits<::std::decay_t<_ExecutionPolicy>>::__allow_parallel; template -constexpr decltype(auto) +constexpr auto __is_vectorization_preferred() { - return __internal::__lazy_and(::std::decay_t<_ExecutionPolicy>::__allow_vector(), - __internal::__is_random_access_iterator_t<_IteratorTypes...>()); + return ::std::conjunction::__allow_vector()), + __internal::__is_random_access_iterator_t<_IteratorTypes...>>(); } template -constexpr decltype(auto) +constexpr auto __is_parallelization_preferred() { - return __internal::__lazy_and(::std::decay_t<_ExecutionPolicy>::__allow_parallel(), - __internal::__is_random_access_iterator_t<_IteratorTypes...>()); + return ::std::conjunction::__allow_parallel()), + __internal::__is_random_access_iterator_t<_IteratorTypes...>>(); } //------------------------------------------------------------------------ From cb79db2abec30ea77fb14c32e9ed26b30cace26c Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Wed, 31 Jan 2024 15:57:58 +0100 Subject: [PATCH 035/566] include/oneapi/dpl/pstl/algorithm_impl.h - formatting --- include/oneapi/dpl/pstl/algorithm_impl.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/oneapi/dpl/pstl/algorithm_impl.h b/include/oneapi/dpl/pstl/algorithm_impl.h index 7b02f71722d..5bd488a0580 100644 --- a/include/oneapi/dpl/pstl/algorithm_impl.h +++ b/include/oneapi/dpl/pstl/algorithm_impl.h @@ -753,6 +753,7 @@ __pattern_find_if(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _F _ForwardIterator __last, _Predicate __pred) { using __backend_tag = typename decltype(__tag)::__backend_tag; + return __except_handler([&]() { return __parallel_find( __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, From f075fe83bce8226b5f8968a0f20f9b93b5108050 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Thu, 8 Feb 2024 17:03:41 +0100 Subject: [PATCH 036/566] Add forward declarations for struct __parallel_tag, struct __parallel_forward_tag --- include/oneapi/dpl/pstl/algorithm_fwd.h | 5 +++++ include/oneapi/dpl/pstl/numeric_fwd.h | 2 ++ 2 files changed, 7 insertions(+) diff --git a/include/oneapi/dpl/pstl/algorithm_fwd.h b/include/oneapi/dpl/pstl/algorithm_fwd.h index 34d09140fb0..97adcbe7d85 100644 --- a/include/oneapi/dpl/pstl/algorithm_fwd.h +++ b/include/oneapi/dpl/pstl/algorithm_fwd.h @@ -26,6 +26,11 @@ namespace dpl namespace __internal { +template +struct __parallel_tag; + +struct __parallel_forward_tag; + //------------------------------------------------------------------------ // any_of //------------------------------------------------------------------------ diff --git a/include/oneapi/dpl/pstl/numeric_fwd.h b/include/oneapi/dpl/pstl/numeric_fwd.h index c663cd5c2d4..aab7edc0150 100644 --- a/include/oneapi/dpl/pstl/numeric_fwd.h +++ b/include/oneapi/dpl/pstl/numeric_fwd.h @@ -25,6 +25,8 @@ namespace dpl { namespace __internal { +template +struct __parallel_tag; //------------------------------------------------------------------------ // transform_reduce (version with two binary functions, according to draft N4659) From c1d389f62e27db110f58e4e24932fc07d7fec77d Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Fri, 9 Feb 2024 10:46:07 +0100 Subject: [PATCH 037/566] Implement tag dispatching prototype : __pattern_replace_if + tag impls --- include/oneapi/dpl/pstl/algorithm_impl.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/oneapi/dpl/pstl/algorithm_impl.h b/include/oneapi/dpl/pstl/algorithm_impl.h index 5bd488a0580..66e2a0c3231 100644 --- a/include/oneapi/dpl/pstl/algorithm_impl.h +++ b/include/oneapi/dpl/pstl/algorithm_impl.h @@ -178,7 +178,7 @@ __pattern_walk1(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _For } template -oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, void> +void __pattern_replace_if(_Tag __tag, _ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _UnaryPredicate __pred, const _Tp& __new_value) { From d4c811406baee65f4554f3e2f6007f734bb398c7 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Fri, 9 Feb 2024 11:48:30 +0100 Subject: [PATCH 038/566] Implement tag dispatching prototype : __parallel_for + tag impls (omp, serial) --- include/oneapi/dpl/pstl/omp/parallel_for.h | 19 +++++++++++++++++++ .../oneapi/dpl/pstl/parallel_backend_serial.h | 7 +++++++ 2 files changed, 26 insertions(+) diff --git a/include/oneapi/dpl/pstl/omp/parallel_for.h b/include/oneapi/dpl/pstl/omp/parallel_for.h index 5b6ed66453a..296b24e282b 100644 --- a/include/oneapi/dpl/pstl/omp/parallel_for.h +++ b/include/oneapi/dpl/pstl/omp/parallel_for.h @@ -66,6 +66,25 @@ __parallel_for(_ExecutionPolicy&&, _Index __first, _Index __last, _Fp __f) } } +template +void +__parallel_for(oneapi::dpl::__internal::__tbb_backend_tag, _ExecutionPolicy&&, _Index __first, _Index __last, _Fp __f) +{ + if (omp_in_parallel()) + { + // we don't create a nested parallel region in an existing parallel + // region: just create tasks + oneapi::dpl::__omp_backend::__parallel_for_body(__first, __last, __f); + } + else + { + // in any case (nested or non-nested) one parallel region is created and + // only one thread creates a set of tasks + _PSTL_PRAGMA(omp parallel) + _PSTL_PRAGMA(omp single nowait) { oneapi::dpl::__omp_backend::__parallel_for_body(__first, __last, __f); } + } +} + } // namespace __omp_backend } // namespace dpl } // namespace oneapi diff --git a/include/oneapi/dpl/pstl/parallel_backend_serial.h b/include/oneapi/dpl/pstl/parallel_backend_serial.h index a2dd6468a34..a80f36734ed 100644 --- a/include/oneapi/dpl/pstl/parallel_backend_serial.h +++ b/include/oneapi/dpl/pstl/parallel_backend_serial.h @@ -71,6 +71,13 @@ __parallel_for(_ExecutionPolicy&&, _Index __first, _Index __last, _Fp __f) __f(__first, __last); } +template +void +__parallel_for(oneapi::dpl::__internal::__tbb_backend_tag, _ExecutionPolicy&&, _Index __first, _Index __last, _Fp __f) +{ + __f(__first, __last); +} + template _Value __parallel_reduce(_ExecutionPolicy&&, _Index __first, _Index __last, const _Value& __identity, From f2962bcbe31262091aa8193a09311ac5a300ed22 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Fri, 9 Feb 2024 11:52:17 +0100 Subject: [PATCH 039/566] Implement tag dispatching prototype : __parallel_for_each + tag impls (omp, serial) --- .../oneapi/dpl/pstl/omp/parallel_for_each.h | 20 +++++++++++++++++++ .../oneapi/dpl/pstl/parallel_backend_serial.h | 9 +++++++++ 2 files changed, 29 insertions(+) diff --git a/include/oneapi/dpl/pstl/omp/parallel_for_each.h b/include/oneapi/dpl/pstl/omp/parallel_for_each.h index 7877ef095ef..510cd04c352 100644 --- a/include/oneapi/dpl/pstl/omp/parallel_for_each.h +++ b/include/oneapi/dpl/pstl/omp/parallel_for_each.h @@ -61,6 +61,26 @@ __parallel_for_each(_ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterat } } +template +void +__parallel_for_each(oneapi::dpl::__internal::__tbb_backend_tag, _ExecutionPolicy&&, _ForwardIterator __first, + _ForwardIterator __last, _Fp __f) +{ + if (omp_in_parallel()) + { + // we don't create a nested parallel region in an existing parallel + // region: just create tasks + oneapi::dpl::__omp_backend::__parallel_for_each_body(__first, __last, __f); + } + else + { + // in any case (nested or non-nested) one parallel region is created and + // only one thread creates a set of tasks + _PSTL_PRAGMA(omp parallel) + _PSTL_PRAGMA(omp single nowait) { oneapi::dpl::__omp_backend::__parallel_for_each_body(__first, __last, __f); } + } +} + } // namespace __omp_backend } // namespace dpl } // namespace oneapi diff --git a/include/oneapi/dpl/pstl/parallel_backend_serial.h b/include/oneapi/dpl/pstl/parallel_backend_serial.h index a80f36734ed..577fcbd36a0 100644 --- a/include/oneapi/dpl/pstl/parallel_backend_serial.h +++ b/include/oneapi/dpl/pstl/parallel_backend_serial.h @@ -155,6 +155,15 @@ __parallel_for_each(_ExecutionPolicy&&, _ForwardIterator __begin, _ForwardIterat __f(*__iter); } +template +void +__parallel_for_each(oneapi::dpl::__internal::__tbb_backend_tag, _ExecutionPolicy&&, _ForwardIterator __begin, + _ForwardIterator __end, _Fp __f) +{ + for (auto __iter = __begin; __iter != __end; ++__iter) + __f(*__iter); +} + } // namespace __serial_backend } // namespace dpl } // namespace oneapi From 5cb677f96a771533374cae3405b90de15e90a56c Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Wed, 31 Jan 2024 15:50:11 +0100 Subject: [PATCH 040/566] __pattern_walk1 + tag impls --- include/oneapi/dpl/pstl/algorithm_fwd.h | 14 ++++++++++++++ include/oneapi/dpl/pstl/algorithm_impl.h | 13 +++++++++---- 2 files changed, 23 insertions(+), 4 deletions(-) diff --git a/include/oneapi/dpl/pstl/algorithm_fwd.h b/include/oneapi/dpl/pstl/algorithm_fwd.h index 97adcbe7d85..22cd38af956 100644 --- a/include/oneapi/dpl/pstl/algorithm_fwd.h +++ b/include/oneapi/dpl/pstl/algorithm_fwd.h @@ -74,6 +74,10 @@ oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy> __pattern_walk1(_ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _Function, _IsVector, /*parallel=*/::std::false_type) noexcept; +template +void +__pattern_walk1(_Tag, _ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _Function) noexcept; + template oneapi::dpl::__internal::__enable_if_host_execution_policy_conditional< _ExecutionPolicy, __is_random_access_iterator_v<_RandomAccessIterator>> @@ -81,6 +85,11 @@ __pattern_walk1(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _Rando _IsVector __is_vector, /*parallel=*/::std::true_type); +template +void +__pattern_walk1(__parallel_forward_tag, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, + _RandomAccessIterator __last, _Function __f); + template oneapi::dpl::__internal::__enable_if_host_execution_policy_conditional< _ExecutionPolicy, !__is_random_access_iterator_v<_RandomAccessIterator>> @@ -88,6 +97,11 @@ __pattern_walk1(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _Rando _IsVector __is_vector, /*parallel=*/::std::true_type); +template +void +__pattern_walk1(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, + _Function __f); + template oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy> __pattern_walk_brick(_ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _Brick, diff --git a/include/oneapi/dpl/pstl/algorithm_impl.h b/include/oneapi/dpl/pstl/algorithm_impl.h index 66e2a0c3231..0b1a538378e 100644 --- a/include/oneapi/dpl/pstl/algorithm_impl.h +++ b/include/oneapi/dpl/pstl/algorithm_impl.h @@ -138,6 +138,8 @@ template ); + __internal::__brick_walk1(__first, __last, __f, typename _Tag::__is_vector{}); } @@ -155,20 +157,23 @@ void __pattern_walk1(__parallel_forward_tag, _ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _Function __f) { + using __backend_tag = typename __parallel_forward_tag::__backend_tag; + typedef typename ::std::iterator_traits<_ForwardIterator>::reference _ReferenceType; auto __func = [&__f](_ReferenceType arg) { __f(arg); }; __internal::__except_handler([&]() { - __par_backend::__parallel_for_each(__parallel_forward_tag::__backend_tag{}, - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __func); + __par_backend::__parallel_for_each(__backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, + __func); }); } template void -__pattern_walk1(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _ForwardIterator __first, +__pattern_walk1(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _Function __f) { - using __backend_tag = typename decltype(__tag)::__backend_tag; + using __backend_tag = typename __parallel_tag<_IsVector>::__backend_tag; + __internal::__except_handler([&]() { __par_backend::__parallel_for(__backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, [__f](_ForwardIterator __i, _ForwardIterator __j) { From 1fbeea54ca8441c17dfe65820f07a8d206e41ded Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Thu, 1 Feb 2024 18:19:04 +0100 Subject: [PATCH 041/566] __pattern_walk1 + tag calls --- include/oneapi/dpl/pstl/algorithm_impl.h | 6 +- include/oneapi/dpl/pstl/glue_algorithm_impl.h | 8 +- include/oneapi/dpl/pstl/glue_memory_impl.h | 78 ++++++++++--------- .../dpl/pstl/hetero/algorithm_impl_hetero.h | 29 ++++--- 4 files changed, 70 insertions(+), 51 deletions(-) diff --git a/include/oneapi/dpl/pstl/algorithm_impl.h b/include/oneapi/dpl/pstl/algorithm_impl.h index 0b1a538378e..4a4396be8ee 100644 --- a/include/oneapi/dpl/pstl/algorithm_impl.h +++ b/include/oneapi/dpl/pstl/algorithm_impl.h @@ -280,8 +280,10 @@ __pattern_walk1_n(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _Siz _IsVector __is_vector, /*is_parallel=*/::std::true_type) { - oneapi::dpl::__internal::__pattern_walk1(::std::forward<_ExecutionPolicy>(__exec), __first, __first + __n, __f, - __is_vector, ::std::true_type()); + constexpr auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _RandomAccessIterator>(); + + oneapi::dpl::__internal::__pattern_walk1(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, + __first + __n, __f); return __first + __n; } diff --git a/include/oneapi/dpl/pstl/glue_algorithm_impl.h b/include/oneapi/dpl/pstl/glue_algorithm_impl.h index c6b0956cb50..dd7eec8fcd2 100644 --- a/include/oneapi/dpl/pstl/glue_algorithm_impl.h +++ b/include/oneapi/dpl/pstl/glue_algorithm_impl.h @@ -75,10 +75,10 @@ template oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy> for_each(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _Function __f) { - oneapi::dpl::__internal::__pattern_walk1( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __f, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(), - __exec.__allow_parallel()); + constexpr auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _ForwardIterator>(); + + oneapi::dpl::__internal::__pattern_walk1(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, + __f); } template diff --git a/include/oneapi/dpl/pstl/glue_memory_impl.h b/include/oneapi/dpl/pstl/glue_memory_impl.h index e810e20bf9a..667a129825b 100644 --- a/include/oneapi/dpl/pstl/glue_memory_impl.h +++ b/include/oneapi/dpl/pstl/glue_memory_impl.h @@ -164,11 +164,11 @@ uninitialized_fill(_ExecutionPolicy&& __exec, _ForwardIterator __first, _Forward typedef typename ::std::iterator_traits<_ForwardIterator>::value_type _ValueType; typedef ::std::decay_t<_ExecutionPolicy> _DecayedExecutionPolicy; - constexpr auto __is_parallel = - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(); - if constexpr (::std::is_arithmetic_v<_ValueType>) { + constexpr auto __is_parallel = + oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(); + oneapi::dpl::__internal::__pattern_walk_brick( ::std::forward<_ExecutionPolicy>(__exec), __first, __last, oneapi::dpl::__internal::__brick_fill<_ValueType, _DecayedExecutionPolicy>{_ValueType(__value)}, @@ -176,13 +176,12 @@ uninitialized_fill(_ExecutionPolicy&& __exec, _ForwardIterator __first, _Forward } else { - constexpr auto __is_vector = - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(); + constexpr auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _ForwardIterator>(); oneapi::dpl::__internal::__pattern_walk1( + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, - oneapi::dpl::__internal::__op_uninitialized_fill<_Tp, _DecayedExecutionPolicy>{__value}, __is_vector, - __is_parallel); + oneapi::dpl::__internal::__op_uninitialized_fill<_Tp, _DecayedExecutionPolicy>{__value}); } } @@ -215,6 +214,26 @@ uninitialized_fill_n(_ExecutionPolicy&& __exec, _ForwardIterator __first, _Size } } +#if (_PSTL_ICPX_OMP_SIMD_DESTROY_WINDOWS_BROKEN || _ONEDPL_ICPX_OMP_SIMD_DESTROY_WINDOWS_BROKEN) +template +struct EvaDestroylWorkaroundPolicy +{ + using ExecutionPolicy = _ExecutionPolicy; +}; + +template <> +struct EvaDestroylWorkaroundPolicy +{ + using ExecutionPolicy = oneapi::dpl::execution::parallel_policy; +}; + +template <> +struct EvaDestroylWorkaroundPolicy +{ + using ExecutionPolicy = oneapi::dpl::execution::sequenced_policy; +}; +#endif // _PSTL_ICPX_OMP_SIMD_DESTROY_WINDOWS_BROKEN || _ONEDPL_ICPX_OMP_SIMD_DESTROY_WINDOWS_BROKEN + // [specialized.destroy] template @@ -224,25 +243,20 @@ destroy(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __ typedef typename ::std::iterator_traits<_ForwardIterator>::value_type _ValueType; typedef typename ::std::iterator_traits<_ForwardIterator>::reference _ReferenceType; - constexpr auto __is_parallel = - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(); - if constexpr (!::std::is_trivially_destructible_v<_ValueType>) { - using _is_vector_type = + using _ExecutionPolicyDest = #if (_PSTL_ICPX_OMP_SIMD_DESTROY_WINDOWS_BROKEN || _ONEDPL_ICPX_OMP_SIMD_DESTROY_WINDOWS_BROKEN) - ::std::conditional_t< - oneapi::dpl::__internal::__is_host_execution_policy<::std::decay_t<_ExecutionPolicy>>::value, - ::std::false_type, - decltype(oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>())>; + typename EvaDestroylWorkaroundPolicy<::std::decay_t<_ExecutionPolicy>>::ExecutionPolicy; #else - decltype(oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>()); + _ExecutionPolicy; #endif // _PSTL_ICPX_OMP_SIMD_DESTROY_WINDOWS_BROKEN || _ONEDPL_ICPX_OMP_SIMD_DESTROY_WINDOWS_BROKEN - constexpr _is_vector_type __is_vector; - oneapi::dpl::__internal::__pattern_walk1( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, - [](_ReferenceType __val) { __val.~_ValueType(); }, __is_vector, __is_parallel); + constexpr auto __dispatch_tag = + oneapi::dpl::__internal::__select_backend<_ExecutionPolicyDest, _ForwardIterator>(); + + oneapi::dpl::__internal::__pattern_walk1(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, + __last, [](_ReferenceType __val) { __val.~_ValueType(); }); } } @@ -289,15 +303,11 @@ uninitialized_default_construct(_ExecutionPolicy&& __exec, _ForwardIterator __fi if constexpr (!::std::is_trivial_v<_ValueType>) { - constexpr auto __is_parallel = - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(); - constexpr auto __is_vector = - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(); + constexpr auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _ForwardIterator>(); oneapi::dpl::__internal::__pattern_walk1( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, - oneapi::dpl::__internal::__op_uninitialized_default_construct<_DecayedExecutionPolicy>{}, __is_vector, - __is_parallel); + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, + oneapi::dpl::__internal::__op_uninitialized_default_construct<_DecayedExecutionPolicy>{}); } } @@ -335,11 +345,11 @@ uninitialized_value_construct(_ExecutionPolicy&& __exec, _ForwardIterator __firs typedef typename ::std::iterator_traits<_ForwardIterator>::value_type _ValueType; typedef ::std::decay_t<_ExecutionPolicy> _DecayedExecutionPolicy; - constexpr auto __is_parallel = - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(); - if constexpr (::std::is_trivial_v<_ValueType>) { + constexpr auto __is_parallel = + oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(); + oneapi::dpl::__internal::__pattern_walk_brick( ::std::forward<_ExecutionPolicy>(__exec), __first, __last, oneapi::dpl::__internal::__brick_fill<_ValueType, _DecayedExecutionPolicy>{_ValueType()}, @@ -347,13 +357,11 @@ uninitialized_value_construct(_ExecutionPolicy&& __exec, _ForwardIterator __firs } else { - constexpr auto __is_vector = - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(); + constexpr auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _ForwardIterator>(); oneapi::dpl::__internal::__pattern_walk1( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, - oneapi::dpl::__internal::__op_uninitialized_value_construct<_DecayedExecutionPolicy>{}, __is_vector, - __is_parallel); + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, + oneapi::dpl::__internal::__op_uninitialized_value_construct<_DecayedExecutionPolicy>{}); } } diff --git a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h index 244980ecff8..dac24712d4d 100644 --- a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h +++ b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h @@ -85,8 +85,9 @@ oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, _ __pattern_walk1_n(_ExecutionPolicy&& __exec, _ForwardIterator __first, _Size __n, _Function __f, /*vector=*/::std::true_type, /*parallel=*/::std::true_type) { - __pattern_walk1(::std::forward<_ExecutionPolicy>(__exec), __first, __first + __n, __f, - /*vector=*/::std::true_type(), /*parallel=*/::std::true_type()); + constexpr auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _ForwardIterator>(); + + __pattern_walk1(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __first + __n, __f); return __first + __n; } @@ -201,10 +202,12 @@ __pattern_walk_brick(_ExecutionPolicy&& __exec, _ForwardIterator __first, _Forwa if (__last - __first <= 0) return; + constexpr auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _ForwardIterator>(); + __pattern_walk1( + __dispatch_tag, __par_backend_hetero::make_wrapped_policy<__walk_brick_wrapper>(::std::forward<_ExecutionPolicy>(__exec)), - __first, __last, __f, - /*vector=*/::std::true_type{}, /*parallel=*/::std::true_type{}); + __first, __last, __f); } template @@ -217,10 +220,12 @@ oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, _ __pattern_walk_brick_n(_ExecutionPolicy&& __exec, _ForwardIterator __first, _Size __n, _Function __f, /*parallel=*/::std::true_type) { + constexpr auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _ForwardIterator>(); + __pattern_walk1( + __dispatch_tag, __par_backend_hetero::make_wrapped_policy<__walk_brick_n_wrapper>(::std::forward<_ExecutionPolicy>(__exec)), - __first, __first + __n, __f, - /*vector=*/::std::true_type{}, /*parallel=*/::std::true_type{}); + __first, __first + __n, __f); return __first + __n; } @@ -332,10 +337,12 @@ oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, _ __pattern_fill(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, const _T& __value, /*vector=*/::std::true_type, /*parallel=*/::std::true_type) { - __pattern_walk1(::std::forward<_ExecutionPolicy>(__exec), + constexpr auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _ForwardIterator>(); + + __pattern_walk1(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::write>(__first), __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::write>(__last), - fill_functor<_T>{__value}, ::std::true_type{}, ::std::true_type{}); + fill_functor<_T>{__value}); return __last; } @@ -361,10 +368,12 @@ oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, _ __pattern_generate(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _Generator __g, /*vector=*/::std::true_type, /*parallel=*/::std::true_type) { - __pattern_walk1(::std::forward<_ExecutionPolicy>(__exec), + constexpr auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _ForwardIterator>(); + + __pattern_walk1(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::write>(__first), __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::write>(__last), - generate_functor<_Generator>{__g}, ::std::true_type{}, ::std::true_type{}); + generate_functor<_Generator>{__g}); return __last; } From cbb14735ff79a4dbd3e1d1ac033124a9cd30bc98 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Fri, 9 Feb 2024 12:26:30 +0100 Subject: [PATCH 042/566] __pattern_walk2 + tag impls --- include/oneapi/dpl/pstl/algorithm_fwd.h | 15 ++++++ include/oneapi/dpl/pstl/algorithm_impl.h | 53 +++++++++++++++++++ .../dpl/pstl/hetero/algorithm_impl_hetero.h | 28 ++++++++++ 3 files changed, 96 insertions(+) diff --git a/include/oneapi/dpl/pstl/algorithm_fwd.h b/include/oneapi/dpl/pstl/algorithm_fwd.h index 22cd38af956..bc9d87b767a 100644 --- a/include/oneapi/dpl/pstl/algorithm_fwd.h +++ b/include/oneapi/dpl/pstl/algorithm_fwd.h @@ -171,6 +171,10 @@ oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _Fo __pattern_walk2(_ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, _Function, _IsVector, /*parallel=*/::std::false_type) noexcept; +template +_ForwardIterator2 +__pattern_walk2(_Tag, _ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, _Function) noexcept; + template oneapi::dpl::__internal::__enable_if_host_execution_policy_conditional< @@ -179,12 +183,23 @@ oneapi::dpl::__internal::__enable_if_host_execution_policy_conditional< __pattern_walk2(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, _Function __f, _IsVector __is_vector, /*parallel=*/::std::true_type); +template +_RandomAccessIterator2 +__pattern_walk2(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator1, _RandomAccessIterator1, + _RandomAccessIterator2, _Function); + template oneapi::dpl::__internal::__enable_if_host_execution_policy_conditional< _ExecutionPolicy, !__is_random_access_iterator_v<_ForwardIterator1, _ForwardIterator2>, _ForwardIterator2> __pattern_walk2(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, _Function __f, _IsVector __is_vector, /*parallel=*/::std::true_type); +template +_ForwardIterator2 +__pattern_walk2(__parallel_forward_tag, _ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, + _Function); + template oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _ForwardIterator2> diff --git a/include/oneapi/dpl/pstl/algorithm_impl.h b/include/oneapi/dpl/pstl/algorithm_impl.h index 4a4396be8ee..782a0763047 100644 --- a/include/oneapi/dpl/pstl/algorithm_impl.h +++ b/include/oneapi/dpl/pstl/algorithm_impl.h @@ -361,6 +361,16 @@ __pattern_walk2(_ExecutionPolicy&&, _ForwardIterator1 __first1, _ForwardIterator return __internal::__brick_walk2(__first1, __last1, __first2, __f, __is_vector); } +template +_ForwardIterator2 +__pattern_walk2(_Tag, _ExecutionPolicy&&, _ForwardIterator1 __first1, _ForwardIterator1 __last1, + _ForwardIterator2 __first2, _Function __f) noexcept +{ + static_assert(__is_backend_tag_serial_v<_Tag>); + + return __internal::__brick_walk2(__first1, __last1, __first2, __f, typename _Tag::__is_vector{}); +} + template oneapi::dpl::__internal::__enable_if_host_execution_policy_conditional< @@ -379,6 +389,21 @@ __pattern_walk2(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, _Ran }); } +template +_RandomAccessIterator2 +__pattern_walk2(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, + _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, _Function __f) +{ + return __internal::__except_handler([&]() { + __par_backend::__parallel_for( + ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, + [__f, __first1, __first2](_RandomAccessIterator1 __i, _RandomAccessIterator1 __j) { + __internal::__brick_walk2(__i, __j, __first2 + (__i - __first1), __f, _IsVector{}); + }); + return __first2 + (__last1 - __first1); + }); +} + template oneapi::dpl::__internal::__enable_if_host_execution_policy_conditional< _ExecutionPolicy, !__is_random_access_iterator_v<_ForwardIterator1, _ForwardIterator2>, _ForwardIterator2> @@ -406,6 +431,34 @@ __pattern_walk2(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardI }); } +template +_ForwardIterator2 +__pattern_walk2(__parallel_forward_tag, _ExecutionPolicy&& __exec, _ForwardIterator1 __first1, + _ForwardIterator1 __last1, _ForwardIterator2 __first2, _Function __f) +{ + using __backend_tag = typename __parallel_forward_tag::__backend_tag; + + return __internal::__except_handler([&]() { + using _iterator_tuple = zip_forward_iterator<_ForwardIterator1, _ForwardIterator2>; + auto __begin = _iterator_tuple(__first1, __first2); + auto __end = _iterator_tuple(__last1, /*dummy parameter*/ _ForwardIterator2()); + + typedef typename ::std::iterator_traits<_ForwardIterator1>::reference _ReferenceType1; + typedef typename ::std::iterator_traits<_ForwardIterator2>::reference _ReferenceType2; + + __par_backend::__parallel_for_each(__backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __begin, __end, + [&__f](::std::tuple<_ReferenceType1, _ReferenceType2> __val) { + __f(::std::get<0>(__val), ::std::get<1>(__val)); + }); + + //TODO: parallel_for_each does not allow to return correct iterator value according to the ::std::transform + // implementation. Therefore, iterator value is calculated separately. + for (; __begin != __end; ++__begin) + ; + return ::std::get<1>(__begin.base()); + }); +} + template oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _ForwardIterator2> diff --git a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h index dac24712d4d..7c9782e0acf 100644 --- a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h +++ b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h @@ -126,6 +126,34 @@ __pattern_walk2(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardI return __first2 + __n; } +template +_ForwardIterator2 +__pattern_walk2(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _ForwardIterator1 __first1, + _ForwardIterator1 __last1, _ForwardIterator2 __first2, _Function __f) +{ + auto __n = __last1 - __first1; + if (__n <= 0) + return __first2; + + auto __keep1 = oneapi::dpl::__ranges::__get_sycl_range<__acc_mode1, _ForwardIterator1>(); + auto __buf1 = __keep1(__first1, __last1); + + auto __keep2 = oneapi::dpl::__ranges::__get_sycl_range<__acc_mode2, _ForwardIterator2>(); + auto __buf2 = __keep2(__first2, __first2 + __n); + + auto __future_obj = oneapi::dpl::__par_backend_hetero::__parallel_for( + ::std::forward<_ExecutionPolicy>(__exec), unseq_backend::walk_n<_ExecutionPolicy, _Function>{__f}, __n, + __buf1.all_view(), __buf2.all_view()); + + if constexpr (_IsSync()) + __future_obj.wait(); + + return __first2 + __n; +} + template oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, _ForwardIterator2> From 9ed3e7f0ed4a1bac5131eb5b24727659d179393c Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Thu, 1 Feb 2024 09:58:00 +0100 Subject: [PATCH 043/566] __pattern_walk2 + tag calls --- include/oneapi/dpl/pstl/algorithm_impl.h | 6 ++- include/oneapi/dpl/pstl/glue_algorithm_impl.h | 15 +++--- include/oneapi/dpl/pstl/glue_memory_impl.h | 28 +++++----- .../dpl/pstl/hetero/algorithm_impl_hetero.h | 54 +++++++++++++------ 4 files changed, 62 insertions(+), 41 deletions(-) diff --git a/include/oneapi/dpl/pstl/algorithm_impl.h b/include/oneapi/dpl/pstl/algorithm_impl.h index 782a0763047..b1f7c2c01ae 100644 --- a/include/oneapi/dpl/pstl/algorithm_impl.h +++ b/include/oneapi/dpl/pstl/algorithm_impl.h @@ -474,8 +474,10 @@ oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _Ra __pattern_walk2_n(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, _Size __n, _RandomAccessIterator2 __first2, _Function __f, _IsVector __is_vector, /*parallel=*/::std::true_type) { - return __internal::__pattern_walk2(::std::forward<_ExecutionPolicy>(__exec), __first1, __first1 + __n, __first2, - __f, __is_vector, ::std::true_type()); + constexpr auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _RandomAccessIterator1, _RandomAccessIterator2>(); + + return __internal::__pattern_walk2(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, + __first1 + __n, __first2, __f); } template diff --git a/include/oneapi/dpl/pstl/glue_algorithm_impl.h b/include/oneapi/dpl/pstl/glue_algorithm_impl.h index dd7eec8fcd2..905708d9fe9 100644 --- a/include/oneapi/dpl/pstl/glue_algorithm_impl.h +++ b/include/oneapi/dpl/pstl/glue_algorithm_impl.h @@ -321,11 +321,12 @@ oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _Forward transform(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 __last, _ForwardIterator2 __result, _UnaryOperation __op) { + constexpr auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>(); + return oneapi::dpl::__internal::__pattern_walk2( + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __result, - oneapi::dpl::__internal::__transform_functor<_UnaryOperation>{::std::move(__op)}, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>(), - __exec.__allow_parallel()); + oneapi::dpl::__internal::__transform_functor<_UnaryOperation>{::std::move(__op)}); } // we can't use non-const __op here @@ -402,16 +403,16 @@ oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _Forward replace_copy_if(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 __last, _ForwardIterator2 __result, _UnaryPredicate __pred, const _Tp& __new_value) { + constexpr auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>(); + return oneapi::dpl::__internal::__pattern_walk2( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __result, + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __result, oneapi::dpl::__internal::__replace_copy_functor< oneapi::dpl::__internal::__ref_or_copy<_ExecutionPolicy, const _Tp>, ::std::conditional_t, _UnaryPredicate, oneapi::dpl::__internal::__ref_or_copy<_ExecutionPolicy, _UnaryPredicate>>>( - __new_value, __pred), - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>(), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>()); + __new_value, __pred)); } template diff --git a/include/oneapi/dpl/pstl/glue_memory_impl.h b/include/oneapi/dpl/pstl/glue_memory_impl.h index 667a129825b..4d68973c814 100644 --- a/include/oneapi/dpl/pstl/glue_memory_impl.h +++ b/include/oneapi/dpl/pstl/glue_memory_impl.h @@ -45,24 +45,22 @@ uninitialized_copy(_ExecutionPolicy&& __exec, _InputIterator __first, _InputIter typedef typename ::std::iterator_traits<_ForwardIterator>::value_type _ValueType2; typedef ::std::decay_t<_ExecutionPolicy> _DecayedExecutionPolicy; - constexpr auto __is_parallel = - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _InputIterator, _ForwardIterator>(); - if constexpr (::std::is_trivial_v<_ValueType1> && ::std::is_trivial_v<_ValueType2>) { + constexpr auto __is_parallel = + oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _InputIterator, _ForwardIterator>(); + return oneapi::dpl::__internal::__pattern_walk2_brick( ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __result, oneapi::dpl::__internal::__brick_copy<_DecayedExecutionPolicy>{}, __is_parallel); } else { - constexpr auto __is_vector = - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _InputIterator, _ForwardIterator>(); + constexpr auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _InputIterator, _ForwardIterator>(); return oneapi::dpl::__internal::__pattern_walk2( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __result, - oneapi::dpl::__internal::__op_uninitialized_copy<_DecayedExecutionPolicy>{}, __is_vector, - __is_parallel); + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __result, + oneapi::dpl::__internal::__op_uninitialized_copy<_DecayedExecutionPolicy>{}); } } @@ -105,24 +103,22 @@ uninitialized_move(_ExecutionPolicy&& __exec, _InputIterator __first, _InputIter typedef typename ::std::iterator_traits<_ForwardIterator>::value_type _ValueType2; typedef ::std::decay_t<_ExecutionPolicy> _DecayedExecutionPolicy; - constexpr auto __is_parallel = - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _InputIterator, _ForwardIterator>(); - if constexpr (::std::is_trivial_v<_ValueType1> && ::std::is_trivial_v<_ValueType2>) { + constexpr auto __is_parallel = + oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _InputIterator, _ForwardIterator>(); + return oneapi::dpl::__internal::__pattern_walk2_brick( ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __result, oneapi::dpl::__internal::__brick_copy<_DecayedExecutionPolicy>{}, __is_parallel); } else { - constexpr auto __is_vector = - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _InputIterator, _ForwardIterator>(); + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _InputIterator, _ForwardIterator>(); return oneapi::dpl::__internal::__pattern_walk2( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __result, - oneapi::dpl::__internal::__op_uninitialized_move<_DecayedExecutionPolicy>{}, __is_vector, - __is_parallel); + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __result, + oneapi::dpl::__internal::__op_uninitialized_move<_DecayedExecutionPolicy>{}); } } diff --git a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h index 7c9782e0acf..d9a0e37908b 100644 --- a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h +++ b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h @@ -174,10 +174,15 @@ __pattern_swap(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIt _ForwardIterator2 __first2, _Function __f, /*is_vector=*/::std::true_type, /*is_parallel=*/::std::true_type) { - return __pattern_walk2(::std::forward<_ExecutionPolicy>(__exec), - __first1, __last1, __first2, __f, - ::std::true_type(), ::std::true_type()); + constexpr auto __dispatch_tag = + oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>(); + + using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; + + return __pattern_walk2<__backend_tag, /*_IsSync=*/::std::true_type, + __par_backend_hetero::access_mode::read_write, + __par_backend_hetero::access_mode::read_write>( + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __f); } //------------------------------------------------------------------------ @@ -311,14 +316,18 @@ __pattern_walk2_transform_if(_ExecutionPolicy&& __exec, _ForwardIterator1 __firs /*vector=*/::std::true_type, /*parallel=*/::std::true_type) { + constexpr auto __dispatch_tag = + oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>(); + + using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; + // Require `read_write` access mode for output sequence to force a copy in for host iterators to capture incoming // values of the output sequence for elements where the predicate is false. - return __pattern_walk2( __par_backend_hetero::make_wrapped_policy<__walk2_transform_if_wrapper>( - ::std::forward<_ExecutionPolicy>(__exec)), - __first1, __last1, __first2, __func, - /*vector=*/::std::true_type{}, /*parallel*/ ::std::true_type{}); + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec)), + __first1, __last1, __first2, __func); } template @@ -1098,11 +1107,17 @@ __pattern_unique(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, auto __copy_last = __pattern_unique_copy(__exec, __first, __last, __copy_first, __pred, /*vector=*/::std::true_type{}, /*parallel*/ ::std::true_type{}); + constexpr auto __dispatch_tag1 = + oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, decltype(__copy_first), decltype(__copy_last), + decltype(__first)>(); + using __backend_tag1 = typename decltype(__dispatch_tag1)::__backend_tag; + //TODO: optimize copy back depending on Iterator, i.e. set_final_data for host iterator/pointer - return __pattern_walk2( + __dispatch_tag1, __par_backend_hetero::make_wrapped_policy(::std::forward<_ExecutionPolicy>(__exec)), - __copy_first, __copy_last, __first, __brick_copy<_ExecutionPolicy>{}, ::std::true_type{}, ::std::true_type{}); + __copy_first, __copy_last, __first, __brick_copy<_ExecutionPolicy>{}); } //------------------------------------------------------------------------ @@ -1363,14 +1378,21 @@ __pattern_stable_partition(_ExecutionPolicy&& __exec, _Iterator __first, _Iterat auto true_count = copy_result.first - __true_result; //TODO: optimize copy back if possible (inplace, decrease number of submits) - __pattern_walk2( - __par_backend_hetero::make_wrapped_policy(__exec), - __true_result, copy_result.first, __first, __brick_move<_ExecutionPolicy>{}, ::std::true_type{}, - ::std::true_type{}); + constexpr auto __dispatch_tag1 = + oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, decltype(__true_result), + decltype(copy_result.first), decltype(__first)>(); + using __backend_tag1 = typename decltype(__dispatch_tag1)::__backend_tag; + __pattern_walk2<__backend_tag1, /*_IsSync=*/::std::false_type>( + __dispatch_tag1, __par_backend_hetero::make_wrapped_policy(__exec), __true_result, + copy_result.first, __first, __brick_move<_ExecutionPolicy>{}); + + constexpr auto __dispatch_tag2 = + oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, decltype(__false_result), + decltype(copy_result.second), decltype(__first + true_count)>(); __pattern_walk2( + __dispatch_tag2, __par_backend_hetero::make_wrapped_policy(::std::forward<_ExecutionPolicy>(__exec)), - __false_result, copy_result.second, __first + true_count, __brick_move<_ExecutionPolicy>{}, ::std::true_type{}, - ::std::true_type{}); + __false_result, copy_result.second, __first + true_count, __brick_move<_ExecutionPolicy>{}); return __first + true_count; } From 06a7cefdb7235468f08be6731268fc345312f15d Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Thu, 1 Feb 2024 11:58:52 +0100 Subject: [PATCH 044/566] __pattern_walk3 + tag impls --- include/oneapi/dpl/pstl/algorithm_fwd.h | 18 ++++++ include/oneapi/dpl/pstl/algorithm_impl.h | 59 +++++++++++++++++++ .../dpl/pstl/hetero/algorithm_impl_hetero.h | 29 +++++++++ 3 files changed, 106 insertions(+) diff --git a/include/oneapi/dpl/pstl/algorithm_fwd.h b/include/oneapi/dpl/pstl/algorithm_fwd.h index bc9d87b767a..24db58ed68d 100644 --- a/include/oneapi/dpl/pstl/algorithm_fwd.h +++ b/include/oneapi/dpl/pstl/algorithm_fwd.h @@ -263,6 +263,12 @@ __pattern_walk3(_ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _Forwa _Function, _IsVector, /*parallel=*/::std::false_type) noexcept; +template +_ForwardIterator3 +__pattern_walk3(_Tag, _ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, _ForwardIterator3, + _Function) noexcept; + template oneapi::dpl::__internal::__enable_if_host_execution_policy_conditional< @@ -273,6 +279,12 @@ __pattern_walk3(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, _Ran _RandomAccessIterator2 __first2, _RandomAccessIterator3 __first3, _Function __f, _IsVector __is_vector, /*parallel=*/::std::true_type); +template +_RandomAccessIterator3 +__pattern_walk3(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator1, _RandomAccessIterator1, + _RandomAccessIterator2, _RandomAccessIterator3, _Function); + template oneapi::dpl::__internal::__enable_if_host_execution_policy_conditional< @@ -283,6 +295,12 @@ __pattern_walk3(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, _Ran _RandomAccessIterator2 __first2, _RandomAccessIterator3 __first3, _Function __f, _IsVector __is_vector, /*parallel=*/::std::true_type); +template +_ForwardIterator3 +__pattern_walk3(__parallel_forward_tag, _ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, + _ForwardIterator3, _Function); + //------------------------------------------------------------------------ // transform_if //------------------------------------------------------------------------ diff --git a/include/oneapi/dpl/pstl/algorithm_impl.h b/include/oneapi/dpl/pstl/algorithm_impl.h index b1f7c2c01ae..2bea3b3417e 100644 --- a/include/oneapi/dpl/pstl/algorithm_impl.h +++ b/include/oneapi/dpl/pstl/algorithm_impl.h @@ -600,6 +600,17 @@ __pattern_walk3(_ExecutionPolicy&&, _ForwardIterator1 __first1, _ForwardIterator return __internal::__brick_walk3(__first1, __last1, __first2, __first3, __f, __is_vector); } +template +_ForwardIterator3 +__pattern_walk3(_Tag, _ExecutionPolicy&&, _ForwardIterator1 __first1, _ForwardIterator1 __last1, + _ForwardIterator2 __first2, _ForwardIterator3 __first3, _Function __f) noexcept +{ + static_assert(__is_backend_tag_serial_v<_Tag>); + + return __internal::__brick_walk3(__first1, __last1, __first2, __first3, __f, typename _Tag::__is_vector{}); +} + template oneapi::dpl::__internal::__enable_if_host_execution_policy_conditional< @@ -621,6 +632,23 @@ __pattern_walk3(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, _Ran }); } +template +_RandomAccessIterator3 +__pattern_walk3(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, + _RandomAccessIterator2 __first2, _RandomAccessIterator3 __first3, _Function __f) +{ + return __internal::__except_handler([&]() { + __par_backend::__parallel_for( + ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, + [__f, __first1, __first2, __first3](_RandomAccessIterator1 __i, _RandomAccessIterator1 __j) { + __internal::__brick_walk3(__i, __j, __first2 + (__i - __first1), __first3 + (__i - __first1), __f, + _IsVector{}); + }); + return __first3 + (__last1 - __first1); + }); +} + template oneapi::dpl::__internal::__enable_if_host_execution_policy_conditional< @@ -653,6 +681,37 @@ __pattern_walk3(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardI }); } +template +_ForwardIterator3 +__pattern_walk3(__parallel_forward_tag, _ExecutionPolicy&& __exec, _ForwardIterator1 __first1, + _ForwardIterator1 __last1, _ForwardIterator2 __first2, _ForwardIterator3 __first3, _Function __f) +{ + using __backend_tag = typename __parallel_forward_tag::__backend_tag; + + return __internal::__except_handler([&]() { + using _iterator_tuple = zip_forward_iterator<_ForwardIterator1, _ForwardIterator2, _ForwardIterator3>; + auto __begin = _iterator_tuple(__first1, __first2, __first3); + auto __end = _iterator_tuple(__last1, /*dummy parameter*/ _ForwardIterator2(), + /*dummy parameter*/ _ForwardIterator3()); + + typedef typename ::std::iterator_traits<_ForwardIterator1>::reference _ReferenceType1; + typedef typename ::std::iterator_traits<_ForwardIterator2>::reference _ReferenceType2; + typedef typename ::std::iterator_traits<_ForwardIterator3>::reference _ReferenceType3; + + __par_backend::__parallel_for_each(__backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __begin, __end, + [&](::std::tuple<_ReferenceType1, _ReferenceType2, _ReferenceType3> __val) { + __f(::std::get<0>(__val), ::std::get<1>(__val), ::std::get<2>(__val)); + }); + + //TODO: parallel_for_each does not allow to return correct iterator value according to the ::std::transform + // implementation. Therefore, iterator value is calculated separately. + for (; __begin != __end; ++__begin) + ; + return ::std::get<2>(__begin.base()); + }); +} + //------------------------------------------------------------------------ // transform_if //------------------------------------------------------------------------ diff --git a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h index d9a0e37908b..9fb0b1c9bf7 100644 --- a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h +++ b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h @@ -218,6 +218,35 @@ __pattern_walk3(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardI return __first3 + __n; } +template +_ForwardIterator3 +__pattern_walk3(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _ForwardIterator1 __first1, + _ForwardIterator1 __last1, _ForwardIterator2 __first2, _ForwardIterator3 __first3, _Function __f) +{ + auto __n = __last1 - __first1; + if (__n <= 0) + return __first3; + + auto __keep1 = oneapi::dpl::__ranges::__get_sycl_range<__acc_mode1, _ForwardIterator1>(); + auto __buf1 = __keep1(__first1, __last1); + auto __keep2 = oneapi::dpl::__ranges::__get_sycl_range<__acc_mode2, _ForwardIterator2>(); + auto __buf2 = __keep2(__first2, __first2 + __n); + auto __keep3 = oneapi::dpl::__ranges::__get_sycl_range<__acc_mode3, _ForwardIterator3>(); + auto __buf3 = __keep3(__first3, __first3 + __n); + + oneapi::dpl::__par_backend_hetero::__parallel_for(::std::forward<_ExecutionPolicy>(__exec), + unseq_backend::walk_n<_ExecutionPolicy, _Function>{__f}, __n, + __buf1.all_view(), __buf2.all_view(), __buf3.all_view()) + .wait(); + + return __first3 + __n; +} + //------------------------------------------------------------------------ // walk_brick, walk_brick_n //------------------------------------------------------------------------ From 0b8b028ff27623929b15600c383563ba99ebd332 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Thu, 1 Feb 2024 12:33:58 +0100 Subject: [PATCH 045/566] __pattern_walk3 + tag calls --- include/oneapi/dpl/pstl/glue_algorithm_impl.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/include/oneapi/dpl/pstl/glue_algorithm_impl.h b/include/oneapi/dpl/pstl/glue_algorithm_impl.h index 905708d9fe9..207967a3a3a 100644 --- a/include/oneapi/dpl/pstl/glue_algorithm_impl.h +++ b/include/oneapi/dpl/pstl/glue_algorithm_impl.h @@ -336,11 +336,12 @@ oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _Forward transform(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, _ForwardIterator __result, _BinaryOperation __op) { + constexpr auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2, _ForwardIterator>(); + return oneapi::dpl::__internal::__pattern_walk3( + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __result, - oneapi::dpl::__internal::__transform_functor<_BinaryOperation>(::std::move(__op)), - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2, _ForwardIterator>(), - __exec.__allow_parallel()); + oneapi::dpl::__internal::__transform_functor<_BinaryOperation>(::std::move(__op))); } // [alg.transform_if] From d3c200ab9a30c81c7b79d0a3d1a6cc88527d33f1 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Thu, 1 Feb 2024 16:16:17 +0100 Subject: [PATCH 046/566] __pattern_walk1_n + tag impls --- include/oneapi/dpl/pstl/algorithm_fwd.h | 14 ++++++++++---- include/oneapi/dpl/pstl/algorithm_impl.h | 19 +++++++++++++++++++ .../dpl/pstl/hetero/algorithm_impl_hetero.h | 14 ++++++++++++-- 3 files changed, 41 insertions(+), 6 deletions(-) diff --git a/include/oneapi/dpl/pstl/algorithm_fwd.h b/include/oneapi/dpl/pstl/algorithm_fwd.h index 24db58ed68d..7903e0fadc3 100644 --- a/include/oneapi/dpl/pstl/algorithm_fwd.h +++ b/include/oneapi/dpl/pstl/algorithm_fwd.h @@ -87,8 +87,7 @@ __pattern_walk1(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _Rando template void -__pattern_walk1(__parallel_forward_tag, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, - _RandomAccessIterator __last, _Function __f); +__pattern_walk1(__parallel_forward_tag, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _Function); template oneapi::dpl::__internal::__enable_if_host_execution_policy_conditional< @@ -99,8 +98,7 @@ __pattern_walk1(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _Rando template void -__pattern_walk1(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, - _Function __f); +__pattern_walk1(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _Function); template oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy> @@ -129,11 +127,19 @@ oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _Fo __pattern_walk1_n(_ExecutionPolicy&&, _ForwardIterator, _Size, _Function, _IsVector, /*is_parallel=*/::std::false_type) noexcept; +template +_ForwardIterator +__pattern_walk1_n(_Tag, _ExecutionPolicy&&, _ForwardIterator, _Size, _Function) noexcept; + template oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator> __pattern_walk1_n(_ExecutionPolicy&&, _RandomAccessIterator, _Size, _Function, _IsVector, /*is_parallel=*/::std::true_type); +template +_RandomAccessIterator +__pattern_walk1_n(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _Size, _Function); + template oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _ForwardIterator> __pattern_walk_brick_n(_ExecutionPolicy&&, _ForwardIterator, _Size, _Brick, diff --git a/include/oneapi/dpl/pstl/algorithm_impl.h b/include/oneapi/dpl/pstl/algorithm_impl.h index 2bea3b3417e..3e20b86469b 100644 --- a/include/oneapi/dpl/pstl/algorithm_impl.h +++ b/include/oneapi/dpl/pstl/algorithm_impl.h @@ -274,6 +274,15 @@ __pattern_walk1_n(_ExecutionPolicy&&, _ForwardIterator __first, _Size __n, _Func return __internal::__brick_walk1_n(__first, __n, __f, __is_vector); } +template +_ForwardIterator +__pattern_walk1_n(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _Size __n, _Function __f) noexcept +{ + static_assert(__is_backend_tag_v<_Tag>); + + return __internal::__brick_walk1_n(__first, __n, __f, typename _Tag::__is_vector{}); +} + template oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator> __pattern_walk1_n(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _Size __n, _Function __f, @@ -287,6 +296,16 @@ __pattern_walk1_n(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _Siz return __first + __n; } +template +_RandomAccessIterator +__pattern_walk1_n(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, _Size __n, + _Function __f) +{ + oneapi::dpl::__internal::__pattern_walk1(__tag, ::std::forward<_ExecutionPolicy>(__exec), __first, + __first + __n, __f); + return __first + __n; +} + template oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _ForwardIterator> __pattern_walk_brick_n(_ExecutionPolicy&& __exec, _ForwardIterator __first, _Size __n, _Brick __brick, diff --git a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h index 9fb0b1c9bf7..19be5cd634f 100644 --- a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h +++ b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h @@ -60,8 +60,8 @@ __pattern_walk1(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIte template void -__pattern_walk1(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _ForwardIterator __first, - _ForwardIterator __last, _Function __f) +__pattern_walk1(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, + _Function __f) { auto __n = __last - __first; if (__n <= 0) @@ -91,6 +91,16 @@ __pattern_walk1_n(_ExecutionPolicy&& __exec, _ForwardIterator __first, _Size __n return __first + __n; } +template +_ForwardIterator +__pattern_walk1_n(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _ForwardIterator __first, _Size __n, + _Function __f) +{ + __pattern_walk1(__tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __first + __n, __f); + return __first + __n; +} + //------------------------------------------------------------------------ // walk2 //------------------------------------------------------------------------ From 6ba79f93f5f27191f1c27e615da843d6adbcccbe Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Thu, 1 Feb 2024 16:22:48 +0100 Subject: [PATCH 047/566] __pattern_walk1_n + tag calls --- include/oneapi/dpl/pstl/glue_algorithm_impl.h | 8 +-- include/oneapi/dpl/pstl/glue_memory_impl.h | 59 ++++++++----------- 2 files changed, 28 insertions(+), 39 deletions(-) diff --git a/include/oneapi/dpl/pstl/glue_algorithm_impl.h b/include/oneapi/dpl/pstl/glue_algorithm_impl.h index 207967a3a3a..2b77e0c44b7 100644 --- a/include/oneapi/dpl/pstl/glue_algorithm_impl.h +++ b/include/oneapi/dpl/pstl/glue_algorithm_impl.h @@ -85,10 +85,10 @@ template for_each_n(_ExecutionPolicy&& __exec, _ForwardIterator __first, _Size __n, _Function __f) { - return oneapi::dpl::__internal::__pattern_walk1_n( - ::std::forward<_ExecutionPolicy>(__exec), __first, __n, __f, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>()); + constexpr auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _ForwardIterator>(); + + return oneapi::dpl::__internal::__pattern_walk1_n(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, + __n, __f); } // [alg.find] diff --git a/include/oneapi/dpl/pstl/glue_memory_impl.h b/include/oneapi/dpl/pstl/glue_memory_impl.h index 4d68973c814..1e450a4efa5 100644 --- a/include/oneapi/dpl/pstl/glue_memory_impl.h +++ b/include/oneapi/dpl/pstl/glue_memory_impl.h @@ -188,11 +188,11 @@ uninitialized_fill_n(_ExecutionPolicy&& __exec, _ForwardIterator __first, _Size typedef typename ::std::iterator_traits<_ForwardIterator>::value_type _ValueType; typedef ::std::decay_t<_ExecutionPolicy> _DecayedExecutionPolicy; - constexpr auto __is_parallel = - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(); - if constexpr (::std::is_arithmetic_v<_ValueType>) { + constexpr auto __is_parallel = + oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(); + return oneapi::dpl::__internal::__pattern_walk_brick_n( ::std::forward<_ExecutionPolicy>(__exec), __first, __n, oneapi::dpl::__internal::__brick_fill_n<_ValueType, _DecayedExecutionPolicy>{_ValueType(__value)}, @@ -200,13 +200,11 @@ uninitialized_fill_n(_ExecutionPolicy&& __exec, _ForwardIterator __first, _Size } else { - constexpr auto __is_vector = - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(); + constexpr auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _ForwardIterator>(); return oneapi::dpl::__internal::__pattern_walk1_n( - ::std::forward<_ExecutionPolicy>(__exec), __first, __n, - oneapi::dpl::__internal::__op_uninitialized_fill<_Tp, _DecayedExecutionPolicy>{__value}, __is_vector, - __is_parallel); + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __n, + oneapi::dpl::__internal::__op_uninitialized_fill<_Tp, _DecayedExecutionPolicy>{__value}); } } @@ -269,22 +267,19 @@ destroy_n(_ExecutionPolicy&& __exec, _ForwardIterator __first, _Size __n) } else { - constexpr auto __is_parallel = - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(); - using _is_vector_type = + using _ExecutionPolicyDest = #if (_PSTL_ICPX_OMP_SIMD_DESTROY_WINDOWS_BROKEN || _ONEDPL_ICPX_OMP_SIMD_DESTROY_WINDOWS_BROKEN) - ::std::conditional_t< - oneapi::dpl::__internal::__is_host_execution_policy<::std::decay_t<_ExecutionPolicy>>::value, - ::std::false_type, - decltype(oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>())>; + typename EvaDestroylWorkaroundPolicy<::std::decay_t<_ExecutionPolicy>>::ExecutionPolicy; #else - decltype(oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>()); + _ExecutionPolicy; #endif // _PSTL_ICPX_OMP_SIMD_DESTROY_WINDOWS_BROKEN || _ONEDPL_ICPX_OMP_SIMD_DESTROY_WINDOWS_BROKEN - constexpr _is_vector_type __is_vector; - return oneapi::dpl::__internal::__pattern_walk1_n(::std::forward<_ExecutionPolicy>(__exec), __first, __n, - [](_ReferenceType __val) { __val.~_ValueType(); }, - __is_vector, __is_parallel); + constexpr auto __dispatch_tag = + oneapi::dpl::__internal::__select_backend<_ExecutionPolicyDest, _ForwardIterator>(); + + return oneapi::dpl::__internal::__pattern_walk1_n(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), + __first, __n, + [](_ReferenceType __val) { __val.~_ValueType(); }); } } @@ -320,15 +315,11 @@ uninitialized_default_construct_n(_ExecutionPolicy&& __exec, _ForwardIterator __ } else { - constexpr auto __is_parallel = - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(); - constexpr auto __is_vector = - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(); + constexpr auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _ForwardIterator>(); return oneapi::dpl::__internal::__pattern_walk1_n( - ::std::forward<_ExecutionPolicy>(__exec), __first, __n, - oneapi::dpl::__internal::__op_uninitialized_default_construct<_DecayedExecutionPolicy>{}, __is_vector, - __is_parallel); + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __n, + oneapi::dpl::__internal::__op_uninitialized_default_construct<_DecayedExecutionPolicy>{}); } } @@ -368,11 +359,11 @@ uninitialized_value_construct_n(_ExecutionPolicy&& __exec, _ForwardIterator __fi typedef typename ::std::iterator_traits<_ForwardIterator>::value_type _ValueType; typedef ::std::decay_t<_ExecutionPolicy> _DecayedExecutionPolicy; - constexpr auto __is_parallel = - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(); - if constexpr (::std::is_trivial_v<_ValueType>) { + constexpr auto __is_parallel = + oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(); + return oneapi::dpl::__internal::__pattern_walk_brick_n( ::std::forward<_ExecutionPolicy>(__exec), __first, __n, oneapi::dpl::__internal::__brick_fill_n<_ValueType, _DecayedExecutionPolicy>{_ValueType()}, @@ -380,13 +371,11 @@ uninitialized_value_construct_n(_ExecutionPolicy&& __exec, _ForwardIterator __fi } else { - constexpr auto __is_vector = - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(); + constexpr auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _ForwardIterator>(); return oneapi::dpl::__internal::__pattern_walk1_n( - ::std::forward<_ExecutionPolicy>(__exec), __first, __n, - oneapi::dpl::__internal::__op_uninitialized_value_construct<_DecayedExecutionPolicy>{}, __is_vector, - __is_parallel); + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __n, + oneapi::dpl::__internal::__op_uninitialized_value_construct<_DecayedExecutionPolicy>{}); } } From 825391c27696320743eb4a32d6e8944c06659805 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Fri, 2 Feb 2024 10:55:24 +0100 Subject: [PATCH 048/566] __pattern_find_if + tag impls --- include/oneapi/dpl/pstl/algorithm_fwd.h | 14 ++++++-------- include/oneapi/dpl/pstl/algorithm_impl.h | 8 +++++--- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/include/oneapi/dpl/pstl/algorithm_fwd.h b/include/oneapi/dpl/pstl/algorithm_fwd.h index 7903e0fadc3..9faa0bde46d 100644 --- a/include/oneapi/dpl/pstl/algorithm_fwd.h +++ b/include/oneapi/dpl/pstl/algorithm_fwd.h @@ -381,15 +381,13 @@ template _RandomAccessIterator __brick_find_if(_RandomAccessIterator, _RandomAccessIterator, _Predicate, /*is_vector=*/::std::true_type) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _ForwardIterator> -__pattern_find_if(_ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _Predicate, _IsVector, - /*is_parallel=*/::std::false_type) noexcept; +template +_ForwardIterator +__pattern_find_if(_Tag, _ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _Predicate) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator> -__pattern_find_if(_ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _Predicate, _IsVector, - /*is_parallel=*/::std::true_type); +template +_ForwardIterator +__pattern_find_if(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _Predicate); //------------------------------------------------------------------------ // find_end diff --git a/include/oneapi/dpl/pstl/algorithm_impl.h b/include/oneapi/dpl/pstl/algorithm_impl.h index 3e20b86469b..75b2f0fa87f 100644 --- a/include/oneapi/dpl/pstl/algorithm_impl.h +++ b/include/oneapi/dpl/pstl/algorithm_impl.h @@ -881,18 +881,20 @@ __brick_find_if(_RandomAccessIterator __first, _RandomAccessIterator __last, _Pr template _ForwardIterator -__pattern_find_if(_Tag __tag, _ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, +__pattern_find_if(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, _Predicate __pred) noexcept { + static_assert(__is_backend_tag_v<_Tag>); + return __internal::__brick_find_if(__first, __last, __pred, typename _Tag::__is_vector{}); } template _ForwardIterator -__pattern_find_if(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _ForwardIterator __first, +__pattern_find_if(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _Predicate __pred) { - using __backend_tag = typename decltype(__tag)::__backend_tag; + using __backend_tag = typename __parallel_tag<_IsVector>::__backend_tag; return __except_handler([&]() { return __parallel_find( From bc10cdf618a5f4f7df9cc8046039134483191002 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Thu, 8 Feb 2024 09:25:13 +0100 Subject: [PATCH 049/566] __pattern_replace_if + tag impls --- include/oneapi/dpl/pstl/algorithm_impl.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/oneapi/dpl/pstl/algorithm_impl.h b/include/oneapi/dpl/pstl/algorithm_impl.h index 75b2f0fa87f..3059748bd0d 100644 --- a/include/oneapi/dpl/pstl/algorithm_impl.h +++ b/include/oneapi/dpl/pstl/algorithm_impl.h @@ -187,6 +187,8 @@ void __pattern_replace_if(_Tag __tag, _ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _UnaryPredicate __pred, const _Tp& __new_value) { + static_assert(__is_backend_tag_v<_Tag>); + oneapi::dpl::__internal::__pattern_walk1( __tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, oneapi::dpl::__internal::__replace_functor< From f3bf9acd54e9d77284feab966c99337580fc5eb8 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Fri, 2 Feb 2024 11:49:44 +0100 Subject: [PATCH 050/566] __pattern_walk_brick + tag impls --- include/oneapi/dpl/pstl/algorithm_fwd.h | 9 +++++++ include/oneapi/dpl/pstl/algorithm_impl.h | 24 +++++++++++++++++++ .../dpl/pstl/hetero/algorithm_impl_hetero.h | 14 +++++++++++ 3 files changed, 47 insertions(+) diff --git a/include/oneapi/dpl/pstl/algorithm_fwd.h b/include/oneapi/dpl/pstl/algorithm_fwd.h index 9faa0bde46d..700d8ca5541 100644 --- a/include/oneapi/dpl/pstl/algorithm_fwd.h +++ b/include/oneapi/dpl/pstl/algorithm_fwd.h @@ -105,11 +105,20 @@ oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy> __pattern_walk_brick(_ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _Brick, /*parallel=*/::std::false_type) noexcept; +template +void +__pattern_walk_brick(_Tag, _ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _Brick) noexcept; + template oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy> __pattern_walk_brick(_ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _Brick, /*parallel=*/::std::true_type); +template +void +__pattern_walk_brick(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, + _Brick); + //------------------------------------------------------------------------ // walk1_n //------------------------------------------------------------------------ diff --git a/include/oneapi/dpl/pstl/algorithm_impl.h b/include/oneapi/dpl/pstl/algorithm_impl.h index 3059748bd0d..5fd7d2d87cb 100644 --- a/include/oneapi/dpl/pstl/algorithm_impl.h +++ b/include/oneapi/dpl/pstl/algorithm_impl.h @@ -234,6 +234,16 @@ __pattern_walk_brick(_ExecutionPolicy&& __exec, _ForwardIterator __first, _Forwa __brick(__first, __last, __is_vector); } +template +void +__pattern_walk_brick(_Tag, _ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, + _Brick __brick) noexcept +{ + static_assert(__is_backend_tag_v<_Tag>); + + __brick(__first, __last, typename _Tag::__is_vector{}); +} + template oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy> __pattern_walk_brick(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last, @@ -249,6 +259,20 @@ __pattern_walk_brick(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _ }); } +template +void +__pattern_walk_brick(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, + _RandomAccessIterator __last, _Brick __brick) +{ + using __backend_tag = typename __parallel_tag<_IsVector>::__backend_tag; + + __internal::__except_handler([&]() { + __par_backend::__parallel_for( + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, + [__brick](_RandomAccessIterator __i, _RandomAccessIterator __j) { __brick(__i, __j, _IsVector{}); }); + }); +} + //------------------------------------------------------------------------ // walk1_n //------------------------------------------------------------------------ diff --git a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h index 19be5cd634f..dd045d3bb04 100644 --- a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h +++ b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h @@ -282,6 +282,20 @@ __pattern_walk_brick(_ExecutionPolicy&& __exec, _ForwardIterator __first, _Forwa __first, __last, __f); } +template +void +__pattern_walk_brick(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _ForwardIterator __first, + _ForwardIterator __last, _Function __f) +{ + if (__last - __first <= 0) + return; + + __pattern_walk1( + __tag, + __par_backend_hetero::make_wrapped_policy<__walk_brick_wrapper>(::std::forward<_ExecutionPolicy>(__exec)), + __first, __last, __f); +} + template struct __walk_brick_n_wrapper { From 20349f66c0dde96e447ad1f97aa502743613cf23 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Fri, 2 Feb 2024 11:50:19 +0100 Subject: [PATCH 051/566] __pattern_walk_brick + tag calls --- include/oneapi/dpl/pstl/glue_memory_impl.h | 23 ++++++++-------------- 1 file changed, 8 insertions(+), 15 deletions(-) diff --git a/include/oneapi/dpl/pstl/glue_memory_impl.h b/include/oneapi/dpl/pstl/glue_memory_impl.h index 1e450a4efa5..1c6a3bdd6ab 100644 --- a/include/oneapi/dpl/pstl/glue_memory_impl.h +++ b/include/oneapi/dpl/pstl/glue_memory_impl.h @@ -160,20 +160,17 @@ uninitialized_fill(_ExecutionPolicy&& __exec, _ForwardIterator __first, _Forward typedef typename ::std::iterator_traits<_ForwardIterator>::value_type _ValueType; typedef ::std::decay_t<_ExecutionPolicy> _DecayedExecutionPolicy; + constexpr auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _ForwardIterator>(); + if constexpr (::std::is_arithmetic_v<_ValueType>) { - constexpr auto __is_parallel = - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(); - oneapi::dpl::__internal::__pattern_walk_brick( + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, - oneapi::dpl::__internal::__brick_fill<_ValueType, _DecayedExecutionPolicy>{_ValueType(__value)}, - __is_parallel); + oneapi::dpl::__internal::__brick_fill<_ValueType, _DecayedExecutionPolicy>{_ValueType(__value)}); } else { - constexpr auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _ForwardIterator>(); - oneapi::dpl::__internal::__pattern_walk1( __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, @@ -332,20 +329,16 @@ uninitialized_value_construct(_ExecutionPolicy&& __exec, _ForwardIterator __firs typedef typename ::std::iterator_traits<_ForwardIterator>::value_type _ValueType; typedef ::std::decay_t<_ExecutionPolicy> _DecayedExecutionPolicy; + constexpr auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _ForwardIterator>(); + if constexpr (::std::is_trivial_v<_ValueType>) { - constexpr auto __is_parallel = - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(); - oneapi::dpl::__internal::__pattern_walk_brick( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, - oneapi::dpl::__internal::__brick_fill<_ValueType, _DecayedExecutionPolicy>{_ValueType()}, - __is_parallel); + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, + oneapi::dpl::__internal::__brick_fill<_ValueType, _DecayedExecutionPolicy>{_ValueType()}); } else { - constexpr auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _ForwardIterator>(); - oneapi::dpl::__internal::__pattern_walk1( __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, oneapi::dpl::__internal::__op_uninitialized_value_construct<_DecayedExecutionPolicy>{}); From 674dc00c5df805f4f16dfee8e0affb0819cdca7b Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Fri, 2 Feb 2024 12:38:31 +0100 Subject: [PATCH 052/566] __pattern_walk2_n + tag impls --- include/oneapi/dpl/pstl/algorithm_fwd.h | 11 ++++++++++ include/oneapi/dpl/pstl/algorithm_impl.h | 21 +++++++++++++++++++ .../dpl/pstl/hetero/algorithm_impl_hetero.h | 9 ++++++++ 3 files changed, 41 insertions(+) diff --git a/include/oneapi/dpl/pstl/algorithm_fwd.h b/include/oneapi/dpl/pstl/algorithm_fwd.h index 700d8ca5541..6d42e90d927 100644 --- a/include/oneapi/dpl/pstl/algorithm_fwd.h +++ b/include/oneapi/dpl/pstl/algorithm_fwd.h @@ -221,12 +221,23 @@ oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _Fo __pattern_walk2_n(_ExecutionPolicy&&, _ForwardIterator1, _Size, _ForwardIterator2, _Function, _IsVector, /*parallel=*/::std::false_type) noexcept; +template +_ForwardIterator2 +__pattern_walk2_n(_Tag, _ExecutionPolicy&&, _ForwardIterator1, _Size, _ForwardIterator2, _Function) noexcept; + template oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator2> __pattern_walk2_n(_ExecutionPolicy&&, _RandomAccessIterator1, _Size, _RandomAccessIterator2, _Function, _IsVector, /*parallel=*/::std::true_type); +template +_RandomAccessIterator2 +__pattern_walk2_n(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator1, _Size, _RandomAccessIterator2, + _Function); + template oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _ForwardIterator2> __pattern_walk2_brick(_ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, _Brick, diff --git a/include/oneapi/dpl/pstl/algorithm_impl.h b/include/oneapi/dpl/pstl/algorithm_impl.h index 5fd7d2d87cb..12479e8c9fd 100644 --- a/include/oneapi/dpl/pstl/algorithm_impl.h +++ b/include/oneapi/dpl/pstl/algorithm_impl.h @@ -513,6 +513,17 @@ __pattern_walk2_n(_ExecutionPolicy&&, _ForwardIterator1 __first1, _Size __n, _Fo return __internal::__brick_walk2_n(__first1, __n, __first2, __f, __is_vector); } +template +_ForwardIterator2 +__pattern_walk2_n(_Tag, _ExecutionPolicy&&, _ForwardIterator1 __first1, _Size __n, _ForwardIterator2 __first2, + _Function __f) noexcept +{ + static_assert(__is_backend_tag_v<_Tag>); + + return __internal::__brick_walk2_n(__first1, __n, __first2, __f, typename _Tag::__is_vector{}); +} + template oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator2> @@ -525,6 +536,16 @@ __pattern_walk2_n(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, _S __first1 + __n, __first2, __f); } +template +_RandomAccessIterator2 +__pattern_walk2_n(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, + _Size __n, _RandomAccessIterator2 __first2, _Function __f) +{ + return __internal::__pattern_walk2(__tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __first1 + __n, + __first2, __f); +} + template oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _ForwardIterator2> __pattern_walk2_brick(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, diff --git a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h index dd045d3bb04..69eba6177fc 100644 --- a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h +++ b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h @@ -174,6 +174,15 @@ __pattern_walk2_n(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _Size _ ::std::true_type(), ::std::true_type()); } +template +_ForwardIterator2 +__pattern_walk2_n(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _Size __n, + _ForwardIterator2 __first2, _Function __f) +{ + return __pattern_walk2(__tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __first1 + __n, __first2, __f); +} + //------------------------------------------------------------------------ // swap //------------------------------------------------------------------------ From 4338b96ce996eefb94fe22de840f2d6efb960b59 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Fri, 2 Feb 2024 12:39:00 +0100 Subject: [PATCH 053/566] __pattern_walk2_n + tag calls --- include/oneapi/dpl/pstl/glue_memory_impl.h | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/include/oneapi/dpl/pstl/glue_memory_impl.h b/include/oneapi/dpl/pstl/glue_memory_impl.h index 1c6a3bdd6ab..cb0a86fa036 100644 --- a/include/oneapi/dpl/pstl/glue_memory_impl.h +++ b/include/oneapi/dpl/pstl/glue_memory_impl.h @@ -72,6 +72,8 @@ uninitialized_copy_n(_ExecutionPolicy&& __exec, _InputIterator __first, _Size __ typedef typename ::std::iterator_traits<_ForwardIterator>::value_type _ValueType2; typedef ::std::decay_t<_ExecutionPolicy> _DecayedExecutionPolicy; + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _InputIterator, _ForwardIterator>(); + constexpr auto __is_parallel = oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _InputIterator, _ForwardIterator>(); @@ -83,13 +85,9 @@ uninitialized_copy_n(_ExecutionPolicy&& __exec, _InputIterator __first, _Size __ } else { - constexpr auto __is_vector = - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _InputIterator, _ForwardIterator>(); - return oneapi::dpl::__internal::__pattern_walk2_n( - ::std::forward<_ExecutionPolicy>(__exec), __first, __n, __result, - oneapi::dpl::__internal::__op_uninitialized_copy<_DecayedExecutionPolicy>{}, __is_vector, - __is_parallel); + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __n, __result, + oneapi::dpl::__internal::__op_uninitialized_copy<_DecayedExecutionPolicy>{}); } } @@ -130,6 +128,8 @@ uninitialized_move_n(_ExecutionPolicy&& __exec, _InputIterator __first, _Size __ typedef typename ::std::iterator_traits<_ForwardIterator>::value_type _ValueType2; typedef ::std::decay_t<_ExecutionPolicy> _DecayedExecutionPolicy; + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _InputIterator, _ForwardIterator>(); + constexpr auto __is_parallel = oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _InputIterator, _ForwardIterator>(); @@ -141,13 +141,9 @@ uninitialized_move_n(_ExecutionPolicy&& __exec, _InputIterator __first, _Size __ } else { - constexpr auto __is_vector = - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _InputIterator, _ForwardIterator>(); - return oneapi::dpl::__internal::__pattern_walk2_n( - ::std::forward<_ExecutionPolicy>(__exec), __first, __n, __result, - oneapi::dpl::__internal::__op_uninitialized_move<_DecayedExecutionPolicy>{}, __is_vector, - __is_parallel); + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __n, __result, + oneapi::dpl::__internal::__op_uninitialized_move<_DecayedExecutionPolicy>{}); } } From 846f7d96458282dd57454ef27216080b32c7d8b7 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Fri, 2 Feb 2024 12:54:09 +0100 Subject: [PATCH 054/566] __pattern_walk_brick_n + tag impls --- include/oneapi/dpl/pstl/algorithm_fwd.h | 8 +++++++ include/oneapi/dpl/pstl/algorithm_impl.h | 24 +++++++++++++++++++ .../dpl/pstl/hetero/algorithm_impl_hetero.h | 13 ++++++++++ 3 files changed, 45 insertions(+) diff --git a/include/oneapi/dpl/pstl/algorithm_fwd.h b/include/oneapi/dpl/pstl/algorithm_fwd.h index 6d42e90d927..591553e4f9b 100644 --- a/include/oneapi/dpl/pstl/algorithm_fwd.h +++ b/include/oneapi/dpl/pstl/algorithm_fwd.h @@ -154,11 +154,19 @@ oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _Fo __pattern_walk_brick_n(_ExecutionPolicy&&, _ForwardIterator, _Size, _Brick, /*is_parallel=*/::std::false_type) noexcept; +template +_ForwardIterator +__pattern_walk_brick_n(_Tag, _ExecutionPolicy&&, _ForwardIterator, _Size, _Brick) noexcept; + template oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator> __pattern_walk_brick_n(_ExecutionPolicy&&, _RandomAccessIterator, _Size, _Brick, /*is_parallel=*/::std::true_type); +template +_RandomAccessIterator +__pattern_walk_brick_n(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _Size, _Brick); + //------------------------------------------------------------------------ // walk2 (pseudo) // diff --git a/include/oneapi/dpl/pstl/algorithm_impl.h b/include/oneapi/dpl/pstl/algorithm_impl.h index 12479e8c9fd..90806aa392f 100644 --- a/include/oneapi/dpl/pstl/algorithm_impl.h +++ b/include/oneapi/dpl/pstl/algorithm_impl.h @@ -341,6 +341,15 @@ __pattern_walk_brick_n(_ExecutionPolicy&& __exec, _ForwardIterator __first, _Siz return __brick(__first, __n, __is_vector); } +template +_ForwardIterator +__pattern_walk_brick_n(_Tag, _ExecutionPolicy&& __exec, _ForwardIterator __first, _Size __n, _Brick __brick) noexcept +{ + static_assert(__is_backend_tag_v<_Tag>); + + return __brick(__first, __n, typename _Tag::__is_vector{}); +} + template oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator> __pattern_walk_brick_n(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _Size __n, _Brick __brick, @@ -356,6 +365,21 @@ __pattern_walk_brick_n(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, }); } +template +_RandomAccessIterator +__pattern_walk_brick_n(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, _Size __n, + _Brick __brick) +{ + using __backend_tag = typename __parallel_tag<_IsVector>::__backend_tag; + + return __internal::__except_handler([&]() { + __par_backend::__parallel_for( + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __first + __n, + [__brick](_RandomAccessIterator __i, _RandomAccessIterator __j) { __brick(__i, __j - __i, _IsVector{}); }); + return __first + __n; + }); +} + //------------------------------------------------------------------------ // walk2 (pseudo) // diff --git a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h index 69eba6177fc..a430ee845cd 100644 --- a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h +++ b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h @@ -324,6 +324,19 @@ __pattern_walk_brick_n(_ExecutionPolicy&& __exec, _ForwardIterator __first, _Siz return __first + __n; } +template +_ForwardIterator +__pattern_walk_brick_n(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _ForwardIterator __first, _Size __n, + _Function __f) +{ + __pattern_walk1( + __tag, + __par_backend_hetero::make_wrapped_policy<__walk_brick_n_wrapper>(::std::forward<_ExecutionPolicy>(__exec)), + __first, __first + __n, __f); + return __first + __n; +} + //------------------------------------------------------------------------ // walk2_brick, walk2_brick_n //------------------------------------------------------------------------ From 6139218326bfe14c185c8f80f83d1de174c753aa Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Fri, 2 Feb 2024 12:54:38 +0100 Subject: [PATCH 055/566] __pattern_walk_brick_n + tag calls --- include/oneapi/dpl/pstl/glue_memory_impl.h | 24 ++++++++-------------- 1 file changed, 8 insertions(+), 16 deletions(-) diff --git a/include/oneapi/dpl/pstl/glue_memory_impl.h b/include/oneapi/dpl/pstl/glue_memory_impl.h index cb0a86fa036..72e2f6cc1ad 100644 --- a/include/oneapi/dpl/pstl/glue_memory_impl.h +++ b/include/oneapi/dpl/pstl/glue_memory_impl.h @@ -181,20 +181,16 @@ uninitialized_fill_n(_ExecutionPolicy&& __exec, _ForwardIterator __first, _Size typedef typename ::std::iterator_traits<_ForwardIterator>::value_type _ValueType; typedef ::std::decay_t<_ExecutionPolicy> _DecayedExecutionPolicy; + constexpr auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _ForwardIterator>(); + if constexpr (::std::is_arithmetic_v<_ValueType>) { - constexpr auto __is_parallel = - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(); - return oneapi::dpl::__internal::__pattern_walk_brick_n( - ::std::forward<_ExecutionPolicy>(__exec), __first, __n, - oneapi::dpl::__internal::__brick_fill_n<_ValueType, _DecayedExecutionPolicy>{_ValueType(__value)}, - __is_parallel); + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __n, + oneapi::dpl::__internal::__brick_fill_n<_ValueType, _DecayedExecutionPolicy>{_ValueType(__value)}); } else { - constexpr auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _ForwardIterator>(); - return oneapi::dpl::__internal::__pattern_walk1_n( __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __n, oneapi::dpl::__internal::__op_uninitialized_fill<_Tp, _DecayedExecutionPolicy>{__value}); @@ -348,20 +344,16 @@ uninitialized_value_construct_n(_ExecutionPolicy&& __exec, _ForwardIterator __fi typedef typename ::std::iterator_traits<_ForwardIterator>::value_type _ValueType; typedef ::std::decay_t<_ExecutionPolicy> _DecayedExecutionPolicy; + constexpr auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _ForwardIterator>(); + if constexpr (::std::is_trivial_v<_ValueType>) { - constexpr auto __is_parallel = - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(); - return oneapi::dpl::__internal::__pattern_walk_brick_n( - ::std::forward<_ExecutionPolicy>(__exec), __first, __n, - oneapi::dpl::__internal::__brick_fill_n<_ValueType, _DecayedExecutionPolicy>{_ValueType()}, - __is_parallel); + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __n, + oneapi::dpl::__internal::__brick_fill_n<_ValueType, _DecayedExecutionPolicy>{_ValueType()}); } else { - constexpr auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _ForwardIterator>(); - return oneapi::dpl::__internal::__pattern_walk1_n( __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __n, oneapi::dpl::__internal::__op_uninitialized_value_construct<_DecayedExecutionPolicy>{}); From 84aae3b0be50cd8568d3a3f05e0522f17a82ee77 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Fri, 2 Feb 2024 16:27:57 +0100 Subject: [PATCH 056/566] __pattern_walk2_brick + tag impls --- include/oneapi/dpl/pstl/algorithm_fwd.h | 16 +++++ include/oneapi/dpl/pstl/algorithm_impl.h | 58 +++++++++++++++++++ .../dpl/pstl/hetero/algorithm_impl_hetero.h | 12 ++++ 3 files changed, 86 insertions(+) diff --git a/include/oneapi/dpl/pstl/algorithm_fwd.h b/include/oneapi/dpl/pstl/algorithm_fwd.h index 591553e4f9b..d6743445c26 100644 --- a/include/oneapi/dpl/pstl/algorithm_fwd.h +++ b/include/oneapi/dpl/pstl/algorithm_fwd.h @@ -251,6 +251,11 @@ oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _Fo __pattern_walk2_brick(_ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, _Brick, /*parallel=*/::std::false_type) noexcept; +template +_ForwardIterator2 +__pattern_walk2_brick(_Tag, _ExecutionPolicy&&, _ForwardIterator1 __first1, _ForwardIterator1, _ForwardIterator2, + _Brick) noexcept; + template oneapi::dpl::__internal::__enable_if_host_execution_policy_conditional< _ExecutionPolicy, __is_random_access_iterator_v<_RandomAccessIterator1, _RandomAccessIterator2>, @@ -258,6 +263,12 @@ oneapi::dpl::__internal::__enable_if_host_execution_policy_conditional< __pattern_walk2_brick(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, _Brick __brick, /*parallel=*/::std::true_type); +template +_RandomAccessIterator2 +__pattern_walk2_brick(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator1, _RandomAccessIterator1, + _RandomAccessIterator2, _Brick); + template oneapi::dpl::__internal::__enable_if_host_execution_policy_conditional< _ExecutionPolicy, !__is_random_access_iterator_v<_RandomAccessIterator1, _RandomAccessIterator2>, @@ -265,6 +276,11 @@ oneapi::dpl::__internal::__enable_if_host_execution_policy_conditional< __pattern_walk2_brick(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, _Brick __brick, /*parallel=*/::std::true_type); +template +_ForwardIterator2 +__pattern_walk2_brick(__parallel_forward_tag, _ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, + _ForwardIterator2, _Brick); + template oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _ForwardIterator2> __pattern_walk2_brick_n(_ExecutionPolicy&&, _ForwardIterator1, _Size, _ForwardIterator2, _Brick, diff --git a/include/oneapi/dpl/pstl/algorithm_impl.h b/include/oneapi/dpl/pstl/algorithm_impl.h index 90806aa392f..097d1e5f3db 100644 --- a/include/oneapi/dpl/pstl/algorithm_impl.h +++ b/include/oneapi/dpl/pstl/algorithm_impl.h @@ -580,6 +580,16 @@ __pattern_walk2_brick(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _Fo return __brick(__first1, __last1, __first2, __is_vector); } +template +_ForwardIterator2 +__pattern_walk2_brick(_Tag, _ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, + _ForwardIterator2 __first2, _Brick __brick) noexcept +{ + static_assert(__is_backend_tag_serial_v<_Tag>); + + return __brick(__first1, __last1, __first2, typename _Tag::__is_vector{}); +} + template oneapi::dpl::__internal::__enable_if_host_execution_policy_conditional< _ExecutionPolicy, __is_random_access_iterator_v<_RandomAccessIterator1, _RandomAccessIterator2>, @@ -600,6 +610,24 @@ __pattern_walk2_brick(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1 }); } +template +_RandomAccessIterator2 +__pattern_walk2_brick(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, + _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, _Brick __brick) +{ + using __backend_tag = typename __parallel_tag<_IsVector>::__backend_tag; + + return __except_handler([&]() { + __par_backend::__parallel_for( + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, + [__first1, __first2, __brick](_RandomAccessIterator1 __i, _RandomAccessIterator1 __j) { + __brick(__i, __j, __first2 + (__i - __first1), _IsVector{}); + }); + return __first2 + (__last1 - __first1); + }); +} + //TODO: it postponed till adding more or less effective parallel implementation template oneapi::dpl::__internal::__enable_if_host_execution_policy_conditional< @@ -629,6 +657,36 @@ __pattern_walk2_brick(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _Fo }); } +//TODO: it postponed till adding more or less effective parallel implementation +template +_ForwardIterator2 +__pattern_walk2_brick(__parallel_forward_tag, _ExecutionPolicy&& __exec, _ForwardIterator1 __first1, + _ForwardIterator1 __last1, _ForwardIterator2 __first2, _Brick __brick) +{ + using __backend_tag = typename __parallel_forward_tag::__backend_tag; + + using _iterator_tuple = zip_forward_iterator<_ForwardIterator1, _ForwardIterator2>; + auto __begin = _iterator_tuple(__first1, __first2); + auto __end = _iterator_tuple(__last1, /*dummy parameter*/ _ForwardIterator2()); + + typedef typename ::std::iterator_traits<_ForwardIterator1>::reference _ReferenceType1; + typedef typename ::std::iterator_traits<_ForwardIterator2>::reference _ReferenceType2; + + return __except_handler([&]() { + __par_backend::__parallel_for_each(__backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __begin, __end, + [__brick](::std::tuple<_ReferenceType1, _ReferenceType2> __val) { + __brick(::std::get<0>(__val), + ::std::forward<_ReferenceType2>(::std::get<1>(__val))); + }); + + //TODO: parallel_for_each does not allow to return correct iterator value according to the ::std::transform + // implementation. Therefore, iterator value is calculated separately. + for (; __begin != __end; ++__begin) + ; + return ::std::get<1>(__begin.base()); + }); +} + template oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator2> __pattern_walk2_brick_n(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, _Size __n, diff --git a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h index a430ee845cd..3aca94ad160 100644 --- a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h +++ b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h @@ -357,6 +357,18 @@ __pattern_walk2_brick(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _Fo /*vector=*/::std::true_type{}, /*parallel*/ ::std::true_type{}); } +template +_ForwardIterator2 +__pattern_walk2_brick(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _ForwardIterator1 __first1, + _ForwardIterator1 __last1, _ForwardIterator2 __first2, _Brick __brick) +{ + return __pattern_walk2( + __tag, + __par_backend_hetero::make_wrapped_policy<__walk2_brick_wrapper>(::std::forward<_ExecutionPolicy>(__exec)), + __first1, __last1, __first2, __brick); +} + template struct __walk2_brick_n_wrapper { From 79c3f1215f234044c722878016772007e3d11d73 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Fri, 2 Feb 2024 13:26:57 +0100 Subject: [PATCH 057/566] __pattern_walk2_brick + tag calls --- include/oneapi/dpl/pstl/algorithm_impl.h | 48 ++++++++++--------- include/oneapi/dpl/pstl/glue_algorithm_impl.h | 16 ++++--- include/oneapi/dpl/pstl/glue_memory_impl.h | 26 ++++------ .../dpl/pstl/hetero/algorithm_impl_hetero.h | 31 ++++++++---- .../dpl/pstl/hetero/numeric_impl_hetero.h | 15 +++--- 5 files changed, 76 insertions(+), 60 deletions(-) diff --git a/include/oneapi/dpl/pstl/algorithm_impl.h b/include/oneapi/dpl/pstl/algorithm_impl.h index 097d1e5f3db..874e2883f37 100644 --- a/include/oneapi/dpl/pstl/algorithm_impl.h +++ b/include/oneapi/dpl/pstl/algorithm_impl.h @@ -3523,6 +3523,8 @@ __parallel_set_union_op(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ { typedef typename ::std::iterator_traits<_ForwardIterator1>::difference_type _DifferenceType; + constexpr auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2, _OutputIterator>(); + const auto __n1 = __last1 - __first1; const auto __n2 = __last2 - __first2; @@ -3530,13 +3532,13 @@ __parallel_set_union_op(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ // {1} {}: parallel copying just first sequence if (__n2 == 0) - return __internal::__pattern_walk2_brick(::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __result, - __copy_range, ::std::true_type()); + return __internal::__pattern_walk2_brick(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, + __last1, __result, __copy_range); // {} {2}: parallel copying justmake second sequence if (__n1 == 0) - return __internal::__pattern_walk2_brick(::std::forward<_ExecutionPolicy>(__exec), __first2, __last2, __result, - __copy_range, ::std::true_type()); + return __internal::__pattern_walk2_brick(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first2, + __last2, __result, __copy_range); // testing whether the sequences are intersected _ForwardIterator1 __left_bound_seq_1 = ::std::lower_bound(__first1, __last1, *__first2, __comp); @@ -3547,12 +3549,12 @@ __parallel_set_union_op(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ __par_backend::__parallel_invoke( ::std::forward<_ExecutionPolicy>(__exec), [=] { - __internal::__pattern_walk2_brick(::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __result, - __copy_range, ::std::true_type()); + __internal::__pattern_walk2_brick(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, + __last1, __result, __copy_range); }, [=] { - __internal::__pattern_walk2_brick(::std::forward<_ExecutionPolicy>(__exec), __first2, __last2, - __result + __n1, __copy_range, ::std::true_type()); + __internal::__pattern_walk2_brick(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first2, + __last2, __result + __n1, __copy_range); }); return __result + __n1 + __n2; } @@ -3566,12 +3568,12 @@ __parallel_set_union_op(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ __par_backend::__parallel_invoke( ::std::forward<_ExecutionPolicy>(__exec), [=] { - __internal::__pattern_walk2_brick(::std::forward<_ExecutionPolicy>(__exec), __first2, __last2, __result, - __copy_range, ::std::true_type()); + __internal::__pattern_walk2_brick(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first2, + __last2, __result, __copy_range); }, [=] { - __internal::__pattern_walk2_brick(::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, - __result + __n2, __copy_range, ::std::true_type()); + __internal::__pattern_walk2_brick(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, + __last1, __result + __n2, __copy_range); }); return __result + __n1 + __n2; } @@ -3585,8 +3587,8 @@ __parallel_set_union_op(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ ::std::forward<_ExecutionPolicy>(__exec), //do parallel copying of [first1; left_bound_seq_1) [=] { - __internal::__pattern_walk2_brick(::std::forward<_ExecutionPolicy>(__exec), __first1, - __left_bound_seq_1, __res_or, __copy_range, ::std::true_type()); + __internal::__pattern_walk2_brick(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, + __left_bound_seq_1, __res_or, __copy_range); }, [=, &__result] { __result = __internal::__parallel_set_op( @@ -3607,8 +3609,8 @@ __parallel_set_union_op(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ ::std::forward<_ExecutionPolicy>(__exec), //do parallel copying of [first2; left_bound_seq_2) [=] { - __internal::__pattern_walk2_brick(::std::forward<_ExecutionPolicy>(__exec), __first2, - __left_bound_seq_2, __res_or, __copy_range, ::std::true_type()); + __internal::__pattern_walk2_brick(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first2, + __left_bound_seq_2, __res_or, __copy_range); }, [=, &__result] { __result = __internal::__parallel_set_op( @@ -3838,6 +3840,8 @@ __pattern_set_difference(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __fir typedef typename ::std::iterator_traits<_RandomAccessIterator3>::value_type _T; typedef typename ::std::iterator_traits<_RandomAccessIterator1>::difference_type _DifferenceType; + constexpr auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _RandomAccessIterator1, _RandomAccessIterator2, _RandomAccessIterator3>(); + const auto __n1 = __last1 - __first1; const auto __n2 = __last2 - __first2; @@ -3847,22 +3851,22 @@ __pattern_set_difference(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __fir // {1} \ {}: parallel copying just first sequence if (__n2 == 0) - return __pattern_walk2_brick(::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __result, - __internal::__brick_copy<_ExecutionPolicy>{}, ::std::true_type()); + return __pattern_walk2_brick(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, + __result, __internal::__brick_copy<_ExecutionPolicy>{}); // testing whether the sequences are intersected _RandomAccessIterator1 __left_bound_seq_1 = ::std::lower_bound(__first1, __last1, *__first2, __comp); //{1} < {2}: seq 2 is wholly greater than seq 1, so, parallel copying just first sequence if (__left_bound_seq_1 == __last1) - return __pattern_walk2_brick(::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __result, - __internal::__brick_copy<_ExecutionPolicy>{}, ::std::true_type()); + return __pattern_walk2_brick(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, + __result, __internal::__brick_copy<_ExecutionPolicy>{}); // testing whether the sequences are intersected _RandomAccessIterator2 __left_bound_seq_2 = ::std::lower_bound(__first2, __last2, *__first1, __comp); //{2} < {1}: seq 1 is wholly greater than seq 2, so, parallel copying just first sequence if (__left_bound_seq_2 == __last2) - return __internal::__pattern_walk2_brick(::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __result, - __brick_copy<_ExecutionPolicy>{}, ::std::true_type()); + return __internal::__pattern_walk2_brick(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, + __last1, __result, __brick_copy<_ExecutionPolicy>{}); if (__n1 + __n2 > __set_algo_cut_off) return __parallel_set_op( diff --git a/include/oneapi/dpl/pstl/glue_algorithm_impl.h b/include/oneapi/dpl/pstl/glue_algorithm_impl.h index 2b77e0c44b7..023264dd732 100644 --- a/include/oneapi/dpl/pstl/glue_algorithm_impl.h +++ b/include/oneapi/dpl/pstl/glue_algorithm_impl.h @@ -266,10 +266,11 @@ template copy(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 __last, _ForwardIterator2 __result) { - return oneapi::dpl::__internal::__pattern_walk2_brick( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __result, - oneapi::dpl::__internal::__brick_copy<_ExecutionPolicy>{}, - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>()); + constexpr auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>(); + + return oneapi::dpl::__internal::__pattern_walk2_brick(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), + __first, __last, __result, + oneapi::dpl::__internal::__brick_copy<_ExecutionPolicy>{}); } template @@ -810,10 +811,11 @@ move(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 __l { using _DecayedExecutionPolicy = ::std::decay_t<_ExecutionPolicy>; + constexpr auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>(); + return oneapi::dpl::__internal::__pattern_walk2_brick( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __d_first, - oneapi::dpl::__internal::__brick_move<_DecayedExecutionPolicy>{}, - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>()); + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __d_first, + oneapi::dpl::__internal::__brick_move<_DecayedExecutionPolicy>{}); } // [partial.sort] diff --git a/include/oneapi/dpl/pstl/glue_memory_impl.h b/include/oneapi/dpl/pstl/glue_memory_impl.h index 72e2f6cc1ad..d96cc8a62bc 100644 --- a/include/oneapi/dpl/pstl/glue_memory_impl.h +++ b/include/oneapi/dpl/pstl/glue_memory_impl.h @@ -45,19 +45,16 @@ uninitialized_copy(_ExecutionPolicy&& __exec, _InputIterator __first, _InputIter typedef typename ::std::iterator_traits<_ForwardIterator>::value_type _ValueType2; typedef ::std::decay_t<_ExecutionPolicy> _DecayedExecutionPolicy; + constexpr auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _InputIterator, _ForwardIterator>(); + if constexpr (::std::is_trivial_v<_ValueType1> && ::std::is_trivial_v<_ValueType2>) { - constexpr auto __is_parallel = - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _InputIterator, _ForwardIterator>(); - return oneapi::dpl::__internal::__pattern_walk2_brick( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __result, - oneapi::dpl::__internal::__brick_copy<_DecayedExecutionPolicy>{}, __is_parallel); + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __result, + oneapi::dpl::__internal::__brick_copy<_DecayedExecutionPolicy>{}); } else { - constexpr auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _InputIterator, _ForwardIterator>(); - return oneapi::dpl::__internal::__pattern_walk2( __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __result, oneapi::dpl::__internal::__op_uninitialized_copy<_DecayedExecutionPolicy>{}); @@ -72,7 +69,7 @@ uninitialized_copy_n(_ExecutionPolicy&& __exec, _InputIterator __first, _Size __ typedef typename ::std::iterator_traits<_ForwardIterator>::value_type _ValueType2; typedef ::std::decay_t<_ExecutionPolicy> _DecayedExecutionPolicy; - const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _InputIterator, _ForwardIterator>(); + constexpr auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _InputIterator, _ForwardIterator>(); constexpr auto __is_parallel = oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _InputIterator, _ForwardIterator>(); @@ -101,19 +98,16 @@ uninitialized_move(_ExecutionPolicy&& __exec, _InputIterator __first, _InputIter typedef typename ::std::iterator_traits<_ForwardIterator>::value_type _ValueType2; typedef ::std::decay_t<_ExecutionPolicy> _DecayedExecutionPolicy; + constexpr auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _InputIterator, _ForwardIterator>(); + if constexpr (::std::is_trivial_v<_ValueType1> && ::std::is_trivial_v<_ValueType2>) { - constexpr auto __is_parallel = - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _InputIterator, _ForwardIterator>(); - return oneapi::dpl::__internal::__pattern_walk2_brick( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __result, - oneapi::dpl::__internal::__brick_copy<_DecayedExecutionPolicy>{}, __is_parallel); + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __result, + oneapi::dpl::__internal::__brick_copy<_DecayedExecutionPolicy>{}); } else { - const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _InputIterator, _ForwardIterator>(); - return oneapi::dpl::__internal::__pattern_walk2( __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __result, oneapi::dpl::__internal::__op_uninitialized_move<_DecayedExecutionPolicy>{}); @@ -128,7 +122,7 @@ uninitialized_move_n(_ExecutionPolicy&& __exec, _InputIterator __first, _Size __ typedef typename ::std::iterator_traits<_ForwardIterator>::value_type _ValueType2; typedef ::std::decay_t<_ExecutionPolicy> _DecayedExecutionPolicy; - const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _InputIterator, _ForwardIterator>(); + constexpr auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _InputIterator, _ForwardIterator>(); constexpr auto __is_parallel = oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _InputIterator, _ForwardIterator>(); diff --git a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h index 3aca94ad160..d47a621ccb5 100644 --- a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h +++ b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h @@ -1320,19 +1320,21 @@ __pattern_merge(_ExecutionPolicy&& __exec, _Iterator1 __first1, _Iterator1 __las if (__n == 0) return __d_first; + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _Iterator1, _Iterator2, _Iterator3>(); + //To consider the direct copying pattern call in case just one of sequences is empty. if (__n1 == 0) oneapi::dpl::__internal::__pattern_walk2_brick( + __dispatch_tag, oneapi::dpl::__par_backend_hetero::make_wrapped_policy( ::std::forward<_ExecutionPolicy>(__exec)), - __first2, __last2, __d_first, oneapi::dpl::__internal::__brick_copy<_ExecutionPolicy>{}, - ::std::true_type()); + __first2, __last2, __d_first, oneapi::dpl::__internal::__brick_copy<_ExecutionPolicy>{}); else if (__n2 == 0) oneapi::dpl::__internal::__pattern_walk2_brick( + __dispatch_tag, oneapi::dpl::__par_backend_hetero::make_wrapped_policy( ::std::forward<_ExecutionPolicy>(__exec)), - __first1, __last1, __d_first, oneapi::dpl::__internal::__brick_copy<_ExecutionPolicy>{}, - ::std::true_type()); + __first1, __last1, __d_first, oneapi::dpl::__internal::__brick_copy<_ExecutionPolicy>{}); else { auto __keep1 = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read, _Iterator1>(); @@ -1930,10 +1932,13 @@ __pattern_set_difference(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, // {1} \ {}: the difference is {1} if (__first2 == __last2) { + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2, _OutputIterator>(); + return oneapi::dpl::__internal::__pattern_walk2_brick( + __dispatch_tag, oneapi::dpl::__par_backend_hetero::make_wrapped_policy<__set_difference_copy_case_1>( ::std::forward<_ExecutionPolicy>(__exec)), - __first1, __last1, __result, oneapi::dpl::__internal::__brick_copy<_ExecutionPolicy>{}, ::std::true_type()); + __first1, __last1, __result, oneapi::dpl::__internal::__brick_copy<_ExecutionPolicy>{}); } return __pattern_hetero_set_op(::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2, @@ -1961,22 +1966,26 @@ __pattern_set_union(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _Forw if (__first1 == __last1 && __first2 == __last2) return __result; + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2, _OutputIterator>(); + //{1} is empty if (__first1 == __last1) { return oneapi::dpl::__internal::__pattern_walk2_brick( + __dispatch_tag, oneapi::dpl::__par_backend_hetero::make_wrapped_policy<__set_union_copy_case_1>( ::std::forward<_ExecutionPolicy>(__exec)), - __first2, __last2, __result, oneapi::dpl::__internal::__brick_copy<_ExecutionPolicy>{}, ::std::true_type()); + __first2, __last2, __result, oneapi::dpl::__internal::__brick_copy<_ExecutionPolicy>{}); } //{2} is empty if (__first2 == __last2) { return oneapi::dpl::__internal::__pattern_walk2_brick( + __dispatch_tag, oneapi::dpl::__par_backend_hetero::make_wrapped_policy<__set_union_copy_case_2>( ::std::forward<_ExecutionPolicy>(__exec)), - __first1, __last1, __result, oneapi::dpl::__internal::__brick_copy<_ExecutionPolicy>{}, ::std::true_type()); + __first1, __last1, __result, oneapi::dpl::__internal::__brick_copy<_ExecutionPolicy>{}); } typedef typename ::std::iterator_traits<_OutputIterator>::value_type _ValueType; @@ -2037,22 +2046,26 @@ __pattern_set_symmetric_difference(_ExecutionPolicy&& __exec, _ForwardIterator1 if (__first1 == __last1 && __first2 == __last2) return __result; + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2, _OutputIterator>(); + //{1} is empty if (__first1 == __last1) { return oneapi::dpl::__internal::__pattern_walk2_brick( + __dispatch_tag, oneapi::dpl::__par_backend_hetero::make_wrapped_policy<__set_symmetric_difference_copy_case_1>( ::std::forward<_ExecutionPolicy>(__exec)), - __first2, __last2, __result, oneapi::dpl::__internal::__brick_copy<_ExecutionPolicy>{}, ::std::true_type()); + __first2, __last2, __result, oneapi::dpl::__internal::__brick_copy<_ExecutionPolicy>{}); } //{2} is empty if (__first2 == __last2) { return oneapi::dpl::__internal::__pattern_walk2_brick( + __dispatch_tag, oneapi::dpl::__par_backend_hetero::make_wrapped_policy<__set_symmetric_difference_copy_case_2>( ::std::forward<_ExecutionPolicy>(__exec)), - __first1, __last1, __result, oneapi::dpl::__internal::__brick_copy<_ExecutionPolicy>{}, ::std::true_type()); + __first1, __last1, __result, oneapi::dpl::__internal::__brick_copy<_ExecutionPolicy>{}); } typedef typename ::std::iterator_traits<_OutputIterator>::value_type _ValueType; diff --git a/include/oneapi/dpl/pstl/hetero/numeric_impl_hetero.h b/include/oneapi/dpl/pstl/hetero/numeric_impl_hetero.h index 60c1001f5b8..90cf7764f4d 100644 --- a/include/oneapi/dpl/pstl/hetero/numeric_impl_hetero.h +++ b/include/oneapi/dpl/pstl/hetero/numeric_impl_hetero.h @@ -131,6 +131,8 @@ __pattern_transform_scan_base(_ExecutionPolicy&& __exec, _Iterator1 __first, _It if (__first == __last) return __result; + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _Iterator1, _Iterator2>(); + const auto __n = __last - __first; auto __keep1 = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read, _Iterator1>(); @@ -173,9 +175,9 @@ __pattern_transform_scan_base(_ExecutionPolicy&& __exec, _Iterator1 __first, _It .wait(); // Move data from temporary buffer into results - oneapi::dpl::__internal::__pattern_walk2_brick(::std::move(__policy), __first_tmp, __last_tmp, __result, - oneapi::dpl::__internal::__brick_move<_NewExecutionPolicy>{}, - ::std::true_type{}); + oneapi::dpl::__internal::__pattern_walk2_brick(__dispatch_tag, ::std::move(__policy), __first_tmp, __last_tmp, + __result, + oneapi::dpl::__internal::__brick_move<_NewExecutionPolicy>{}); //TODO: optimize copy back depending on Iterator, i.e. set_final_data for host iterator/pointer } @@ -233,6 +235,8 @@ __pattern_adjacent_difference(_ExecutionPolicy&& __exec, _ForwardIterator1 __fir if (__n <= 0) return __d_first; + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>(); + using _It1ValueT = typename ::std::iterator_traits<_ForwardIterator1>::value_type; using _It2ValueTRef = typename ::std::iterator_traits<_ForwardIterator2>::reference; @@ -246,9 +250,8 @@ __pattern_adjacent_difference(_ExecutionPolicy&& __exec, _ForwardIterator1 __fir auto __wrapped_policy = __par_backend_hetero::make_wrapped_policy( ::std::forward<_ExecutionPolicy>(__exec)); - __internal::__pattern_walk2_brick(__wrapped_policy, __first, __last, __d_first, - __internal::__brick_copy{}, - ::std::true_type{}); + __internal::__pattern_walk2_brick(__dispatch_tag, __wrapped_policy, __first, __last, __d_first, + __internal::__brick_copy{}); return __d_last; }); From 845409b26126e45b6da33116fe06711bf940f56c Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Mon, 5 Feb 2024 10:31:40 +0100 Subject: [PATCH 058/566] include/oneapi/dpl/pstl/algorithm_impl.h - fix compile error in struct __brick_move - implement operator()(_ReferenceType1&& __val, _ReferenceType2&& __result) const --- include/oneapi/dpl/pstl/algorithm_impl.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/include/oneapi/dpl/pstl/algorithm_impl.h b/include/oneapi/dpl/pstl/algorithm_impl.h index 874e2883f37..26da51f89f5 100644 --- a/include/oneapi/dpl/pstl/algorithm_impl.h +++ b/include/oneapi/dpl/pstl/algorithm_impl.h @@ -1437,6 +1437,13 @@ struct __brick_move<_ExecutionPolicy, oneapi::dpl::__internal::__enable_if_host_ { return ::std::move(__first, __last, __result); } + + template + void + operator()(_ReferenceType1&& __val, _ReferenceType2&& __result) const + { + ::std::forward<_ReferenceType2>(__result) = ::std::move(::std::forward<_ReferenceType1>(__val)); + } }; template From 25c5cdecdec92430e86f2cabfcf16c8c2174db38 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Fri, 2 Feb 2024 17:53:41 +0100 Subject: [PATCH 059/566] __pattern_walk2_brick_n + tag impls --- include/oneapi/dpl/pstl/algorithm_fwd.h | 13 ++++++++- include/oneapi/dpl/pstl/algorithm_impl.h | 29 +++++++++++++++++++ .../dpl/pstl/hetero/algorithm_impl_hetero.h | 12 ++++++++ 3 files changed, 53 insertions(+), 1 deletion(-) diff --git a/include/oneapi/dpl/pstl/algorithm_fwd.h b/include/oneapi/dpl/pstl/algorithm_fwd.h index d6743445c26..e03bc5f95bd 100644 --- a/include/oneapi/dpl/pstl/algorithm_fwd.h +++ b/include/oneapi/dpl/pstl/algorithm_fwd.h @@ -253,7 +253,7 @@ __pattern_walk2_brick(_ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, template _ForwardIterator2 -__pattern_walk2_brick(_Tag, _ExecutionPolicy&&, _ForwardIterator1 __first1, _ForwardIterator1, _ForwardIterator2, +__pattern_walk2_brick(_Tag, _ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, _Brick) noexcept; template @@ -286,11 +286,22 @@ oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _Fo __pattern_walk2_brick_n(_ExecutionPolicy&&, _ForwardIterator1, _Size, _ForwardIterator2, _Brick, /*parallel=*/::std::false_type) noexcept; +template +_ForwardIterator2 +__pattern_walk2_brick_n(_Tag, _ExecutionPolicy&&, _ForwardIterator1, _Size, _ForwardIterator2, _Brick) noexcept; + template oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator2> __pattern_walk2_brick_n(_ExecutionPolicy&&, _RandomAccessIterator1, _Size, _RandomAccessIterator2, _Brick, /*parallel=*/::std::true_type); +template +_RandomAccessIterator2 +__pattern_walk2_brick_n(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator1, _Size, + _RandomAccessIterator2, _Brick); + //------------------------------------------------------------------------ // walk3 (pseudo) // diff --git a/include/oneapi/dpl/pstl/algorithm_impl.h b/include/oneapi/dpl/pstl/algorithm_impl.h index 26da51f89f5..9d1deab8f41 100644 --- a/include/oneapi/dpl/pstl/algorithm_impl.h +++ b/include/oneapi/dpl/pstl/algorithm_impl.h @@ -705,6 +705,24 @@ __pattern_walk2_brick_n(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __firs }); } +template +_RandomAccessIterator2 +__pattern_walk2_brick_n(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, + _Size __n, _RandomAccessIterator2 __first2, _Brick __brick) +{ + using __backend_tag = typename __parallel_tag<_IsVector>::__backend_tag; + + return __except_handler([&]() { + __par_backend::__parallel_for( + __backend_tag{}, + ::std::forward<_ExecutionPolicy>(__exec), __first1, __first1 + __n, + [__first1, __first2, __brick](_RandomAccessIterator1 __i, _RandomAccessIterator1 __j) { + __brick(__i, __j - __i, __first2 + (__i - __first1), _IsVector{}); + }); + return __first2 + __n; + }); +} + template oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _ForwardIterator2> __pattern_walk2_brick_n(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _Size __n, _ForwardIterator2 __first2, @@ -715,6 +733,17 @@ __pattern_walk2_brick_n(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ return __brick(__first1, __n, __first2, __is_vector); } +template +_ForwardIterator2 +__pattern_walk2_brick_n(_Tag, _ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _Size __n, + _ForwardIterator2 __first2, _Brick __brick) noexcept +{ + static_assert(__is_backend_tag_v<_Tag>); + + return __brick(__first1, __n, __first2, typename _Tag::__is_vector{}); +} + //------------------------------------------------------------------------ // walk3 (pseudo) // diff --git a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h index d47a621ccb5..d53d2518c3b 100644 --- a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h +++ b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h @@ -387,6 +387,18 @@ __pattern_walk2_brick_n(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ /*vector=*/::std::true_type{}, /*parallel*/ ::std::true_type{}); } +template +_ForwardIterator2 +__pattern_walk2_brick_n(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _ForwardIterator1 __first1, + _Size __n, _ForwardIterator2 __first2, _Brick __brick) +{ + return __pattern_walk2( + __tag, + __par_backend_hetero::make_wrapped_policy<__walk2_brick_n_wrapper>(::std::forward<_ExecutionPolicy>(__exec)), + __first1, __first1 + __n, __first2, __brick); +} + //------------------------------------------------------------------------ // transform_if //------------------------------------------------------------------------ From 7261791f695bb2a71fc7258566c44bbf42476545 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Fri, 2 Feb 2024 17:54:16 +0100 Subject: [PATCH 060/566] __pattern_walk2_brick_n + tag calls --- include/oneapi/dpl/pstl/glue_algorithm_impl.h | 6 ++++-- include/oneapi/dpl/pstl/glue_memory_impl.h | 14 ++++---------- 2 files changed, 8 insertions(+), 12 deletions(-) diff --git a/include/oneapi/dpl/pstl/glue_algorithm_impl.h b/include/oneapi/dpl/pstl/glue_algorithm_impl.h index 023264dd732..afd5e9483e7 100644 --- a/include/oneapi/dpl/pstl/glue_algorithm_impl.h +++ b/include/oneapi/dpl/pstl/glue_algorithm_impl.h @@ -279,10 +279,12 @@ copy_n(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _Size __n, _Forward { using _DecayedExecutionPolicy = ::std::decay_t<_ExecutionPolicy>; + constexpr auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>(); + return oneapi::dpl::__internal::__pattern_walk2_brick_n( + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __n, __result, - oneapi::dpl::__internal::__brick_copy_n<_DecayedExecutionPolicy>{}, - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>()); + oneapi::dpl::__internal::__brick_copy_n<_DecayedExecutionPolicy>{}); } template diff --git a/include/oneapi/dpl/pstl/glue_memory_impl.h b/include/oneapi/dpl/pstl/glue_memory_impl.h index d96cc8a62bc..cc46b189b0f 100644 --- a/include/oneapi/dpl/pstl/glue_memory_impl.h +++ b/include/oneapi/dpl/pstl/glue_memory_impl.h @@ -71,14 +71,11 @@ uninitialized_copy_n(_ExecutionPolicy&& __exec, _InputIterator __first, _Size __ constexpr auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _InputIterator, _ForwardIterator>(); - constexpr auto __is_parallel = - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _InputIterator, _ForwardIterator>(); - if constexpr (::std::is_trivial_v<_ValueType1> && ::std::is_trivial_v<_ValueType2>) { return oneapi::dpl::__internal::__pattern_walk2_brick_n( - ::std::forward<_ExecutionPolicy>(__exec), __first, __n, __result, - oneapi::dpl::__internal::__brick_copy_n<_DecayedExecutionPolicy>{}, __is_parallel); + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __n, __result, + oneapi::dpl::__internal::__brick_copy_n<_DecayedExecutionPolicy>{}); } else { @@ -124,14 +121,11 @@ uninitialized_move_n(_ExecutionPolicy&& __exec, _InputIterator __first, _Size __ constexpr auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _InputIterator, _ForwardIterator>(); - constexpr auto __is_parallel = - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _InputIterator, _ForwardIterator>(); - if constexpr (::std::is_trivial_v<_ValueType1> && ::std::is_trivial_v<_ValueType2>) { return oneapi::dpl::__internal::__pattern_walk2_brick_n( - ::std::forward<_ExecutionPolicy>(__exec), __first, __n, __result, - oneapi::dpl::__internal::__brick_copy_n<_DecayedExecutionPolicy>{}, __is_parallel); + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __n, __result, + oneapi::dpl::__internal::__brick_copy_n<_DecayedExecutionPolicy>{}); } else { From 53d7c309225c76e6c15e38ca8faf627f7de57fdf Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Mon, 5 Feb 2024 11:06:55 +0100 Subject: [PATCH 061/566] __pattern_walk2_transform_if + tag impls --- include/oneapi/dpl/pstl/algorithm_fwd.h | 6 ++++++ include/oneapi/dpl/pstl/algorithm_impl.h | 11 +++++++++++ .../dpl/pstl/hetero/algorithm_impl_hetero.h | 17 +++++++++++++++++ 3 files changed, 34 insertions(+) diff --git a/include/oneapi/dpl/pstl/algorithm_fwd.h b/include/oneapi/dpl/pstl/algorithm_fwd.h index e03bc5f95bd..2f8d2d953b8 100644 --- a/include/oneapi/dpl/pstl/algorithm_fwd.h +++ b/include/oneapi/dpl/pstl/algorithm_fwd.h @@ -373,6 +373,12 @@ __pattern_walk2_transform_if(_ExecutionPolicy&& __exec, _ForwardIterator1 __firs _ForwardIterator2 __first2, _Function __func, _IsVector __is_vector, _IsParallel __is_parallel) noexcept; +template +_ForwardIterator2 +__pattern_walk2_transform_if(_Tag, _ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, + _Function) noexcept; + template oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _ForwardIterator3> diff --git a/include/oneapi/dpl/pstl/algorithm_impl.h b/include/oneapi/dpl/pstl/algorithm_impl.h index 9d1deab8f41..64d814a7049 100644 --- a/include/oneapi/dpl/pstl/algorithm_impl.h +++ b/include/oneapi/dpl/pstl/algorithm_impl.h @@ -904,6 +904,17 @@ __pattern_walk2_transform_if(_ExecutionPolicy&& __exec, _ForwardIterator1 __firs __is_parallel); } +template +_ForwardIterator2 +__pattern_walk2_transform_if(_Tag __tag, _ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, + _ForwardIterator2 __first2, _Function __func) noexcept +{ + static_assert(__is_backend_tag_v<_Tag>); + + return __pattern_walk2(__tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __func); +} + template oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _ForwardIterator3> diff --git a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h index d53d2518c3b..39b2f4c032a 100644 --- a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h +++ b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h @@ -429,6 +429,23 @@ __pattern_walk2_transform_if(_ExecutionPolicy&& __exec, _ForwardIterator1 __firs __first1, __last1, __first2, __func); } +template +_ForwardIterator2 +__pattern_walk2_transform_if(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _ForwardIterator1 __first1, + _ForwardIterator1 __last1, _ForwardIterator2 __first2, _Function __func) +{ + // Require `read_write` access mode for output sequence to force a copy in for host iterators to capture incoming + // values of the output sequence for elements where the predicate is false. + return __pattern_walk2<_BackendTag, /*_IsSync=*/::std::true_type, + __par_backend_hetero::access_mode::read, + __par_backend_hetero::access_mode::read_write>( + __tag, + __par_backend_hetero::make_wrapped_policy<__walk2_transform_if_wrapper>( + ::std::forward<_ExecutionPolicy>(__exec)), + __first1, __last1, __first2, __func); +} + template struct __walk3_transform_if_wrapper { From e3bf163021fda673d818a000b2b9a2edd03d241a Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Mon, 5 Feb 2024 11:07:10 +0100 Subject: [PATCH 062/566] __pattern_walk2_transform_if + tag calls --- include/oneapi/dpl/pstl/glue_algorithm_impl.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/include/oneapi/dpl/pstl/glue_algorithm_impl.h b/include/oneapi/dpl/pstl/glue_algorithm_impl.h index afd5e9483e7..c119a978671 100644 --- a/include/oneapi/dpl/pstl/glue_algorithm_impl.h +++ b/include/oneapi/dpl/pstl/glue_algorithm_impl.h @@ -355,12 +355,13 @@ oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _Forward transform_if(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 __last, _ForwardIterator2 __result, _UnaryOperation __op, _UnaryPredicate __pred) { + constexpr auto __dispatch_tag = + oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>(); + return oneapi::dpl::__internal::__pattern_walk2_transform_if( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __result, + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __result, oneapi::dpl::__internal::__transform_if_unary_functor<_UnaryOperation, _UnaryPredicate>(::std::move(__op), - ::std::move(__pred)), - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>(), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>()); + ::std::move(__pred))); } template Date: Mon, 5 Feb 2024 11:35:33 +0100 Subject: [PATCH 063/566] __pattern_fill + tag impls --- include/oneapi/dpl/pstl/algorithm_fwd.h | 8 ++++++ include/oneapi/dpl/pstl/algorithm_impl.h | 26 +++++++++++++++++++ .../dpl/pstl/hetero/algorithm_impl_hetero.h | 12 +++++++++ 3 files changed, 46 insertions(+) diff --git a/include/oneapi/dpl/pstl/algorithm_fwd.h b/include/oneapi/dpl/pstl/algorithm_fwd.h index 2f8d2d953b8..02ae8f1bc92 100644 --- a/include/oneapi/dpl/pstl/algorithm_fwd.h +++ b/include/oneapi/dpl/pstl/algorithm_fwd.h @@ -1050,11 +1050,19 @@ oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy> __pattern_fill(_ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, const _Tp&, /*is_parallel=*/::std::false_type, _IsVector) noexcept; +template +void +__pattern_fill(_Tag, _ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, const _Tp&) noexcept; + template oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator> __pattern_fill(_ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, const _Tp&, /*is_parallel=*/::std::true_type, _IsVector); +template +_RandomAccessIterator +__pattern_fill(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, const _Tp&); + template struct __brick_fill_n; diff --git a/include/oneapi/dpl/pstl/algorithm_impl.h b/include/oneapi/dpl/pstl/algorithm_impl.h index 64d814a7049..edc2c5af00f 100644 --- a/include/oneapi/dpl/pstl/algorithm_impl.h +++ b/include/oneapi/dpl/pstl/algorithm_impl.h @@ -3087,6 +3087,15 @@ __pattern_fill(_ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __ __internal::__brick_fill<_Tp, _ExecutionPolicy>{__value}(__first, __last, __is_vector); } +template +void +__pattern_fill(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, const _Tp& __value) noexcept +{ + static_assert(__is_backend_tag_v<_Tag>); + + __internal::__brick_fill<_Tp, _ExecutionPolicy>{__value}(__first, __last, typename _Tag::__is_vector{}); +} + template oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator> __pattern_fill(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last, @@ -3103,6 +3112,23 @@ __pattern_fill(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _Random }); } +template +_RandomAccessIterator +__pattern_fill(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, + _RandomAccessIterator __last, const _Tp& __value) +{ + using __backend_tag = typename __parallel_tag<_IsVector>::__backend_tag; + + return __internal::__except_handler([&__exec, __first, __last, &__value]() { + __par_backend::__parallel_for(__backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, + [&__value](_RandomAccessIterator __begin, _RandomAccessIterator __end) { + __internal::__brick_fill<_Tp, _ExecutionPolicy>{__value}(__begin, __end, + _IsVector{}); + }); + return __last; + }); +} + template struct __brick_fill_n<_Tp, _ExecutionPolicy, oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy>> diff --git a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h index 39b2f4c032a..665d2e472e3 100644 --- a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h +++ b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h @@ -499,6 +499,18 @@ __pattern_fill(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIter return __last; } +template +_ForwardIterator +__pattern_fill(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _ForwardIterator __first, + _ForwardIterator __last, const _T& __value) +{ + __pattern_walk1(__tag, ::std::forward<_ExecutionPolicy>(__exec), + __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::write>(__first), + __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::write>(__last), + fill_functor<_T>{__value}); + return __last; +} + //------------------------------------------------------------------------ // generate //------------------------------------------------------------------------ From d32a2077c3a9c6ad4e898088bce4625cc3a4dac9 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Mon, 5 Feb 2024 11:35:54 +0100 Subject: [PATCH 064/566] __pattern_fill + tag calls --- include/oneapi/dpl/pstl/algorithm_impl.h | 8 +++++--- include/oneapi/dpl/pstl/glue_algorithm_impl.h | 8 ++++---- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/include/oneapi/dpl/pstl/algorithm_impl.h b/include/oneapi/dpl/pstl/algorithm_impl.h index edc2c5af00f..1b0d5b6d0d3 100644 --- a/include/oneapi/dpl/pstl/algorithm_impl.h +++ b/include/oneapi/dpl/pstl/algorithm_impl.h @@ -3163,10 +3163,12 @@ __pattern_fill_n(_ExecutionPolicy&&, _OutputIterator __first, _Size __count, con template _RandomAccessIterator __pattern_fill_n(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _Size __count, const _Tp& __value, - /*is_parallel=*/::std::true_type, _IsVector __is_vector) + /*is_parallel=*/::std::true_type, _IsVector /*__is_vector*/) { - return __internal::__pattern_fill(::std::forward<_ExecutionPolicy>(__exec), __first, __first + __count, __value, - ::std::true_type(), __is_vector); + constexpr auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _RandomAccessIterator>(); + + return __internal::__pattern_fill(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, + __first + __count, __value); } //------------------------------------------------------------------------ diff --git a/include/oneapi/dpl/pstl/glue_algorithm_impl.h b/include/oneapi/dpl/pstl/glue_algorithm_impl.h index c119a978671..c7ad0f1db2a 100644 --- a/include/oneapi/dpl/pstl/glue_algorithm_impl.h +++ b/include/oneapi/dpl/pstl/glue_algorithm_impl.h @@ -438,10 +438,10 @@ template oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy> fill(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, const _Tp& __value) { - oneapi::dpl::__internal::__pattern_fill( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __value, - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(), - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>()); + constexpr auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _ForwardIterator>(); + + oneapi::dpl::__internal::__pattern_fill(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, + __value); } template From b0afc8f17347ceb57e95628c84b23780428c12be Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Mon, 5 Feb 2024 11:49:40 +0100 Subject: [PATCH 065/566] __pattern_fill_n + tag impls --- include/oneapi/dpl/pstl/algorithm_fwd.h | 8 ++++++++ include/oneapi/dpl/pstl/algorithm_impl.h | 18 ++++++++++++++++++ 2 files changed, 26 insertions(+) diff --git a/include/oneapi/dpl/pstl/algorithm_fwd.h b/include/oneapi/dpl/pstl/algorithm_fwd.h index 02ae8f1bc92..070ef60d2dc 100644 --- a/include/oneapi/dpl/pstl/algorithm_fwd.h +++ b/include/oneapi/dpl/pstl/algorithm_fwd.h @@ -1071,11 +1071,19 @@ _OutputIterator __pattern_fill_n(_ExecutionPolicy&&, _OutputIterator, _Size, const _Tp&, /*is_parallel=*/::std::false_type, _IsVector) noexcept; +template +_OutputIterator +__pattern_fill_n(_Tag, _ExecutionPolicy&&, _OutputIterator, _Size, const _Tp&) noexcept; + template _RandomAccessIterator __pattern_fill_n(_ExecutionPolicy&&, _RandomAccessIterator, _Size, const _Tp&, /*is_parallel=*/::std::true_type, _IsVector); +template +_RandomAccessIterator +__pattern_fill_n(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _Size, const _Tp&); + //------------------------------------------------------------------------ // generate, generate_n //------------------------------------------------------------------------ diff --git a/include/oneapi/dpl/pstl/algorithm_impl.h b/include/oneapi/dpl/pstl/algorithm_impl.h index 1b0d5b6d0d3..3c6d72b5a1b 100644 --- a/include/oneapi/dpl/pstl/algorithm_impl.h +++ b/include/oneapi/dpl/pstl/algorithm_impl.h @@ -3160,6 +3160,15 @@ __pattern_fill_n(_ExecutionPolicy&&, _OutputIterator __first, _Size __count, con return __internal::__brick_fill_n<_Tp, _ExecutionPolicy>{__value}(__first, __count, __is_vector); } +template +_OutputIterator +__pattern_fill_n(_Tag, _ExecutionPolicy&&, _OutputIterator __first, _Size __count, const _Tp& __value) noexcept +{ + static_assert(__is_backend_tag_v<_Tag>); + + return __internal::__brick_fill_n<_Tp, _ExecutionPolicy>{__value}(__first, __count, typename _Tag::__is_vector{}); +} + template _RandomAccessIterator __pattern_fill_n(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _Size __count, const _Tp& __value, @@ -3171,6 +3180,15 @@ __pattern_fill_n(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _Size __first + __count, __value); } +template +_RandomAccessIterator +__pattern_fill_n(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, + _Size __count, const _Tp& __value) +{ + return __internal::__pattern_fill(__tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __first + __count, + __value); +} + //------------------------------------------------------------------------ // generate, generate_n //------------------------------------------------------------------------ From d87c2f2b47f4258da9c0d247752a7b226f306c27 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Mon, 5 Feb 2024 11:51:23 +0100 Subject: [PATCH 066/566] __pattern_fill_n + tag calls --- include/oneapi/dpl/pstl/glue_algorithm_impl.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/include/oneapi/dpl/pstl/glue_algorithm_impl.h b/include/oneapi/dpl/pstl/glue_algorithm_impl.h index c7ad0f1db2a..128d0c1b35b 100644 --- a/include/oneapi/dpl/pstl/glue_algorithm_impl.h +++ b/include/oneapi/dpl/pstl/glue_algorithm_impl.h @@ -451,10 +451,10 @@ fill_n(_ExecutionPolicy&& __exec, _ForwardIterator __first, _Size __count, const if (__count <= 0) return __first; - return oneapi::dpl::__internal::__pattern_fill_n( - ::std::forward<_ExecutionPolicy>(__exec), __first, __count, __value, - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(), - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>()); + constexpr auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _ForwardIterator>(); + + return oneapi::dpl::__internal::__pattern_fill_n(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, + __count, __value); } // [alg.generate] From 26b6dcd4a4e2914a0cfc03bd96765941c7365112 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Mon, 5 Feb 2024 12:01:29 +0100 Subject: [PATCH 067/566] __pattern_copy_if + tag impls --- include/oneapi/dpl/pstl/algorithm_fwd.h | 11 +++++ include/oneapi/dpl/pstl/algorithm_impl.h | 45 +++++++++++++++++++ .../dpl/pstl/hetero/algorithm_impl_hetero.h | 25 +++++++++++ 3 files changed, 81 insertions(+) diff --git a/include/oneapi/dpl/pstl/algorithm_fwd.h b/include/oneapi/dpl/pstl/algorithm_fwd.h index 070ef60d2dc..e81b38d31e1 100644 --- a/include/oneapi/dpl/pstl/algorithm_fwd.h +++ b/include/oneapi/dpl/pstl/algorithm_fwd.h @@ -632,12 +632,23 @@ oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _Ou __pattern_copy_if(_ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _OutputIterator, _UnaryPredicate, _IsVector, /*parallel=*/::std::false_type) noexcept; +template +_OutputIterator +__pattern_copy_if(_Tag, _ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _OutputIterator, + _UnaryPredicate) noexcept; + template oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _OutputIterator> __pattern_copy_if(_ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _OutputIterator, _UnaryPredicate, _IsVector, /*parallel=*/::std::true_type); +template +_RandomAccessIterator2 +__pattern_copy_if(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator1, _RandomAccessIterator1, + _RandomAccessIterator2, _UnaryPredicate); + //------------------------------------------------------------------------ // count //------------------------------------------------------------------------ diff --git a/include/oneapi/dpl/pstl/algorithm_impl.h b/include/oneapi/dpl/pstl/algorithm_impl.h index 3c6d72b5a1b..3870be47fdd 100644 --- a/include/oneapi/dpl/pstl/algorithm_impl.h +++ b/include/oneapi/dpl/pstl/algorithm_impl.h @@ -1667,6 +1667,16 @@ __pattern_copy_if(_ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator return __internal::__brick_copy_if(__first, __last, __result, __pred, __is_vector); } +template +_OutputIterator +__pattern_copy_if(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, _OutputIterator __result, + _UnaryPredicate __pred) noexcept +{ + static_assert(__is_backend_tag_v<_Tag>); + + return __internal::__brick_copy_if(__first, __last, __result, __pred, typename _Tag::__is_vector{}); +} + template oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator2> @@ -1703,6 +1713,41 @@ __pattern_copy_if(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first, _Ra return __internal::__brick_copy_if(__first, __last, __result, __pred, __is_vector); } +template +_RandomAccessIterator2 +__pattern_copy_if(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first, + _RandomAccessIterator1 __last, _RandomAccessIterator2 __result, _UnaryPredicate __pred) +{ + typedef typename ::std::iterator_traits<_RandomAccessIterator1>::difference_type _DifferenceType; + const _DifferenceType __n = __last - __first; + if (_DifferenceType(1) < __n) + { + __par_backend::__buffer<_ExecutionPolicy, bool> __mask_buf(__n); + return __internal::__except_handler([&__exec, __n, __first, __result, __pred, &__mask_buf]() { + bool* __mask = __mask_buf.get(); + _DifferenceType __m{}; + __par_backend::__parallel_strict_scan( + ::std::forward<_ExecutionPolicy>(__exec), __n, _DifferenceType(0), + [=](_DifferenceType __i, _DifferenceType __len) { // Reduce + return __internal::__brick_calc_mask_1<_DifferenceType>(__first + __i, __first + (__i + __len), + __mask + __i, __pred, _IsVector{}) + .first; + }, + ::std::plus<_DifferenceType>(), // Combine + [=](_DifferenceType __i, _DifferenceType __len, _DifferenceType __initial) { // Scan + __internal::__brick_copy_by_mask( + __first + __i, __first + (__i + __len), __result + __initial, __mask + __i, + [](_RandomAccessIterator1 __x, _RandomAccessIterator2 __z) { *__z = *__x; }, _IsVector{}); + }, + [&__m](_DifferenceType __total) { __m = __total; }); + return __result + __m; + }); + } + // trivial sequence - use serial algorithm + return __internal::__brick_copy_if(__first, __last, __result, __pred, _IsVector{}); +} + //------------------------------------------------------------------------ // count //------------------------------------------------------------------------ diff --git a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h index 665d2e472e3..d1cfa28d933 100644 --- a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h +++ b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h @@ -1138,6 +1138,31 @@ __pattern_copy_if(_ExecutionPolicy&& __exec, _Iterator1 __first, _Iterator1 __la return __result_first + __num_copied; } +template +_Iterator2 +__pattern_copy_if(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Iterator1 __first, _Iterator1 __last, + _Iterator2 __result_first, _Predicate __pred) +{ + using _It1DifferenceType = typename ::std::iterator_traits<_Iterator1>::difference_type; + + if (__first == __last) + return __result_first; + + _It1DifferenceType __n = __last - __first; + + auto __keep1 = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read, _Iterator1>(); + auto __buf1 = __keep1(__first, __last); + auto __keep2 = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::write, _Iterator2>(); + auto __buf2 = __keep2(__result_first, __result_first + __n); + + auto __res = __par_backend_hetero::__parallel_copy_if(::std::forward<_ExecutionPolicy>(__exec), __buf1.all_view(), + __buf2.all_view(), __n, __pred); + + ::std::size_t __num_copied = __res.get(); + return __result_first + __num_copied; +} + //------------------------------------------------------------------------ // partition_copy //------------------------------------------------------------------------ From 46ba9c41ff49e937186c00aa7a273bc026fc2b8d Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Mon, 5 Feb 2024 12:01:42 +0100 Subject: [PATCH 068/566] __pattern_copy_if + tag calls --- include/oneapi/dpl/pstl/glue_algorithm_impl.h | 9 +++++---- include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h | 6 ++++-- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/include/oneapi/dpl/pstl/glue_algorithm_impl.h b/include/oneapi/dpl/pstl/glue_algorithm_impl.h index 128d0c1b35b..c1c507d029a 100644 --- a/include/oneapi/dpl/pstl/glue_algorithm_impl.h +++ b/include/oneapi/dpl/pstl/glue_algorithm_impl.h @@ -292,10 +292,11 @@ oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _Forward copy_if(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 __last, _ForwardIterator2 __result, _Predicate __pred) { - return oneapi::dpl::__internal::__pattern_copy_if( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __result, __pred, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>(), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>()); + constexpr auto __dispatch_tag = + oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>(); + + return oneapi::dpl::__internal::__pattern_copy_if(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, + __last, __result, __pred); } // [alg.swap] diff --git a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h index d1cfa28d933..6c1202a5c11 100644 --- a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h +++ b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h @@ -1234,10 +1234,12 @@ __pattern_remove_if(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __la using _ValueType = typename ::std::iterator_traits<_Iterator>::value_type; + constexpr auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _Iterator>(); + oneapi::dpl::__par_backend_hetero::__buffer<_ExecutionPolicy, _ValueType> __buf(__exec, __last - __first); auto __copy_first = __buf.get(); - auto __copy_last = __pattern_copy_if(__exec, __first, __last, __copy_first, __not_pred<_Predicate>{__pred}, - /*vector=*/::std::true_type{}, /*parallel*/ ::std::true_type{}); + auto __copy_last = + __pattern_copy_if(__dispatch_tag, __exec, __first, __last, __copy_first, __not_pred<_Predicate>{__pred}); //TODO: optimize copy back depending on Iterator, i.e. set_final_data for host iterator/pointer return __pattern_walk2( From cc9148d38965f7b79faa99dd198c394e01821009 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Mon, 5 Feb 2024 16:11:08 +0100 Subject: [PATCH 069/566] include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h - fix compiler error: implement previously absent impl of __pattern_fill_n for hetero policy --- .../oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h index 6c1202a5c11..2a1c0a0ca4e 100644 --- a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h +++ b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h @@ -511,6 +511,20 @@ __pattern_fill(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Forw return __last; } +template +_ForwardIterator +__pattern_fill_n(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _ForwardIterator __first, _Size __count, + const _T& __value) +{ + // TODO: is this new implementation are correct? + // Previously we hadn't hetero impl for __pattern_fill_n + + return __pattern_walk1_n(__tag, ::std::forward<_ExecutionPolicy>(__exec), + __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::write>(__first), + __count, fill_functor<_T>{__value}); +} + + //------------------------------------------------------------------------ // generate //------------------------------------------------------------------------ From 4553fff012e9391404f0469b3e901fa6a4995269 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Mon, 5 Feb 2024 16:33:55 +0100 Subject: [PATCH 070/566] __pattern_adjacent_find + tag impls --- include/oneapi/dpl/pstl/algorithm_fwd.h | 10 ++++ include/oneapi/dpl/pstl/algorithm_impl.h | 54 ++++++++++++++++++ .../dpl/pstl/hetero/algorithm_impl_hetero.h | 57 +++++++++++++++++++ 3 files changed, 121 insertions(+) diff --git a/include/oneapi/dpl/pstl/algorithm_fwd.h b/include/oneapi/dpl/pstl/algorithm_fwd.h index e81b38d31e1..b7202b5cf28 100644 --- a/include/oneapi/dpl/pstl/algorithm_fwd.h +++ b/include/oneapi/dpl/pstl/algorithm_fwd.h @@ -1030,11 +1030,21 @@ oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _Fo __pattern_adjacent_find(_ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _BinaryPredicate, /* is_parallel */ ::std::false_type, _IsVector, _Semantic) noexcept; +template +_ForwardIterator +__pattern_adjacent_find(_Tag, _ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _BinaryPredicate, + _Semantic) noexcept; + template oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator> __pattern_adjacent_find(_ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _BinaryPredicate, /* is_parallel */ ::std::true_type, _IsVector, _Semantic); +template +_RandomAccessIterator +__pattern_adjacent_find(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, + _BinaryPredicate, _Semantic); + //------------------------------------------------------------------------ // nth_element //------------------------------------------------------------------------ diff --git a/include/oneapi/dpl/pstl/algorithm_impl.h b/include/oneapi/dpl/pstl/algorithm_impl.h index 3870be47fdd..e6b68c55df4 100644 --- a/include/oneapi/dpl/pstl/algorithm_impl.h +++ b/include/oneapi/dpl/pstl/algorithm_impl.h @@ -2997,6 +2997,16 @@ __pattern_adjacent_find(_ExecutionPolicy&&, _ForwardIterator __first, _ForwardIt return __internal::__brick_adjacent_find(__first, __last, __pred, __is_vector, _Semantic::value); } +template +_ForwardIterator +__pattern_adjacent_find(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, + _BinaryPredicate __pred, _Semantic) noexcept +{ + static_assert(__is_backend_tag_v<_Tag>); + + return __internal::__brick_adjacent_find(__first, __last, __pred, typename _Tag::__is_vector{}, _Semantic::value); +} + template oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator> __pattern_adjacent_find(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last, @@ -3042,6 +3052,50 @@ __pattern_adjacent_find(_ExecutionPolicy&& __exec, _RandomAccessIterator __first }); } +template +_RandomAccessIterator +__pattern_adjacent_find(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, + _RandomAccessIterator __last, _BinaryPredicate __pred, _Semantic __or_semantic) +{ + if (__last - __first < 2) + return __last; + + return __internal::__except_handler([&]() { + return __par_backend::__parallel_reduce( + ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __last, + [__last, __pred, __or_semantic](_RandomAccessIterator __begin, _RandomAccessIterator __end, + _RandomAccessIterator __value) -> _RandomAccessIterator { + // TODO: investigate performance benefits from the use of shared variable for the result, + // checking (compare_and_swap idiom) its __value at __first. + if (__or_semantic && __value < __last) + { //found + return __value; + } + + if (__value > __begin) + { + // modify __end to check the predicate on the boundary __values; + // TODO: to use a custom range with boundaries overlapping + // TODO: investigate what if we remove "if" below and run algorithm on range [__first, __last-1) + // then check the pair [__last-1, __last) + if (__end != __last) + ++__end; + + //correct the global result iterator if the "brick" returns a local "__last" + const _RandomAccessIterator __res = + __internal::__brick_adjacent_find(__begin, __end, __pred, _IsVector{}, __or_semantic); + if (__res < __end) + __value = __res; + } + return __value; + }, + [](_RandomAccessIterator __x, _RandomAccessIterator __y) -> _RandomAccessIterator { + return __x < __y ? __x : __y; + } //reduce a __value + ); + }); +} + //------------------------------------------------------------------------ // nth_element //------------------------------------------------------------------------ diff --git a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h index 2a1c0a0ca4e..8fc80e800e7 100644 --- a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h +++ b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h @@ -782,6 +782,34 @@ __pattern_adjacent_find(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator return result ? __first : __last; } +template +_Iterator +__pattern_adjacent_find(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, + _BinaryPredicate __predicate, oneapi::dpl::__internal::__or_semantic) +{ + if (__last - __first < 2) + return __last; + + using _Predicate = + oneapi::dpl::unseq_backend::single_match_pred<_ExecutionPolicy, adjacent_find_fn<_BinaryPredicate>>; + + auto __keep1 = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read, _Iterator>(); + auto __buf1 = __keep1(__first, __last - 1); + auto __keep2 = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read, _Iterator>(); + auto __buf2 = __keep2(__first + 1, __last); + + // TODO: in case of confilicting names + // __par_backend_hetero::make_wrapped_policy<__par_backend_hetero::__or_policy_wrapper>() + bool result = __par_backend_hetero::__parallel_find_or( + ::std::forward<_ExecutionPolicy>(__exec), _Predicate{adjacent_find_fn<_BinaryPredicate>{__predicate}}, + __par_backend_hetero::__parallel_or_tag{}, + oneapi::dpl::__ranges::make_zip_view(__buf1.all_view(), __buf2.all_view())); + + // inverted conditional because of + // reorder_predicate in glue_algorithm_impl.h + return result ? __first : __last; +} + template oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, _Iterator> __pattern_adjacent_find(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, _BinaryPredicate __predicate, @@ -811,6 +839,35 @@ __pattern_adjacent_find(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator return (__result_iterator == __last - 1) ? __last : __result_iterator; } +template +_Iterator +__pattern_adjacent_find(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, + _BinaryPredicate __predicate, + oneapi::dpl::__internal::__first_semantic) +{ + if (__last - __first < 2) + return __last; + + using _Predicate = + oneapi::dpl::unseq_backend::single_match_pred<_ExecutionPolicy, adjacent_find_fn<_BinaryPredicate>>; + + auto __result = __par_backend_hetero::__parallel_find( + ::std::forward<_ExecutionPolicy>(__exec), + __par_backend_hetero::zip( + __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__first), + __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__first + 1)), + __par_backend_hetero::zip( + __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__last - 1), + __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__last)), + _Predicate{adjacent_find_fn<_BinaryPredicate>{__predicate}}, ::std::true_type{}); + + auto __zip_at_first = __par_backend_hetero::zip( + __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__first), + __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__first + 1)); + _Iterator __result_iterator = __first + (__result - __zip_at_first); + return (__result_iterator == __last - 1) ? __last : __result_iterator; +} + //------------------------------------------------------------------------ // count, count_if //------------------------------------------------------------------------ From 749d90176f4743bcdaf510222acb51d6c77aefd9 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Mon, 5 Feb 2024 16:35:07 +0100 Subject: [PATCH 071/566] __pattern_adjacent_find + tag calls --- include/oneapi/dpl/pstl/glue_algorithm_impl.h | 37 ++++++++++--------- 1 file changed, 19 insertions(+), 18 deletions(-) diff --git a/include/oneapi/dpl/pstl/glue_algorithm_impl.h b/include/oneapi/dpl/pstl/glue_algorithm_impl.h index c1c507d029a..070c30b36b0 100644 --- a/include/oneapi/dpl/pstl/glue_algorithm_impl.h +++ b/include/oneapi/dpl/pstl/glue_algorithm_impl.h @@ -171,21 +171,23 @@ oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _Forward adjacent_find(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last) { typedef typename ::std::iterator_traits<_ForwardIterator>::value_type _ValueType; - return oneapi::dpl::__internal::__pattern_adjacent_find( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, ::std::equal_to<_ValueType>(), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(), - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(), - oneapi::dpl::__internal::__first_semantic()); + + constexpr auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _ForwardIterator>(); + + return oneapi::dpl::__internal::__pattern_adjacent_find(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), + __first, __last, ::std::equal_to<_ValueType>(), + oneapi::dpl::__internal::__first_semantic()); } template oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator> adjacent_find(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _BinaryPredicate __pred) { + constexpr auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _ForwardIterator>(); + return oneapi::dpl::__internal::__pattern_adjacent_find( + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __pred, - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(), - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(), oneapi::dpl::__internal::__first_semantic()); } @@ -871,12 +873,11 @@ template oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator> is_sorted_until(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _Compare __comp) { + constexpr auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _ForwardIterator>(); + const _ForwardIterator __res = oneapi::dpl::__internal::__pattern_adjacent_find( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, - oneapi::dpl::__internal::__reorder_pred<_Compare>(__comp), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(), - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(), - oneapi::dpl::__internal::__first_semantic()); + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, + oneapi::dpl::__internal::__reorder_pred<_Compare>(__comp), oneapi::dpl::__internal::__first_semantic()); return __res == __last ? __last : oneapi::dpl::__internal::__pstl_next(__res); } @@ -892,12 +893,12 @@ template oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, bool> is_sorted(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _Compare __comp) { - return oneapi::dpl::__internal::__pattern_adjacent_find( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, - oneapi::dpl::__internal::__reorder_pred<_Compare>(__comp), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(), - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(), - oneapi::dpl::__internal::__or_semantic()) == __last; + constexpr auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _ForwardIterator>(); + + return oneapi::dpl::__internal::__pattern_adjacent_find(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), + __first, __last, + oneapi::dpl::__internal::__reorder_pred<_Compare>(__comp), + oneapi::dpl::__internal::__or_semantic()) == __last; } template From 27e59d50ece9a69bf527a6c264d86f9024d3a5c4 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Mon, 5 Feb 2024 16:48:32 +0100 Subject: [PATCH 072/566] __pattern_any_of + tag impls --- include/oneapi/dpl/pstl/algorithm_fwd.h | 8 +++++++ include/oneapi/dpl/pstl/algorithm_impl.h | 22 +++++++++++++++++++ .../dpl/pstl/hetero/algorithm_impl_hetero.h | 19 ++++++++++++++++ 3 files changed, 49 insertions(+) diff --git a/include/oneapi/dpl/pstl/algorithm_fwd.h b/include/oneapi/dpl/pstl/algorithm_fwd.h index b7202b5cf28..f39eee7ef32 100644 --- a/include/oneapi/dpl/pstl/algorithm_fwd.h +++ b/include/oneapi/dpl/pstl/algorithm_fwd.h @@ -50,11 +50,19 @@ oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, boo __pattern_any_of(_ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _Pred, _IsVector, /*parallel=*/::std::false_type) noexcept; +template +bool +__pattern_any_of(_Tag, _ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _Pred) noexcept; + template oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, bool> __pattern_any_of(_ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _Pred, _IsVector, /*parallel=*/::std::true_type); +template +bool +__pattern_any_of(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _Pred); + //------------------------------------------------------------------------ // walk1 (pseudo) // diff --git a/include/oneapi/dpl/pstl/algorithm_impl.h b/include/oneapi/dpl/pstl/algorithm_impl.h index e6b68c55df4..61d0995a4bb 100644 --- a/include/oneapi/dpl/pstl/algorithm_impl.h +++ b/include/oneapi/dpl/pstl/algorithm_impl.h @@ -71,6 +71,15 @@ __pattern_any_of(_ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator return __internal::__brick_any_of(__first, __last, __pred, __is_vector); } +template +bool +__pattern_any_of(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, _Pred __pred) noexcept +{ + static_assert(__is_backend_tag_v<_Tag>); + + return __internal::__brick_any_of(__first, __last, __pred, typename _Tag::__is_vector{}); +} + template oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, bool> __pattern_any_of(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last, _Pred __pred, @@ -84,6 +93,19 @@ __pattern_any_of(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _Rand }); } +template +bool +__pattern_any_of(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, + _RandomAccessIterator __last, _Pred __pred) +{ + return __internal::__except_handler([&]() { + return __internal::__parallel_or(::std::forward<_ExecutionPolicy>(__exec), __first, __last, + [__pred](_RandomAccessIterator __i, _RandomAccessIterator __j) { + return __internal::__brick_any_of(__i, __j, __pred, _IsVector{}); + }); + }); +} + // [alg.foreach] // for_each_n with no policy diff --git a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h index 8fc80e800e7..f1288fdb078 100644 --- a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h +++ b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h @@ -924,6 +924,25 @@ __pattern_any_of(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, _Predicate{__pred}, __par_backend_hetero::__parallel_or_tag{}, __buf.all_view()); } +template +bool +__pattern_any_of(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, + _Pred __pred) +{ + if (__first == __last) + return false; + + using _Predicate = oneapi::dpl::unseq_backend::single_match_pred<_ExecutionPolicy, _Pred>; + + auto __keep = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read, _Iterator>(); + auto __buf = __keep(__first, __last); + + return oneapi::dpl::__par_backend_hetero::__parallel_find_or( + __par_backend_hetero::make_wrapped_policy<__par_backend_hetero::__or_policy_wrapper>( + ::std::forward<_ExecutionPolicy>(__exec)), + _Predicate{__pred}, __par_backend_hetero::__parallel_or_tag{}, __buf.all_view()); +} + //------------------------------------------------------------------------ // equal //------------------------------------------------------------------------ From 3fd9c6577b9f14911222caa4040ba43c6a1dfc48 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Mon, 5 Feb 2024 16:49:24 +0100 Subject: [PATCH 073/566] __pattern_any_of + tag calls --- include/oneapi/dpl/pstl/glue_algorithm_impl.h | 8 ++++---- .../oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h | 14 +++++++++----- 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/include/oneapi/dpl/pstl/glue_algorithm_impl.h b/include/oneapi/dpl/pstl/glue_algorithm_impl.h index 070c30b36b0..6925816b17b 100644 --- a/include/oneapi/dpl/pstl/glue_algorithm_impl.h +++ b/include/oneapi/dpl/pstl/glue_algorithm_impl.h @@ -43,10 +43,10 @@ template oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, bool> any_of(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _Predicate __pred) { - return oneapi::dpl::__internal::__pattern_any_of( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __pred, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>()); + constexpr auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _ForwardIterator>(); + + return oneapi::dpl::__internal::__pattern_any_of(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, + __last, __pred); } // [alg.all_of] diff --git a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h index f1288fdb078..c2353b9ceb1 100644 --- a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h +++ b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h @@ -1132,9 +1132,10 @@ __pattern_search_n(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __las if (__last - __first == __count) { - return (!__internal::__pattern_any_of(::std::forward<_ExecutionPolicy>(__exec), __first, __last, - __search_n_unary_predicate<_Tp, _BinaryPredicate>{__value, __pred}, - ::std::true_type{}, ::std::true_type{})) + constexpr auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _Iterator>(); + + return (!__internal::__pattern_any_of(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, + __search_n_unary_predicate<_Tp, _BinaryPredicate>{__value, __pred})) ? __first : __last; } @@ -1606,10 +1607,13 @@ __pattern_stable_partition(_ExecutionPolicy&& __exec, _Iterator __first, _Iterat if (__last == __first) return __last; else if (__last - __first < 2) - return __pattern_any_of(::std::forward<_ExecutionPolicy>(__exec), __first, __last, __pred, ::std::true_type(), - ::std::true_type()) + { + constexpr auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _Iterator>(); + + return __pattern_any_of(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __pred) ? __last : __first; + } using _ValueType = typename ::std::iterator_traits<_Iterator>::value_type; From 74735909dfde368d860dfd693a077be1e364e6bf Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Mon, 5 Feb 2024 16:56:20 +0100 Subject: [PATCH 074/566] __pattern_count + tag impls --- include/oneapi/dpl/pstl/algorithm_fwd.h | 9 ++++++ include/oneapi/dpl/pstl/algorithm_impl.h | 30 +++++++++++++++++++ .../dpl/pstl/hetero/algorithm_impl_hetero.h | 28 +++++++++++++++++ 3 files changed, 67 insertions(+) diff --git a/include/oneapi/dpl/pstl/algorithm_fwd.h b/include/oneapi/dpl/pstl/algorithm_fwd.h index f39eee7ef32..35882ecc71d 100644 --- a/include/oneapi/dpl/pstl/algorithm_fwd.h +++ b/include/oneapi/dpl/pstl/algorithm_fwd.h @@ -677,12 +677,21 @@ oneapi::dpl::__internal::__enable_if_host_execution_policy< __pattern_count(_ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _Predicate, /* is_parallel */ ::std::false_type, _IsVector) noexcept; +template +typename ::std::iterator_traits<_ForwardIterator>::difference_type +__pattern_count(_Tag, _ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _Predicate) noexcept; + template oneapi::dpl::__internal::__enable_if_host_execution_policy< _ExecutionPolicy, typename ::std::iterator_traits<_RandomAccessIterator>::difference_type> __pattern_count(_ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _Predicate, /* is_parallel */ ::std::true_type, _IsVector); +template +typename ::std::iterator_traits<_RandomAccessIterator>::difference_type +__pattern_count(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, + _Predicate); + //------------------------------------------------------------------------ // unique //------------------------------------------------------------------------ diff --git a/include/oneapi/dpl/pstl/algorithm_impl.h b/include/oneapi/dpl/pstl/algorithm_impl.h index 61d0995a4bb..b2d66091175 100644 --- a/include/oneapi/dpl/pstl/algorithm_impl.h +++ b/include/oneapi/dpl/pstl/algorithm_impl.h @@ -1798,6 +1798,15 @@ __pattern_count(_ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator _ return __internal::__brick_count(__first, __last, __pred, __is_vector); } +template +typename ::std::iterator_traits<_ForwardIterator>::difference_type +__pattern_count(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, _Predicate __pred) noexcept +{ + static_assert(__is_backend_tag_v<_Tag>); + + return __internal::__brick_count(__first, __last, __pred, typename _Tag::__is_vector{}); +} + template oneapi::dpl::__internal::__enable_if_host_execution_policy< _ExecutionPolicy, typename ::std::iterator_traits<_RandomAccessIterator>::difference_type> @@ -1820,6 +1829,27 @@ __pattern_count(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _Rando }); } +template +typename ::std::iterator_traits<_RandomAccessIterator>::difference_type +__pattern_count(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, + _RandomAccessIterator __last, _Predicate __pred) +{ + typedef typename ::std::iterator_traits<_RandomAccessIterator>::difference_type _SizeType; + + //trivial pre-checks + if (__first == __last) + return _SizeType(0); + + return __internal::__except_handler([&]() { + return __par_backend::__parallel_reduce( + ::std::forward<_ExecutionPolicy>(__exec), __first, __last, _SizeType(0), + [__pred](_RandomAccessIterator __begin, _RandomAccessIterator __end, _SizeType __value) -> _SizeType { + return __value + __internal::__brick_count(__begin, __end, __pred, _IsVector{}); + }, + ::std::plus<_SizeType>()); + }); +} + //------------------------------------------------------------------------ // unique //------------------------------------------------------------------------ diff --git a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h index c2353b9ceb1..a414fffbb35 100644 --- a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h +++ b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h @@ -901,6 +901,34 @@ __pattern_count(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, .get(); } +template +typename ::std::iterator_traits<_Iterator>::difference_type +__pattern_count(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, + _Predicate __predicate) +{ + if (__first == __last) + return 0; + + using _ReduceValueType = typename ::std::iterator_traits<_Iterator>::difference_type; + + auto __reduce_fn = ::std::plus<_ReduceValueType>{}; + // int is being implicitly casted to difference_type + // otherwise we can only pass the difference_type as a functor template parameter + auto __transform_fn = [__predicate](auto __gidx, auto __acc) -> int { + return (__predicate(__acc[__gidx]) ? 1 : 0); + }; + + auto __keep = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read, _Iterator>(); + auto __buf = __keep(__first, __last); + + return oneapi::dpl::__par_backend_hetero::__parallel_transform_reduce<_ReduceValueType, + ::std::true_type /*is_commutative*/>( + ::std::forward<_ExecutionPolicy>(__exec), __reduce_fn, __transform_fn, + unseq_backend::__no_init_value{}, // no initial value + __buf.all_view()) + .get(); +} + //------------------------------------------------------------------------ // any_of //------------------------------------------------------------------------ From 9807a462ca805acf16e0859d03d3e0b47f8ad1b8 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Mon, 5 Feb 2024 16:56:42 +0100 Subject: [PATCH 075/566] __pattern_count + tag calls --- include/oneapi/dpl/pstl/glue_algorithm_impl.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/include/oneapi/dpl/pstl/glue_algorithm_impl.h b/include/oneapi/dpl/pstl/glue_algorithm_impl.h index 6925816b17b..7a3582ac073 100644 --- a/include/oneapi/dpl/pstl/glue_algorithm_impl.h +++ b/include/oneapi/dpl/pstl/glue_algorithm_impl.h @@ -201,12 +201,12 @@ oneapi::dpl::__internal::__enable_if_execution_policy< _ExecutionPolicy, typename ::std::iterator_traits<_ForwardIterator>::difference_type> count(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, const _Tp& __value) { + constexpr auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _ForwardIterator>(); + return oneapi::dpl::__internal::__pattern_count( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, oneapi::dpl::__internal::__equal_value>( - __value), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(), - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>()); + __value)); } template @@ -214,10 +214,10 @@ oneapi::dpl::__internal::__enable_if_execution_policy< _ExecutionPolicy, typename ::std::iterator_traits<_ForwardIterator>::difference_type> count_if(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _Predicate __pred) { - return oneapi::dpl::__internal::__pattern_count( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __pred, - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(), - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>()); + constexpr auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _ForwardIterator>(); + + return oneapi::dpl::__internal::__pattern_count(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, + __last, __pred); } // [alg.search] From 5ae36d448a48cbf2eb4bc0878c519e4476593550 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Mon, 5 Feb 2024 17:18:05 +0100 Subject: [PATCH 076/566] __pattern_equal + tag impls --- include/oneapi/dpl/pstl/algorithm_fwd.h | 22 +++++++ include/oneapi/dpl/pstl/algorithm_impl.h | 57 +++++++++++++++++++ .../dpl/pstl/hetero/algorithm_impl_hetero.h | 33 +++++++++++ 3 files changed, 112 insertions(+) diff --git a/include/oneapi/dpl/pstl/algorithm_fwd.h b/include/oneapi/dpl/pstl/algorithm_fwd.h index 35882ecc71d..73bb1bc2f79 100644 --- a/include/oneapi/dpl/pstl/algorithm_fwd.h +++ b/include/oneapi/dpl/pstl/algorithm_fwd.h @@ -412,12 +412,23 @@ oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, boo __pattern_equal(_ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, _BinaryPredicate, _IsVector, /* is_parallel = */ ::std::false_type) noexcept; +template +bool +__pattern_equal(_Tag, _ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, _ForwardIterator2, + _BinaryPredicate) noexcept; + template oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, bool> __pattern_equal(_ExecutionPolicy&&, _RandomAccessIterator1, _RandomAccessIterator1, _RandomAccessIterator2, _BinaryPredicate, _IsVector, /* is_parallel = */ ::std::true_type); +template +bool +__pattern_equal(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator1, _RandomAccessIterator1, + _RandomAccessIterator2, _RandomAccessIterator2, _BinaryPredicate); + template bool __brick_equal(_ForwardIterator1, _ForwardIterator1, _ForwardIterator2, _ForwardIterator2, _BinaryPredicate, /* is_vector = */ ::std::false_type) noexcept; @@ -432,12 +443,23 @@ oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, boo __pattern_equal(_ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, _ForwardIterator2, _BinaryPredicate, _IsVector, /* is_parallel = */ ::std::false_type) noexcept; +template +bool +__pattern_equal(_Tag, _ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, + _BinaryPredicate) noexcept; + template oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, bool> __pattern_equal(_ExecutionPolicy&&, _RandomAccessIterator1, _RandomAccessIterator1, _RandomAccessIterator2, _RandomAccessIterator2, _BinaryPredicate, _IsVector, /* is_parallel = */ ::std::true_type); +template +bool +__pattern_equal(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator1, _RandomAccessIterator1, + _RandomAccessIterator2, _BinaryPredicate); + //------------------------------------------------------------------------ // find_if //------------------------------------------------------------------------ diff --git a/include/oneapi/dpl/pstl/algorithm_impl.h b/include/oneapi/dpl/pstl/algorithm_impl.h index b2d66091175..1d9003e41bb 100644 --- a/include/oneapi/dpl/pstl/algorithm_impl.h +++ b/include/oneapi/dpl/pstl/algorithm_impl.h @@ -982,6 +982,17 @@ __pattern_equal(_ExecutionPolicy&&, _ForwardIterator1 __first1, _ForwardIterator return __internal::__brick_equal(__first1, __last1, __first2, __last2, __p, __is_vector); } +template +bool +__pattern_equal(_Tag, _ExecutionPolicy&&, _ForwardIterator1 __first1, _ForwardIterator1 __last1, + _ForwardIterator2 __first2, + _ForwardIterator2 __last2, _BinaryPredicate __p) noexcept +{ + static_assert(__is_backend_tag_v<_Tag>); + + return __internal::__brick_equal(__first1, __last1, __first2, __last2, __p, typename _Tag::__is_vector{}); +} + template oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, bool> @@ -1002,6 +1013,27 @@ __pattern_equal(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, _Ran }); } +template +bool +__pattern_equal(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, + _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, _RandomAccessIterator2 __last2, + _BinaryPredicate __p) +{ + if (__last1 - __first1 != __last2 - __first2) + return false; + + return __internal::__except_handler([&]() { + return !__internal::__parallel_or( + ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, + [__first1, __first2, __p](_RandomAccessIterator1 __i, _RandomAccessIterator1 __j) { + return !__internal::__brick_equal(__i, __j, __first2 + (__i - __first1), __first2 + (__j - __first1), + __p, _IsVector{}); + }); + }); +} + + //------------------------------------------------------------------------ // equal version for sequences with equal length //------------------------------------------------------------------------ @@ -1032,6 +1064,16 @@ __pattern_equal(_ExecutionPolicy&&, _ForwardIterator1 __first1, _ForwardIterator return __internal::__brick_equal(__first1, __last1, __first2, __p, __is_vector); } +template +bool +__pattern_equal(_Tag, _ExecutionPolicy&&, _ForwardIterator1 __first1, _ForwardIterator1 __last1, + _ForwardIterator2 __first2, _BinaryPredicate __p) noexcept +{ + static_assert(__is_backend_tag_v<_Tag>); + + return __internal::__brick_equal(__first1, __last1, __first2, __p, typename _Tag::__is_vector{}); +} + template oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, bool> @@ -1048,6 +1090,21 @@ __pattern_equal(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, _Ran }); } +template +bool +__pattern_equal(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, + _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, _BinaryPredicate __p) +{ + return __internal::__except_handler([&]() { + return !__internal::__parallel_or( + ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, + [__first1, __first2, __p](_RandomAccessIterator1 __i, _RandomAccessIterator1 __j) { + return !__internal::__brick_equal(__i, __j, __first2 + (__i - __first1), __p, _IsVector{}); + }); + }); +} + //------------------------------------------------------------------------ // find_if //------------------------------------------------------------------------ diff --git a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h index a414fffbb35..2f9ded980ae 100644 --- a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h +++ b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h @@ -999,6 +999,30 @@ __pattern_equal(_ExecutionPolicy&& __exec, _Iterator1 __first1, _Iterator1 __las oneapi::dpl::__ranges::make_zip_view(__buf1.all_view(), __buf2.all_view())); } +template +bool +__pattern_equal(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Iterator1 __first1, _Iterator1 __last1, + _Iterator2 __first2, _Iterator2 __last2, _Pred __pred) +{ + if (__last1 == __first1 || __last2 == __first2 || __last1 - __first1 != __last2 - __first2) + return false; + + using _Predicate = oneapi::dpl::unseq_backend::single_match_pred<_ExecutionPolicy, equal_predicate<_Pred>>; + + auto __keep1 = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read, _Iterator1>(); + auto __buf1 = __keep1(__first1, __last1); + auto __keep2 = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read, _Iterator2>(); + auto __buf2 = __keep2(__first2, __last2); + + // TODO: in case of confilicting names + // __par_backend_hetero::make_wrapped_policy<__par_backend_hetero::__or_policy_wrapper>() + return !__par_backend_hetero::__parallel_find_or( + ::std::forward<_ExecutionPolicy>(__exec), _Predicate{equal_predicate<_Pred>{__pred}}, + __par_backend_hetero::__parallel_or_tag{}, + oneapi::dpl::__ranges::make_zip_view(__buf1.all_view(), __buf2.all_view())); +} + + //------------------------------------------------------------------------ // equal version for sequences with equal length //------------------------------------------------------------------------ @@ -1013,6 +1037,15 @@ __pattern_equal(_ExecutionPolicy&& __exec, _Iterator1 __first1, _Iterator1 __las /*vector=*/::std::true_type{}, /*parallel=*/::std::true_type{}); } +template +bool +__pattern_equal(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Iterator1 __first1, _Iterator1 __last1, + _Iterator2 __first2, _Pred __pred) +{ + return oneapi::dpl::__internal::__pattern_equal(__tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, + __first2, __first2 + (__last1 - __first1), __pred); +} + //------------------------------------------------------------------------ // find_if //------------------------------------------------------------------------ From 0508fc612612ce007b9b83d7aabba8c7bc8854c6 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Mon, 5 Feb 2024 17:24:16 +0100 Subject: [PATCH 077/566] __pattern_equal + tag calls --- include/oneapi/dpl/pstl/algorithm_impl.h | 18 ++++++++++---- include/oneapi/dpl/pstl/glue_algorithm_impl.h | 20 +++++++++------- .../dpl/pstl/hetero/algorithm_impl_hetero.h | 24 ++++++++++++++----- 3 files changed, 44 insertions(+), 18 deletions(-) diff --git a/include/oneapi/dpl/pstl/algorithm_impl.h b/include/oneapi/dpl/pstl/algorithm_impl.h index 1d9003e41bb..ad7cfd9b399 100644 --- a/include/oneapi/dpl/pstl/algorithm_impl.h +++ b/include/oneapi/dpl/pstl/algorithm_impl.h @@ -1291,8 +1291,13 @@ __pattern_find_end(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first, _R { if (__last - __first == __s_last - __s_first) { - const bool __res = __internal::__pattern_equal(::std::forward<_ExecutionPolicy>(__exec), __first, __last, - __s_first, __pred, __is_vector, ::std::true_type()); + // TODO is it correct that we check _RandomAccessIterator2 in __select_backend ? + constexpr auto __dispatch_tag = + oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _RandomAccessIterator1, + _RandomAccessIterator2>(); + + const bool __res = __internal::__pattern_equal(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), + __first, __last, __s_first, __pred); return __res ? __first : __last; } else @@ -1395,8 +1400,13 @@ __pattern_search(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first, _Ran { if (__last - __first == __s_last - __s_first) { - const bool __res = __internal::__pattern_equal(::std::forward<_ExecutionPolicy>(__exec), __first, __last, - __s_first, __pred, __is_vector, ::std::true_type()); + // TODO is it correct that we check _RandomAccessIterator2 in __select_backend ? + constexpr auto __dispatch_tag = + oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _RandomAccessIterator1, + _RandomAccessIterator2>(); + + const bool __res = __internal::__pattern_equal(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), + __first, __last, __s_first, __pred); return __res ? __first : __last; } else diff --git a/include/oneapi/dpl/pstl/glue_algorithm_impl.h b/include/oneapi/dpl/pstl/glue_algorithm_impl.h index 7a3582ac073..5af0b880aee 100644 --- a/include/oneapi/dpl/pstl/glue_algorithm_impl.h +++ b/include/oneapi/dpl/pstl/glue_algorithm_impl.h @@ -776,10 +776,12 @@ oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, bool> equal(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, _BinaryPredicate __p) { - return oneapi::dpl::__internal::__pattern_equal( - ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __p, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1>(), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator1>()); + // TODO is it correct that we check _ForwardIterator2 in __select_backend ? + constexpr auto __dispatch_tag = + oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>(); + + return oneapi::dpl::__internal::__pattern_equal(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, + __last1, __first2, __p); } template @@ -795,10 +797,12 @@ oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, bool> equal(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, _ForwardIterator2 __last2, _BinaryPredicate __p) { - return oneapi::dpl::__internal::__pattern_equal( - ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2, __p, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1>(), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator1>()); + // TODO is it correct that we check _ForwardIterator2 in __select_backend ? + constexpr auto __dispatch_tag = + oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>(); + + return oneapi::dpl::__internal::__pattern_equal(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, + __last1, __first2, __last2, __p); } template diff --git a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h index 2f9ded980ae..3fee7daff48 100644 --- a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h +++ b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h @@ -1032,9 +1032,12 @@ oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, b __pattern_equal(_ExecutionPolicy&& __exec, _Iterator1 __first1, _Iterator1 __last1, _Iterator2 __first2, _Pred __pred, /*vector=*/::std::true_type, /*parallel=*/::std::true_type) { - return oneapi::dpl::__internal::__pattern_equal(::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, - __first2, __first2 + (__last1 - __first1), __pred, - /*vector=*/::std::true_type{}, /*parallel=*/::std::true_type{}); + // TODO is it correct that we check _Iterator2 in __select_backend ? + constexpr auto __dispatch_tag = + oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _Iterator1, _Iterator2>(); + + return oneapi::dpl::__internal::__pattern_equal(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, + __last1, __first2, __first2 + (__last1 - __first1), __pred); } template @@ -1081,8 +1084,12 @@ __pattern_find_end(_ExecutionPolicy&& __exec, _Iterator1 __first, _Iterator1 __l if (__last - __first == __s_last - __s_first) { - const bool __res = __pattern_equal(::std::forward<_ExecutionPolicy>(__exec), __first, __last, __s_first, __pred, - ::std::true_type(), ::std::true_type()); + // TODO is it correct that we check _Iterator2 in __select_backend ? + constexpr auto __dispatch_tag = + oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _Iterator1, _Iterator2>(); + + const bool __res = __pattern_equal(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, + __s_first, __pred); return __res ? __first : __last; } else @@ -1146,9 +1153,14 @@ __pattern_search(_ExecutionPolicy&& __exec, _Iterator1 __first, _Iterator1 __las if (__last - __first == __s_last - __s_first) { + // TODO is it correct that we check _Iterator2 in __select_backend ? + constexpr auto __dispatch_tag = + oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _Iterator1, _Iterator2>(); + const bool __res = __pattern_equal( + __dispatch_tag, __par_backend_hetero::make_wrapped_policy(::std::forward<_ExecutionPolicy>(__exec)), __first, - __last, __s_first, __pred, ::std::true_type(), ::std::true_type()); + __last, __s_first, __pred); return __res ? __first : __last; } From a550ebf6d6a42bf2e3e50f95b9beb84b63e32bb1 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Mon, 5 Feb 2024 17:36:04 +0100 Subject: [PATCH 078/566] __pattern_find_end + tag impls --- include/oneapi/dpl/pstl/algorithm_fwd.h | 11 +++++ include/oneapi/dpl/pstl/algorithm_impl.h | 42 +++++++++++++++++++ .../dpl/pstl/hetero/algorithm_impl_hetero.h | 28 +++++++++++++ 3 files changed, 81 insertions(+) diff --git a/include/oneapi/dpl/pstl/algorithm_fwd.h b/include/oneapi/dpl/pstl/algorithm_fwd.h index 73bb1bc2f79..1631a73aada 100644 --- a/include/oneapi/dpl/pstl/algorithm_fwd.h +++ b/include/oneapi/dpl/pstl/algorithm_fwd.h @@ -501,6 +501,11 @@ __pattern_find_end(_ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _Fo _BinaryPredicate, _IsVector, /*is_parallel=*/::std::false_type) noexcept; +template +_ForwardIterator1 +__pattern_find_end(_Tag, _ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, _ForwardIterator2, + _BinaryPredicate) noexcept; + template oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator1> @@ -508,6 +513,12 @@ __pattern_find_end(_ExecutionPolicy&&, _RandomAccessIterator1, _RandomAccessIter _RandomAccessIterator2, _BinaryPredicate, _IsVector, /*is_parallel=*/::std::true_type); +template +_RandomAccessIterator1 +__pattern_find_end(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator1, _RandomAccessIterator1, + _RandomAccessIterator2, _RandomAccessIterator2, _BinaryPredicate); + //------------------------------------------------------------------------ // find_first_of //------------------------------------------------------------------------ diff --git a/include/oneapi/dpl/pstl/algorithm_impl.h b/include/oneapi/dpl/pstl/algorithm_impl.h index ad7cfd9b399..ad91b847c06 100644 --- a/include/oneapi/dpl/pstl/algorithm_impl.h +++ b/include/oneapi/dpl/pstl/algorithm_impl.h @@ -1282,6 +1282,17 @@ __pattern_find_end(_ExecutionPolicy&&, _ForwardIterator1 __first, _ForwardIterat return __internal::__brick_find_end(__first, __last, __s_first, __s_last, __pred, __is_vector); } +template +_ForwardIterator1 +__pattern_find_end(_Tag, _ExecutionPolicy&&, _ForwardIterator1 __first, _ForwardIterator1 __last, _ForwardIterator2 __s_first, + _ForwardIterator2 __s_last, _BinaryPredicate __pred) noexcept +{ + static_assert(__is_backend_tag_v<_Tag>); + + return __internal::__brick_find_end(__first, __last, __s_first, __s_last, __pred, typename _Tag::__is_vector{}); +} + + template oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator1> @@ -1315,6 +1326,37 @@ __pattern_find_end(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first, _R } } +template +_RandomAccessIterator1 +__pattern_find_end(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first, + _RandomAccessIterator1 __last, + _RandomAccessIterator2 __s_first, _RandomAccessIterator2 __s_last, _BinaryPredicate __pred) +{ + using __backend_tag = typename __parallel_tag<_IsVector>::__backend_tag; + + if (__last - __first == __s_last - __s_first) + { + const bool __res = __internal::__pattern_equal(__tag, ::std::forward<_ExecutionPolicy>(__exec), + __first, __last, __s_first, __pred); + return __res ? __first : __last; + } + else + { + return __internal::__except_handler([&]() { + return __internal::__parallel_find( + __backend_tag{}, + ::std::forward<_ExecutionPolicy>(__exec), __first, __last, + [__last, __s_first, __s_last, __pred](_RandomAccessIterator1 __i, _RandomAccessIterator1 __j) { + return __internal::__find_subrange(__i, __j, __last, __s_first, __s_last, __pred, false, + _IsVector{}); + }, + ::std::false_type{}); + }); + } +} + + //------------------------------------------------------------------------ // find_first_of //------------------------------------------------------------------------ diff --git a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h index 3fee7daff48..15ef508753d 100644 --- a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h +++ b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h @@ -1106,6 +1106,34 @@ __pattern_find_end(_ExecutionPolicy&& __exec, _Iterator1 __first, _Iterator1 __l } } +template +_Iterator1 +__pattern_find_end(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Iterator1 __first, _Iterator1 __last, + _Iterator2 __s_first, _Iterator2 __s_last, _Pred __pred) +{ + if (__first == __last || __s_last == __s_first || __last - __first < __s_last - __s_first) + return __last; + + if (__last - __first == __s_last - __s_first) + { + const bool __res = + __pattern_equal(__tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __s_first, __pred); + return __res ? __first : __last; + } + else + { + using _Predicate = unseq_backend::multiple_match_pred<_ExecutionPolicy, _Pred>; + + return __par_backend_hetero::__parallel_find( + ::std::forward<_ExecutionPolicy>(__exec), + __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__first), + __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__last), + __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__s_first), + __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__s_last), _Predicate{__pred}, + ::std::false_type{}); + } +} + //------------------------------------------------------------------------ // find_first_of //------------------------------------------------------------------------ From a3742b16c102b4c5690651f38e453df51629223d Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Mon, 5 Feb 2024 17:36:24 +0100 Subject: [PATCH 079/566] __pattern_find_end + tag calls --- include/oneapi/dpl/pstl/glue_algorithm_impl.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/include/oneapi/dpl/pstl/glue_algorithm_impl.h b/include/oneapi/dpl/pstl/glue_algorithm_impl.h index 5af0b880aee..3197a4901b4 100644 --- a/include/oneapi/dpl/pstl/glue_algorithm_impl.h +++ b/include/oneapi/dpl/pstl/glue_algorithm_impl.h @@ -129,10 +129,11 @@ oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _Forward find_end(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 __last, _ForwardIterator2 __s_first, _ForwardIterator2 __s_last, _BinaryPredicate __pred) { - return oneapi::dpl::__internal::__pattern_find_end( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __s_first, __s_last, __pred, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>(), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>()); + constexpr auto __dispatch_tag = + oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>(); + + return oneapi::dpl::__internal::__pattern_find_end(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), + __first, __last, __s_first, __s_last, __pred); } template From 5d8ec601e5be7d45a37bccda4f3bac83468b0af6 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Mon, 5 Feb 2024 17:52:28 +0100 Subject: [PATCH 080/566] __pattern_find_first_of + tag impls --- include/oneapi/dpl/pstl/algorithm_fwd.h | 11 +++++++ include/oneapi/dpl/pstl/algorithm_impl.h | 32 ++++++++++++++++++- .../dpl/pstl/hetero/algorithm_impl_hetero.h | 22 +++++++++++++ 3 files changed, 64 insertions(+), 1 deletion(-) diff --git a/include/oneapi/dpl/pstl/algorithm_fwd.h b/include/oneapi/dpl/pstl/algorithm_fwd.h index 1631a73aada..008a150d442 100644 --- a/include/oneapi/dpl/pstl/algorithm_fwd.h +++ b/include/oneapi/dpl/pstl/algorithm_fwd.h @@ -539,12 +539,23 @@ oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _Fo __pattern_find_first_of(_ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, _ForwardIterator2, _BinaryPredicate, _IsVector, /*is_parallel=*/::std::false_type) noexcept; +template +_ForwardIterator1 +__pattern_find_first_of(_Tag, _ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, + _ForwardIterator2, _BinaryPredicate) noexcept; + template oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator1> __pattern_find_first_of(_ExecutionPolicy&&, _RandomAccessIterator1, _RandomAccessIterator1, _RandomAccessIterator2, _RandomAccessIterator2, _BinaryPredicate, _IsVector, /*is_parallel=*/::std::true_type); +template +_ForwardIterator1 +__pattern_find_first_of(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, + _ForwardIterator2, _ForwardIterator2, _BinaryPredicate); + //------------------------------------------------------------------------ // search //------------------------------------------------------------------------ diff --git a/include/oneapi/dpl/pstl/algorithm_impl.h b/include/oneapi/dpl/pstl/algorithm_impl.h index ad91b847c06..bad6eb6df66 100644 --- a/include/oneapi/dpl/pstl/algorithm_impl.h +++ b/include/oneapi/dpl/pstl/algorithm_impl.h @@ -1378,7 +1378,7 @@ __brick_find_first_of(_ForwardIterator1 __first, _ForwardIterator1 __last, _Forw template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _ForwardIterator1> +_ForwardIterator1 __pattern_find_first_of(_ExecutionPolicy&&, _ForwardIterator1 __first, _ForwardIterator1 __last, _ForwardIterator2 __s_first, _ForwardIterator2 __s_last, _BinaryPredicate __pred, _IsVector __is_vector, /*is_parallel=*/::std::false_type) noexcept @@ -1386,6 +1386,17 @@ __pattern_find_first_of(_ExecutionPolicy&&, _ForwardIterator1 __first, _ForwardI return __internal::__brick_find_first_of(__first, __last, __s_first, __s_last, __pred, __is_vector); } +template +_ForwardIterator1 +__pattern_find_first_of(_Tag, _ExecutionPolicy&&, _ForwardIterator1 __first, _ForwardIterator1 __last, + _ForwardIterator2 __s_first, _ForwardIterator2 __s_last, _BinaryPredicate __pred) noexcept +{ + static_assert(__is_backend_tag_v<_Tag>); + + return __internal::__brick_find_first_of(__first, __last, __s_first, __s_last, __pred, + typename _Tag::__is_vector{}); +} + template oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _ForwardIterator1> @@ -1403,6 +1414,25 @@ __pattern_find_first_of(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _F }); } +template +_ForwardIterator1 +__pattern_find_first_of(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _ForwardIterator1 __first, + _ForwardIterator1 __last, _ForwardIterator2 __s_first, _ForwardIterator2 __s_last, + _BinaryPredicate __pred) +{ + using __backend_tag = typename __parallel_tag<_IsVector>::__backend_tag; + + return __internal::__except_handler([&]() { + return __internal::__parallel_find( + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, + [__s_first, __s_last, &__pred](_ForwardIterator1 __i, _ForwardIterator1 __j) { + return __internal::__brick_find_first_of(__i, __j, __s_first, __s_last, __pred, _IsVector{}); + }, + ::std::true_type{}); + }); +} + //------------------------------------------------------------------------ // search //------------------------------------------------------------------------ diff --git a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h index 15ef508753d..0b2b439eca2 100644 --- a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h +++ b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h @@ -1159,6 +1159,28 @@ __pattern_find_first_of(_ExecutionPolicy&& __exec, _Iterator1 __first, _Iterator ::std::true_type{}); } +template +_Iterator1 +__pattern_find_first_of(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Iterator1 __first, + _Iterator1 __last, _Iterator2 __s_first, _Iterator2 __s_last, _Pred __pred) +{ + if (__first == __last || __s_last == __s_first) + return __last; + + using _Predicate = unseq_backend::first_match_pred<_ExecutionPolicy, _Pred>; + + // TODO: To check whether it makes sense to iterate over the second sequence in case of + // distance(__first, __last) < distance(__s_first, __s_last). + return __par_backend_hetero::__parallel_find( + ::std::forward<_ExecutionPolicy>(__exec), + __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__first), + __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__last), + __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__s_first), + __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__s_last), _Predicate{__pred}, + ::std::true_type{}); +} + + //------------------------------------------------------------------------ // search //------------------------------------------------------------------------ From 5a52a0b8d82e6e0809626de4b802b6e4266392e2 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Mon, 5 Feb 2024 17:52:42 +0100 Subject: [PATCH 081/566] __pattern_find_first_of + tag calls --- include/oneapi/dpl/pstl/glue_algorithm_impl.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/include/oneapi/dpl/pstl/glue_algorithm_impl.h b/include/oneapi/dpl/pstl/glue_algorithm_impl.h index 3197a4901b4..576fb10630d 100644 --- a/include/oneapi/dpl/pstl/glue_algorithm_impl.h +++ b/include/oneapi/dpl/pstl/glue_algorithm_impl.h @@ -151,10 +151,11 @@ oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _Forward find_first_of(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 __last, _ForwardIterator2 __s_first, _ForwardIterator2 __s_last, _BinaryPredicate __pred) { - return oneapi::dpl::__internal::__pattern_find_first_of( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __s_first, __s_last, __pred, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>(), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>()); + constexpr auto __dispatch_tag = + oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>(); + + return oneapi::dpl::__internal::__pattern_find_first_of(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), + __first, __last, __s_first, __s_last, __pred); } template From e95371ad1ae9d6cf88c24f4b4a64d6fb7b388dc7 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Mon, 5 Feb 2024 18:06:03 +0100 Subject: [PATCH 082/566] __pattern_generate + tag impls --- include/oneapi/dpl/pstl/algorithm_fwd.h | 9 +++++++ include/oneapi/dpl/pstl/algorithm_impl.h | 25 +++++++++++++++++++ .../dpl/pstl/hetero/algorithm_impl_hetero.h | 12 +++++++++ 3 files changed, 46 insertions(+) diff --git a/include/oneapi/dpl/pstl/algorithm_fwd.h b/include/oneapi/dpl/pstl/algorithm_fwd.h index 008a150d442..935c26da900 100644 --- a/include/oneapi/dpl/pstl/algorithm_fwd.h +++ b/include/oneapi/dpl/pstl/algorithm_fwd.h @@ -1183,11 +1183,20 @@ oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy> __pattern_generate(_ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _Generator, /*is_parallel=*/::std::false_type, _IsVector) noexcept; +template +void +__pattern_generate(_Tag, _ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _Generator) noexcept; + template oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator> __pattern_generate(_ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _Generator, /*is_parallel=*/::std::true_type, _IsVector); +template +_RandomAccessIterator +__pattern_generate(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, + _Generator); + template _RandomAccessIterator __brick_generate_n(_RandomAccessIterator, Size, _Generator, /* is_vector = */ ::std::true_type) noexcept; diff --git a/include/oneapi/dpl/pstl/algorithm_impl.h b/include/oneapi/dpl/pstl/algorithm_impl.h index bad6eb6df66..fbc3cb75126 100644 --- a/include/oneapi/dpl/pstl/algorithm_impl.h +++ b/include/oneapi/dpl/pstl/algorithm_impl.h @@ -3506,6 +3506,15 @@ __pattern_generate(_ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterato __internal::__brick_generate(__first, __last, __g, __is_vector); } +template +void +__pattern_generate(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, _Generator __g) noexcept +{ + static_assert(__is_backend_tag_v<_Tag>); + + __internal::__brick_generate(__first, __last, __g, typename _Tag::__is_vector{}); +} + template oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator> __pattern_generate(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last, @@ -3521,6 +3530,22 @@ __pattern_generate(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _Ra }); } +template +_RandomAccessIterator +__pattern_generate(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, + _RandomAccessIterator __last, _Generator __g) +{ + using __backend_tag = typename __parallel_tag<_IsVector>::__backend_tag; + + return __internal::__except_handler([&]() { + __par_backend::__parallel_for(__backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, + [__g](_RandomAccessIterator __begin, _RandomAccessIterator __end) { + __internal::__brick_generate(__begin, __end, __g, _IsVector{}); + }); + return __last; + }); +} + template _RandomAccessIterator __brick_generate_n(_RandomAccessIterator __first, Size __count, _Generator __g, diff --git a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h index 0b2b439eca2..6995bcb492d 100644 --- a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h +++ b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h @@ -556,6 +556,18 @@ __pattern_generate(_ExecutionPolicy&& __exec, _ForwardIterator __first, _Forward return __last; } +template +_ForwardIterator +__pattern_generate(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _ForwardIterator __first, + _ForwardIterator __last, _Generator __g) +{ + __pattern_walk1(__tag, ::std::forward<_ExecutionPolicy>(__exec), + __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::write>(__first), + __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::write>(__last), + generate_functor<_Generator>{__g}); + return __last; +} + //------------------------------------------------------------------------ // brick_copy, brick_move //------------------------------------------------------------------------ From 183ea6192c29daa4a236f532c66346d30ae13c95 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Mon, 5 Feb 2024 18:06:40 +0100 Subject: [PATCH 083/566] __pattern_generate + tag calls --- include/oneapi/dpl/pstl/glue_algorithm_impl.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/include/oneapi/dpl/pstl/glue_algorithm_impl.h b/include/oneapi/dpl/pstl/glue_algorithm_impl.h index 576fb10630d..1fd1ff3232e 100644 --- a/include/oneapi/dpl/pstl/glue_algorithm_impl.h +++ b/include/oneapi/dpl/pstl/glue_algorithm_impl.h @@ -467,10 +467,10 @@ template oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy> generate(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _Generator __g) { - oneapi::dpl::__internal::__pattern_generate( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __g, - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(), - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>()); + constexpr auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _ForwardIterator>(); + + oneapi::dpl::__internal::__pattern_generate(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, + __last, __g); } template From 233854b58931309022bbdb907799f45900f9d663 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Mon, 5 Feb 2024 18:17:08 +0100 Subject: [PATCH 084/566] __pattern_generate_n + tag impls --- include/oneapi/dpl/pstl/algorithm_fwd.h | 8 ++++++++ include/oneapi/dpl/pstl/algorithm_impl.h | 20 ++++++++++++++++++++ 2 files changed, 28 insertions(+) diff --git a/include/oneapi/dpl/pstl/algorithm_fwd.h b/include/oneapi/dpl/pstl/algorithm_fwd.h index 935c26da900..f5f7d58dc72 100644 --- a/include/oneapi/dpl/pstl/algorithm_fwd.h +++ b/include/oneapi/dpl/pstl/algorithm_fwd.h @@ -1210,11 +1210,19 @@ OutputIterator __pattern_generate_n(_ExecutionPolicy&&, OutputIterator, Size, _Generator, /*is_parallel=*/::std::false_type, _IsVector) noexcept; +template +_OutputIterator +__pattern_generate_n(_Tag, _ExecutionPolicy&&, _OutputIterator, _Size, _Generator) noexcept; + template _RandomAccessIterator __pattern_generate_n(_ExecutionPolicy&&, _RandomAccessIterator, Size, _Generator, /*is_parallel=*/::std::true_type, _IsVector); +template +_RandomAccessIterator +__pattern_generate_n(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _Size, _Generator); + //------------------------------------------------------------------------ // remove //------------------------------------------------------------------------ diff --git a/include/oneapi/dpl/pstl/algorithm_impl.h b/include/oneapi/dpl/pstl/algorithm_impl.h index fbc3cb75126..8840e63943b 100644 --- a/include/oneapi/dpl/pstl/algorithm_impl.h +++ b/include/oneapi/dpl/pstl/algorithm_impl.h @@ -3569,6 +3569,15 @@ __pattern_generate_n(_ExecutionPolicy&&, _OutputIterator __first, _Size __count, return __internal::__brick_generate_n(__first, __count, __g, __is_vector); } +template +_OutputIterator +__pattern_generate_n(_Tag, _ExecutionPolicy&&, _OutputIterator __first, _Size __count, _Generator __g) noexcept +{ + static_assert(__is_backend_tag_v<_Tag>); + + return __internal::__brick_generate_n(__first, __count, __g, typename _Tag::__is_vector{}); +} + template _RandomAccessIterator __pattern_generate_n(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _Size __count, _Generator __g, @@ -3580,6 +3589,17 @@ __pattern_generate_n(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _ ::std::true_type(), __is_vector); } +template +_RandomAccessIterator +__pattern_generate_n(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, + _Size __count, _Generator __g) +{ + static_assert(__is_random_access_iterator_v<_RandomAccessIterator>, + "Pattern-brick error. Should be a random access iterator."); + return __internal::__pattern_generate(__tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __first + __count, + __g); +} + //------------------------------------------------------------------------ // remove //------------------------------------------------------------------------ From c8a61eef2d008df8a06108c79e4f05b803611922 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Mon, 5 Feb 2024 18:17:19 +0100 Subject: [PATCH 085/566] __pattern_generate_n + tag calls --- include/oneapi/dpl/pstl/glue_algorithm_impl.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/include/oneapi/dpl/pstl/glue_algorithm_impl.h b/include/oneapi/dpl/pstl/glue_algorithm_impl.h index 1fd1ff3232e..d1580305c5a 100644 --- a/include/oneapi/dpl/pstl/glue_algorithm_impl.h +++ b/include/oneapi/dpl/pstl/glue_algorithm_impl.h @@ -480,10 +480,10 @@ generate_n(_ExecutionPolicy&& __exec, _ForwardIterator __first, _Size __count, _ if (__count <= 0) return __first; - return oneapi::dpl::__internal::__pattern_generate_n( - ::std::forward<_ExecutionPolicy>(__exec), __first, __count, __g, - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(), - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>()); + constexpr auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _ForwardIterator>(); + + return oneapi::dpl::__internal::__pattern_generate_n(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), + __first, __count, __g); } // [alg.remove] From cdc46496cc051cce0eabd78a02c8e504fb33f624 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Mon, 5 Feb 2024 18:27:40 +0100 Subject: [PATCH 086/566] include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h - fix compiler error: implement previously absent impl of __pattern_generate_n for hetero policy --- .../oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h index 6995bcb492d..04e920be109 100644 --- a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h +++ b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h @@ -568,6 +568,19 @@ __pattern_generate(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _ return __last; } +template +_ForwardIterator +__pattern_generate_n(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _ForwardIterator __first, + _Size __count, _Generator __g) +{ + // TODO: is this new implementation are correct? + // Previously we hadn't hetero impl for __pattern_generate_n + + return __pattern_walk1_n(__tag, ::std::forward<_ExecutionPolicy>(__exec), + __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::write>(__first), + __count, generate_functor<_Generator>{__g}); +} + //------------------------------------------------------------------------ // brick_copy, brick_move //------------------------------------------------------------------------ From 4f3b82ed3cbfc0c3138e056312716c7e2a59569c Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Mon, 5 Feb 2024 18:40:40 +0100 Subject: [PATCH 087/566] __pattern_includes + tag impls --- include/oneapi/dpl/pstl/algorithm_fwd.h | 11 +++ include/oneapi/dpl/pstl/algorithm_impl.h | 71 +++++++++++++++++++ .../dpl/pstl/hetero/algorithm_impl_hetero.h | 28 ++++++++ 3 files changed, 110 insertions(+) diff --git a/include/oneapi/dpl/pstl/algorithm_fwd.h b/include/oneapi/dpl/pstl/algorithm_fwd.h index f5f7d58dc72..202c503a287 100644 --- a/include/oneapi/dpl/pstl/algorithm_fwd.h +++ b/include/oneapi/dpl/pstl/algorithm_fwd.h @@ -1305,6 +1305,11 @@ __pattern_includes(_ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _Fo _Compare, _IsVector, /*is_parallel=*/::std::false_type) noexcept; +template +bool +__pattern_includes(_Tag, _ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, _ForwardIterator2, + _Compare) noexcept; + template oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, bool> @@ -1312,6 +1317,12 @@ __pattern_includes(_ExecutionPolicy&&, _RandomAccessIterator1, _RandomAccessIter _RandomAccessIterator2, _Compare, _IsVector, /*is_parallel=*/::std::true_type); +template +bool +__pattern_includes(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator1, _RandomAccessIterator1, + _RandomAccessIterator2, _RandomAccessIterator2, _Compare); + //------------------------------------------------------------------------ // set_union //------------------------------------------------------------------------ diff --git a/include/oneapi/dpl/pstl/algorithm_impl.h b/include/oneapi/dpl/pstl/algorithm_impl.h index 8840e63943b..299e2b1b78f 100644 --- a/include/oneapi/dpl/pstl/algorithm_impl.h +++ b/include/oneapi/dpl/pstl/algorithm_impl.h @@ -3787,6 +3787,16 @@ __pattern_includes(_ExecutionPolicy&&, _ForwardIterator1 __first1, _ForwardItera return ::std::includes(__first1, __last1, __first2, __last2, __comp); } +template +bool +__pattern_includes(_Tag, _ExecutionPolicy&&, _ForwardIterator1 __first1, _ForwardIterator1 __last1, + _ForwardIterator2 __first2, _ForwardIterator2 __last2, _Compare __comp) noexcept +{ + static_assert(__is_backend_tag_v<_Tag>); + + return ::std::includes(__first1, __last1, __first2, __last2, __comp); +} + template oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, bool> @@ -3849,6 +3859,67 @@ __pattern_includes(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, _ }); } +template +bool +__pattern_includes(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, + _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, _RandomAccessIterator2 __last2, + _Compare __comp) +{ + if (__first2 == __last2) + return true; + + //optimization; {1} - the first sequence, {2} - the second sequence + //{1} is empty or size_of{2} > size_of{1} + if (__first1 == __last1 || __last2 - __first2 > __last1 - __first1 || + // {1}: [**********] or [**********] + // {2}: [***********] [***********] + __comp(*__first2, *__first1) || __comp(*(__last1 - 1), *(__last2 - 1))) + return false; + + __first1 = ::std::lower_bound(__first1, __last1, *__first2, __comp); + if (__first1 == __last1) + return false; + + if (__last2 - __first2 == 1) + return !__comp(*__first1, *__first2) && !__comp(*__first2, *__first1); + + return __internal::__except_handler([&]() { + return !__internal::__parallel_or( + ::std::forward<_ExecutionPolicy>(__exec), __first2, __last2, + [__first1, __last1, __first2, __last2, &__comp](_RandomAccessIterator2 __i, _RandomAccessIterator2 __j) { + assert(__j > __i); + //assert(__j - __i > 1); + + //1. moving boundaries to "consume" subsequence of equal elements + auto __is_equal_sorted = [&__comp](_RandomAccessIterator2 __a, _RandomAccessIterator2 __b) -> bool { + //enough one call of __comp due to compared couple belongs to one sorted sequence + return !__comp(*__a, *__b); + }; + + //1.1 left bound, case "aaa[aaaxyz...]" - searching "x" + if (__i > __first2 && __is_equal_sorted(__i - 1, __i)) + { + //whole subrange continues to have equal elements - return "no op" + if (__is_equal_sorted(__i, __j - 1)) + return false; + + __i = ::std::upper_bound(__i, __last2, *__i, __comp); + } + + //1.2 right bound, case "[...aaa]aaaxyz" - searching "x" + if (__j < __last2 && __is_equal_sorted(__j - 1, __j)) + __j = ::std::upper_bound(__j, __last2, *__j, __comp); + + //2. testing is __a subsequence of the second range included into the first range + auto __b = ::std::lower_bound(__first1, __last1, *__i, __comp); + + assert(!__comp(*(__last1 - 1), *__b)); + assert(!__comp(*(__j - 1), *__i)); + return !::std::includes(__b, __last1, __i, __j, __comp); + }); + }); +} + inline constexpr auto __set_algo_cut_off = 1000; template +bool +__pattern_includes(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _ForwardIterator1 __first1, + _ForwardIterator1 __last1, _ForwardIterator2 __first2, _ForwardIterator2 __last2, _Compare __comp) +{ + //according to the spec + if (__first2 == __last2) + return true; + + //optimization; {1} - the first sequence, {2} - the second sequence + //{1} is empty or size_of{2} > size_of{1} + if (__first1 == __last1 || __last2 - __first2 > __last1 - __first1) + return false; + + typedef typename ::std::iterator_traits<_ForwardIterator1>::difference_type _Size1; + typedef typename ::std::iterator_traits<_ForwardIterator2>::difference_type _Size2; + + using __brick_include_type = unseq_backend::__brick_includes<_ExecutionPolicy, _Compare, _Size1, _Size2>; + return !__par_backend_hetero::__parallel_or( + ::std::forward<_ExecutionPolicy>(__exec), + __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__first2), + __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__last2), + __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__first1), + __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__last1), + __brick_include_type(__comp, __last1 - __first1, __last2 - __first2)); +} + //------------------------------------------------------------------------ // partial_sort //------------------------------------------------------------------------ From 5e7336277f071480af0d111af6d7f6172c666fba Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Mon, 5 Feb 2024 18:40:59 +0100 Subject: [PATCH 088/566] __pattern_includes + tag calls --- include/oneapi/dpl/pstl/glue_algorithm_impl.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/include/oneapi/dpl/pstl/glue_algorithm_impl.h b/include/oneapi/dpl/pstl/glue_algorithm_impl.h index d1580305c5a..e5015e47064 100644 --- a/include/oneapi/dpl/pstl/glue_algorithm_impl.h +++ b/include/oneapi/dpl/pstl/glue_algorithm_impl.h @@ -964,10 +964,11 @@ oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, bool> includes(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, _ForwardIterator2 __last2, _Compare __comp) { - return oneapi::dpl::__internal::__pattern_includes( - ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2, __comp, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>(), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>()); + constexpr auto __dispatch_tag = + oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>(); + + return oneapi::dpl::__internal::__pattern_includes(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), + __first1, __last1, __first2, __last2, __comp); } template From 81b5469db7fbdacec7444e1b721c6765503f9d6a Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Mon, 5 Feb 2024 18:56:09 +0100 Subject: [PATCH 089/566] __pattern_inplace_merge + tag impls --- include/oneapi/dpl/pstl/algorithm_fwd.h | 10 ++++ include/oneapi/dpl/pstl/algorithm_impl.h | 54 +++++++++++++++++++ .../dpl/pstl/hetero/algorithm_impl_hetero.h | 30 +++++++++++ 3 files changed, 94 insertions(+) diff --git a/include/oneapi/dpl/pstl/algorithm_fwd.h b/include/oneapi/dpl/pstl/algorithm_fwd.h index 202c503a287..2132e715aa7 100644 --- a/include/oneapi/dpl/pstl/algorithm_fwd.h +++ b/include/oneapi/dpl/pstl/algorithm_fwd.h @@ -1289,12 +1289,22 @@ __pattern_inplace_merge(_ExecutionPolicy&&, _BidirectionalIterator, _Bidirection _Compare, _IsVector, /* is_parallel = */ ::std::false_type) noexcept; +template +void +__pattern_inplace_merge(_Tag, _ExecutionPolicy&&, _BidirectionalIterator, _BidirectionalIterator, + _BidirectionalIterator, _Compare) noexcept; + template oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy> __pattern_inplace_merge(_ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _RandomAccessIterator, _Compare, _IsVector, /*is_parallel=*/::std::true_type); +template +void +__pattern_inplace_merge(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, + _RandomAccessIterator, _Compare); + //------------------------------------------------------------------------ // includes //------------------------------------------------------------------------ diff --git a/include/oneapi/dpl/pstl/algorithm_impl.h b/include/oneapi/dpl/pstl/algorithm_impl.h index 299e2b1b78f..af5ca222661 100644 --- a/include/oneapi/dpl/pstl/algorithm_impl.h +++ b/include/oneapi/dpl/pstl/algorithm_impl.h @@ -3732,6 +3732,16 @@ __pattern_inplace_merge(_ExecutionPolicy&&, _BidirectionalIterator __first, _Bid __internal::__brick_inplace_merge(__first, __middle, __last, __comp, __is_vector); } +template +void +__pattern_inplace_merge(_Tag, _ExecutionPolicy&&, _BidirectionalIterator __first, _BidirectionalIterator __middle, + _BidirectionalIterator __last, _Compare __comp) noexcept +{ + static_assert(__is_backend_tag_v<_Tag>); + + __internal::__brick_inplace_merge(__first, __middle, __last, __comp, typename _Tag::__is_vector{}); +} + template oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy> __pattern_inplace_merge(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __middle, @@ -3774,6 +3784,50 @@ __pattern_inplace_merge(_ExecutionPolicy&& __exec, _RandomAccessIterator __first }); } +template +void +__pattern_inplace_merge(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, + _RandomAccessIterator __middle, _RandomAccessIterator __last, _Compare __comp) +{ + using __backend_tag = typename __parallel_tag<_IsVector>::__backend_tag; + + if (__first == __last || __first == __middle || __middle == __last) + { + return; + } + + typedef typename ::std::iterator_traits<_RandomAccessIterator>::value_type _Tp; + auto __n = __last - __first; + __par_backend::__buffer<_ExecutionPolicy, _Tp> __buf(__n); + _Tp* __r = __buf.get(); + __internal::__except_handler([&]() { + auto __move_values = [](_RandomAccessIterator __x, _Tp* __z) { + if constexpr (::std::is_trivial_v<_Tp>) + *__z = ::std::move(*__x); + else + ::new (::std::addressof(*__z)) _Tp(::std::move(*__x)); + }; + + auto __move_sequences = [](_RandomAccessIterator __first1, _RandomAccessIterator __last1, _Tp* __first2) { + return __internal::__brick_uninitialized_move(__first1, __last1, __first2, _IsVector{}); + }; + + __par_backend::__parallel_merge( + ::std::forward<_ExecutionPolicy>(__exec), __first, __middle, __middle, __last, __r, __comp, + [__n, __move_values, __move_sequences](_RandomAccessIterator __f1, _RandomAccessIterator __l1, + _RandomAccessIterator __f2, _RandomAccessIterator __l2, _Tp* __f3, + _Compare __comp) { + (__utils::__serial_move_merge(__n))(__f1, __l1, __f2, __l2, __f3, __comp, __move_values, __move_values, + __move_sequences, __move_sequences); + return __f3 + (__l1 - __f1) + (__l2 - __f2); + }); + __par_backend::__parallel_for(__backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __r, __r + __n, + [__r, __first](_Tp* __i, _Tp* __j) { + __brick_move_destroy<_ExecutionPolicy>{}(__i, __j, __first + (__i - __r), _IsVector{}); + }); + }); +} + //------------------------------------------------------------------------ // includes //------------------------------------------------------------------------ diff --git a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h index 6a135eb5429..f6eb9fd44b5 100644 --- a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h +++ b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h @@ -1691,6 +1691,36 @@ __pattern_inplace_merge(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __copy_first, __copy_last, __first, __brick_move<_ExecutionPolicy>{}, ::std::true_type{}, ::std::true_type{}); } +template +void +__pattern_inplace_merge(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Iterator __first, + _Iterator __middle, _Iterator __last, _Compare __comp) +{ + using _ValueType = typename ::std::iterator_traits<_Iterator>::value_type; + + if (__first == __middle || __middle == __last || __first == __last) + return; + + assert(__first < __middle && __middle < __last); + + auto __n = __last - __first; + oneapi::dpl::__par_backend_hetero::__buffer<_ExecutionPolicy, _ValueType> __buf(__exec, __n); + auto __copy_first = __buf.get(); + auto __copy_last = __copy_first + __n; + + __pattern_merge(__exec, __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__first), + __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__middle), + __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__middle), + __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__last), + __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::write>(__copy_first), + __comp, ::std::true_type{}, ::std::true_type{}); + + //TODO: optimize copy back depending on Iterator, i.e. set_final_data for host iterator/pointer + __pattern_walk2( + __tag, __par_backend_hetero::make_wrapped_policy(::std::forward<_ExecutionPolicy>(__exec)), + __copy_first, __copy_last, __first, __brick_move<_ExecutionPolicy>{}); +} + //------------------------------------------------------------------------ // sort //------------------------------------------------------------------------ From b8e043fe5145420e90fe3fffbb35ade9cc55db3f Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Mon, 5 Feb 2024 18:56:22 +0100 Subject: [PATCH 090/566] __pattern_inplace_merge + tag calls --- include/oneapi/dpl/pstl/glue_algorithm_impl.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/include/oneapi/dpl/pstl/glue_algorithm_impl.h b/include/oneapi/dpl/pstl/glue_algorithm_impl.h index e5015e47064..413ce31fe00 100644 --- a/include/oneapi/dpl/pstl/glue_algorithm_impl.h +++ b/include/oneapi/dpl/pstl/glue_algorithm_impl.h @@ -942,10 +942,11 @@ oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy> inplace_merge(_ExecutionPolicy&& __exec, _BidirectionalIterator __first, _BidirectionalIterator __middle, _BidirectionalIterator __last, _Compare __comp) { - oneapi::dpl::__internal::__pattern_inplace_merge( - ::std::forward<_ExecutionPolicy>(__exec), __first, __middle, __last, __comp, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _BidirectionalIterator>(), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _BidirectionalIterator>()); + constexpr auto __dispatch_tag = + oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _BidirectionalIterator>(); + + oneapi::dpl::__internal::__pattern_inplace_merge(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, + __middle, __last, __comp); } template From 526ae3010aca144f8a204ca736492825381a1676 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Tue, 6 Feb 2024 09:41:14 +0100 Subject: [PATCH 091/566] __pattern_is_heap + tag impls --- include/oneapi/dpl/pstl/algorithm_fwd.h | 9 ++++++++ include/oneapi/dpl/pstl/algorithm_impl.h | 23 +++++++++++++++++++ .../dpl/pstl/hetero/algorithm_impl_hetero.h | 17 ++++++++++++++ 3 files changed, 49 insertions(+) diff --git a/include/oneapi/dpl/pstl/algorithm_fwd.h b/include/oneapi/dpl/pstl/algorithm_fwd.h index 2132e715aa7..bde93113e9b 100644 --- a/include/oneapi/dpl/pstl/algorithm_fwd.h +++ b/include/oneapi/dpl/pstl/algorithm_fwd.h @@ -1481,11 +1481,20 @@ oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, boo __pattern_is_heap(_ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _Compare, _IsVector, /* is_parallel = */ ::std::false_type) noexcept; +template +bool +__pattern_is_heap(_Tag, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _Compare) noexcept; + template oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, bool> __pattern_is_heap(_ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _Compare, _IsVector, /* is_parallel = */ ::std::true_type); +template +bool +__pattern_is_heap(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, + _Compare); + //------------------------------------------------------------------------ // min_element //------------------------------------------------------------------------ diff --git a/include/oneapi/dpl/pstl/algorithm_impl.h b/include/oneapi/dpl/pstl/algorithm_impl.h index af5ca222661..973c2d7353b 100644 --- a/include/oneapi/dpl/pstl/algorithm_impl.h +++ b/include/oneapi/dpl/pstl/algorithm_impl.h @@ -4615,6 +4615,15 @@ __pattern_is_heap(_ExecutionPolicy&&, _RandomAccessIterator __first, _RandomAcce return __internal::__brick_is_heap(__first, __last, __comp, __is_vector); } +template +bool +__pattern_is_heap(_Tag, _ExecutionPolicy&&, _RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp) noexcept +{ + static_assert(__is_backend_tag_v<_Tag>); + + return __internal::__brick_is_heap(__first, __last, __comp, typename _Tag::__is_vector{}); +} + template oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, bool> __pattern_is_heap(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last, @@ -4629,6 +4638,20 @@ __pattern_is_heap(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _Ran }); } +template +bool +__pattern_is_heap(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, + _RandomAccessIterator __last, _Compare __comp) +{ + return __internal::__except_handler([&]() { + return !__parallel_or(::std::forward<_ExecutionPolicy>(__exec), __first, __last, + [__first, __comp](_RandomAccessIterator __i, _RandomAccessIterator __j) { + return !__internal::__is_heap_local(__first, __i - __first, __j - __first, __comp, + _IsVector{}); + }); + }); +} + //------------------------------------------------------------------------ // min_element //------------------------------------------------------------------------ diff --git a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h index f6eb9fd44b5..23c88d4b769 100644 --- a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h +++ b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h @@ -1612,6 +1612,23 @@ __pattern_is_heap(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _Ran __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__last), _Predicate{__comp}); } +template +bool +__pattern_is_heap(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, + _RandomAccessIterator __last, _Compare __comp) +{ + if (__last - __first < 2) + return true; + + using _Predicate = + oneapi::dpl::unseq_backend::single_match_pred_by_idx<_ExecutionPolicy, __is_heap_check<_Compare>>; + + return !__par_backend_hetero::__parallel_or( + ::std::forward<_ExecutionPolicy>(__exec), + __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__first), + __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__last), _Predicate{__comp}); +} + //------------------------------------------------------------------------ // merge //------------------------------------------------------------------------ From f61dbc9c69a4815260dc0361af69ce772784317f Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Tue, 6 Feb 2024 09:41:25 +0100 Subject: [PATCH 092/566] __pattern_is_heap + tag calls --- include/oneapi/dpl/pstl/glue_algorithm_impl.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/include/oneapi/dpl/pstl/glue_algorithm_impl.h b/include/oneapi/dpl/pstl/glue_algorithm_impl.h index 413ce31fe00..f4a6cf48516 100644 --- a/include/oneapi/dpl/pstl/glue_algorithm_impl.h +++ b/include/oneapi/dpl/pstl/glue_algorithm_impl.h @@ -1097,10 +1097,11 @@ template oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, bool> is_heap(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp) { - return oneapi::dpl::__internal::__pattern_is_heap( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __comp, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _RandomAccessIterator>(), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _RandomAccessIterator>()); + constexpr auto __dispatch_tag = + oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _RandomAccessIterator>(); + + return oneapi::dpl::__internal::__pattern_is_heap(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, + __last, __comp); } template From 42236624b6084b9012c8a6d26b29f0fd35ab8d28 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Tue, 6 Feb 2024 10:02:36 +0100 Subject: [PATCH 093/566] __pattern_is_heap_until + tag impls --- include/oneapi/dpl/pstl/algorithm_fwd.h | 9 +++++++ include/oneapi/dpl/pstl/algorithm_impl.h | 26 +++++++++++++++++++ .../dpl/pstl/hetero/algorithm_impl_hetero.h | 18 +++++++++++++ 3 files changed, 53 insertions(+) diff --git a/include/oneapi/dpl/pstl/algorithm_fwd.h b/include/oneapi/dpl/pstl/algorithm_fwd.h index bde93113e9b..a2d586f4397 100644 --- a/include/oneapi/dpl/pstl/algorithm_fwd.h +++ b/include/oneapi/dpl/pstl/algorithm_fwd.h @@ -1459,11 +1459,20 @@ oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _Ra __pattern_is_heap_until(_ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _Compare, _IsVector, /* is_parallel = */ ::std::false_type) noexcept; +template +_RandomAccessIterator +__pattern_is_heap_until(_Tag, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _Compare) noexcept; + template oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator> __pattern_is_heap_until(_ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _Compare, _IsVector, /* is_parallel = */ ::std::true_type); +template +_RandomAccessIterator +__pattern_is_heap_until(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, + _Compare); + //------------------------------------------------------------------------ // is_heap //------------------------------------------------------------------------ diff --git a/include/oneapi/dpl/pstl/algorithm_impl.h b/include/oneapi/dpl/pstl/algorithm_impl.h index 973c2d7353b..742524bbc3f 100644 --- a/include/oneapi/dpl/pstl/algorithm_impl.h +++ b/include/oneapi/dpl/pstl/algorithm_impl.h @@ -4529,6 +4529,16 @@ __pattern_is_heap_until(_ExecutionPolicy&&, _RandomAccessIterator __first, _Rand return __internal::__brick_is_heap_until(__first, __last, __comp, __is_vector); } +template +_RandomAccessIterator +__pattern_is_heap_until(_Tag, _ExecutionPolicy&&, _RandomAccessIterator __first, _RandomAccessIterator __last, + _Compare __comp) noexcept +{ + static_assert(__is_backend_tag_v<_Tag>); + + return __internal::__brick_is_heap_until(__first, __last, __comp, typename _Tag::__is_vector{}); +} + template _RandomAccessIterator __is_heap_until_local(_RandomAccessIterator __first, _DifferenceType __begin, _DifferenceType __end, _Compare __comp, @@ -4566,6 +4576,22 @@ __pattern_is_heap_until(_ExecutionPolicy&& __exec, _RandomAccessIterator __first }); } +template +_RandomAccessIterator +__pattern_is_heap_until(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, + _RandomAccessIterator __last, _Compare __comp) +{ + return __internal::__except_handler([&]() { + return __parallel_find( + ::std::forward<_ExecutionPolicy>(__exec), __first, __last, + [__first, __comp](_RandomAccessIterator __i, _RandomAccessIterator __j) { + return __internal::__is_heap_until_local(__first, __i - __first, __j - __first, __comp, _IsVector{}); + }, + ::std::true_type{}); + }); +} + + //------------------------------------------------------------------------ // is_heap //------------------------------------------------------------------------ diff --git a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h index 23c88d4b769..115b645e628 100644 --- a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h +++ b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h @@ -1595,6 +1595,24 @@ __pattern_is_heap_until(_ExecutionPolicy&& __exec, _RandomAccessIterator __first ::std::true_type{}); } +template +_RandomAccessIterator +__pattern_is_heap_until(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, + _RandomAccessIterator __last, _Compare __comp) +{ + if (__last - __first < 2) + return __last; + + using _Predicate = + oneapi::dpl::unseq_backend::single_match_pred_by_idx<_ExecutionPolicy, __is_heap_check<_Compare>>; + + return __par_backend_hetero::__parallel_find( + ::std::forward<_ExecutionPolicy>(__exec), + __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__first), + __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__last), _Predicate{__comp}, + ::std::true_type{}); +} + template oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, bool> __pattern_is_heap(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last, From 0e0c0bfbffa2211772e9d858c71c11522975d901 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Tue, 6 Feb 2024 10:02:53 +0100 Subject: [PATCH 094/566] __pattern_is_heap_until + tag calls --- include/oneapi/dpl/pstl/glue_algorithm_impl.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/include/oneapi/dpl/pstl/glue_algorithm_impl.h b/include/oneapi/dpl/pstl/glue_algorithm_impl.h index f4a6cf48516..83dd2dfa00f 100644 --- a/include/oneapi/dpl/pstl/glue_algorithm_impl.h +++ b/include/oneapi/dpl/pstl/glue_algorithm_impl.h @@ -1079,10 +1079,11 @@ template oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _RandomAccessIterator> is_heap_until(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp) { - return oneapi::dpl::__internal::__pattern_is_heap_until( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __comp, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _RandomAccessIterator>(), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _RandomAccessIterator>()); + constexpr auto __dispatch_tag = + oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _RandomAccessIterator>(); + + return oneapi::dpl::__internal::__pattern_is_heap_until(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), + __first, __last, __comp); } template From f5c35e2920fedeebbbbdd5d27aacccb242a336c2 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Tue, 6 Feb 2024 10:12:42 +0100 Subject: [PATCH 095/566] __pattern_is_partitioned + tag impls --- include/oneapi/dpl/pstl/algorithm_fwd.h | 9 ++ include/oneapi/dpl/pstl/algorithm_impl.h | 106 ++++++++++++++++++ .../dpl/pstl/hetero/algorithm_impl_hetero.h | 32 ++++++ 3 files changed, 147 insertions(+) diff --git a/include/oneapi/dpl/pstl/algorithm_fwd.h b/include/oneapi/dpl/pstl/algorithm_fwd.h index a2d586f4397..dc540ab2769 100644 --- a/include/oneapi/dpl/pstl/algorithm_fwd.h +++ b/include/oneapi/dpl/pstl/algorithm_fwd.h @@ -908,11 +908,20 @@ oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, boo __pattern_is_partitioned(_ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _UnaryPredicate, _IsVector, /*is_parallel=*/::std::false_type) noexcept; +template +bool +__pattern_is_partitioned(_Tag, _ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _UnaryPredicate) noexcept; + template oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, bool> __pattern_is_partitioned(_ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _UnaryPredicate, _IsVector, /*is_parallel=*/::std::true_type); +template +bool +__pattern_is_partitioned(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, + _UnaryPredicate); + //------------------------------------------------------------------------ // partition //------------------------------------------------------------------------ diff --git a/include/oneapi/dpl/pstl/algorithm_impl.h b/include/oneapi/dpl/pstl/algorithm_impl.h index 742524bbc3f..ccc3cec86dc 100644 --- a/include/oneapi/dpl/pstl/algorithm_impl.h +++ b/include/oneapi/dpl/pstl/algorithm_impl.h @@ -2586,6 +2586,15 @@ __pattern_is_partitioned(_ExecutionPolicy&&, _ForwardIterator __first, _ForwardI return __internal::__brick_is_partitioned(__first, __last, __pred, __is_vector); } +template +bool +__pattern_is_partitioned(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, _UnaryPredicate __pred) noexcept +{ + static_assert(__is_backend_tag_v<_Tag>); + + return __internal::__brick_is_partitioned(__first, __last, __pred, typename _Tag::__is_vector{}); +} + template oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, bool> __pattern_is_partitioned(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last, @@ -2683,6 +2692,103 @@ __pattern_is_partitioned(_ExecutionPolicy&& __exec, _RandomAccessIterator __firs }); } +template +bool +__pattern_is_partitioned(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, + _RandomAccessIterator __last, _UnaryPredicate __pred) +{ + //trivial pre-checks + if (__first == __last) + return true; + + return __internal::__except_handler([&]() { + // State of current range: + // broken - current range is not partitioned by pred + // all_true - all elements in current range satisfy pred + // all_false - all elements in current range don't satisfy pred + // true_false - elements satisfy pred are placed before elements that don't satisfy pred + enum _ReduceRes + { + __not_init = -1, + __broken, + __all_true, + __all_false, + __true_false + }; + // Array with states that we'll have when state from the left branch is merged with state from the right branch. + // State is calculated by formula: new_state = table[left_state * 4 + right_state] + const _ReduceRes __table[] = {__broken, __broken, __broken, __broken, __broken, __all_true, + __true_false, __true_false, __broken, __broken, __all_false, __broken, + __broken, __broken, __true_false, __broken}; + struct _ReduceType + { + _ReduceRes __val; + _RandomAccessIterator __pos; + }; + //a commutative combiner + auto __combine = [&__table](_ReduceType __x, _ReduceType __y) { + return __x.__pos > __y.__pos ? _ReduceType{__table[__y.__val * 4 + __x.__val], __y.__pos} + : _ReduceType{__table[__x.__val * 4 + __y.__val], __x.__pos}; + }; + + const _ReduceType __identity{__not_init, __last}; + + _ReduceType __result = __par_backend::__parallel_reduce( + ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __identity, + [&__pred, __combine](_RandomAccessIterator __i, _RandomAccessIterator __j, + _ReduceType __value) -> _ReduceType { + if (__value.__val == __broken) + return _ReduceType{__broken, __i}; + + _ReduceType __res{__not_init, __i}; + // if first element satisfy pred + if (__pred(*__i)) + { + // find first element that don't satisfy pred + _RandomAccessIterator __x = + __internal::__brick_find_if(__i + 1, __j, __not_pred<_UnaryPredicate&>(__pred), _IsVector{}); + if (__x != __j) + { + // find first element after "x" that satisfy pred + _RandomAccessIterator __y = __internal::__brick_find_if(__x + 1, __j, __pred, _IsVector{}); + // if it was found then range isn't partitioned by pred + if (__y != __j) + return _ReduceType{__broken, __i}; + + __res = _ReduceType{__true_false, __i}; + } + else + __res = _ReduceType{__all_true, __i}; + } + else + { // if first element doesn't satisfy pred + // then we should find the first element that satisfy pred. + // If we found it then range isn't partitioned by pred + if (__internal::__brick_find_if(__i + 1, __j, __pred, _IsVector{}) != __j) + return _ReduceType{__broken, __i}; + + __res = _ReduceType{__all_false, __i}; + } + // if we have value from left range then we should calculate the result + return (__value.__val == __not_init) ? __res : __combine(__value, __res); + }, + + [__combine](_ReduceType __val1, _ReduceType __val2) -> _ReduceType { + if (__val1.__val == __not_init) + return __val2; + if (__val2.__val == __not_init) + return __val1; + assert(__val1.__val != __not_init && __val2.__val != __not_init); + + if (__val1.__val == __broken || __val2.__val == __broken) + return _ReduceType{__broken, __val1.__pos}; + // calculate the result for new big range + return __combine(__val1, __val2); + }); + return __result.__val != __broken; + }); +} + //------------------------------------------------------------------------ // partition //------------------------------------------------------------------------ diff --git a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h index 115b645e628..e5428eb37a0 100644 --- a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h +++ b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h @@ -1558,6 +1558,38 @@ __pattern_is_partitioned(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator return __broken != __reduce_fn(_ReduceValueType{__all_true}, __res); } +template +bool +__pattern_is_partitioned(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Iterator __first, + _Iterator __last, _Predicate __predicate) +{ + if (__last - __first < 2) + return true; + + using _ReduceValueType = _IsPartitionedReduceType; + auto __reduce_fn = [](_ReduceValueType __a, _ReduceValueType __b) { + _ReduceValueType __table[] = {__broken, __broken, __broken, __broken, __broken, __all_true, + __true_false, __true_false, __broken, __broken, __all_false, __broken, + __broken, __broken, __true_false, __broken}; + return __table[__a * 4 + __b]; + }; + auto __transform_fn = [__predicate](auto __gidx, auto __acc) { + return (__predicate(__acc[__gidx]) ? __all_true : __all_false); + }; + + auto __keep = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read, _Iterator>(); + auto __buf = __keep(__first, __last); + + auto __res = oneapi::dpl::__par_backend_hetero::__parallel_transform_reduce<_ReduceValueType, + ::std::false_type /*is_commutative*/>( + ::std::forward<_ExecutionPolicy>(__exec), __reduce_fn, __transform_fn, + unseq_backend::__no_init_value{}, // no initial value + __buf.all_view()) + .get(); + + return __broken != __reduce_fn(_ReduceValueType{__all_true}, __res); +} + //------------------------------------------------------------------------ // is_heap / is_heap_until //------------------------------------------------------------------------ From b099ad246ffc2a24fd5bc44452f9e6d2683096c5 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Tue, 6 Feb 2024 10:12:51 +0100 Subject: [PATCH 096/566] __pattern_is_partitioned + tag calls --- include/oneapi/dpl/pstl/glue_algorithm_impl.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/include/oneapi/dpl/pstl/glue_algorithm_impl.h b/include/oneapi/dpl/pstl/glue_algorithm_impl.h index 83dd2dfa00f..a909b7f9ca0 100644 --- a/include/oneapi/dpl/pstl/glue_algorithm_impl.h +++ b/include/oneapi/dpl/pstl/glue_algorithm_impl.h @@ -621,10 +621,10 @@ template oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, bool> is_partitioned(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _UnaryPredicate __pred) { - return oneapi::dpl::__internal::__pattern_is_partitioned( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __pred, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>()); + constexpr auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _ForwardIterator>(); + + return oneapi::dpl::__internal::__pattern_is_partitioned(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), + __first, __last, __pred); } template From 98ca54c960e1238df607ce58bd8f2cc19e047643 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Tue, 6 Feb 2024 10:18:02 +0100 Subject: [PATCH 097/566] __pattern_lexicographical_compare + tag impls --- include/oneapi/dpl/pstl/algorithm_fwd.h | 10 ++++ include/oneapi/dpl/pstl/algorithm_impl.h | 20 ++++++++ .../dpl/pstl/hetero/algorithm_impl_hetero.h | 48 +++++++++++++++++++ 3 files changed, 78 insertions(+) diff --git a/include/oneapi/dpl/pstl/algorithm_fwd.h b/include/oneapi/dpl/pstl/algorithm_fwd.h index dc540ab2769..fd3d03debfa 100644 --- a/include/oneapi/dpl/pstl/algorithm_fwd.h +++ b/include/oneapi/dpl/pstl/algorithm_fwd.h @@ -1609,6 +1609,11 @@ __pattern_lexicographical_compare(_ExecutionPolicy&&, _ForwardIterator1, _Forwar _ForwardIterator2, _Compare, _IsVector, /* is_parallel = */ ::std::false_type) noexcept; +template +bool +__pattern_lexicographical_compare(_Tag, _ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, + _ForwardIterator2, _Compare) noexcept; + template oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, bool> @@ -1616,6 +1621,11 @@ __pattern_lexicographical_compare(_ExecutionPolicy&&, _RandomAccessIterator1, _R _RandomAccessIterator2, _RandomAccessIterator2, _Compare, _IsVector, /* is_parallel = */ ::std::true_type); +template +bool +__pattern_lexicographical_compare(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, + _ForwardIterator2, _ForwardIterator2, _Compare) noexcept; + template oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _ForwardIterator2> diff --git a/include/oneapi/dpl/pstl/algorithm_impl.h b/include/oneapi/dpl/pstl/algorithm_impl.h index ccc3cec86dc..6e6a8525a69 100644 --- a/include/oneapi/dpl/pstl/algorithm_impl.h +++ b/include/oneapi/dpl/pstl/algorithm_impl.h @@ -5032,6 +5032,17 @@ __brick_lexicographical_compare(_RandomAccessIterator1 __first1, _RandomAccessIt } } +template +bool +__pattern_lexicographical_compare(_Tag, _ExecutionPolicy&&, _ForwardIterator1 __first1, _ForwardIterator1 __last1, + _ForwardIterator2 __first2, _ForwardIterator2 __last2, _Compare __comp) noexcept +{ + static_assert(__is_backend_tag_v<_Tag>); + + return __internal::__brick_lexicographical_compare(__first1, __last1, __first2, __last2, __comp, + typename _Tag::__is_vector{}); +} + template oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, bool> __pattern_lexicographical_compare(_ExecutionPolicy&&, _ForwardIterator1 __first1, _ForwardIterator1 __last1, @@ -5041,6 +5052,15 @@ __pattern_lexicographical_compare(_ExecutionPolicy&&, _ForwardIterator1 __first1 return __internal::__brick_lexicographical_compare(__first1, __last1, __first2, __last2, __comp, __is_vector); } +template +bool +__pattern_lexicographical_compare(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _ForwardIterator1 __first1, + _ForwardIterator1 __last1, _ForwardIterator2 __first2, _ForwardIterator2 __last2, + _Compare __comp) noexcept +{ + return __internal::__brick_lexicographical_compare(__first1, __last1, __first2, __last2, __comp, _IsVector{}); +} + template oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, bool> diff --git a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h index e5428eb37a0..ce70301da0d 100644 --- a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h +++ b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h @@ -1956,6 +1956,54 @@ __pattern_lexicographical_compare(_ExecutionPolicy&& __exec, _Iterator1 __first1 return __ret_idx ? __ret_idx == 1 : (__last1 - __first1) < (__last2 - __first2); } +template +bool +__pattern_lexicographical_compare(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Iterator1 __first1, + _Iterator1 __last1, _Iterator2 __first2, _Iterator2 __last2, _Compare __comp) +{ + //trivial pre-checks + if (__first2 == __last2) + return false; + if (__first1 == __last1) + return true; + + using _Iterator1DifferenceType = typename ::std::iterator_traits<_Iterator1>::difference_type; + using _ReduceValueType = int32_t; + + auto __reduce_fn = [](_ReduceValueType __a, _ReduceValueType __b) { + bool __is_mismatched = __a != 0; + return __a * __is_mismatched + __b * !__is_mismatched; + }; + auto __transform_fn = [__comp](auto __gidx, auto __acc1, auto __acc2) { + auto const& __s1_val = __acc1[__gidx]; + auto const& __s2_val = __acc2[__gidx]; + + ::std::int32_t __is_s1_val_less = __comp(__s1_val, __s2_val); + ::std::int32_t __is_s1_val_greater = __comp(__s2_val, __s1_val); + + // 1 if __s1_val < __s2_val, -1 if __s1_val < __s2_val, 0 if __s1_val == __s2_val + return _ReduceValueType{1 * __is_s1_val_less - 1 * __is_s1_val_greater}; + }; + + auto __shared_size = ::std::min(__last1 - __first1, (_Iterator1DifferenceType)(__last2 - __first2)); + + auto __keep1 = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read, _Iterator1>(); + auto __buf1 = __keep1(__first1, __first1 + __shared_size); + + auto __keep2 = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read, _Iterator2>(); + auto __buf2 = __keep2(__first2, __first2 + __shared_size); + + auto __ret_idx = + oneapi::dpl::__par_backend_hetero::__parallel_transform_reduce<_ReduceValueType, + ::std::false_type /*is_commutative*/>( + ::std::forward<_ExecutionPolicy>(__exec), __reduce_fn, __transform_fn, + unseq_backend::__no_init_value{}, // no initial value + __buf1.all_view(), __buf2.all_view()) + .get(); + + return __ret_idx ? __ret_idx == 1 : (__last1 - __first1) < (__last2 - __first2); +} + template oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, bool> __pattern_includes(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, From 0279692e72c5f1e5e9cf4166ff673b6814b0eeb2 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Tue, 6 Feb 2024 10:18:11 +0100 Subject: [PATCH 098/566] __pattern_lexicographical_compare + tag calls --- include/oneapi/dpl/pstl/glue_algorithm_impl.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/include/oneapi/dpl/pstl/glue_algorithm_impl.h b/include/oneapi/dpl/pstl/glue_algorithm_impl.h index a909b7f9ca0..8ca191f2c4c 100644 --- a/include/oneapi/dpl/pstl/glue_algorithm_impl.h +++ b/include/oneapi/dpl/pstl/glue_algorithm_impl.h @@ -1197,10 +1197,11 @@ oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, bool> lexicographical_compare(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, _ForwardIterator2 __last2, _Compare __comp) { + constexpr auto __dispatch_tag = + oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>(); + return oneapi::dpl::__internal::__pattern_lexicographical_compare( - ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2, __comp, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>(), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>()); + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2, __comp); } template From 047a9cb96029b83db7b85767016ebb5b8a63cddb Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Tue, 6 Feb 2024 10:35:44 +0100 Subject: [PATCH 099/566] __pattern_merge + tag impls --- include/oneapi/dpl/pstl/algorithm_fwd.h | 12 ++++++ include/oneapi/dpl/pstl/algorithm_impl.h | 27 ++++++++++++ .../dpl/pstl/hetero/algorithm_impl_hetero.h | 43 +++++++++++++++++++ 3 files changed, 82 insertions(+) diff --git a/include/oneapi/dpl/pstl/algorithm_fwd.h b/include/oneapi/dpl/pstl/algorithm_fwd.h index fd3d03debfa..1a9a270c288 100644 --- a/include/oneapi/dpl/pstl/algorithm_fwd.h +++ b/include/oneapi/dpl/pstl/algorithm_fwd.h @@ -1273,6 +1273,12 @@ oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _Ou __pattern_merge(_ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, _ForwardIterator2, _OutputIterator, _Compare, _IsVector, /* is_parallel = */ ::std::false_type) noexcept; +template +_OutputIterator +__pattern_merge(_Tag, _ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, _ForwardIterator2, + _OutputIterator, _Compare) noexcept; + template oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _OutputIterator> @@ -1280,6 +1286,12 @@ __pattern_merge(_ExecutionPolicy&&, _RandomAccessIterator1, _RandomAccessIterato _RandomAccessIterator2, _OutputIterator, _Compare, _IsVector, /* is_parallel = */ ::std::true_type); +template +_RandomAccessIterator3 +__pattern_merge(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator1, _RandomAccessIterator1, + _RandomAccessIterator2, _RandomAccessIterator2, _RandomAccessIterator3, _Compare); + //------------------------------------------------------------------------ // inplace_merge //------------------------------------------------------------------------ diff --git a/include/oneapi/dpl/pstl/algorithm_impl.h b/include/oneapi/dpl/pstl/algorithm_impl.h index 6e6a8525a69..04b238611ec 100644 --- a/include/oneapi/dpl/pstl/algorithm_impl.h +++ b/include/oneapi/dpl/pstl/algorithm_impl.h @@ -3793,6 +3793,18 @@ __pattern_merge(_ExecutionPolicy&&, _ForwardIterator1 __first1, _ForwardIterator return __internal::__brick_merge(__first1, __last1, __first2, __last2, __d_first, __comp, __is_vector); } +template +_OutputIterator +__pattern_merge(_Tag, _ExecutionPolicy&&, _ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, + _ForwardIterator2 __last2, _OutputIterator __d_first, _Compare __comp) noexcept +{ + static_assert(__is_backend_tag_v<_Tag>); + + return __internal::__brick_merge(__first1, __last1, __first2, __last2, __d_first, __comp, + typename _Tag::__is_vector{}); +} + template oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator3> @@ -3809,6 +3821,21 @@ __pattern_merge(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, _Ran return __d_first + (__last1 - __first1) + (__last2 - __first2); } +template +_RandomAccessIterator3 +__pattern_merge(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, + _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, _RandomAccessIterator2 __last2, + _RandomAccessIterator3 __d_first, _Compare __comp) +{ + __par_backend::__parallel_merge( + ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2, __d_first, __comp, + [](_RandomAccessIterator1 __f1, _RandomAccessIterator1 __l1, _RandomAccessIterator2 __f2, + _RandomAccessIterator2 __l2, _RandomAccessIterator3 __f3, + _Compare __comp) { return __internal::__brick_merge(__f1, __l1, __f2, __l2, __f3, __comp, _IsVector{}); }); + return __d_first + (__last1 - __first1) + (__last2 - __first2); +} + //------------------------------------------------------------------------ // inplace_merge //------------------------------------------------------------------------ diff --git a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h index ce70301da0d..3fbd8d8757f 100644 --- a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h +++ b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h @@ -1725,6 +1725,49 @@ __pattern_merge(_ExecutionPolicy&& __exec, _Iterator1 __first1, _Iterator1 __las } return __d_first + __n; } + +template +_Iterator3 +__pattern_merge(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Iterator1 __first1, _Iterator1 __last1, + _Iterator2 __first2, _Iterator2 __last2, _Iterator3 __d_first, _Compare __comp) +{ + auto __n1 = __last1 - __first1; + auto __n2 = __last2 - __first2; + auto __n = __n1 + __n2; + if (__n == 0) + return __d_first; + + //To consider the direct copying pattern call in case just one of sequences is empty. + if (__n1 == 0) + oneapi::dpl::__internal::__pattern_walk2_brick( + __tag, + oneapi::dpl::__par_backend_hetero::make_wrapped_policy( + ::std::forward<_ExecutionPolicy>(__exec)), + __first2, __last2, __d_first, oneapi::dpl::__internal::__brick_copy<_ExecutionPolicy>{}); + else if (__n2 == 0) + oneapi::dpl::__internal::__pattern_walk2_brick( + __tag, + oneapi::dpl::__par_backend_hetero::make_wrapped_policy( + ::std::forward<_ExecutionPolicy>(__exec)), + __first1, __last1, __d_first, oneapi::dpl::__internal::__brick_copy<_ExecutionPolicy>{}); + else + { + auto __keep1 = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read, _Iterator1>(); + auto __buf1 = __keep1(__first1, __last1); + auto __keep2 = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read, _Iterator2>(); + auto __buf2 = __keep2(__first2, __last2); + + auto __keep3 = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::write, _Iterator3>(); + auto __buf3 = __keep3(__d_first, __d_first + __n); + + __par_backend_hetero::__parallel_merge(::std::forward<_ExecutionPolicy>(__exec), __buf1.all_view(), + __buf2.all_view(), __buf3.all_view(), __comp) + .wait(); + } + return __d_first + __n; +} + //------------------------------------------------------------------------ // inplace_merge //------------------------------------------------------------------------ From 3f94595d567d9ad45a842cf12941377cf01b9c6b Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Tue, 6 Feb 2024 10:36:21 +0100 Subject: [PATCH 100/566] __pattern_merge + tag calls --- include/oneapi/dpl/pstl/glue_algorithm_impl.h | 9 +++++---- include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h | 10 +++++++--- 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/include/oneapi/dpl/pstl/glue_algorithm_impl.h b/include/oneapi/dpl/pstl/glue_algorithm_impl.h index 8ca191f2c4c..165bb4e9ab2 100644 --- a/include/oneapi/dpl/pstl/glue_algorithm_impl.h +++ b/include/oneapi/dpl/pstl/glue_algorithm_impl.h @@ -922,10 +922,11 @@ oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _Forward merge(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, _ForwardIterator2 __last2, _ForwardIterator __d_first, _Compare __comp) { - return oneapi::dpl::__internal::__pattern_merge( - ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2, __d_first, __comp, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2, _ForwardIterator>(), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2, _ForwardIterator>()); + constexpr auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _ForwardIterator1, + _ForwardIterator2, _ForwardIterator>(); + + return oneapi::dpl::__internal::__pattern_merge(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, + __last1, __first2, __last2, __d_first, __comp); } template diff --git a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h index 3fbd8d8757f..05a80007649 100644 --- a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h +++ b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h @@ -2526,11 +2526,13 @@ __pattern_set_union(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _Forw __buf,__comp, unseq_backend::_DifferenceTag() ) - __buf; //2. Merge {1} and the difference + const auto __dispatch_tag1 = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, decltype(__first1), + decltype(__buf), decltype(__result)>(); return oneapi::dpl::__internal::__pattern_merge( + __dispatch_tag1, oneapi::dpl::__par_backend_hetero::make_wrapped_policy<__set_union_copy_case_2>( ::std::forward<_ExecutionPolicy>(__exec)), - __first1, __last1, __buf, __buf + __n_diff, __result, __comp, - /*vector=*/::std::true_type(), /*parallel=*/::std::true_type()); + __first1, __last1, __buf, __buf + __n_diff, __result, __comp); } //Dummy names to avoid kernel problems @@ -2619,7 +2621,9 @@ __pattern_set_symmetric_difference(_ExecutionPolicy&& __exec, _ForwardIterator1 __buf_2; //3. Merge the differences - return oneapi::dpl::__internal::__pattern_merge(::std::forward<_ExecutionPolicy>(__exec), __buf_1, + constexpr auto __dispatch_tag1 = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, decltype(__buf_1), + decltype(__buf_2), decltype(__result)>(); + return oneapi::dpl::__internal::__pattern_merge(__dispatch_tag1, ::std::forward<_ExecutionPolicy>(__exec), __buf_1, __buf_1 + __n_diff_1, __buf_2, __buf_2 + __n_diff_2, __result, __comp, ::std::true_type(), ::std::true_type()); } From f0b93f2a4f037b4d7e73300d02d7ebde7cdc3933 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Tue, 6 Feb 2024 10:48:35 +0100 Subject: [PATCH 101/566] __pattern_min_element + tag impls --- include/oneapi/dpl/pstl/algorithm_fwd.h | 9 +++ include/oneapi/dpl/pstl/algorithm_impl.h | 40 +++++++++++++ .../dpl/pstl/hetero/algorithm_impl_hetero.h | 58 +++++++++++++++++++ 3 files changed, 107 insertions(+) diff --git a/include/oneapi/dpl/pstl/algorithm_fwd.h b/include/oneapi/dpl/pstl/algorithm_fwd.h index 1a9a270c288..49d4a9460e8 100644 --- a/include/oneapi/dpl/pstl/algorithm_fwd.h +++ b/include/oneapi/dpl/pstl/algorithm_fwd.h @@ -1542,11 +1542,20 @@ oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _Fo __pattern_min_element(_ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _Compare, _IsVector, /* is_parallel = */ ::std::false_type) noexcept; +template +_ForwardIterator +__pattern_min_element(_Tag, _ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _Compare) noexcept; + template oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator> __pattern_min_element(_ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _Compare, _IsVector, /* is_parallel = */ ::std::true_type); +template +_RandomAccessIterator +__pattern_min_element(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, + _Compare); + //------------------------------------------------------------------------ // minmax_element //------------------------------------------------------------------------ diff --git a/include/oneapi/dpl/pstl/algorithm_impl.h b/include/oneapi/dpl/pstl/algorithm_impl.h index 04b238611ec..ad1f1937b5f 100644 --- a/include/oneapi/dpl/pstl/algorithm_impl.h +++ b/include/oneapi/dpl/pstl/algorithm_impl.h @@ -4843,6 +4843,15 @@ __pattern_min_element(_ExecutionPolicy&&, _ForwardIterator __first, _ForwardIter return __internal::__brick_min_element(__first, __last, __comp, __is_vector); } +template +_ForwardIterator +__pattern_min_element(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, _Compare __comp) noexcept +{ + static_assert(__is_backend_tag_v<_Tag>); + + return __internal::__brick_min_element(__first, __last, __comp, typename _Tag::__is_vector{}); +} + template oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator> __pattern_min_element(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last, @@ -4874,6 +4883,37 @@ __pattern_min_element(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, }); } +template +_RandomAccessIterator +__pattern_min_element(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, + _RandomAccessIterator __last, _Compare __comp) +{ + // a trivial case pre-check + if (__last - __first < 2) + return __first; + + return __internal::__except_handler([&]() { + return __par_backend::__parallel_reduce( + ::std::forward<_ExecutionPolicy>(__exec), __first, __last, /*identity*/ __last, + [=](_RandomAccessIterator __begin, _RandomAccessIterator __end, + _RandomAccessIterator __init) -> _RandomAccessIterator { + const _RandomAccessIterator __subresult = + __internal::__brick_min_element(__begin, __end, __comp, _IsVector{}); + return __init == __last ? __subresult + : __internal::__cmp_iterators_by_values(__init, __subresult, __comp, + oneapi::dpl::__internal::__pstl_less()); + }, + [=](_RandomAccessIterator __it1, _RandomAccessIterator __it2) -> _RandomAccessIterator { + if (__it1 == __last) + return __it2; + if (__it2 == __last) + return __it1; + return __internal::__cmp_iterators_by_values(__it1, __it2, __comp, + oneapi::dpl::__internal::__pstl_less()); + }); + }); +} + //------------------------------------------------------------------------ // minmax_element //------------------------------------------------------------------------ diff --git a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h index 05a80007649..0472c13e8ed 100644 --- a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h +++ b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h @@ -706,6 +706,64 @@ __pattern_min_element(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __ return __first + ::std::get<0>(__ret_idx); } +template +_Iterator +__pattern_min_element(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, + _Compare __comp) +{ + if (__first == __last) + return __last; + + using _IteratorValueType = typename ::std::iterator_traits<_Iterator>::value_type; + using _IndexValueType = ::std::make_unsigned_t::difference_type>; + using _ReduceValueType = tuple<_IndexValueType, _IteratorValueType>; + // Commutativity of the reduction operator depends on the compilation target (see __reduce_fn below); + // __spirv_target_conditional postpones deciding on commutativity to the device code where the + // target can be correctly tested. + using _Commutative = oneapi::dpl::__internal::__spirv_target_conditional; + auto __reduce_fn = [__comp](_ReduceValueType __a, _ReduceValueType __b) { + using ::std::get; + // TODO: Consider removing the non-commutative operator for SPIR-V targets when we see improved performance with the + // non-sequential load path in transform_reduce. + if constexpr (oneapi::dpl::__internal::__is_spirv_target_v) + { + // This operator doesn't track the lowest found index in case of equal min. or max. values. Thus, this operator is + // not commutative. + if (__comp(get<1>(__b), get<1>(__a))) + { + return __b; + } + return __a; + } + else + { + // This operator keeps track of the lowest found index in case of equal min. or max. values. Thus, this operator is + // commutative. + bool _is_a_lt_b = __comp(get<1>(__a), get<1>(__b)); + bool _is_b_lt_a = __comp(get<1>(__b), get<1>(__a)); + + if (_is_b_lt_a || (!_is_a_lt_b && get<0>(__b) < get<0>(__a))) + { + return __b; + } + return __a; + } + }; + auto __transform_fn = [](auto __gidx, auto __acc) { return _ReduceValueType{__gidx, __acc[__gidx]}; }; + + auto __keep = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read, _Iterator>(); + auto __buf = __keep(__first, __last); + + auto __ret_idx = oneapi::dpl::__par_backend_hetero::__parallel_transform_reduce<_ReduceValueType, _Commutative>( + ::std::forward<_ExecutionPolicy>(__exec), __reduce_fn, __transform_fn, + unseq_backend::__no_init_value{}, // no initial value + __buf.all_view()) + .get(); + + return __first + ::std::get<0>(__ret_idx); +} + // TODO: // The following minmax_element implementation // has at worst 2N applications of the predicate From d7cc061ad64b43c61af8a61cdd2e31310370910d Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Tue, 6 Feb 2024 10:48:45 +0100 Subject: [PATCH 102/566] __pattern_min_element + tag calls --- include/oneapi/dpl/pstl/glue_algorithm_impl.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/include/oneapi/dpl/pstl/glue_algorithm_impl.h b/include/oneapi/dpl/pstl/glue_algorithm_impl.h index 165bb4e9ab2..0ccdda7065f 100644 --- a/include/oneapi/dpl/pstl/glue_algorithm_impl.h +++ b/include/oneapi/dpl/pstl/glue_algorithm_impl.h @@ -1120,10 +1120,10 @@ template oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator> min_element(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _Compare __comp) { - return oneapi::dpl::__internal::__pattern_min_element( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __comp, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>()); + constexpr auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _ForwardIterator>(); + + return oneapi::dpl::__internal::__pattern_min_element(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), + __first, __last, __comp); } template From 7ed0def8d66632be23ef80ecda98dc4a98d4fbe4 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Tue, 6 Feb 2024 11:01:23 +0100 Subject: [PATCH 103/566] __pattern_minmax_element + tag impls --- include/oneapi/dpl/pstl/algorithm_fwd.h | 9 ++++ include/oneapi/dpl/pstl/algorithm_impl.h | 51 +++++++++++++++++++ .../dpl/pstl/hetero/algorithm_impl_hetero.h | 50 ++++++++++++++++++ 3 files changed, 110 insertions(+) diff --git a/include/oneapi/dpl/pstl/algorithm_fwd.h b/include/oneapi/dpl/pstl/algorithm_fwd.h index 49d4a9460e8..9af3f6b2176 100644 --- a/include/oneapi/dpl/pstl/algorithm_fwd.h +++ b/include/oneapi/dpl/pstl/algorithm_fwd.h @@ -1575,12 +1575,21 @@ oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, __pattern_minmax_element(_ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _Compare, _IsVector, /* is_parallel = */ ::std::false_type) noexcept; +template +::std::pair<_ForwardIterator, _ForwardIterator> +__pattern_minmax_element(_Tag, _ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _Compare) noexcept; + template oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, ::std::pair<_RandomAccessIterator, _RandomAccessIterator>> __pattern_minmax_element(_ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _Compare, _IsVector, /* is_parallel = */ ::std::true_type); +template +::std::pair<_RandomAccessIterator, _RandomAccessIterator> +__pattern_minmax_element(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, + _Compare); + //------------------------------------------------------------------------ // mismatch //------------------------------------------------------------------------ diff --git a/include/oneapi/dpl/pstl/algorithm_impl.h b/include/oneapi/dpl/pstl/algorithm_impl.h index ad1f1937b5f..83efd6ffdc8 100644 --- a/include/oneapi/dpl/pstl/algorithm_impl.h +++ b/include/oneapi/dpl/pstl/algorithm_impl.h @@ -4947,6 +4947,16 @@ __pattern_minmax_element(_ExecutionPolicy&&, _ForwardIterator __first, _ForwardI return __internal::__brick_minmax_element(__first, __last, __comp, __is_vector); } +template +::std::pair<_ForwardIterator, _ForwardIterator> +__pattern_minmax_element(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, + _Compare __comp) noexcept +{ + static_assert(__is_backend_tag_v<_Tag>); + + return __internal::__brick_minmax_element(__first, __last, __comp, typename _Tag::__is_vector{}); +} + template oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, ::std::pair<_RandomAccessIterator, _RandomAccessIterator>> @@ -4989,6 +4999,47 @@ __pattern_minmax_element(_ExecutionPolicy&& __exec, _RandomAccessIterator __firs }); } +template +::std::pair<_RandomAccessIterator, _RandomAccessIterator> +__pattern_minmax_element(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, + _RandomAccessIterator __last, _Compare __comp) +{ + // a trivial case pre-check + if (__last - __first < 2) + return ::std::make_pair(__first, __first); + + return __internal::__except_handler([&]() { + typedef ::std::pair<_RandomAccessIterator, _RandomAccessIterator> _Result; + + return __par_backend::__parallel_reduce( + ::std::forward<_ExecutionPolicy>(__exec), __first, __last, + /*identity*/ ::std::make_pair(__last, __last), + [=, &__comp](_RandomAccessIterator __begin, _RandomAccessIterator __end, _Result __init) -> _Result { + const _Result __subresult = __internal::__brick_minmax_element(__begin, __end, __comp, _IsVector{}); + if (__init.first == __last) // = identity + return __subresult; + return ::std::make_pair( + __internal::__cmp_iterators_by_values(__init.first, __subresult.first, __comp, + oneapi::dpl::__internal::__pstl_less()), + __internal::__cmp_iterators_by_values(__init.second, __subresult.second, + oneapi::dpl::__internal::__reorder_pred<_Compare>(__comp), + oneapi::dpl::__internal::__pstl_greater())); + }, + [=, &__comp](_Result __p1, _Result __p2) -> _Result { + if (__p1.first == __last) + return __p2; + if (__p2.first == __last) + return __p1; + return ::std::make_pair( + __internal::__cmp_iterators_by_values(__p1.first, __p2.first, __comp, + oneapi::dpl::__internal::__pstl_less()), + __internal::__cmp_iterators_by_values(__p1.second, __p2.second, + oneapi::dpl::__internal::__reorder_pred<_Compare>(__comp), + oneapi::dpl::__internal::__pstl_greater())); + }); + }); +} + //------------------------------------------------------------------------ // mismatch //------------------------------------------------------------------------ diff --git a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h index 0472c13e8ed..23c9ef38e93 100644 --- a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h +++ b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h @@ -832,6 +832,56 @@ __pattern_minmax_element(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator return ::std::make_pair<_Iterator, _Iterator>(__first + ::std::get<0>(__ret), __first + ::std::get<1>(__ret)); } +template +::std::pair<_Iterator, _Iterator> +__pattern_minmax_element(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Iterator __first, + _Iterator __last, _Compare __comp) +{ + if (__first == __last) + return ::std::make_pair(__first, __first); + + using _IteratorValueType = typename ::std::iterator_traits<_Iterator>::value_type; + using _IndexValueType = ::std::make_unsigned_t::difference_type>; + using _ReduceValueType = ::std::tuple<_IndexValueType, _IndexValueType, _IteratorValueType, _IteratorValueType>; + + // This operator doesn't track the lowest found index in case of equal min. values and the highest found index in + // case of equal max. values. Thus, this operator is not commutative. + auto __reduce_fn = [__comp](_ReduceValueType __a, _ReduceValueType __b) { + using ::std::get; + auto __chosen_for_min = __a; + auto __chosen_for_max = __b; + + assert(get<0>(__a) < get<0>(__b)); + assert(get<1>(__a) < get<1>(__b)); + + if (__comp(get<2>(__b), get<2>(__a))) + __chosen_for_min = ::std::move(__b); + if (__comp(get<3>(__b), get<3>(__a))) + __chosen_for_max = ::std::move(__a); + return _ReduceValueType{get<0>(__chosen_for_min), get<1>(__chosen_for_max), get<2>(__chosen_for_min), + get<3>(__chosen_for_max)}; + }; + + // TODO: Doesn't work with `zip_iterator`. + // In that case the first and the second arguments of `_ReduceValueType` will be + // a `tuple` of `difference_type`, not the `difference_type` itself. + auto __transform_fn = [](auto __gidx, auto __acc) { + return _ReduceValueType{__gidx, __gidx, __acc[__gidx], __acc[__gidx]}; + }; + + auto __keep = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read, _Iterator>(); + auto __buf = __keep(__first, __last); + + auto __ret = oneapi::dpl::__par_backend_hetero::__parallel_transform_reduce<_ReduceValueType, + ::std::false_type /*is_commutative*/>( + ::std::forward<_ExecutionPolicy>(__exec), __reduce_fn, __transform_fn, + unseq_backend::__no_init_value{}, // no initial value + __buf.all_view()) + .get(); + + return ::std::make_pair<_Iterator, _Iterator>(__first + ::std::get<0>(__ret), __first + ::std::get<1>(__ret)); +} + //------------------------------------------------------------------------ // adjacent_find //------------------------------------------------------------------------ From 854b42cdb0239aa42efea7749d6791103981a3ad Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Tue, 6 Feb 2024 11:01:34 +0100 Subject: [PATCH 104/566] __pattern_minmax_element + tag calls --- include/oneapi/dpl/pstl/glue_algorithm_impl.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/include/oneapi/dpl/pstl/glue_algorithm_impl.h b/include/oneapi/dpl/pstl/glue_algorithm_impl.h index 0ccdda7065f..1ed979c2e74 100644 --- a/include/oneapi/dpl/pstl/glue_algorithm_impl.h +++ b/include/oneapi/dpl/pstl/glue_algorithm_impl.h @@ -1155,10 +1155,10 @@ template oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, ::std::pair<_ForwardIterator, _ForwardIterator>> minmax_element(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _Compare __comp) { - return oneapi::dpl::__internal::__pattern_minmax_element( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __comp, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>()); + constexpr auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _ForwardIterator>(); + + return oneapi::dpl::__internal::__pattern_minmax_element(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), + __first, __last, __comp); } template From 3543d130975fc5d00285549f3e4d6f57c3a9bac9 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Tue, 6 Feb 2024 11:15:16 +0100 Subject: [PATCH 105/566] __pattern_mismatch + tag impls --- include/oneapi/dpl/pstl/algorithm_fwd.h | 11 +++++++ include/oneapi/dpl/pstl/algorithm_impl.h | 33 +++++++++++++++++++ .../dpl/pstl/hetero/algorithm_impl_hetero.h | 21 ++++++++++++ 3 files changed, 65 insertions(+) diff --git a/include/oneapi/dpl/pstl/algorithm_fwd.h b/include/oneapi/dpl/pstl/algorithm_fwd.h index 9af3f6b2176..54f4d61341e 100644 --- a/include/oneapi/dpl/pstl/algorithm_fwd.h +++ b/include/oneapi/dpl/pstl/algorithm_fwd.h @@ -1612,6 +1612,11 @@ __pattern_mismatch(_ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _Fo _Predicate, _IsVector, /* is_parallel = */ ::std::false_type) noexcept; +template +::std::pair<_ForwardIterator1, _ForwardIterator2> +__pattern_mismatch(_Tag, _ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, _ForwardIterator2, + _Predicate) noexcept; + template oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, @@ -1619,6 +1624,12 @@ oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, __pattern_mismatch(_ExecutionPolicy&&, _RandomAccessIterator1, _RandomAccessIterator1, _RandomAccessIterator2, _RandomAccessIterator2, _Predicate, _IsVector, /* is_parallel = */ ::std::true_type); +template +::std::pair<_RandomAccessIterator1, _RandomAccessIterator2> +__pattern_mismatch(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator1, _RandomAccessIterator1, + _RandomAccessIterator2, _RandomAccessIterator2, _Predicate); + //------------------------------------------------------------------------ // lexicographical_compare //------------------------------------------------------------------------ diff --git a/include/oneapi/dpl/pstl/algorithm_impl.h b/include/oneapi/dpl/pstl/algorithm_impl.h index 83efd6ffdc8..bfe8f6ca0af 100644 --- a/include/oneapi/dpl/pstl/algorithm_impl.h +++ b/include/oneapi/dpl/pstl/algorithm_impl.h @@ -5078,6 +5078,16 @@ __pattern_mismatch(_ExecutionPolicy&&, _ForwardIterator1 __first1, _ForwardItera return __internal::__brick_mismatch(__first1, __last1, __first2, __last2, __pred, __is_vector); } +template +::std::pair<_ForwardIterator1, _ForwardIterator2> +__pattern_mismatch(_Tag, _ExecutionPolicy&&, _ForwardIterator1 __first1, _ForwardIterator1 __last1, + _ForwardIterator2 __first2, _ForwardIterator2 __last2, _Predicate __pred) noexcept +{ + static_assert(__is_backend_tag_v<_Tag>); + + return __internal::__brick_mismatch(__first1, __last1, __first2, __last2, __pred, typename _Tag::__is_vector{}); +} + template oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, @@ -5100,6 +5110,29 @@ __pattern_mismatch(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, _ }); } +template +::std::pair<_RandomAccessIterator1, _RandomAccessIterator2> +__pattern_mismatch(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, + _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, _RandomAccessIterator2 __last2, + _Predicate __pred) +{ + using __backend_tag = typename __parallel_tag<_IsVector>::__backend_tag; + + return __internal::__except_handler([&]() { + auto __n = ::std::min(__last1 - __first1, __last2 - __first2); + auto __result = __internal::__parallel_find( + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first1, __first1 + __n, + [__first1, __first2, __pred](_RandomAccessIterator1 __i, _RandomAccessIterator1 __j) { + return __internal::__brick_mismatch(__i, __j, __first2 + (__i - __first1), __first2 + (__j - __first1), + __pred, _IsVector{}) + .first; + }, + ::std::true_type{}); + return ::std::make_pair(__result, __first2 + (__result - __first1)); + }); +} + //------------------------------------------------------------------------ // lexicographical_compare //------------------------------------------------------------------------ diff --git a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h index 23c9ef38e93..4cd97a19d20 100644 --- a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h +++ b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h @@ -1429,6 +1429,27 @@ __pattern_mismatch(_ExecutionPolicy&& __exec, _Iterator1 __first1, _Iterator1 __ return ::std::make_pair(__first1 + __n, __first2 + __n); } +template +::std::pair<_Iterator1, _Iterator2> +__pattern_mismatch(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Iterator1 __first1, _Iterator1 __last1, + _Iterator2 __first2, _Iterator2 __last2, _Pred __pred) +{ + auto __n = ::std::min(__last1 - __first1, __last2 - __first2); + if (__n <= 0) + return ::std::make_pair(__first1, __first2); + + using _Predicate = oneapi::dpl::unseq_backend::single_match_pred<_ExecutionPolicy, equal_predicate<_Pred>>; + + auto __first_zip = __par_backend_hetero::zip( + __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__first1), + __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__first2)); + auto __result = + __par_backend_hetero::__parallel_find(::std::forward<_ExecutionPolicy>(__exec), __first_zip, __first_zip + __n, + _Predicate{equal_predicate<_Pred>{__pred}}, ::std::true_type{}); + __n = __result - __first_zip; + return ::std::make_pair(__first1 + __n, __first2 + __n); +} + //------------------------------------------------------------------------ // copy_if //------------------------------------------------------------------------ From 4bf5073ade730a3e90a59a3a04982cd411d01e34 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Tue, 6 Feb 2024 11:15:28 +0100 Subject: [PATCH 106/566] __pattern_mismatch + tag calls --- include/oneapi/dpl/pstl/glue_algorithm_impl.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/include/oneapi/dpl/pstl/glue_algorithm_impl.h b/include/oneapi/dpl/pstl/glue_algorithm_impl.h index 1ed979c2e74..2173b90d8c6 100644 --- a/include/oneapi/dpl/pstl/glue_algorithm_impl.h +++ b/include/oneapi/dpl/pstl/glue_algorithm_impl.h @@ -734,10 +734,11 @@ oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, mismatch(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, _ForwardIterator2 __last2, _BinaryPredicate __pred) { - return oneapi::dpl::__internal::__pattern_mismatch( - ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2, __pred, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>(), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>()); + constexpr auto __dispatch_tag = + oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>(); + + return oneapi::dpl::__internal::__pattern_mismatch(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), + __first1, __last1, __first2, __last2, __pred); } template From 70a4146d59ae3c13206da3a66a1a1e9a5ecd9f52 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Tue, 6 Feb 2024 12:20:38 +0100 Subject: [PATCH 107/566] __pattern_nth_element + tag impls --- include/oneapi/dpl/pstl/algorithm_fwd.h | 10 ++++ include/oneapi/dpl/pstl/algorithm_impl.h | 53 +++++++++++++++++++ .../dpl/pstl/hetero/algorithm_impl_hetero.h | 15 ++++++ 3 files changed, 78 insertions(+) diff --git a/include/oneapi/dpl/pstl/algorithm_fwd.h b/include/oneapi/dpl/pstl/algorithm_fwd.h index 54f4d61341e..f45f54585d4 100644 --- a/include/oneapi/dpl/pstl/algorithm_fwd.h +++ b/include/oneapi/dpl/pstl/algorithm_fwd.h @@ -1124,12 +1124,22 @@ __pattern_nth_element(_ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIt _IsVector, /*is_parallel=*/::std::false_type) noexcept; +template +void +__pattern_nth_element(_Tag, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _RandomAccessIterator, + _Compare) noexcept; + template oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy> __pattern_nth_element(_ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _RandomAccessIterator, _Compare, _IsVector, /*is_parallel=*/::std::true_type); +template +void +__pattern_nth_element(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, + _RandomAccessIterator, _Compare); + //------------------------------------------------------------------------ // fill, fill_n //------------------------------------------------------------------------ diff --git a/include/oneapi/dpl/pstl/algorithm_impl.h b/include/oneapi/dpl/pstl/algorithm_impl.h index bfe8f6ca0af..b96eb4da63a 100644 --- a/include/oneapi/dpl/pstl/algorithm_impl.h +++ b/include/oneapi/dpl/pstl/algorithm_impl.h @@ -3406,6 +3406,16 @@ __pattern_nth_element(_ExecutionPolicy&&, _RandomAccessIterator __first, _Random ::std::nth_element(__first, __nth, __last, __comp); } +template +void +__pattern_nth_element(_Tag, _ExecutionPolicy&&, _RandomAccessIterator __first, _RandomAccessIterator __nth, + _RandomAccessIterator __last, _Compare __comp) noexcept +{ + static_assert(__is_backend_tag_v<_Tag>); + + ::std::nth_element(__first, __nth, __last, __comp); +} + template oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy> __pattern_nth_element(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __nth, @@ -3450,6 +3460,49 @@ __pattern_nth_element(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, } while (__x != __nth); } +template +void +__pattern_nth_element(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, + _RandomAccessIterator __nth, _RandomAccessIterator __last, _Compare __comp) +{ + if (__first == __last || __nth == __last) + { + return; + } + + using ::std::iter_swap; + typedef typename ::std::iterator_traits<_RandomAccessIterator>::value_type _Tp; + _RandomAccessIterator __x; + do + { + __x = __internal::__pattern_partition( + ::std::forward<_ExecutionPolicy>(__exec), __first + 1, __last, + [&__comp, __first](const _Tp& __x) { return __comp(__x, *__first); }, _IsVector{}, + /*is_parallel=*/::std::true_type()); + --__x; + if (__x != __first) + { + iter_swap(__first, __x); + } + // if x > nth then our new range for partition is [first, x) + if (__x - __nth > 0) + { + __last = __x; + } + // if x < nth then our new range for partition is [x, last) + else if (__x - __nth < 0) + { + // if *x == *nth then we start the new partition at the next index where *x != *nth + while (!__comp(*__nth, *__x) && !__comp(*__x, *__nth) && __x - __nth < 0) + { + ++__x; + } + iter_swap(__nth, __x); + __first = __x; + } + } while (__x != __nth); +} + //------------------------------------------------------------------------ // fill, fill_n //------------------------------------------------------------------------ diff --git a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h index 4cd97a19d20..10908ebd883 100644 --- a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h +++ b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h @@ -2359,6 +2359,21 @@ __pattern_nth_element(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __ /*vector*/ ::std::true_type{}, /*parallel*/ ::std::true_type{}); } +template +void +__pattern_nth_element(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Iterator __first, _Iterator __nth, + _Iterator __last, _Compare __comp) +{ + if (__first == __last || __nth == __last) + return; + + // TODO: check partition-based implementation + // - try to avoid host dereference issue + // - measure performance of the issue-free implementation + __pattern_partial_sort(::std::forward<_ExecutionPolicy>(__exec), __first, __nth + 1, __last, __comp, + /*vector*/ ::std::true_type{}, /*parallel*/ ::std::true_type{}); +} + //------------------------------------------------------------------------ // reverse //------------------------------------------------------------------------ From 442a9b91479cf2441c06a77c746dc47d86f89c3e Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Tue, 6 Feb 2024 12:20:52 +0100 Subject: [PATCH 108/566] __pattern_nth_element + tag calls --- include/oneapi/dpl/pstl/glue_algorithm_impl.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/include/oneapi/dpl/pstl/glue_algorithm_impl.h b/include/oneapi/dpl/pstl/glue_algorithm_impl.h index 2173b90d8c6..3051269d7f0 100644 --- a/include/oneapi/dpl/pstl/glue_algorithm_impl.h +++ b/include/oneapi/dpl/pstl/glue_algorithm_impl.h @@ -1177,10 +1177,11 @@ oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy> nth_element(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __nth, _RandomAccessIterator __last, _Compare __comp) { - oneapi::dpl::__internal::__pattern_nth_element( - ::std::forward<_ExecutionPolicy>(__exec), __first, __nth, __last, __comp, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _RandomAccessIterator>(), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _RandomAccessIterator>()); + constexpr auto __dispatch_tag = + oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _RandomAccessIterator>(); + + oneapi::dpl::__internal::__pattern_nth_element(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, + __nth, __last, __comp); } template From 370bf3ee7d22e6deb97e8c85462485bbb27c21f3 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Tue, 6 Feb 2024 12:35:03 +0100 Subject: [PATCH 109/566] __pattern_partial_sort + tag impls --- include/oneapi/dpl/pstl/algorithm_fwd.h | 10 ++++++ include/oneapi/dpl/pstl/algorithm_impl.h | 32 +++++++++++++++++++ .../dpl/pstl/hetero/algorithm_impl_hetero.h | 16 ++++++++++ 3 files changed, 58 insertions(+) diff --git a/include/oneapi/dpl/pstl/algorithm_fwd.h b/include/oneapi/dpl/pstl/algorithm_fwd.h index f45f54585d4..762fa0b813d 100644 --- a/include/oneapi/dpl/pstl/algorithm_fwd.h +++ b/include/oneapi/dpl/pstl/algorithm_fwd.h @@ -1057,12 +1057,22 @@ __pattern_partial_sort(_ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessI _Compare, _IsVector, /*is_parallel=*/::std::false_type) noexcept; +template +void +__pattern_partial_sort(_Tag, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _RandomAccessIterator, + _Compare) noexcept; + template oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy> __pattern_partial_sort(_ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _RandomAccessIterator, _Compare, _IsVector, /*is_parallel=*/::std::true_type); +template +void +__pattern_partial_sort(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, + _RandomAccessIterator, _Compare); + //------------------------------------------------------------------------ // partial_sort_copy //------------------------------------------------------------------------ diff --git a/include/oneapi/dpl/pstl/algorithm_impl.h b/include/oneapi/dpl/pstl/algorithm_impl.h index b96eb4da63a..9a274d7f3f3 100644 --- a/include/oneapi/dpl/pstl/algorithm_impl.h +++ b/include/oneapi/dpl/pstl/algorithm_impl.h @@ -3158,6 +3158,16 @@ __pattern_partial_sort(_ExecutionPolicy&&, _RandomAccessIterator __first, _Rando ::std::partial_sort(__first, __middle, __last, __comp); } +template +void +__pattern_partial_sort(_Tag, _ExecutionPolicy&&, _RandomAccessIterator __first, _RandomAccessIterator __middle, + _RandomAccessIterator __last, _Compare __comp) noexcept +{ + static_assert(__is_backend_tag_v<_Tag>); + + ::std::partial_sort(__first, __middle, __last, __comp); +} + template oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy> __pattern_partial_sort(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __middle, @@ -3180,6 +3190,28 @@ __pattern_partial_sort(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, }); } +template +void +__pattern_partial_sort(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, + _RandomAccessIterator __middle, _RandomAccessIterator __last, _Compare __comp) +{ + const auto __n = __middle - __first; + if (__n == 0) + return; + + __except_handler([&]() { + __par_backend::__parallel_stable_sort( + ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __comp, + [__n](_RandomAccessIterator __begin, _RandomAccessIterator __end, _Compare __comp) { + if (__n < __end - __begin) + ::std::partial_sort(__begin, __begin + __n, __end, __comp); + else + ::std::sort(__begin, __end, __comp); + }, + __n); + }); +} + //------------------------------------------------------------------------ // partial_sort_copy //------------------------------------------------------------------------ diff --git a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h index 10908ebd883..5ad1eda0ece 100644 --- a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h +++ b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h @@ -2251,6 +2251,22 @@ __pattern_partial_sort(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator _ .wait(); } +template +oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy> +__pattern_partial_sort(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Iterator __first, _Iterator __mid, + _Iterator __last, _Compare __comp) +{ + if (__last - __first < 2) + return; + + __par_backend_hetero::__parallel_partial_sort( + ::std::forward<_ExecutionPolicy>(__exec), + __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read_write>(__first), + __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read_write>(__mid), + __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read_write>(__last), __comp) + .wait(); +} + //------------------------------------------------------------------------ // partial_sort_copy //------------------------------------------------------------------------ From 2500d1e690eccca36b8d39e1dde7e28c390c29de Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Tue, 6 Feb 2024 12:35:16 +0100 Subject: [PATCH 110/566] __pattern_partial_sort + tag calls --- include/oneapi/dpl/pstl/glue_algorithm_impl.h | 9 +++++---- include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h | 8 ++++---- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/include/oneapi/dpl/pstl/glue_algorithm_impl.h b/include/oneapi/dpl/pstl/glue_algorithm_impl.h index 3051269d7f0..20e38400587 100644 --- a/include/oneapi/dpl/pstl/glue_algorithm_impl.h +++ b/include/oneapi/dpl/pstl/glue_algorithm_impl.h @@ -838,10 +838,11 @@ oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy> partial_sort(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __middle, _RandomAccessIterator __last, _Compare __comp) { - oneapi::dpl::__internal::__pattern_partial_sort( - ::std::forward<_ExecutionPolicy>(__exec), __first, __middle, __last, __comp, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _RandomAccessIterator>(), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _RandomAccessIterator>()); + constexpr auto __dispatch_tag = + oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _RandomAccessIterator>(); + + oneapi::dpl::__internal::__pattern_partial_sort(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, + __middle, __last, __comp); } template diff --git a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h index 5ad1eda0ece..56df7e4a06c 100644 --- a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h +++ b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h @@ -2368,11 +2368,12 @@ __pattern_nth_element(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __ if (__first == __last || __nth == __last) return; + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _Iterator>(); + // TODO: check partition-based implementation // - try to avoid host dereference issue // - measure performance of the issue-free implementation - __pattern_partial_sort(::std::forward<_ExecutionPolicy>(__exec), __first, __nth + 1, __last, __comp, - /*vector*/ ::std::true_type{}, /*parallel*/ ::std::true_type{}); + __pattern_partial_sort(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __nth + 1, __last, __comp); } template @@ -2386,8 +2387,7 @@ __pattern_nth_element(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec // TODO: check partition-based implementation // - try to avoid host dereference issue // - measure performance of the issue-free implementation - __pattern_partial_sort(::std::forward<_ExecutionPolicy>(__exec), __first, __nth + 1, __last, __comp, - /*vector*/ ::std::true_type{}, /*parallel*/ ::std::true_type{}); + __pattern_partial_sort(__tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __nth + 1, __last, __comp); } //------------------------------------------------------------------------ From b415521fe0c6c806a5e8535e65e04f27708ae8f8 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Tue, 6 Feb 2024 13:26:18 +0100 Subject: [PATCH 111/566] __pattern_partial_sort_copy + tag impls --- include/oneapi/dpl/pstl/algorithm_fwd.h | 10 ++ include/oneapi/dpl/pstl/algorithm_impl.h | 85 ++++++++++++++ .../dpl/pstl/hetero/algorithm_impl_hetero.h | 106 ++++++++++++++++-- 3 files changed, 193 insertions(+), 8 deletions(-) diff --git a/include/oneapi/dpl/pstl/algorithm_fwd.h b/include/oneapi/dpl/pstl/algorithm_fwd.h index 762fa0b813d..97e8a1b3de3 100644 --- a/include/oneapi/dpl/pstl/algorithm_fwd.h +++ b/include/oneapi/dpl/pstl/algorithm_fwd.h @@ -1084,6 +1084,11 @@ __pattern_partial_sort_copy(_ExecutionPolicy&&, _RandomAccessIterator1, _RandomA _RandomAccessIterator2, _Compare, _IsVector, /*is_parallel=*/::std::false_type) noexcept; +template +_RandomAccessIterator +__pattern_partial_sort_copy(_Tag, _ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _RandomAccessIterator, + _RandomAccessIterator, _Compare) noexcept; + template oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator2> @@ -1091,6 +1096,11 @@ __pattern_partial_sort_copy(_ExecutionPolicy&&, _RandomAccessIterator1, _RandomA _RandomAccessIterator2, _Compare, _IsVector, /*is_parallel=*/::std::true_type); +template +_RandomAccessIterator +__pattern_partial_sort_copy(_Tag, _ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _RandomAccessIterator, + _RandomAccessIterator, _Compare) noexcept; + //------------------------------------------------------------------------ // adjacent_find //------------------------------------------------------------------------ diff --git a/include/oneapi/dpl/pstl/algorithm_impl.h b/include/oneapi/dpl/pstl/algorithm_impl.h index 9a274d7f3f3..20793e4f2c4 100644 --- a/include/oneapi/dpl/pstl/algorithm_impl.h +++ b/include/oneapi/dpl/pstl/algorithm_impl.h @@ -3225,6 +3225,16 @@ __pattern_partial_sort_copy(_ExecutionPolicy&&, _ForwardIterator __first, _Forwa return ::std::partial_sort_copy(__first, __last, __d_first, __d_last, __comp); } +template +_RandomAccessIterator +__pattern_partial_sort_copy(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, + _RandomAccessIterator __d_first, _RandomAccessIterator __d_last, _Compare __comp) noexcept +{ + static_assert(__is_backend_tag_v<_Tag>); + + return ::std::partial_sort_copy(__first, __last, __d_first, __d_last, __comp); +} + template oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator2> @@ -3299,6 +3309,81 @@ __pattern_partial_sort_copy(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __ }); } +template +_RandomAccessIterator2 +__pattern_partial_sort_copy(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first, + _RandomAccessIterator1 __last, _RandomAccessIterator2 __d_first, + _RandomAccessIterator2 __d_last, _Compare __comp) +{ + using __backend_tag = typename __parallel_tag<_IsVector>::__backend_tag; + + if (__last == __first || __d_last == __d_first) + { + return __d_first; + } + auto __n1 = __last - __first; + auto __n2 = __d_last - __d_first; + return __internal::__except_handler([&]() { + if (__n2 >= __n1) + { + __par_backend::__parallel_stable_sort( + ::std::forward<_ExecutionPolicy>(__exec), __d_first, __d_first + __n1, __comp, + [__first, __d_first](_RandomAccessIterator2 __i, _RandomAccessIterator2 __j, _Compare __comp) { + _RandomAccessIterator1 __i1 = __first + (__i - __d_first); + _RandomAccessIterator1 __j1 = __first + (__j - __d_first); + + // 1. Copy elements from input to output + __brick_copy<_ExecutionPolicy>{}(__i1, __j1, __i, _IsVector{}); + // 2. Sort elements in output sequence + ::std::sort(__i, __j, __comp); + }, + __n1); + return __d_first + __n1; + } + else + { + typedef typename ::std::iterator_traits<_RandomAccessIterator1>::value_type _T1; + typedef typename ::std::iterator_traits<_RandomAccessIterator2>::value_type _T2; + __par_backend::__buffer<_ExecutionPolicy, _T1> __buf(__n1); + _T1* __r = __buf.get(); + + __par_backend::__parallel_stable_sort( + ::std::forward<_ExecutionPolicy>(__exec), __r, __r + __n1, __comp, + [__n2, __first, __r](_T1* __i, _T1* __j, _Compare __comp) { + _RandomAccessIterator1 __it = __first + (__i - __r); + + // 1. Copy elements from input to raw memory + for (_T1* __k = __i; __k != __j; ++__k, ++__it) + { + ::new (__k) _T2(*__it); + } + + // 2. Sort elements in temporary buffer + if (__n2 < __j - __i) + ::std::partial_sort(__i, __i + __n2, __j, __comp); + else + ::std::sort(__i, __j, __comp); + }, + __n2); + + // 3. Move elements from temporary buffer to output + __par_backend::__parallel_for(__backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __r, __r + __n2, + [__r, __d_first](_T1* __i, _T1* __j) { + __brick_move_destroy<_ExecutionPolicy>{}( + __i, __j, __d_first + (__i - __r), _IsVector{}); + }); + + if constexpr (!::std::is_trivially_destructible_v<_T1>) + __par_backend::__parallel_for(__backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __r + __n2, + __r + __n1, + [](_T1* __i, _T1* __j) { __brick_destroy(__i, __j, _IsVector{}); }); + + return __d_first + __n2; + } + }); +} + //------------------------------------------------------------------------ // adjacent_find //------------------------------------------------------------------------ diff --git a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h index 56df7e4a06c..b4631412785 100644 --- a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h +++ b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h @@ -2315,16 +2315,26 @@ __pattern_partial_sort_copy(_ExecutionPolicy&& __exec, _InIterator __first, _InI // as it uses a similar mechanism. if (__in_size <= __out_size) { + constexpr auto __dispatch_tag1 = + oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, decltype(__first), decltype(__last), + decltype(__out_first)>(); + using __backend_tag1 = typename decltype(__dispatch_tag1)::__backend_tag; + // If our output buffer is larger than the input buffer, simply copy elements to the output and use // full sort on them. - auto __out_end = __pattern_walk2( - __par_backend_hetero::make_wrapped_policy<__initial_copy_1>(__exec), __first, __last, __out_first, - __brick_copy<_ExecutionPolicy>{}, ::std::true_type{}, ::std::true_type{}); + auto __out_end = __pattern_walk2<__backend_tag1, /*_IsSync=*/::std::false_type>( + __dispatch_tag1, __par_backend_hetero::make_wrapped_policy<__initial_copy_1>(__exec), __first, __last, + __out_first, __brick_copy<_ExecutionPolicy>{}); + + constexpr auto __dispatch_tag2 = + oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, decltype(__out_first), decltype(__out_end), + decltype(__out_first)>(); // Use reqular sort as partial_sort isn't required to be stable __pattern_sort( + __dispatch_tag2, __par_backend_hetero::make_wrapped_policy<__partial_sort_1>(::std::forward<_ExecutionPolicy>(__exec)), - __out_first, __out_end, __comp, ::std::true_type{}, ::std::true_type{}, ::std::true_type{}); + __out_first, __out_end, __comp, ::std::true_type{}); return __out_end; } @@ -2337,9 +2347,85 @@ __pattern_partial_sort_copy(_ExecutionPolicy&& __exec, _InIterator __first, _InI oneapi::dpl::__par_backend_hetero::__buffer<_ExecutionPolicy, _ValueType> __buf(__exec, __in_size); auto __buf_first = __buf.get(); - auto __buf_last = __pattern_walk2( + + constexpr auto __dispatch_tag1 = + oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, decltype(__first), decltype(__last), + decltype(__buf_first)>(); + using __backend_tag1 = typename decltype(__dispatch_tag1)::__backend_tag; + + auto __buf_last = __pattern_walk2<__backend_tag1, /*_IsSync=*/::std::false_type>( __par_backend_hetero::make_wrapped_policy<__initial_copy_2>(__exec), __first, __last, __buf_first, - __brick_copy<_ExecutionPolicy>{}, ::std::true_type{}, ::std::true_type{}); + __brick_copy<_ExecutionPolicy>{}); + + auto __buf_mid = __buf_first + __out_size; + + __par_backend_hetero::__parallel_partial_sort( + __par_backend_hetero::make_wrapped_policy<__partial_sort_2>(__exec), + __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read_write>(__buf_first), + __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read_write>(__buf_mid), + __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read_write>(__buf_last), __comp); + + constexpr auto __dispatch_tag2 = + oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, decltype(__buf_first), decltype(__buf_mid), + decltype(__out_first)>(); + + return __pattern_walk2( + __dispatch_tag2, + __par_backend_hetero::make_wrapped_policy<__copy_back>(::std::forward<_ExecutionPolicy>(__exec)), + __buf_first, __buf_mid, __out_first, __brick_copy<_ExecutionPolicy>{}); + } +} + +template +_OutIterator +__pattern_partial_sort_copy(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _InIterator __first, + _InIterator __last, _OutIterator __out_first, _OutIterator __out_last, _Compare __comp) +{ + using _ValueType = typename ::std::iterator_traits<_InIterator>::value_type; + + auto __in_size = __last - __first; + auto __out_size = __out_last - __out_first; + + if (__in_size == 0 || __out_size == 0) + return __out_first; + + // TODO: we can avoid a separate __pattern_walk2 for initial copy: it can be done during sort itself + // like it's done for CPU version, but it's better to be done together with merge cutoff implementation + // as it uses a similar mechanism. + if (__in_size <= __out_size) + { + // If our output buffer is larger than the input buffer, simply copy elements to the output and use + // full sort on them. + auto __out_end = __pattern_walk2<_BackendTag, /*_IsSync=*/::std::false_type>( + __tag, __par_backend_hetero::make_wrapped_policy<__initial_copy_1>(__exec), __first, __last, __out_first, + __brick_copy<_ExecutionPolicy>{}); + + // Use regular sort as partial_sort isn't required to be stable + __pattern_sort( + __par_backend_hetero::make_wrapped_policy<__partial_sort_1>(::std::forward<_ExecutionPolicy>(__exec)), + __out_first, __out_end, __comp, ::std::true_type{}, ::std::true_type{}, ::std::true_type{}); + + return __out_end; + } + else + { + // If our input buffer is smaller than the input buffer do the following: + // - create a temporary buffer and copy all the elements from the input buffer there + // - run partial sort on the temporary buffer + // - copy k elements from the temporary buffer to the output buffer. + oneapi::dpl::__par_backend_hetero::__buffer<_ExecutionPolicy, _ValueType> __buf(__exec, __in_size); + + auto __buf_first = __buf.get(); + + constexpr auto __dispatch_tag1 = + oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, decltype(__first), decltype(__last), + decltype(__buf_first)>(); + using __backend_tag1 = typename decltype(__dispatch_tag1)::__backend_tag; + + auto __buf_last = __pattern_walk2<__backend_tag1, /*_IsSync=*/::std::false_type>( + __dispatch_tag1, __par_backend_hetero::make_wrapped_policy<__initial_copy_2>(__exec), __first, __last, + __buf_first, __brick_copy<_ExecutionPolicy>{}); auto __buf_mid = __buf_first + __out_size; @@ -2349,10 +2435,14 @@ __pattern_partial_sort_copy(_ExecutionPolicy&& __exec, _InIterator __first, _InI __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read_write>(__buf_mid), __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read_write>(__buf_last), __comp); + constexpr auto __dispatch_tag2 = + oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, decltype(__buf_first), decltype(__buf_mid), + decltype(__out_first)>(); + return __pattern_walk2( + __dispatch_tag2, __par_backend_hetero::make_wrapped_policy<__copy_back>(::std::forward<_ExecutionPolicy>(__exec)), - __buf_first, __buf_mid, __out_first, __brick_copy<_ExecutionPolicy>{}, ::std::true_type{}, - ::std::true_type{}); + __buf_first, __buf_mid, __out_first, __brick_copy<_ExecutionPolicy>{}); } } From 3c14631c6dfed1a8341dddeaa7ef5a7dd0507abc Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Tue, 6 Feb 2024 13:37:15 +0100 Subject: [PATCH 112/566] __pattern_partial_sort_copy + tag calls --- include/oneapi/dpl/pstl/glue_algorithm_impl.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/include/oneapi/dpl/pstl/glue_algorithm_impl.h b/include/oneapi/dpl/pstl/glue_algorithm_impl.h index 20e38400587..5a96ceb30fe 100644 --- a/include/oneapi/dpl/pstl/glue_algorithm_impl.h +++ b/include/oneapi/dpl/pstl/glue_algorithm_impl.h @@ -861,10 +861,11 @@ oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _RandomA partial_sort_copy(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _RandomAccessIterator __d_first, _RandomAccessIterator __d_last, _Compare __comp) { + constexpr auto __dispatch_tag = + oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _ForwardIterator, _RandomAccessIterator>(); + return oneapi::dpl::__internal::__pattern_partial_sort_copy( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __d_first, __d_last, __comp, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator, _RandomAccessIterator>(), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator, _RandomAccessIterator>()); + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __d_first, __d_last, __comp); } template From 04df16cc91e7ebd6e15caafc05922294bd5b6e6c Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Tue, 6 Feb 2024 15:23:27 +0100 Subject: [PATCH 113/566] __pattern_partition + tag impls --- include/oneapi/dpl/pstl/algorithm_fwd.h | 9 ++ include/oneapi/dpl/pstl/algorithm_impl.h | 85 +++++++++++++++++++ .../dpl/pstl/hetero/algorithm_impl_hetero.h | 10 +++ 3 files changed, 104 insertions(+) diff --git a/include/oneapi/dpl/pstl/algorithm_fwd.h b/include/oneapi/dpl/pstl/algorithm_fwd.h index 97e8a1b3de3..2049400a571 100644 --- a/include/oneapi/dpl/pstl/algorithm_fwd.h +++ b/include/oneapi/dpl/pstl/algorithm_fwd.h @@ -939,11 +939,20 @@ oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _Fo __pattern_partition(_ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _UnaryPredicate, _IsVector, /*is_parallel=*/::std::false_type) noexcept; +template +_ForwardIterator +__pattern_partition(_Tag, _ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _UnaryPredicate) noexcept; + template oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator> __pattern_partition(_ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _UnaryPredicate, _IsVector, /*is_parallel=*/::std::true_type); +template +_RandomAccessIterator +__pattern_partition(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, + _UnaryPredicate); + //------------------------------------------------------------------------ // stable_partition //------------------------------------------------------------------------ diff --git a/include/oneapi/dpl/pstl/algorithm_impl.h b/include/oneapi/dpl/pstl/algorithm_impl.h index 20793e4f2c4..d8531ae1570 100644 --- a/include/oneapi/dpl/pstl/algorithm_impl.h +++ b/include/oneapi/dpl/pstl/algorithm_impl.h @@ -2818,6 +2818,16 @@ __pattern_partition(_ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterat return __internal::__brick_partition(__first, __last, __pred, __is_vector); } +template +_ForwardIterator +__pattern_partition(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, + _UnaryPredicate __pred) noexcept +{ + static_assert(__is_backend_tag_v<_Tag>); + + return __internal::__brick_partition(__first, __last, __pred, typename _Tag::__is_vector{}); +} + template oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator> __pattern_partition(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last, @@ -2886,6 +2896,81 @@ __pattern_partition(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _R }); } +template +_RandomAccessIterator +__pattern_partition(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, + _RandomAccessIterator __last, _UnaryPredicate __pred) +{ + // partitioned range: elements before pivot satisfy pred (true part), + // elements after pivot don't satisfy pred (false part) + struct _PartitionRange + { + _RandomAccessIterator __begin; + _RandomAccessIterator __pivot; + _RandomAccessIterator __end; + }; + + return __internal::__except_handler([&]() { + _PartitionRange __init{__last, __last, __last}; + + // lambda for merging two partitioned ranges to one partitioned range + auto __reductor = [&__exec](_PartitionRange __val1, _PartitionRange __val2) -> _PartitionRange { + auto __size1 = __val1.__end - __val1.__pivot; + auto __size2 = __val2.__pivot - __val2.__begin; + auto __new_begin = __val2.__begin - (__val1.__end - __val1.__begin); + + // if all elements in left range satisfy pred then we can move new pivot to pivot of right range + if (__val1.__end == __val1.__pivot) + { + return {__new_begin, __val2.__pivot, __val2.__end}; + } + // if true part of right range greater than false part of left range + // then we should swap the false part of left range and last part of true part of right range + else if (__size2 > __size1) + { + constexpr auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, decltype(__val1.__pivot), decltype(__val1.__pivot + __size1)>(); + using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; + + __par_backend::__parallel_for( + __backend_tag{}, + ::std::forward<_ExecutionPolicy>(__exec), __val1.__pivot, __val1.__pivot + __size1, + [__val1, __val2, __size1](_RandomAccessIterator __i, _RandomAccessIterator __j) { + __internal::__brick_swap_ranges(__i, __j, (__val2.__pivot - __size1) + (__i - __val1.__pivot), + _IsVector{}); + }); + return {__new_begin, __val2.__pivot - __size1, __val2.__end}; + } + // else we should swap the first part of false part of left range and true part of right range + else + { + constexpr auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, decltype(__val1.__pivot), decltype(__val1.__pivot + __size2)>(); + using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; + + __par_backend::__parallel_for( + __backend_tag{}, + ::std::forward<_ExecutionPolicy>(__exec), __val1.__pivot, __val1.__pivot + __size2, + [__val1, __val2](_RandomAccessIterator __i, _RandomAccessIterator __j) { + __internal::__brick_swap_ranges(__i, __j, __val2.__begin + (__i - __val1.__pivot), _IsVector{}); + }); + return {__new_begin, __val1.__pivot + __size2, __val2.__end}; + } + }; + + _PartitionRange __result = __par_backend::__parallel_reduce( + ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __init, + [__pred, __reductor](_RandomAccessIterator __i, _RandomAccessIterator __j, + _PartitionRange __value) -> _PartitionRange { + //1. serial partition + _RandomAccessIterator __pivot = __internal::__brick_partition(__i, __j, __pred, _IsVector{}); + + // 2. merging of two ranges (left and right respectively) + return __reductor(__value, {__i, __pivot, __j}); + }, + __reductor); + return __result.__pivot; + }); +} + //------------------------------------------------------------------------ // stable_partition //------------------------------------------------------------------------ diff --git a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h index b4631412785..07855e3e796 100644 --- a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h +++ b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h @@ -2075,6 +2075,16 @@ __pattern_partition(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __la ::std::true_type(), ::std::true_type()); } +template +_Iterator +__pattern_partition(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, + _UnaryPredicate __pred) +{ + //TODO: consider nonstable approaches + return __pattern_stable_partition(::std::forward<_ExecutionPolicy>(__exec), __first, __last, __pred, + ::std::true_type(), ::std::true_type()); +} + //------------------------------------------------------------------------ // lexicographical_compare //------------------------------------------------------------------------ From 973e89c52e19c58121c1cca7615be23562af2c4a Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Tue, 6 Feb 2024 15:24:44 +0100 Subject: [PATCH 114/566] __pattern_partition + tag calls --- include/oneapi/dpl/pstl/algorithm_impl.h | 15 +++++++-------- include/oneapi/dpl/pstl/glue_algorithm_impl.h | 8 ++++---- 2 files changed, 11 insertions(+), 12 deletions(-) diff --git a/include/oneapi/dpl/pstl/algorithm_impl.h b/include/oneapi/dpl/pstl/algorithm_impl.h index d8531ae1570..629029f0a77 100644 --- a/include/oneapi/dpl/pstl/algorithm_impl.h +++ b/include/oneapi/dpl/pstl/algorithm_impl.h @@ -3629,15 +3629,16 @@ __pattern_nth_element(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, return; } + constexpr auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _RandomAccessIterator>(); + using ::std::iter_swap; typedef typename ::std::iterator_traits<_RandomAccessIterator>::value_type _Tp; _RandomAccessIterator __x; do { - __x = __internal::__pattern_partition( - ::std::forward<_ExecutionPolicy>(__exec), __first + 1, __last, - [&__comp, __first](const _Tp& __x) { return __comp(__x, *__first); }, __is_vector, - /*is_parallel=*/::std::true_type()); + __x = __internal::__pattern_partition(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first + 1, + __last, + [&__comp, __first](const _Tp& __x) { return __comp(__x, *__first); }); --__x; if (__x != __first) { @@ -3677,10 +3678,8 @@ __pattern_nth_element(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec _RandomAccessIterator __x; do { - __x = __internal::__pattern_partition( - ::std::forward<_ExecutionPolicy>(__exec), __first + 1, __last, - [&__comp, __first](const _Tp& __x) { return __comp(__x, *__first); }, _IsVector{}, - /*is_parallel=*/::std::true_type()); + __x = __internal::__pattern_partition(__tag, ::std::forward<_ExecutionPolicy>(__exec), __first + 1, __last, + [&__comp, __first](const _Tp& __x) { return __comp(__x, *__first); }); --__x; if (__x != __first) { diff --git a/include/oneapi/dpl/pstl/glue_algorithm_impl.h b/include/oneapi/dpl/pstl/glue_algorithm_impl.h index 5a96ceb30fe..1b7b0c8317e 100644 --- a/include/oneapi/dpl/pstl/glue_algorithm_impl.h +++ b/include/oneapi/dpl/pstl/glue_algorithm_impl.h @@ -631,10 +631,10 @@ template oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator> partition(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _UnaryPredicate __pred) { - return oneapi::dpl::__internal::__pattern_partition( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __pred, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>()); + constexpr auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _ForwardIterator>(); + + return oneapi::dpl::__internal::__pattern_partition(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), + __first, __last, __pred); } template From eaddf2111b33fbd65c4ad5cb32e90a24aae112ce Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Tue, 6 Feb 2024 15:53:15 +0100 Subject: [PATCH 115/566] __pattern_partition_copy + tag impls --- include/oneapi/dpl/pstl/algorithm_fwd.h | 12 +++++ include/oneapi/dpl/pstl/algorithm_impl.h | 50 +++++++++++++++++++ .../dpl/pstl/hetero/algorithm_impl_hetero.h | 25 ++++++++++ 3 files changed, 87 insertions(+) diff --git a/include/oneapi/dpl/pstl/algorithm_fwd.h b/include/oneapi/dpl/pstl/algorithm_fwd.h index 2049400a571..9b380d672b8 100644 --- a/include/oneapi/dpl/pstl/algorithm_fwd.h +++ b/include/oneapi/dpl/pstl/algorithm_fwd.h @@ -999,6 +999,12 @@ __pattern_partition_copy(_ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _UnaryPredicate, _IsVector, /*is_parallelization=*/::std::false_type) noexcept; +template +::std::pair<_OutputIterator1, _OutputIterator2> +__pattern_partition_copy(_Tag, _ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _OutputIterator1, + _OutputIterator2, _UnaryPredicate) noexcept; + template oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, @@ -1007,6 +1013,12 @@ __pattern_partition_copy(_ExecutionPolicy&&, _RandomAccessIterator, _RandomAcces _OutputIterator2, _UnaryPredicate, _IsVector, /*is_parallelization=*/::std::true_type); +template +::std::pair<_RandomAccessIterator2, _RandomAccessIterator3> +__pattern_partition_copy(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator1, _RandomAccessIterator1, + _RandomAccessIterator2, _RandomAccessIterator3, _UnaryPredicate); + //------------------------------------------------------------------------ // sort //------------------------------------------------------------------------ diff --git a/include/oneapi/dpl/pstl/algorithm_impl.h b/include/oneapi/dpl/pstl/algorithm_impl.h index 629029f0a77..8f910352492 100644 --- a/include/oneapi/dpl/pstl/algorithm_impl.h +++ b/include/oneapi/dpl/pstl/algorithm_impl.h @@ -3090,6 +3090,18 @@ __pattern_partition_copy(_ExecutionPolicy&&, _ForwardIterator __first, _ForwardI return __internal::__brick_partition_copy(__first, __last, __out_true, __out_false, __pred, __is_vector); } +template +::std::pair<_OutputIterator1, _OutputIterator2> +__pattern_partition_copy(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, + _OutputIterator1 __out_true, _OutputIterator2 __out_false, _UnaryPredicate __pred) noexcept +{ + static_assert(__is_backend_tag_v<_Tag>); + + return __internal::__brick_partition_copy(__first, __last, __out_true, __out_false, __pred, + typename _Tag::__is_vector{}); +} + template oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, @@ -3130,6 +3142,44 @@ __pattern_partition_copy(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __fir return __internal::__brick_partition_copy(__first, __last, __out_true, __out_false, __pred, __is_vector); } +template +::std::pair<_RandomAccessIterator2, _RandomAccessIterator3> +__pattern_partition_copy(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first, + _RandomAccessIterator1 __last, _RandomAccessIterator2 __out_true, + _RandomAccessIterator3 __out_false, _UnaryPredicate __pred) +{ + typedef typename ::std::iterator_traits<_RandomAccessIterator1>::difference_type _DifferenceType; + typedef ::std::pair<_DifferenceType, _DifferenceType> _ReturnType; + const _DifferenceType __n = __last - __first; + if (_DifferenceType(1) < __n) + { + __par_backend::__buffer<_ExecutionPolicy, bool> __mask_buf(__n); + return __internal::__except_handler([&__exec, __n, __first, __out_true, __out_false, __pred, &__mask_buf]() { + bool* __mask = __mask_buf.get(); + _ReturnType __m{}; + __par_backend::__parallel_strict_scan( + ::std::forward<_ExecutionPolicy>(__exec), __n, ::std::make_pair(_DifferenceType(0), _DifferenceType(0)), + [=](_DifferenceType __i, _DifferenceType __len) { // Reduce + return __internal::__brick_calc_mask_1<_DifferenceType>(__first + __i, __first + (__i + __len), + __mask + __i, __pred, _IsVector{}); + }, + [](const _ReturnType& __x, const _ReturnType& __y) -> _ReturnType { + return ::std::make_pair(__x.first + __y.first, __x.second + __y.second); + }, // Combine + [=](_DifferenceType __i, _DifferenceType __len, _ReturnType __initial) { // Scan + __internal::__brick_partition_by_mask(__first + __i, __first + (__i + __len), + __out_true + __initial.first, __out_false + __initial.second, + __mask + __i, _IsVector{}); + }, + [&__m](_ReturnType __total) { __m = __total; }); + return ::std::make_pair(__out_true + __m.first, __out_false + __m.second); + }); + } + // trivial sequence - use serial algorithm + return __internal::__brick_partition_copy(__first, __last, __out_true, __out_false, __pred, _IsVector{}); +} + //------------------------------------------------------------------------ // sort //------------------------------------------------------------------------ diff --git a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h index 07855e3e796..10097adeb11 100644 --- a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h +++ b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h @@ -1561,6 +1561,31 @@ __pattern_partition_copy(_ExecutionPolicy&& __exec, _Iterator1 __first, _Iterato return ::std::make_pair(__result1 + __result.second, __result2 + (__last - __first - __result.second)); } +template +::std::pair<_Iterator2, _Iterator3> +__pattern_partition_copy(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Iterator1 __first, + _Iterator1 __last, _Iterator2 __result1, _Iterator3 __result2, _UnaryPredicate __pred) +{ + if (__first == __last) + return ::std::make_pair(__result1, __result2); + + using _It1DifferenceType = typename ::std::iterator_traits<_Iterator1>::difference_type; + using _ReduceOp = ::std::plus<_It1DifferenceType>; + + unseq_backend::__create_mask<_UnaryPredicate, _It1DifferenceType> __create_mask_op{__pred}; + unseq_backend::__partition_by_mask<_ReduceOp, /*inclusive*/ ::std::true_type> __copy_by_mask_op{_ReduceOp{}}; + + auto __result = __pattern_scan_copy( + ::std::forward<_ExecutionPolicy>(__exec), __first, __last, + __par_backend_hetero::zip( + __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::write>(__result1), + __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::write>(__result2)), + __create_mask_op, __copy_by_mask_op); + + return ::std::make_pair(__result1 + __result.second, __result2 + (__last - __first - __result.second)); +} + //------------------------------------------------------------------------ // unique_copy //------------------------------------------------------------------------ From 7398bf4e44b1f9726b477eef30ef1a5f57750278 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Tue, 6 Feb 2024 15:54:49 +0100 Subject: [PATCH 116/566] __pattern_partition_copy + tag calls --- include/oneapi/dpl/pstl/glue_algorithm_impl.h | 9 +++++---- include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h | 6 ++++-- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/include/oneapi/dpl/pstl/glue_algorithm_impl.h b/include/oneapi/dpl/pstl/glue_algorithm_impl.h index 1b7b0c8317e..21cd4dec70c 100644 --- a/include/oneapi/dpl/pstl/glue_algorithm_impl.h +++ b/include/oneapi/dpl/pstl/glue_algorithm_impl.h @@ -655,10 +655,11 @@ oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, partition_copy(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _ForwardIterator1 __out_true, _ForwardIterator2 __out_false, _UnaryPredicate __pred) { - return oneapi::dpl::__internal::__pattern_partition_copy( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __out_true, __out_false, __pred, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator, _ForwardIterator1, _ForwardIterator2>(), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator, _ForwardIterator1, _ForwardIterator2>()); + constexpr auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _ForwardIterator, + _ForwardIterator1, _ForwardIterator2>(); + + return oneapi::dpl::__internal::__pattern_partition_copy(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), + __first, __last, __out_true, __out_false, __pred); } // [alg.sort] diff --git a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h index 10097adeb11..694aa4bf0fc 100644 --- a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h +++ b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h @@ -2066,8 +2066,10 @@ __pattern_stable_partition(_ExecutionPolicy&& __exec, _Iterator __first, _Iterat auto __true_result = __true_buf.get(); auto __false_result = __false_buf.get(); - auto copy_result = __pattern_partition_copy(__exec, __first, __last, __true_result, __false_result, __pred, - /*vector=*/::std::true_type{}, /*parallel*/ ::std::true_type{}); + constexpr auto __dispatch_tag = __select_backend<_ExecutionPolicy, decltype(__first), decltype(__last), + decltype(__true_result), decltype(__false_result)>(); + auto copy_result = + __pattern_partition_copy(__dispatch_tag, __exec, __first, __last, __true_result, __false_result, __pred); auto true_count = copy_result.first - __true_result; //TODO: optimize copy back if possible (inplace, decrease number of submits) From c51a27f5304f5250c64a0a7b433aa66cd081d0ca Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Tue, 6 Feb 2024 16:13:57 +0100 Subject: [PATCH 117/566] __pattern_remove_if + tag impls --- include/oneapi/dpl/pstl/algorithm_fwd.h | 9 ++++++ include/oneapi/dpl/pstl/algorithm_impl.h | 32 +++++++++++++++++++ .../dpl/pstl/hetero/algorithm_impl_hetero.h | 31 ++++++++++++++++++ 3 files changed, 72 insertions(+) diff --git a/include/oneapi/dpl/pstl/algorithm_fwd.h b/include/oneapi/dpl/pstl/algorithm_fwd.h index 9b380d672b8..8bfc1b911e8 100644 --- a/include/oneapi/dpl/pstl/algorithm_fwd.h +++ b/include/oneapi/dpl/pstl/algorithm_fwd.h @@ -1299,11 +1299,20 @@ oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _Fo __pattern_remove_if(_ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _UnaryPredicate, _IsVector, /*is_parallel*/ ::std::false_type) noexcept; +template +_ForwardIterator +__pattern_remove_if(_Tag, _ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _UnaryPredicate) noexcept; + template oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator> __pattern_remove_if(_ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _UnaryPredicate, _IsVector, /*is_parallel*/ ::std::true_type); +template +_RandomAccessIterator +__pattern_remove_if(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, + _UnaryPredicate); + //------------------------------------------------------------------------ // merge //------------------------------------------------------------------------ diff --git a/include/oneapi/dpl/pstl/algorithm_impl.h b/include/oneapi/dpl/pstl/algorithm_impl.h index 8f910352492..e3c6a38d7df 100644 --- a/include/oneapi/dpl/pstl/algorithm_impl.h +++ b/include/oneapi/dpl/pstl/algorithm_impl.h @@ -4042,6 +4042,16 @@ __pattern_remove_if(_ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterat return __internal::__brick_remove_if(__first, __last, __pred, __is_vector); } +template +_ForwardIterator +__pattern_remove_if(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, + _UnaryPredicate __pred) noexcept +{ + static_assert(__is_backend_tag_v<_Tag>); + + return __internal::__brick_remove_if(__first, __last, __pred, typename _Tag::__is_vector{}); +} + template oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator> __pattern_remove_if(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last, @@ -4064,6 +4074,28 @@ __pattern_remove_if(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _R __is_vector); } +template +_RandomAccessIterator +__pattern_remove_if(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, + _RandomAccessIterator __last, _UnaryPredicate __pred) +{ + typedef typename ::std::iterator_traits<_RandomAccessIterator>::reference _ReferenceType; + + if (__first == __last || __first + 1 == __last) + { + // Trivial sequence - use serial algorithm + return __internal::__brick_remove_if(__first, __last, __pred, _IsVector{}); + } + + return __internal::__remove_elements( + ::std::forward<_ExecutionPolicy>(__exec), __first, __last, + [&__pred](bool* __b, bool* __e, _RandomAccessIterator __it) { + __internal::__brick_walk2( + __b, __e, __it, [&__pred](bool& __x, _ReferenceType __y) { __x = !__pred(__y); }, _IsVector{}); + }, + _IsVector{}); +} + //------------------------------------------------------------------------ // merge //------------------------------------------------------------------------ diff --git a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h index 694aa4bf0fc..8e89a62eb2b 100644 --- a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h +++ b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h @@ -1640,6 +1640,37 @@ __pattern_remove_if(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __la __copy_first, __copy_last, __first, __brick_copy<_ExecutionPolicy>{}, ::std::true_type{}, ::std::true_type{}); } +template +_Iterator +__pattern_remove_if(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, + _Predicate __pred) +{ + if (__last == __first) + return __last; + + using _ValueType = typename ::std::iterator_traits<_Iterator>::value_type; + + oneapi::dpl::__par_backend_hetero::__buffer<_ExecutionPolicy, _ValueType> __buf(__exec, __last - __first); + auto __copy_first = __buf.get(); + + constexpr auto __dispatch_tag = + oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, decltype(__first), decltype(__last), + decltype(__copy_first)>(); + + auto __copy_last = + __pattern_copy_if(__dispatch_tag, __exec, __first, __last, __copy_first, __not_pred<_Predicate>{__pred}); + + constexpr auto __dispatch_tag1 = + oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, decltype(__copy_first), decltype(__copy_last), + decltype(__first)>(); + + //TODO: optimize copy back depending on Iterator, i.e. set_final_data for host iterator/pointer + return __pattern_walk2( + __dispatch_tag1, + __par_backend_hetero::make_wrapped_policy(::std::forward<_ExecutionPolicy>(__exec)), + __copy_first, __copy_last, __first, __brick_copy<_ExecutionPolicy>{}); +} + template oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, _Iterator> __pattern_unique(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, _BinaryPredicate __pred, From b16634793da330d224a9cd93e6475f74d5a903b9 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Tue, 6 Feb 2024 16:14:11 +0100 Subject: [PATCH 118/566] __pattern_remove_if + tag calls --- include/oneapi/dpl/pstl/glue_algorithm_impl.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/include/oneapi/dpl/pstl/glue_algorithm_impl.h b/include/oneapi/dpl/pstl/glue_algorithm_impl.h index 21cd4dec70c..a4276ff2f0d 100644 --- a/include/oneapi/dpl/pstl/glue_algorithm_impl.h +++ b/include/oneapi/dpl/pstl/glue_algorithm_impl.h @@ -514,10 +514,10 @@ template oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator> remove_if(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _UnaryPredicate __pred) { - return oneapi::dpl::__internal::__pattern_remove_if( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __pred, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>()); + constexpr auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _ForwardIterator>(); + + return oneapi::dpl::__internal::__pattern_remove_if(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), + __first, __last, __pred); } template From fddf7bcf2637caa120bd73911022f1443aca98be Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Tue, 6 Feb 2024 16:30:46 +0100 Subject: [PATCH 119/566] __pattern_reverse + tag impls --- include/oneapi/dpl/pstl/algorithm_fwd.h | 8 +++++++ include/oneapi/dpl/pstl/algorithm_impl.h | 21 +++++++++++++++++++ .../dpl/pstl/hetero/algorithm_impl_hetero.h | 20 ++++++++++++++++++ 3 files changed, 49 insertions(+) diff --git a/include/oneapi/dpl/pstl/algorithm_fwd.h b/include/oneapi/dpl/pstl/algorithm_fwd.h index 8bfc1b911e8..8652b22d920 100644 --- a/include/oneapi/dpl/pstl/algorithm_fwd.h +++ b/include/oneapi/dpl/pstl/algorithm_fwd.h @@ -817,11 +817,19 @@ oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy> __pattern_reverse(_ExecutionPolicy&&, _BidirectionalIterator, _BidirectionalIterator, _IsVector, /*is_parallel=*/::std::false_type) noexcept; +template +void +__pattern_reverse(_Tag, _ExecutionPolicy&&, _BidirectionalIterator, _BidirectionalIterator) noexcept; + template oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy> __pattern_reverse(_ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _IsVector, /*is_parallel=*/::std::true_type); +template +void +__pattern_reverse(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator); + //------------------------------------------------------------------------ // reverse_copy //------------------------------------------------------------------------ diff --git a/include/oneapi/dpl/pstl/algorithm_impl.h b/include/oneapi/dpl/pstl/algorithm_impl.h index e3c6a38d7df..6d18e4630eb 100644 --- a/include/oneapi/dpl/pstl/algorithm_impl.h +++ b/include/oneapi/dpl/pstl/algorithm_impl.h @@ -2294,6 +2294,15 @@ __pattern_reverse(_ExecutionPolicy&&, _BidirectionalIterator __first, _Bidirecti __internal::__brick_reverse(__first, __last, _is_vector); } +template +void +__pattern_reverse(_Tag, _ExecutionPolicy&&, _BidirectionalIterator __first, _BidirectionalIterator __last) noexcept +{ + static_assert(__is_backend_tag_v<_Tag>); + + __internal::__brick_reverse(__first, __last, typename _Tag::__is_vector{}); +} + template oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy> __pattern_reverse(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last, @@ -2306,6 +2315,18 @@ __pattern_reverse(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _Ran }); } +template +void +__pattern_reverse(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, + _RandomAccessIterator __last) +{ + __par_backend::__parallel_for( + ::std::forward<_ExecutionPolicy>(__exec), __first, __first + (__last - __first) / 2, + [__first, __last](_RandomAccessIterator __inner_first, _RandomAccessIterator __inner_last) { + __internal::__brick_reverse(__inner_first, __inner_last, __last - (__inner_first - __first), _IsVector{}); + }); +} + //------------------------------------------------------------------------ // reverse_copy //------------------------------------------------------------------------ diff --git a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h index 8e89a62eb2b..19da3a640d6 100644 --- a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h +++ b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h @@ -2569,6 +2569,26 @@ __pattern_reverse(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last .wait(); } +template +void +__pattern_reverse(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last) +{ + using __backend_tag = typename decltype(__tag)::__backend_tag; + + auto __n = __last - __first; + if (__n <= 0) + return; + + auto __keep = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read_write, _Iterator>(); + auto __buf = __keep(__first, __last); + oneapi::dpl::__par_backend_hetero::__parallel_for( + __backend_tag{}, + ::std::forward<_ExecutionPolicy>(__exec), + unseq_backend::__reverse_functor::difference_type>{__n}, __n / 2, + __buf.all_view()) + .wait(); +} + //------------------------------------------------------------------------ // reverse_copy //------------------------------------------------------------------------ From e88097890013db6bfe5388a4b8a6b37953754a57 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Tue, 6 Feb 2024 16:30:56 +0100 Subject: [PATCH 120/566] __pattern_reverse + tag calls --- include/oneapi/dpl/pstl/glue_algorithm_impl.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/include/oneapi/dpl/pstl/glue_algorithm_impl.h b/include/oneapi/dpl/pstl/glue_algorithm_impl.h index a4276ff2f0d..6ab9208f939 100644 --- a/include/oneapi/dpl/pstl/glue_algorithm_impl.h +++ b/include/oneapi/dpl/pstl/glue_algorithm_impl.h @@ -575,10 +575,11 @@ template oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy> reverse(_ExecutionPolicy&& __exec, _BidirectionalIterator __first, _BidirectionalIterator __last) { - oneapi::dpl::__internal::__pattern_reverse( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _BidirectionalIterator>(), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _BidirectionalIterator>()); + constexpr auto __dispatch_tag = + oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _BidirectionalIterator>(); + + oneapi::dpl::__internal::__pattern_reverse(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, + __last); } template From a43668a12c3ee17b6945d074cc5b4b160e643931 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Tue, 6 Feb 2024 16:40:39 +0100 Subject: [PATCH 121/566] __pattern_reverse_copy + tag impls --- include/oneapi/dpl/pstl/algorithm_fwd.h | 10 +++++++ include/oneapi/dpl/pstl/algorithm_impl.h | 27 +++++++++++++++++++ .../dpl/pstl/hetero/algorithm_impl_hetero.h | 24 +++++++++++++++++ 3 files changed, 61 insertions(+) diff --git a/include/oneapi/dpl/pstl/algorithm_fwd.h b/include/oneapi/dpl/pstl/algorithm_fwd.h index 8652b22d920..7732b7d3847 100644 --- a/include/oneapi/dpl/pstl/algorithm_fwd.h +++ b/include/oneapi/dpl/pstl/algorithm_fwd.h @@ -847,11 +847,21 @@ oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _Ou __pattern_reverse_copy(_ExecutionPolicy&&, _BidirectionalIterator, _BidirectionalIterator, _OutputIterator, _IsVector, /*is_parallel=*/::std::false_type) noexcept; +template +_OutputIterator +__pattern_reverse_copy(_Tag, _ExecutionPolicy&&, _BidirectionalIterator, _BidirectionalIterator, + _OutputIterator) noexcept; + template oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _OutputIterator> __pattern_reverse_copy(_ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _OutputIterator, _IsVector, /*is_parallel=*/::std::true_type); +template +_RandomAccessIterator2 +__pattern_reverse_copy(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator1, _RandomAccessIterator1, + _RandomAccessIterator2); + //------------------------------------------------------------------------ // rotate //------------------------------------------------------------------------ diff --git a/include/oneapi/dpl/pstl/algorithm_impl.h b/include/oneapi/dpl/pstl/algorithm_impl.h index 6d18e4630eb..ced2b512d75 100644 --- a/include/oneapi/dpl/pstl/algorithm_impl.h +++ b/include/oneapi/dpl/pstl/algorithm_impl.h @@ -2359,6 +2359,16 @@ __pattern_reverse_copy(_ExecutionPolicy&&, _BidirectionalIterator __first, _Bidi return __internal::__brick_reverse_copy(__first, __last, __d_first, __is_vector); } +template +_OutputIterator +__pattern_reverse_copy(_Tag, _ExecutionPolicy&&, _BidirectionalIterator __first, _BidirectionalIterator __last, + _OutputIterator __d_first) noexcept +{ + static_assert(__is_backend_tag_v<_Tag>); + + return __internal::__brick_reverse_copy(__first, __last, __d_first, typename _Tag::__is_vector{}); +} + template oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator2> __pattern_reverse_copy(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first, _RandomAccessIterator1 __last, @@ -2375,6 +2385,23 @@ __pattern_reverse_copy(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first return __d_first + __len; } +template +_RandomAccessIterator2 +__pattern_reverse_copy(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first, + _RandomAccessIterator1 __last, _RandomAccessIterator2 __d_first) +{ + using __backend_tag = typename __parallel_tag<_IsVector>::__backend_tag; + + auto __len = __last - __first; + __par_backend::__parallel_for( + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, + [__first, __len, __d_first](_RandomAccessIterator1 __inner_first, _RandomAccessIterator1 __inner_last) { + __internal::__brick_reverse_copy(__inner_first, __inner_last, + __d_first + (__len - (__inner_last - __first)), _IsVector{}); + }); + return __d_first + __len; +} + //------------------------------------------------------------------------ // rotate //------------------------------------------------------------------------ diff --git a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h index 19da3a640d6..87de9cec1fb 100644 --- a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h +++ b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h @@ -2616,6 +2616,30 @@ __pattern_reverse_copy(_ExecutionPolicy&& __exec, _BidirectionalIterator __first return __result + __n; } +template +_ForwardIterator +__pattern_reverse_copy(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _BidirectionalIterator __first, + _BidirectionalIterator __last, _ForwardIterator __result) +{ + auto __n = __last - __first; + if (__n <= 0) + return __result; + + auto __keep1 = + oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read, _BidirectionalIterator>(); + auto __buf1 = __keep1(__first, __last); + auto __keep2 = + oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::write, _ForwardIterator>(); + auto __buf2 = __keep2(__result, __result + __n); + oneapi::dpl::__par_backend_hetero::__parallel_for( + _BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), + unseq_backend::__reverse_copy::difference_type>{__n}, + __n, __buf1.all_view(), __buf2.all_view()) + .wait(); + + return __result + __n; +} + //------------------------------------------------------------------------ // rotate //------------------------------------------------------------------------ From 37dd88e11fc02df2c0e681a74ef00da3963b2c1a Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Tue, 6 Feb 2024 16:40:59 +0100 Subject: [PATCH 122/566] __pattern_reverse_copy + tag calls --- include/oneapi/dpl/pstl/glue_algorithm_impl.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/include/oneapi/dpl/pstl/glue_algorithm_impl.h b/include/oneapi/dpl/pstl/glue_algorithm_impl.h index 6ab9208f939..c0b07fc6317 100644 --- a/include/oneapi/dpl/pstl/glue_algorithm_impl.h +++ b/include/oneapi/dpl/pstl/glue_algorithm_impl.h @@ -587,10 +587,11 @@ oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _Forward reverse_copy(_ExecutionPolicy&& __exec, _BidirectionalIterator __first, _BidirectionalIterator __last, _ForwardIterator __d_first) { - return oneapi::dpl::__internal::__pattern_reverse_copy( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __d_first, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _BidirectionalIterator, _ForwardIterator>(), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _BidirectionalIterator, _ForwardIterator>()); + constexpr auto __dispatch_tag = + oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _BidirectionalIterator, _ForwardIterator>(); + + return oneapi::dpl::__internal::__pattern_reverse_copy(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), + __first, __last, __d_first); } // [alg.rotate] From 2a7da4662bc003e04f4e4d6047d8545d69d4030b Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Tue, 6 Feb 2024 16:55:27 +0100 Subject: [PATCH 123/566] __pattern_rotate + tag impls --- include/oneapi/dpl/pstl/algorithm_fwd.h | 9 +++ include/oneapi/dpl/pstl/algorithm_impl.h | 73 +++++++++++++++++++ .../dpl/pstl/hetero/algorithm_impl_hetero.h | 36 +++++++++ 3 files changed, 118 insertions(+) diff --git a/include/oneapi/dpl/pstl/algorithm_fwd.h b/include/oneapi/dpl/pstl/algorithm_fwd.h index 7732b7d3847..d36abc46f8e 100644 --- a/include/oneapi/dpl/pstl/algorithm_fwd.h +++ b/include/oneapi/dpl/pstl/algorithm_fwd.h @@ -879,11 +879,20 @@ oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _Fo __pattern_rotate(_ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _ForwardIterator, _IsVector, /*is_parallel=*/::std::false_type) noexcept; +template +_ForwardIterator +__pattern_rotate(_Tag, _ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _ForwardIterator) noexcept; + template oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator> __pattern_rotate(_ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _RandomAccessIterator, _IsVector, /*is_parallel=*/::std::true_type); +template +_RandomAccessIterator +__pattern_rotate(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, + _RandomAccessIterator); + //------------------------------------------------------------------------ // rotate_copy //------------------------------------------------------------------------ diff --git a/include/oneapi/dpl/pstl/algorithm_impl.h b/include/oneapi/dpl/pstl/algorithm_impl.h index ced2b512d75..2d7e9713541 100644 --- a/include/oneapi/dpl/pstl/algorithm_impl.h +++ b/include/oneapi/dpl/pstl/algorithm_impl.h @@ -2462,6 +2462,15 @@ __pattern_rotate(_ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator return __internal::__brick_rotate(__first, __middle, __last, __is_vector); } +template +_ForwardIterator +__pattern_rotate(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __middle, _ForwardIterator __last) noexcept +{ + static_assert(__is_backend_tag_v<_Tag>); + + return __internal::__brick_rotate(__first, __middle, __last, typename _Tag::__is_vector{}); +} + template oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator> __pattern_rotate(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __middle, @@ -2524,6 +2533,70 @@ __pattern_rotate(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _Rand } } +template +_RandomAccessIterator +__pattern_rotate(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, + _RandomAccessIterator __middle, _RandomAccessIterator __last) +{ + using __backend_tag = typename __parallel_tag<_IsVector>::__backend_tag; + + typedef typename ::std::iterator_traits<_RandomAccessIterator>::value_type _Tp; + auto __n = __last - __first; + auto __m = __middle - __first; + if (__m <= __n / 2) + { + __par_backend::__buffer<_ExecutionPolicy, _Tp> __buf(__n - __m); + return __internal::__except_handler([&__exec, __n, __m, __first, __middle, __last, &__buf]() { + _Tp* __result = __buf.get(); + __par_backend::__parallel_for(__backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __middle, __last, + [__middle, __result](_RandomAccessIterator __b, _RandomAccessIterator __e) { + __internal::__brick_uninitialized_move( + __b, __e, __result + (__b - __middle), _IsVector{}); + }); + + __par_backend::__parallel_for(__backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __middle, + [__last, __middle](_RandomAccessIterator __b, _RandomAccessIterator __e) { + __internal::__brick_move<_ExecutionPolicy>{}( + __b, __e, __b + (__last - __middle), _IsVector{}); + }); + + __par_backend::__parallel_for(__backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __result, + __result + (__n - __m), [__first, __result](_Tp* __b, _Tp* __e) { + __brick_move_destroy<_ExecutionPolicy>{}( + __b, __e, __first + (__b - __result), _IsVector{}); + }); + + return __first + (__last - __middle); + }); + } + else + { + __par_backend::__buffer<_ExecutionPolicy, _Tp> __buf(__m); + return __internal::__except_handler([&__exec, __n, __m, __first, __middle, __last, &__buf]() { + _Tp* __result = __buf.get(); + __par_backend::__parallel_for(__backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __middle, + [__first, __result](_RandomAccessIterator __b, _RandomAccessIterator __e) { + __internal::__brick_uninitialized_move( + __b, __e, __result + (__b - __first), _IsVector{}); + }); + + __par_backend::__parallel_for(__backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __middle, __last, + [__first, __middle](_RandomAccessIterator __b, _RandomAccessIterator __e) { + __internal::__brick_move<_ExecutionPolicy>{}( + __b, __e, __first + (__b - __middle), _IsVector{}); + }); + + __par_backend::__parallel_for(__backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __result, + __result + __m, [__n, __m, __first, __result](_Tp* __b, _Tp* __e) { + __brick_move_destroy<_ExecutionPolicy>{}( + __b, __e, __first + ((__n - __m) + (__b - __result)), _IsVector{}); + }); + + return __first + (__last - __middle); + }); + } +} + //------------------------------------------------------------------------ // rotate_copy //------------------------------------------------------------------------ diff --git a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h index 87de9cec1fb..9c162c26226 100644 --- a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h +++ b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h @@ -2687,6 +2687,42 @@ __pattern_rotate(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __new_f return __first + (__last - __new_first); } +template +_Iterator +__pattern_rotate(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Iterator __first, _Iterator __new_first, + _Iterator __last) +{ + using __backend_tag = typename decltype(__tag)::__backend_tag; + + auto __n = __last - __first; + if (__n <= 0) + return __first; + + using _Tp = typename ::std::iterator_traits<_Iterator>::value_type; + + auto __keep = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read_write, _Iterator>(); + auto __buf = __keep(__first, __last); + auto __temp_buf = oneapi::dpl::__par_backend_hetero::__buffer<_ExecutionPolicy, _Tp>(__exec, __n); + + auto __temp_rng = + oneapi::dpl::__ranges::all_view<_Tp, __par_backend_hetero::access_mode::write>(__temp_buf.get_buffer()); + + const auto __shift = __new_first - __first; + oneapi::dpl::__par_backend_hetero::__parallel_for( + __backend_tag{}, oneapi::dpl::__par_backend_hetero::make_wrapped_policy<__rotate_wrapper>(__exec), + unseq_backend::__rotate_copy::difference_type>{__n, __shift}, __n, + __buf.all_view(), __temp_rng); + + using _Function = __brick_move<_ExecutionPolicy>; + auto __brick = unseq_backend::walk_n<_ExecutionPolicy, _Function>{_Function{}}; + + oneapi::dpl::__par_backend_hetero::__parallel_for(__backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), + __brick, __n, __temp_rng, __buf.all_view()) + .wait(); + + return __first + (__last - __new_first); +} + //------------------------------------------------------------------------ // rotate_copy //------------------------------------------------------------------------ From 2e467001283c532408e82547d27a7eb525abe0ef Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Tue, 6 Feb 2024 16:55:39 +0100 Subject: [PATCH 124/566] __pattern_rotate + tag calls --- include/oneapi/dpl/pstl/glue_algorithm_impl.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/include/oneapi/dpl/pstl/glue_algorithm_impl.h b/include/oneapi/dpl/pstl/glue_algorithm_impl.h index c0b07fc6317..40ca4ecb2d6 100644 --- a/include/oneapi/dpl/pstl/glue_algorithm_impl.h +++ b/include/oneapi/dpl/pstl/glue_algorithm_impl.h @@ -600,10 +600,10 @@ template oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator> rotate(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __middle, _ForwardIterator __last) { - return oneapi::dpl::__internal::__pattern_rotate( - ::std::forward<_ExecutionPolicy>(__exec), __first, __middle, __last, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>()); + constexpr auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _ForwardIterator>(); + + return oneapi::dpl::__internal::__pattern_rotate(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, + __middle, __last); } template From ece56b26c1c194f04da7c4b1c046a71166786493 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Tue, 6 Feb 2024 17:06:07 +0100 Subject: [PATCH 125/566] __pattern_rotate_copy + tag impls --- include/oneapi/dpl/pstl/algorithm_fwd.h | 10 +++++ include/oneapi/dpl/pstl/algorithm_impl.h | 44 +++++++++++++++++++ .../dpl/pstl/hetero/algorithm_impl_hetero.h | 30 +++++++++++++ 3 files changed, 84 insertions(+) diff --git a/include/oneapi/dpl/pstl/algorithm_fwd.h b/include/oneapi/dpl/pstl/algorithm_fwd.h index d36abc46f8e..1639f795928 100644 --- a/include/oneapi/dpl/pstl/algorithm_fwd.h +++ b/include/oneapi/dpl/pstl/algorithm_fwd.h @@ -912,12 +912,22 @@ __pattern_rotate_copy(_ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _F _IsVector, /*is_parallel=*/::std::false_type) noexcept; +template +_OutputIterator +__pattern_rotate_copy(_Tag, _ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _ForwardIterator, + _OutputIterator) noexcept; + template oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _OutputIterator> __pattern_rotate_copy(_ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _RandomAccessIterator, _OutputIterator, _IsVector, /*is_parallel=*/::std::true_type); +template +_RandomAccessIterator2 +__pattern_rotate_copy(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator1, _RandomAccessIterator1, + _RandomAccessIterator1, _RandomAccessIterator2); + //------------------------------------------------------------------------ // is_partitioned //------------------------------------------------------------------------ diff --git a/include/oneapi/dpl/pstl/algorithm_impl.h b/include/oneapi/dpl/pstl/algorithm_impl.h index 2d7e9713541..9fbcf8265dc 100644 --- a/include/oneapi/dpl/pstl/algorithm_impl.h +++ b/include/oneapi/dpl/pstl/algorithm_impl.h @@ -2629,6 +2629,17 @@ __pattern_rotate_copy(_ExecutionPolicy&& __exec, _ForwardIterator __first, _Forw __result, __is_vector); } +template +_OutputIterator +__pattern_rotate_copy(_Tag, _ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __middle, + _ForwardIterator __last, _OutputIterator __result) noexcept +{ + static_assert(__is_backend_tag_v<_Tag>); + + return __internal::__brick_rotate_copy(::std::forward<_ExecutionPolicy>(__exec), __first, __middle, __last, + __result, typename _Tag::__is_vector{}); +} + template oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator2> __pattern_rotate_copy(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first, _RandomAccessIterator1 __middle, @@ -2660,6 +2671,39 @@ __pattern_rotate_copy(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first, return __result + (__last - __first); } +template +_RandomAccessIterator2 +__pattern_rotate_copy(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first, + _RandomAccessIterator1 __middle, _RandomAccessIterator1 __last, _RandomAccessIterator2 __result) +{ + using __backend_tag = typename __parallel_tag<_IsVector>::__backend_tag; + + __par_backend::__parallel_for( + __backend_tag{}, + ::std::forward<_ExecutionPolicy>(__exec), __first, __last, + [__first, __last, __middle, __result](_RandomAccessIterator1 __b, _RandomAccessIterator1 __e) { + __internal::__brick_copy<_ExecutionPolicy> __copy{}; + if (__b > __middle) + { + __copy(__b, __e, __result + (__b - __middle), _IsVector{}); + } + else + { + _RandomAccessIterator2 __new_result = __result + ((__last - __middle) + (__b - __first)); + if (__e < __middle) + { + __copy(__b, __e, __new_result, _IsVector{}); + } + else + { + __copy(__b, __middle, __new_result, _IsVector{}); + __copy(__middle, __e, __result, _IsVector{}); + } + } + }); + return __result + (__last - __first); +} + //------------------------------------------------------------------------ // is_partitioned //------------------------------------------------------------------------ diff --git a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h index 9c162c26226..c2fad196ee1 100644 --- a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h +++ b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h @@ -2755,6 +2755,36 @@ __pattern_rotate_copy(_ExecutionPolicy&& __exec, _BidirectionalIterator __first, return __result + __n; } +template +_ForwardIterator +__pattern_rotate_copy(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _BidirectionalIterator __first, + _BidirectionalIterator __new_first, _BidirectionalIterator __last, _ForwardIterator __result) +{ + using __backend_tag = typename decltype(__tag)::__backend_tag; + + auto __n = __last - __first; + if (__n <= 0) + return __result; + + auto __keep1 = + oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read, _BidirectionalIterator>(); + auto __buf1 = __keep1(__first, __last); + auto __keep2 = + oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::write, _ForwardIterator>(); + auto __buf2 = __keep2(__result, __result + __n); + + const auto __shift = __new_first - __first; + + oneapi::dpl::__par_backend_hetero::__parallel_for( + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), + unseq_backend::__rotate_copy::difference_type>{__n, + __shift}, + __n, __buf1.all_view(), __buf2.all_view()) + .wait(); + + return __result + __n; +} + template oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, _OutputIterator> From 668271be4aa6ff24bb1bd34688b76ed5af886cf7 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Tue, 6 Feb 2024 17:06:19 +0100 Subject: [PATCH 126/566] __pattern_rotate_copy + tag calls --- include/oneapi/dpl/pstl/glue_algorithm_impl.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/include/oneapi/dpl/pstl/glue_algorithm_impl.h b/include/oneapi/dpl/pstl/glue_algorithm_impl.h index 40ca4ecb2d6..bc97670f8a8 100644 --- a/include/oneapi/dpl/pstl/glue_algorithm_impl.h +++ b/include/oneapi/dpl/pstl/glue_algorithm_impl.h @@ -611,10 +611,11 @@ oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _Forward rotate_copy(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 __middle, _ForwardIterator1 __last, _ForwardIterator2 __result) { - return oneapi::dpl::__internal::__pattern_rotate_copy( - ::std::forward<_ExecutionPolicy>(__exec), __first, __middle, __last, __result, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>(), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>()); + constexpr auto __dispatch_tag = + oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>(); + + return oneapi::dpl::__internal::__pattern_rotate_copy(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), + __first, __middle, __last, __result); } // [alg.partitions] From a7aed3937dcf14e413dea6c62a6476099a6e3b84 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Tue, 6 Feb 2024 17:17:27 +0100 Subject: [PATCH 127/566] __pattern_search + tag impls --- include/oneapi/dpl/pstl/algorithm_fwd.h | 11 ++++++ include/oneapi/dpl/pstl/algorithm_impl.h | 39 +++++++++++++++++++ .../dpl/pstl/hetero/algorithm_impl_hetero.h | 29 ++++++++++++++ 3 files changed, 79 insertions(+) diff --git a/include/oneapi/dpl/pstl/algorithm_fwd.h b/include/oneapi/dpl/pstl/algorithm_fwd.h index 1639f795928..d246538403c 100644 --- a/include/oneapi/dpl/pstl/algorithm_fwd.h +++ b/include/oneapi/dpl/pstl/algorithm_fwd.h @@ -577,6 +577,11 @@ __pattern_search(_ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _Forw _BinaryPredicate, _IsVector, /*is_parallel=*/::std::false_type) noexcept; +template +_ForwardIterator1 +__pattern_search(_Tag, _ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, _ForwardIterator2, + _BinaryPredicate) noexcept; + template oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator1> @@ -584,6 +589,12 @@ __pattern_search(_ExecutionPolicy&&, _RandomAccessIterator1, _RandomAccessIterat _RandomAccessIterator2, _BinaryPredicate, _IsVector, /*is_parallel=*/::std::true_type); +template +_RandomAccessIterator1 +__pattern_search(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator1, _RandomAccessIterator1, + _RandomAccessIterator2, _RandomAccessIterator2, _BinaryPredicate); + //------------------------------------------------------------------------ // search_n //------------------------------------------------------------------------ diff --git a/include/oneapi/dpl/pstl/algorithm_impl.h b/include/oneapi/dpl/pstl/algorithm_impl.h index 9fbcf8265dc..2dbbaf6f26d 100644 --- a/include/oneapi/dpl/pstl/algorithm_impl.h +++ b/include/oneapi/dpl/pstl/algorithm_impl.h @@ -1462,6 +1462,16 @@ __pattern_search(_ExecutionPolicy&&, _ForwardIterator1 __first, _ForwardIterator return __internal::__brick_search(__first, __last, __s_first, __s_last, __pred, __is_vector); } +template +_ForwardIterator1 +__pattern_search(_Tag, _ExecutionPolicy&&, _ForwardIterator1 __first, _ForwardIterator1 __last, + _ForwardIterator2 __s_first, _ForwardIterator2 __s_last, _BinaryPredicate __pred) noexcept +{ + static_assert(__is_backend_tag_v<_Tag>); + + return __internal::__brick_search(__first, __last, __s_first, __s_last, __pred, typename _Tag::__is_vector{}); +} + template oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator1> @@ -1496,6 +1506,35 @@ __pattern_search(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first, _Ran } } +template +_RandomAccessIterator1 +__pattern_search(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first, + _RandomAccessIterator1 __last, _RandomAccessIterator2 __s_first, _RandomAccessIterator2 __s_last, + _BinaryPredicate __pred) +{ + using __backend_tag = typename __parallel_tag<_IsVector>::__backend_tag; + + if (__last - __first == __s_last - __s_first) + { + const bool __res = __internal::__pattern_equal(__tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, + __s_first, __pred); + return __res ? __first : __last; + } + else + { + return __internal::__except_handler([&]() { + return __internal::__parallel_find( + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, + [__last, __s_first, __s_last, __pred](_RandomAccessIterator1 __i, _RandomAccessIterator1 __j) { + return __internal::__find_subrange(__i, __j, __last, __s_first, __s_last, __pred, true, + _IsVector{}); + }, + /*_IsFirst=*/::std::true_type{}); + }); + } +} + //------------------------------------------------------------------------ // search_n //------------------------------------------------------------------------ diff --git a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h index c2fad196ee1..99c46da8f16 100644 --- a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h +++ b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h @@ -1357,6 +1357,35 @@ __pattern_search(_ExecutionPolicy&& __exec, _Iterator1 __first, _Iterator1 __las ::std::true_type{}); } +template +_Iterator1 +__pattern_search(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Iterator1 __first, _Iterator1 __last, + _Iterator2 __s_first, _Iterator2 __s_last, _Pred __pred) +{ + if (__s_last == __s_first) + return __first; + + if (__last - __first < __s_last - __s_first) + return __last; + + if (__last - __first == __s_last - __s_first) + { + const bool __res = __pattern_equal( + __tag, __par_backend_hetero::make_wrapped_policy(::std::forward<_ExecutionPolicy>(__exec)), + __first, __last, __s_first, __pred); + return __res ? __first : __last; + } + + using _Predicate = unseq_backend::multiple_match_pred<_ExecutionPolicy, _Pred>; + return __par_backend_hetero::__parallel_find( + ::std::forward<_ExecutionPolicy>(__exec), + __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__first), + __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__last), + __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__s_first), + __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__s_last), _Predicate{__pred}, + ::std::true_type{}); +} + //------------------------------------------------------------------------ // search_n //------------------------------------------------------------------------ From 6dce79e1f30ae1f00a52bfecb63ebae3f5e6d9bd Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Tue, 6 Feb 2024 17:20:01 +0100 Subject: [PATCH 128/566] __pattern_search + tag calls --- include/oneapi/dpl/pstl/glue_algorithm_impl.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/include/oneapi/dpl/pstl/glue_algorithm_impl.h b/include/oneapi/dpl/pstl/glue_algorithm_impl.h index bc97670f8a8..c06b4a9ba4e 100644 --- a/include/oneapi/dpl/pstl/glue_algorithm_impl.h +++ b/include/oneapi/dpl/pstl/glue_algorithm_impl.h @@ -229,10 +229,11 @@ oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _Forward search(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 __last, _ForwardIterator2 __s_first, _ForwardIterator2 __s_last, _BinaryPredicate __pred) { - return oneapi::dpl::__internal::__pattern_search( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __s_first, __s_last, __pred, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>(), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>()); + constexpr auto __dispatch_tag = + oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>(); + + return oneapi::dpl::__internal::__pattern_search(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, + __last, __s_first, __s_last, __pred); } template From cdd519334f1d55934ce8af2c538bf8896cfcf9a3 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Tue, 6 Feb 2024 17:45:29 +0100 Subject: [PATCH 129/566] __pattern_search_n + tag impls --- include/oneapi/dpl/pstl/algorithm_fwd.h | 11 ++++++ include/oneapi/dpl/pstl/algorithm_impl.h | 39 +++++++++++++++++++ .../dpl/pstl/hetero/algorithm_impl_hetero.h | 28 +++++++++++++ 3 files changed, 78 insertions(+) diff --git a/include/oneapi/dpl/pstl/algorithm_fwd.h b/include/oneapi/dpl/pstl/algorithm_fwd.h index d246538403c..d53131d1e0c 100644 --- a/include/oneapi/dpl/pstl/algorithm_fwd.h +++ b/include/oneapi/dpl/pstl/algorithm_fwd.h @@ -616,6 +616,11 @@ __pattern_search_n(_ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _Size IsVector, /*is_parallel=*/::std::false_type) noexcept; +template +_ForwardIterator +__pattern_search_n(_Tag, _ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _Size, const _Tp&, + _BinaryPredicate) noexcept; + template oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator> @@ -623,6 +628,12 @@ __pattern_search_n(_ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessItera _BinaryPredicate, IsVector, /*is_parallel=*/::std::true_type); +template +_RandomAccessIterator +__pattern_search_n(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _Size, + const _Tp&, _BinaryPredicate); + //------------------------------------------------------------------------ // copy_n //------------------------------------------------------------------------ diff --git a/include/oneapi/dpl/pstl/algorithm_impl.h b/include/oneapi/dpl/pstl/algorithm_impl.h index 2dbbaf6f26d..461ef02a777 100644 --- a/include/oneapi/dpl/pstl/algorithm_impl.h +++ b/include/oneapi/dpl/pstl/algorithm_impl.h @@ -1564,6 +1564,16 @@ __pattern_search_n(_ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterato return __internal::__brick_search_n(__first, __last, __count, __value, __pred, __is_vector); } +template +_ForwardIterator +__pattern_search_n(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, _Size __count, + const _Tp& __value, _BinaryPredicate __pred) noexcept +{ + static_assert(__is_backend_tag_v<_Tag>); + + return __internal::__brick_search_n(__first, __last, __count, __value, __pred, typename _Tag::__is_vector{}); +} + template oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator> @@ -1592,6 +1602,35 @@ __pattern_search_n(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _Ra } } +template +_RandomAccessIterator +__pattern_search_n(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, + _RandomAccessIterator __last, _Size __count, const _Tp& __value, _BinaryPredicate __pred) +{ + using __backend_tag = typename __parallel_tag<_IsVector>::__backend_tag; + + if (static_cast<_Size>(__last - __first) == __count) + { + const bool __result = + !__internal::__pattern_any_of(__tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, + [&__value, &__pred](const _Tp& __val) { return !__pred(__val, __value); }); + return __result ? __first : __last; + } + else + { + return __internal::__except_handler([&__exec, __first, __last, __count, &__value, __pred]() { + return __internal::__parallel_find( + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, + [__last, __count, &__value, __pred](_RandomAccessIterator __i, _RandomAccessIterator __j) { + return __internal::__find_subrange(__i, __j, __last, __count, __value, __pred, _IsVector{}); + }, + ::std::true_type{}); + }); + } +} + + //------------------------------------------------------------------------ // copy_n //------------------------------------------------------------------------ diff --git a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h index 99c46da8f16..61b5fc06646 100644 --- a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h +++ b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h @@ -1433,6 +1433,34 @@ __pattern_search_n(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __las _Predicate{__pred, __value, __count}, ::std::true_type{}); } +template +_Iterator +__pattern_search_n(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, + _Size __count, const _Tp& __value, _BinaryPredicate __pred) +{ + if (__count <= 0) + return __first; + + if (__last - __first < __count) + return __last; + + if (__last - __first == __count) + { + return (!__internal::__pattern_any_of(__tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, + __search_n_unary_predicate<_Tp, _BinaryPredicate>{__value, __pred})) + ? __first + : __last; + } + + using _Predicate = unseq_backend::n_elem_match_pred<_ExecutionPolicy, _BinaryPredicate, _Tp, _Size>; + return __par_backend_hetero::__parallel_find( + _BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), + __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__first), + __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__last), + _Predicate{__pred, __value, __count}, ::std::true_type{}); +} + //------------------------------------------------------------------------ // mismatch //------------------------------------------------------------------------ From 302c456f0a64caed54ecab58f864f48339c5566e Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Tue, 6 Feb 2024 17:46:38 +0100 Subject: [PATCH 130/566] __pattern_search_n + tag calls --- include/oneapi/dpl/pstl/glue_algorithm_impl.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/include/oneapi/dpl/pstl/glue_algorithm_impl.h b/include/oneapi/dpl/pstl/glue_algorithm_impl.h index c06b4a9ba4e..0eec9137088 100644 --- a/include/oneapi/dpl/pstl/glue_algorithm_impl.h +++ b/include/oneapi/dpl/pstl/glue_algorithm_impl.h @@ -250,10 +250,10 @@ oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _Forward search_n(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _Size __count, const _Tp& __value, _BinaryPredicate __pred) { - return oneapi::dpl::__internal::__pattern_search_n( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __count, __value, __pred, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>()); + constexpr auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _ForwardIterator>(); + + return oneapi::dpl::__internal::__pattern_search_n(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), + __first, __last, __count, __value, __pred); } template From 1871dd6cf6b0a97a56a9a57dd2d84ee54e005775 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Wed, 7 Feb 2024 09:59:53 +0100 Subject: [PATCH 131/566] __pattern_set_difference + tag impls --- include/oneapi/dpl/pstl/algorithm_fwd.h | 12 ++++ include/oneapi/dpl/pstl/algorithm_impl.h | 65 +++++++++++++++++++ .../dpl/pstl/hetero/algorithm_impl_hetero.h | 25 +++++++ 3 files changed, 102 insertions(+) diff --git a/include/oneapi/dpl/pstl/algorithm_fwd.h b/include/oneapi/dpl/pstl/algorithm_fwd.h index d53131d1e0c..0d3afa5a85c 100644 --- a/include/oneapi/dpl/pstl/algorithm_fwd.h +++ b/include/oneapi/dpl/pstl/algorithm_fwd.h @@ -1547,6 +1547,12 @@ oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _Ou __pattern_set_difference(_ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, _ForwardIterator2, _OutputIterator, _Compare, _IsVector, /*is_parallel=*/::std::false_type) noexcept; +template +_OutputIterator +__pattern_set_difference(_Tag, _ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, + _ForwardIterator2, _OutputIterator, _Compare) noexcept; + template oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _OutputIterator> @@ -1554,6 +1560,12 @@ __pattern_set_difference(_ExecutionPolicy&&, _RandomAccessIterator1, _RandomAcce _RandomAccessIterator2, _OutputIterator, _Compare, _IsVector, /*is_parallel=*/::std::true_type); +template +_RandomAccessIterator3 +__pattern_set_difference(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator1, _RandomAccessIterator1, + _RandomAccessIterator2, _RandomAccessIterator2, _RandomAccessIterator3, _Compare); + //------------------------------------------------------------------------ // set_symmetric_difference //------------------------------------------------------------------------ diff --git a/include/oneapi/dpl/pstl/algorithm_impl.h b/include/oneapi/dpl/pstl/algorithm_impl.h index 461ef02a777..366561640a3 100644 --- a/include/oneapi/dpl/pstl/algorithm_impl.h +++ b/include/oneapi/dpl/pstl/algorithm_impl.h @@ -5094,6 +5094,19 @@ __pattern_set_difference(_ExecutionPolicy&&, _ForwardIterator1 __first1, _Forwar return __internal::__brick_set_difference(__first1, __last1, __first2, __last2, __result, __comp, __is_vector); } +template +_OutputIterator +__pattern_set_difference(_Tag, _ExecutionPolicy&&, _ForwardIterator1 __first1, _ForwardIterator1 __last1, + _ForwardIterator2 __first2, _ForwardIterator2 __last2, _OutputIterator __result, + _Compare __comp) noexcept +{ + static_assert(__is_backend_tag_v<_Tag>); + + return __internal::__brick_set_difference(__first1, __last1, __first2, __last2, __result, __comp, + typename _Tag::__is_vector{}); +} + template oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator3> @@ -5148,6 +5161,58 @@ __pattern_set_difference(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __fir return ::std::set_difference(__first1, __last1, __first2, __last2, __result, __comp); } +template +_RandomAccessIterator3 +__pattern_set_difference(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, + _RandomAccessIterator1 __last1, + _RandomAccessIterator2 __first2, _RandomAccessIterator2 __last2, + _RandomAccessIterator3 __result, _Compare __comp) +{ + typedef typename ::std::iterator_traits<_RandomAccessIterator3>::value_type _T; + typedef typename ::std::iterator_traits<_RandomAccessIterator1>::difference_type _DifferenceType; + + const auto __n1 = __last1 - __first1; + const auto __n2 = __last2 - __first2; + + // {} \ {2}: the difference is empty + if (__n1 == 0) + return __result; + + // {1} \ {}: parallel copying just first sequence + if (__n2 == 0) + return __pattern_walk2_brick(__tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __result, + __internal::__brick_copy<_ExecutionPolicy>{}); + + // testing whether the sequences are intersected + _RandomAccessIterator1 __left_bound_seq_1 = ::std::lower_bound(__first1, __last1, *__first2, __comp); + //{1} < {2}: seq 2 is wholly greater than seq 1, so, parallel copying just first sequence + if (__left_bound_seq_1 == __last1) + return __pattern_walk2_brick(__tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __result, + __internal::__brick_copy<_ExecutionPolicy>{}); + + // testing whether the sequences are intersected + _RandomAccessIterator2 __left_bound_seq_2 = ::std::lower_bound(__first2, __last2, *__first1, __comp); + //{2} < {1}: seq 1 is wholly greater than seq 2, so, parallel copying just first sequence + if (__left_bound_seq_2 == __last2) + return __internal::__pattern_walk2_brick(__tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, + __result, __brick_copy<_ExecutionPolicy>{}); + + if (__n1 + __n2 > __set_algo_cut_off) + return __parallel_set_op( + ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2, __result, __comp, + [](_DifferenceType __n, _DifferenceType) { return __n; }, + [](_RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, + _RandomAccessIterator2 __last2, _T* __result, _Compare __comp) { + return oneapi::dpl::__utils::__set_difference_construct(__first1, __last1, __first2, __last2, __result, + __comp, __BrickCopyConstruct<_IsVector>()); + }, + _IsVector{}); + + // use serial algorithm + return ::std::set_difference(__first1, __last1, __first2, __last2, __result, __comp); +} + //------------------------------------------------------------------------ // set_symmetric_difference //------------------------------------------------------------------------ diff --git a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h index 61b5fc06646..a28a3da8531 100644 --- a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h +++ b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h @@ -2956,6 +2956,31 @@ __pattern_set_difference(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, __result, __comp, unseq_backend::_DifferenceTag()); } +template +_OutputIterator +__pattern_set_difference(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _ForwardIterator1 __first1, + _ForwardIterator1 __last1, _ForwardIterator2 __first2, _ForwardIterator2 __last2, + _OutputIterator __result, _Compare __comp) +{ + // {} \ {2}: the difference is empty + if (__first1 == __last1) + return __result; + + // {1} \ {}: the difference is {1} + if (__first2 == __last2) + { + return oneapi::dpl::__internal::__pattern_walk2_brick( + __tag, + oneapi::dpl::__par_backend_hetero::make_wrapped_policy<__set_difference_copy_case_1>( + ::std::forward<_ExecutionPolicy>(__exec)), + __first1, __last1, __result, oneapi::dpl::__internal::__brick_copy<_ExecutionPolicy>{}); + } + + return __pattern_hetero_set_op(::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2, + __result, __comp, unseq_backend::_DifferenceTag()); +} + //Dummy names to avoid kernel problems template class __set_union_copy_case_1 From a4ad88e08cd6228831bfe22eb09db3699e13dbf8 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Wed, 7 Feb 2024 10:00:15 +0100 Subject: [PATCH 132/566] __pattern_set_difference + tag calls --- include/oneapi/dpl/pstl/glue_algorithm_impl.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/include/oneapi/dpl/pstl/glue_algorithm_impl.h b/include/oneapi/dpl/pstl/glue_algorithm_impl.h index 0eec9137088..e5a7dc854c9 100644 --- a/include/oneapi/dpl/pstl/glue_algorithm_impl.h +++ b/include/oneapi/dpl/pstl/glue_algorithm_impl.h @@ -1044,10 +1044,11 @@ oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _Forward set_difference(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, _ForwardIterator2 __last2, _ForwardIterator __result, _Compare __comp) { - return oneapi::dpl::__internal::__pattern_set_difference( - ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2, __result, __comp, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2, _ForwardIterator>(), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2, _ForwardIterator>()); + constexpr auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _ForwardIterator1, + _ForwardIterator2, _ForwardIterator>(); + + return oneapi::dpl::__internal::__pattern_set_difference(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), + __first1, __last1, __first2, __last2, __result, __comp); } template From 9575ac3faf11494dd846bbabfd2fd71c584dec00 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Wed, 7 Feb 2024 10:12:23 +0100 Subject: [PATCH 133/566] __pattern_set_intersection + tag impls --- include/oneapi/dpl/pstl/algorithm_fwd.h | 13 ++++ include/oneapi/dpl/pstl/algorithm_impl.h | 77 +++++++++++++++++++ .../dpl/pstl/hetero/algorithm_impl_hetero.h | 15 ++++ 3 files changed, 105 insertions(+) diff --git a/include/oneapi/dpl/pstl/algorithm_fwd.h b/include/oneapi/dpl/pstl/algorithm_fwd.h index 0d3afa5a85c..4774d55213e 100644 --- a/include/oneapi/dpl/pstl/algorithm_fwd.h +++ b/include/oneapi/dpl/pstl/algorithm_fwd.h @@ -1520,6 +1520,12 @@ __pattern_set_intersection(_ExecutionPolicy&&, _ForwardIterator1, _ForwardIterat _ForwardIterator2, _OutputIterator, _Compare, _IsVector, /*is_parallel=*/::std::false_type) noexcept; +template +_OutputIterator +__pattern_set_intersection(_Tag, _ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, + _ForwardIterator2, _OutputIterator, _Compare) noexcept; + template oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _OutputIterator> @@ -1527,6 +1533,13 @@ __pattern_set_intersection(_ExecutionPolicy&&, _RandomAccessIterator1, _RandomAc _RandomAccessIterator2, _OutputIterator, _Compare, _IsVector, /*is_parallel=*/::std::true_type); +template +_RandomAccessIterator3 +__pattern_set_intersection(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator1, + _RandomAccessIterator1, _RandomAccessIterator2, _RandomAccessIterator2, + _RandomAccessIterator3, _Compare); + //------------------------------------------------------------------------ // set_difference //------------------------------------------------------------------------ diff --git a/include/oneapi/dpl/pstl/algorithm_impl.h b/include/oneapi/dpl/pstl/algorithm_impl.h index 366561640a3..34e00c2e251 100644 --- a/include/oneapi/dpl/pstl/algorithm_impl.h +++ b/include/oneapi/dpl/pstl/algorithm_impl.h @@ -4996,6 +4996,19 @@ __pattern_set_intersection(_ExecutionPolicy&&, _ForwardIterator1 __first1, _Forw return __internal::__brick_set_intersection(__first1, __last1, __first2, __last2, __result, __comp, __is_vector); } +template +_OutputIterator +__pattern_set_intersection(_Tag, _ExecutionPolicy&&, _ForwardIterator1 __first1, _ForwardIterator1 __last1, + _ForwardIterator2 __first2, _ForwardIterator2 __last2, _OutputIterator __result, + _Compare __comp) noexcept +{ + static_assert(__is_backend_tag_v<_Tag>); + + return __internal::__brick_set_intersection(__first1, __last1, __first2, __last2, __result, __comp, + typename _Tag::__is_vector{}); +} + template oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator3> @@ -5061,6 +5074,70 @@ __pattern_set_intersection(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __f return ::std::set_intersection(__left_bound_seq_1, __last1, __left_bound_seq_2, __last2, __result, __comp); } +template +_RandomAccessIterator3 +__pattern_set_intersection(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, + _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, + _RandomAccessIterator2 __last2, _RandomAccessIterator3 __result, _Compare __comp) +{ + typedef typename ::std::iterator_traits<_RandomAccessIterator3>::value_type _T; + typedef typename ::std::iterator_traits<_RandomAccessIterator1>::difference_type _DifferenceType; + + const auto __n1 = __last1 - __first1; + const auto __n2 = __last2 - __first2; + + // intersection is empty + if (__n1 == 0 || __n2 == 0) + return __result; + + // testing whether the sequences are intersected + _RandomAccessIterator1 __left_bound_seq_1 = ::std::lower_bound(__first1, __last1, *__first2, __comp); + //{1} < {2}: seq 2 is wholly greater than seq 1, so, the intersection is empty + if (__left_bound_seq_1 == __last1) + return __result; + + // testing whether the sequences are intersected + _RandomAccessIterator2 __left_bound_seq_2 = ::std::lower_bound(__first2, __last2, *__first1, __comp); + //{2} < {1}: seq 1 is wholly greater than seq 2, so, the intersection is empty + if (__left_bound_seq_2 == __last2) + return __result; + + const auto __m1 = __last1 - __left_bound_seq_1 + __n2; + if (__m1 > __set_algo_cut_off) + { + //we know proper offset due to [first1; left_bound_seq_1) < [first2; last2) + return __internal::__parallel_set_op( + ::std::forward<_ExecutionPolicy>(__exec), __left_bound_seq_1, __last1, __first2, __last2, __result, __comp, + [](_DifferenceType __n, _DifferenceType __m) { return ::std::min(__n, __m); }, + [](_RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, + _RandomAccessIterator2 __last2, _T* __result, _Compare __comp) { + return oneapi::dpl::__utils::__set_intersection_construct(__first1, __last1, __first2, __last2, + __result, __comp); + }, + _IsVector{}); + } + + const auto __m2 = __last2 - __left_bound_seq_2 + __n1; + if (__m2 > __set_algo_cut_off) + { + //we know proper offset due to [first2; left_bound_seq_2) < [first1; last1) + __result = __internal::__parallel_set_op( + ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __left_bound_seq_2, __last2, __result, __comp, + [](_DifferenceType __n, _DifferenceType __m) { return ::std::min(__n, __m); }, + [](_RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, + _RandomAccessIterator2 __last2, _T* __result, _Compare __comp) { + return oneapi::dpl::__utils::__set_intersection_construct(__first2, __last2, __first1, __last1, + __result, __comp); + }, + _IsVector{}); + return __result; + } + + // [left_bound_seq_1; last1) and [left_bound_seq_2; last2) - use serial algorithm + return ::std::set_intersection(__left_bound_seq_1, __last1, __left_bound_seq_2, __last2, __result, __comp); +} + //------------------------------------------------------------------------ // set_difference //------------------------------------------------------------------------ diff --git a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h index a28a3da8531..479800c16dc 100644 --- a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h +++ b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h @@ -2922,6 +2922,21 @@ __pattern_set_intersection(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1 __result, __comp, unseq_backend::_IntersectionTag()); } +template +_OutputIterator +__pattern_set_intersection(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _ForwardIterator1 __first1, + _ForwardIterator1 __last1, _ForwardIterator2 __first2, _ForwardIterator2 __last2, + _OutputIterator __result, _Compare __comp) +{ + // intersection is empty + if (__first1 == __last1 || __first2 == __last2) + return __result; + + return __pattern_hetero_set_op(::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2, + __result, __comp, unseq_backend::_IntersectionTag()); +} + //Dummy names to avoid kernel problems template class __set_difference_copy_case_1 From 22e757aac474dca124ee8976a0e1b50c5f7385ee Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Wed, 7 Feb 2024 10:12:33 +0100 Subject: [PATCH 134/566] __pattern_set_intersection + tag calls --- include/oneapi/dpl/pstl/glue_algorithm_impl.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/include/oneapi/dpl/pstl/glue_algorithm_impl.h b/include/oneapi/dpl/pstl/glue_algorithm_impl.h index e5a7dc854c9..0ef89eae00c 100644 --- a/include/oneapi/dpl/pstl/glue_algorithm_impl.h +++ b/include/oneapi/dpl/pstl/glue_algorithm_impl.h @@ -1021,10 +1021,11 @@ oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _Forward set_intersection(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, _ForwardIterator2 __last2, _ForwardIterator __result, _Compare __comp) { - return oneapi::dpl::__internal::__pattern_set_intersection( - ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2, __result, __comp, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2, _ForwardIterator>(), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2, _ForwardIterator>()); + constexpr auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _ForwardIterator1, + _ForwardIterator2, _ForwardIterator>(); + + return oneapi::dpl::__internal::__pattern_set_intersection(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), + __first1, __last1, __first2, __last2, __result, __comp); } template From bb130a376fb2fdc995a9d14a3a0de52a235716d5 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Wed, 7 Feb 2024 10:25:56 +0100 Subject: [PATCH 135/566] __pattern_set_symmetric_difference + tag impls --- include/oneapi/dpl/pstl/algorithm_fwd.h | 13 ++++ include/oneapi/dpl/pstl/algorithm_impl.h | 39 +++++++++++ .../dpl/pstl/hetero/algorithm_impl_hetero.h | 64 ++++++++++++++++++- 3 files changed, 115 insertions(+), 1 deletion(-) diff --git a/include/oneapi/dpl/pstl/algorithm_fwd.h b/include/oneapi/dpl/pstl/algorithm_fwd.h index 4774d55213e..f81eeea1f32 100644 --- a/include/oneapi/dpl/pstl/algorithm_fwd.h +++ b/include/oneapi/dpl/pstl/algorithm_fwd.h @@ -1600,6 +1600,12 @@ __pattern_set_symmetric_difference(_ExecutionPolicy&&, _ForwardIterator1, _Forwa _ForwardIterator2, _OutputIterator, _Compare, _IsVector, /*is_parallel=*/::std::false_type) noexcept; +template +_OutputIterator +__pattern_set_symmetric_difference(_Tag, _ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, + _ForwardIterator2, _OutputIterator, _Compare) noexcept; + template oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _OutputIterator> @@ -1607,6 +1613,13 @@ __pattern_set_symmetric_difference(_ExecutionPolicy&&, _RandomAccessIterator1, _ _RandomAccessIterator2, _RandomAccessIterator2, _OutputIterator, _Compare, _IsVector, /*is_parallel=*/::std::true_type); +template +_RandomAccessIterator3 +__pattern_set_symmetric_difference(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator1, + _RandomAccessIterator1, _RandomAccessIterator2, _RandomAccessIterator2, + _RandomAccessIterator3, _Compare); + //------------------------------------------------------------------------ // is_heap_until //------------------------------------------------------------------------ diff --git a/include/oneapi/dpl/pstl/algorithm_impl.h b/include/oneapi/dpl/pstl/algorithm_impl.h index 34e00c2e251..2d27bc0fd14 100644 --- a/include/oneapi/dpl/pstl/algorithm_impl.h +++ b/include/oneapi/dpl/pstl/algorithm_impl.h @@ -5325,6 +5325,19 @@ __pattern_set_symmetric_difference(_ExecutionPolicy&&, _ForwardIterator1 __first __is_vector); } +template +_OutputIterator +__pattern_set_symmetric_difference(_Tag, _ExecutionPolicy&&, _ForwardIterator1 __first1, _ForwardIterator1 __last1, + _ForwardIterator2 __first2, _ForwardIterator2 __last2, _OutputIterator __result, + _Compare __comp) noexcept +{ + static_assert(__is_backend_tag_v<_Tag>); + + return __internal::__brick_set_symmetric_difference(__first1, __last1, __first2, __last2, __result, __comp, + typename _Tag::__is_vector{}); +} + template oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator3> @@ -5352,6 +5365,32 @@ __pattern_set_symmetric_difference(_ExecutionPolicy&& __exec, _RandomAccessItera __is_vector); } +template +_RandomAccessIterator3 +__pattern_set_symmetric_difference(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, + _RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, + _RandomAccessIterator2 __first2, _RandomAccessIterator2 __last2, + _RandomAccessIterator3 __result, _Compare __comp) +{ + const auto __n1 = __last1 - __first1; + const auto __n2 = __last2 - __first2; + + // use serial algorithm + if (__n1 + __n2 <= __set_algo_cut_off) + return ::std::set_symmetric_difference(__first1, __last1, __first2, __last2, __result, __comp); + + typedef typename ::std::iterator_traits<_RandomAccessIterator3>::value_type _T; + return __internal::__parallel_set_union_op( + ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2, __result, __comp, + [](_RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, + _RandomAccessIterator2 __last2, _T* __result, _Compare __comp) { + return oneapi::dpl::__utils::__set_symmetric_difference_construct( + __first1, __last1, __first2, __last2, __result, __comp, __BrickCopyConstruct<_IsVector>()); + }, + _IsVector{}); +} + //------------------------------------------------------------------------ // is_heap_until //------------------------------------------------------------------------ diff --git a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h index 479800c16dc..45560162836 100644 --- a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h +++ b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h @@ -3150,7 +3150,69 @@ __pattern_set_symmetric_difference(_ExecutionPolicy&& __exec, _ForwardIterator1 decltype(__buf_2), decltype(__result)>(); return oneapi::dpl::__internal::__pattern_merge(__dispatch_tag1, ::std::forward<_ExecutionPolicy>(__exec), __buf_1, __buf_1 + __n_diff_1, __buf_2, __buf_2 + __n_diff_2, __result, - __comp, ::std::true_type(), ::std::true_type()); + __comp); +} + +template +_OutputIterator +__pattern_set_symmetric_difference(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, + _ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, + _ForwardIterator2 __last2, _OutputIterator __result, _Compare __comp) +{ + if (__first1 == __last1 && __first2 == __last2) + return __result; + + //{1} is empty + if (__first1 == __last1) + { + return oneapi::dpl::__internal::__pattern_walk2_brick( + __tag, + oneapi::dpl::__par_backend_hetero::make_wrapped_policy<__set_symmetric_difference_copy_case_1>( + ::std::forward<_ExecutionPolicy>(__exec)), + __first2, __last2, __result, oneapi::dpl::__internal::__brick_copy<_ExecutionPolicy>{}); + } + + //{2} is empty + if (__first2 == __last2) + { + return oneapi::dpl::__internal::__pattern_walk2_brick( + __tag, + oneapi::dpl::__par_backend_hetero::make_wrapped_policy<__set_symmetric_difference_copy_case_2>( + ::std::forward<_ExecutionPolicy>(__exec)), + __first1, __last1, __result, oneapi::dpl::__internal::__brick_copy<_ExecutionPolicy>{}); + } + + typedef typename ::std::iterator_traits<_OutputIterator>::value_type _ValueType; + + // temporary buffers to store intermediate result + const auto __n1 = __last1 - __first1; + oneapi::dpl::__par_backend_hetero::__buffer<_ExecutionPolicy, _ValueType> __diff_1(__exec, __n1); + auto __buf_1 = __diff_1.get(); + const auto __n2 = __last2 - __first2; + oneapi::dpl::__par_backend_hetero::__buffer<_ExecutionPolicy, _ValueType> __diff_2(__exec, __n2); + auto __buf_2 = __diff_2.get(); + + //1. Calc difference {1} \ {2} + const auto __n_diff_1 = + oneapi::dpl::__internal::__pattern_hetero_set_op( + oneapi::dpl::__par_backend_hetero::make_wrapped_policy<__set_symmetric_difference_phase_1>(__exec), + __first1, __last1, __first2, __last2, __buf_1, __comp, unseq_backend::_DifferenceTag()) - + __buf_1; + + //2. Calc difference {2} \ {1} + const auto __n_diff_2 = + oneapi::dpl::__internal::__pattern_hetero_set_op( + oneapi::dpl::__par_backend_hetero::make_wrapped_policy<__set_symmetric_difference_phase_2>(__exec), + __first2, __last2, __first1, __last1, __buf_2, __comp, unseq_backend::_DifferenceTag()) - + __buf_2; + + //3. Merge the differences + constexpr auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, decltype(__buf_1), + decltype(__buf_2), decltype(__result)>(); + return oneapi::dpl::__internal::__pattern_merge(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __buf_1, + __buf_1 + __n_diff_1, __buf_2, __buf_2 + __n_diff_2, __result, + __comp); } template From 7bc65aa5b92106b68685b8d074e361cada8b1ec2 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Wed, 7 Feb 2024 10:26:18 +0100 Subject: [PATCH 136/566] __pattern_set_symmetric_difference + tag calls --- include/oneapi/dpl/pstl/glue_algorithm_impl.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/include/oneapi/dpl/pstl/glue_algorithm_impl.h b/include/oneapi/dpl/pstl/glue_algorithm_impl.h index 0ef89eae00c..063349837ff 100644 --- a/include/oneapi/dpl/pstl/glue_algorithm_impl.h +++ b/include/oneapi/dpl/pstl/glue_algorithm_impl.h @@ -1070,10 +1070,12 @@ set_symmetric_difference(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator2 __first2, _ForwardIterator2 __last2, _ForwardIterator __result, _Compare __comp) { + constexpr auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _ForwardIterator1, + _ForwardIterator2, _ForwardIterator>(); + return oneapi::dpl::__internal::__pattern_set_symmetric_difference( - ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2, __result, __comp, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2, _ForwardIterator>(), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2, _ForwardIterator>()); + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2, __result, + __comp); } template From d14cbf6536974cff7ce8fcfc1e2b5f059eed6d5e Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Wed, 7 Feb 2024 10:41:18 +0100 Subject: [PATCH 137/566] __pattern_set_union + tag impls --- include/oneapi/dpl/pstl/algorithm_fwd.h | 12 +++++ include/oneapi/dpl/pstl/algorithm_impl.h | 38 +++++++++++++ .../dpl/pstl/hetero/algorithm_impl_hetero.h | 53 +++++++++++++++++++ 3 files changed, 103 insertions(+) diff --git a/include/oneapi/dpl/pstl/algorithm_fwd.h b/include/oneapi/dpl/pstl/algorithm_fwd.h index f81eeea1f32..e60f8c82b10 100644 --- a/include/oneapi/dpl/pstl/algorithm_fwd.h +++ b/include/oneapi/dpl/pstl/algorithm_fwd.h @@ -1493,12 +1493,24 @@ oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _Ou __pattern_set_union(_ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, _ForwardIterator2, _OutputIterator, _Compare, _IsVector, /*is_parallel=*/::std::false_type) noexcept; +template +_OutputIterator +__pattern_set_union(_Tag, _ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, + _ForwardIterator2, _OutputIterator, _Compare) noexcept; + template oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _OutputIterator> __pattern_set_union(_ExecutionPolicy&&, _RandomAccessIterator1, _RandomAccessIterator1, _RandomAccessIterator2, _RandomAccessIterator2, _OutputIterator, _Compare, _IsVector, /*is_parallel=*/::std::true_type); +template +_OutputIterator +__pattern_set_union(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator1, _RandomAccessIterator1, + _RandomAccessIterator2, _RandomAccessIterator2, _OutputIterator, _Compare); + //------------------------------------------------------------------------ // set_intersection //------------------------------------------------------------------------ diff --git a/include/oneapi/dpl/pstl/algorithm_impl.h b/include/oneapi/dpl/pstl/algorithm_impl.h index 2d27bc0fd14..b6117538af8 100644 --- a/include/oneapi/dpl/pstl/algorithm_impl.h +++ b/include/oneapi/dpl/pstl/algorithm_impl.h @@ -4936,6 +4936,19 @@ __pattern_set_union(_ExecutionPolicy&&, _ForwardIterator1 __first1, _ForwardIter return __internal::__brick_set_union(__first1, __last1, __first2, __last2, __result, __comp, __is_vector); } +template +_OutputIterator +__pattern_set_union(_Tag, _ExecutionPolicy&&, _ForwardIterator1 __first1, _ForwardIterator1 __last1, + _ForwardIterator2 __first2, _ForwardIterator2 __last2, _OutputIterator __result, + _Compare __comp) noexcept +{ + static_assert(__is_backend_tag_v<_Tag>); + + return __internal::__brick_set_union(__first1, __last1, __first2, __last2, __result, __comp, + typename _Tag::__is_vector{}); +} + template oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _OutputIterator> @@ -4962,6 +4975,31 @@ __pattern_set_union(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, __is_vector); } +template +_OutputIterator +__pattern_set_union(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, + _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, _RandomAccessIterator2 __last2, + _OutputIterator __result, _Compare __comp) +{ + const auto __n1 = __last1 - __first1; + const auto __n2 = __last2 - __first2; + + // use serial algorithm + if (__n1 + __n2 <= __set_algo_cut_off) + return ::std::set_union(__first1, __last1, __first2, __last2, __result, __comp); + + typedef typename ::std::iterator_traits<_OutputIterator>::value_type _Tp; + return __parallel_set_union_op( + ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2, __result, __comp, + [](_RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, + _RandomAccessIterator2 __last2, _Tp* __result, _Compare __comp) { + return oneapi::dpl::__utils::__set_union_construct(__first1, __last1, __first2, __last2, __result, __comp, + __BrickCopyConstruct<_IsVector>()); + }, + _IsVector{}); +} + //------------------------------------------------------------------------ // set_intersection //------------------------------------------------------------------------ diff --git a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h index 45560162836..4c5475ac68a 100644 --- a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h +++ b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h @@ -3060,6 +3060,59 @@ __pattern_set_union(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _Forw __first1, __last1, __buf, __buf + __n_diff, __result, __comp); } +template +_OutputIterator +__pattern_set_union(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _ForwardIterator1 __first1, + _ForwardIterator1 __last1, _ForwardIterator2 __first2, _ForwardIterator2 __last2, + _OutputIterator __result, _Compare __comp) +{ + if (__first1 == __last1 && __first2 == __last2) + return __result; + + //{1} is empty + if (__first1 == __last1) + { + return oneapi::dpl::__internal::__pattern_walk2_brick( + __tag, + oneapi::dpl::__par_backend_hetero::make_wrapped_policy<__set_union_copy_case_1>( + ::std::forward<_ExecutionPolicy>(__exec)), + __first2, __last2, __result, oneapi::dpl::__internal::__brick_copy<_ExecutionPolicy>{}); + } + + //{2} is empty + if (__first2 == __last2) + { + return oneapi::dpl::__internal::__pattern_walk2_brick( + __tag, + oneapi::dpl::__par_backend_hetero::make_wrapped_policy<__set_union_copy_case_2>( + ::std::forward<_ExecutionPolicy>(__exec)), + __first1, __last1, __result, oneapi::dpl::__internal::__brick_copy<_ExecutionPolicy>{}); + } + + typedef typename ::std::iterator_traits<_OutputIterator>::value_type _ValueType; + + // temporary buffer to store intermediate result + const auto __n2 = __last2 - __first2; + oneapi::dpl::__par_backend_hetero::__buffer<_ExecutionPolicy, _ValueType> __diff(__exec, __n2); + auto __buf = __diff.get(); + + //1. Calc difference {2} \ {1} + const auto __n_diff = + oneapi::dpl::__internal::__pattern_hetero_set_op(__exec, __first2, __last2, __first1, __last1, __buf, __comp, + unseq_backend::_DifferenceTag()) - + __buf; + + //2. Merge {1} and the difference + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, decltype(__first1), + decltype(__buf), decltype(__result)>(); + return oneapi::dpl::__internal::__pattern_merge( + __dispatch_tag, + oneapi::dpl::__par_backend_hetero::make_wrapped_policy<__set_union_copy_case_2>( + ::std::forward<_ExecutionPolicy>(__exec)), + __first1, __last1, __buf, __buf + __n_diff, __result, __comp); +} + //Dummy names to avoid kernel problems template class __set_symmetric_difference_copy_case_1 From 12f9c2821890bd136fcdd96c2fabdcc7d4d1b2c8 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Wed, 7 Feb 2024 10:42:16 +0100 Subject: [PATCH 138/566] __pattern_set_union + tag calls --- include/oneapi/dpl/pstl/glue_algorithm_impl.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/include/oneapi/dpl/pstl/glue_algorithm_impl.h b/include/oneapi/dpl/pstl/glue_algorithm_impl.h index 063349837ff..1e0a7ef92ed 100644 --- a/include/oneapi/dpl/pstl/glue_algorithm_impl.h +++ b/include/oneapi/dpl/pstl/glue_algorithm_impl.h @@ -998,10 +998,11 @@ oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _Forward set_union(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, _ForwardIterator2 __last2, _ForwardIterator __result, _Compare __comp) { - return oneapi::dpl::__internal::__pattern_set_union( - ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2, __result, __comp, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2, _ForwardIterator>(), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2, _ForwardIterator>()); + constexpr auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _ForwardIterator1, + _ForwardIterator2, _ForwardIterator>(); + + return oneapi::dpl::__internal::__pattern_set_union(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), + __first1, __last1, __first2, __last2, __result, __comp); } template From c33acee85fb5d2b2fdc49ddb99b4e336a431675e Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Wed, 7 Feb 2024 10:56:35 +0100 Subject: [PATCH 139/566] __pattern_shift_left + tag impls --- include/oneapi/dpl/pstl/algorithm_fwd.h | 10 ++++ include/oneapi/dpl/pstl/algorithm_impl.h | 55 +++++++++++++++++++ .../dpl/pstl/hetero/algorithm_impl_hetero.h | 20 +++++++ 3 files changed, 85 insertions(+) diff --git a/include/oneapi/dpl/pstl/algorithm_fwd.h b/include/oneapi/dpl/pstl/algorithm_fwd.h index e60f8c82b10..2946cbd7f5c 100644 --- a/include/oneapi/dpl/pstl/algorithm_fwd.h +++ b/include/oneapi/dpl/pstl/algorithm_fwd.h @@ -1862,12 +1862,22 @@ __pattern_shift_left(_ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, typename ::std::iterator_traits<_ForwardIterator>::difference_type, _IsVector, /*is_parallel=*/::std::false_type) noexcept; +template +_ForwardIterator +__pattern_shift_left(_Tag, _ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, + typename ::std::iterator_traits<_ForwardIterator>::difference_type) noexcept; + template oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _ForwardIterator> __pattern_shift_left(_ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, typename ::std::iterator_traits<_ForwardIterator>::difference_type, _IsVector, /*is_parallel=*/::std::true_type); +template +_ForwardIterator +__pattern_shift_left(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, + typename ::std::iterator_traits<_ForwardIterator>::difference_type); + template oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _BidirectionalIterator> __pattern_shift_right(_ExecutionPolicy&&, _BidirectionalIterator, _BidirectionalIterator, diff --git a/include/oneapi/dpl/pstl/algorithm_impl.h b/include/oneapi/dpl/pstl/algorithm_impl.h index b6117538af8..4ac553ab79d 100644 --- a/include/oneapi/dpl/pstl/algorithm_impl.h +++ b/include/oneapi/dpl/pstl/algorithm_impl.h @@ -6148,6 +6148,16 @@ __pattern_shift_left(_ExecutionPolicy&&, _ForwardIterator __first, _ForwardItera return __brick_shift_left(__first, __last, __n, __is_vector); } +template +_ForwardIterator +__pattern_shift_left(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, + typename ::std::iterator_traits<_ForwardIterator>::difference_type __n) noexcept +{ + static_assert(__is_backend_tag_v<_Tag>); + + return __brick_shift_left(__first, __last, __n, typename _Tag::__is_vector{}); +} + template oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _ForwardIterator> __pattern_shift_left(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, @@ -6192,6 +6202,51 @@ __pattern_shift_left(_ExecutionPolicy&& __exec, _ForwardIterator __first, _Forwa return __first + __size_res; } +template +_ForwardIterator +__pattern_shift_left(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _ForwardIterator __first, + _ForwardIterator __last, typename ::std::iterator_traits<_ForwardIterator>::difference_type __n) +{ + using __backend_tag = typename __parallel_tag<_IsVector>::__backend_tag; + + //If (n > 0 && n < m), returns first + (m - n). Otherwise, if n > 0, returns first. Otherwise, returns last. + if (__n <= 0) + return __last; + auto __size = __last - __first; + if (__n >= __size) + return __first; + + using _DiffType = typename ::std::iterator_traits<_ForwardIterator>::difference_type; + + _DiffType __mid = __size / 2 + __size % 2; + _DiffType __size_res = __size - __n; + + //1. n >= size/2; there is enough memory to 'total' parallel copying + if (__n >= __mid) + { + __par_backend::__parallel_for(__backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __n, __size, + [__first, __n](_DiffType __i, _DiffType __j) { + __brick_move<_ExecutionPolicy>{}(__first + __i, __first + __j, + __first + __i - __n, _IsVector{}); + }); + } + else //2. n < size/2; there is not enough memory to parallel copying; doing parallel copying by n elements + { + //TODO: to consider parallel processing by the 'internal' loop (but we may probably get cache locality issues) + for (auto __k = __n; __k < __size; __k += __n) + { + auto __end = ::std::min(__k + __n, __size); + __par_backend::__parallel_for(__backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __k, __end, + [__first, __n](_DiffType __i, _DiffType __j) { + __brick_move<_ExecutionPolicy>{}(__first + __i, __first + __j, + __first + __i - __n, _IsVector{}); + }); + } + } + + return __first + __size_res; +} + template oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _BidirectionalIterator> __pattern_shift_right(_ExecutionPolicy&& __exec, _BidirectionalIterator __first, _BidirectionalIterator __last, diff --git a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h index 4c5475ac68a..2dcf2ccc8b2 100644 --- a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h +++ b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h @@ -3358,6 +3358,26 @@ __pattern_shift_right(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __ return __last - __res; } +template +_Iterator +__pattern_shift_left(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, + typename ::std::iterator_traits<_Iterator>::difference_type __n) +{ + //If (n > 0 && n < m), returns first + (m - n). Otherwise, if n > 0, returns first. Otherwise, returns last. + auto __size = __last - __first; + if (__n <= 0) + return __last; + if (__n >= __size) + return __first; + + auto __keep = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read_write, _Iterator>(); + auto __buf = __keep(__first, __last); + + auto __res = + oneapi::dpl::__internal::__pattern_shift_left(::std::forward<_ExecutionPolicy>(__exec), __buf.all_view(), __n); + return __first + __res; +} + } // namespace __internal } // namespace dpl } // namespace oneapi From 7660a96c5f140c6c505ed563f9ebbb73dbbef08a Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Wed, 7 Feb 2024 10:56:46 +0100 Subject: [PATCH 140/566] __pattern_shift_left + tag calls --- include/oneapi/dpl/pstl/glue_algorithm_impl.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/include/oneapi/dpl/pstl/glue_algorithm_impl.h b/include/oneapi/dpl/pstl/glue_algorithm_impl.h index 1e0a7ef92ed..376c39fef56 100644 --- a/include/oneapi/dpl/pstl/glue_algorithm_impl.h +++ b/include/oneapi/dpl/pstl/glue_algorithm_impl.h @@ -1235,10 +1235,10 @@ oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _Forward shift_left(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, typename ::std::iterator_traits<_ForwardIterator>::difference_type __n) { - return oneapi::dpl::__internal::__pattern_shift_left( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __n, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>()); + constexpr auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _ForwardIterator>(); + + return oneapi::dpl::__internal::__pattern_shift_left(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), + __first, __last, __n); } // [shift.right] From 2e12b116291fe90c4ccfa3e711ca03f89d44ee77 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Wed, 7 Feb 2024 11:20:01 +0100 Subject: [PATCH 141/566] __pattern_shift_right + tag impls --- include/oneapi/dpl/pstl/algorithm_impl.h | 20 +++++++++++++++++ .../dpl/pstl/hetero/algorithm_impl_hetero.h | 22 +++++++++++++++++++ 2 files changed, 42 insertions(+) diff --git a/include/oneapi/dpl/pstl/algorithm_impl.h b/include/oneapi/dpl/pstl/algorithm_impl.h index 4ac553ab79d..46771e8cf72 100644 --- a/include/oneapi/dpl/pstl/algorithm_impl.h +++ b/include/oneapi/dpl/pstl/algorithm_impl.h @@ -6261,6 +6261,26 @@ __pattern_shift_right(_ExecutionPolicy&& __exec, _BidirectionalIterator __first, return __res.base(); } +template +_BidirectionalIterator +__pattern_shift_right(_Tag, _ExecutionPolicy&& __exec, _BidirectionalIterator __first, _BidirectionalIterator __last, + typename ::std::iterator_traits<_BidirectionalIterator>::difference_type __n) +{ + static_assert(__is_backend_tag_v<_Tag>); + + using _ReverseIterator = typename ::std::reverse_iterator<_BidirectionalIterator>; + + constexpr auto __dispatch_tag = + oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, decltype(_ReverseIterator(__last)), + decltype(_ReverseIterator(__first))>(); + + auto __res = + oneapi::dpl::__internal::__pattern_shift_left(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), + _ReverseIterator(__last), _ReverseIterator(__first), __n); + + return __res.base(); +} + } // namespace __internal } // namespace dpl } // namespace oneapi diff --git a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h index 2dcf2ccc8b2..9dffa37b99f 100644 --- a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h +++ b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h @@ -3358,6 +3358,28 @@ __pattern_shift_right(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __ return __last - __res; } +template +_Iterator +__pattern_shift_right(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, + typename ::std::iterator_traits<_Iterator>::difference_type __n) +{ + //If (n > 0 && n < m), returns first + n. Otherwise, if n > 0, returns last. Otherwise, returns first. + auto __size = __last - __first; + if (__n <= 0) + return __first; + if (__n >= __size) + return __last; + + auto __keep = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read_write, _Iterator>(); + auto __buf = __keep(__first, __last); + + //A shift right is the shift left with a reverse logic. + auto __rng = oneapi::dpl::__ranges::reverse_view_simple{__buf.all_view()}; + auto __res = oneapi::dpl::__internal::__pattern_shift_left(::std::forward<_ExecutionPolicy>(__exec), __rng, __n); + + return __last - __res; +} + template _Iterator __pattern_shift_left(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, From 6caf1f20b0d938a786450f1bc5d6d7a92e69ef8d Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Wed, 7 Feb 2024 11:20:15 +0100 Subject: [PATCH 142/566] __pattern_shift_right + tag calls --- include/oneapi/dpl/pstl/glue_algorithm_impl.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/include/oneapi/dpl/pstl/glue_algorithm_impl.h b/include/oneapi/dpl/pstl/glue_algorithm_impl.h index 376c39fef56..9bd59d079a7 100644 --- a/include/oneapi/dpl/pstl/glue_algorithm_impl.h +++ b/include/oneapi/dpl/pstl/glue_algorithm_impl.h @@ -1248,10 +1248,11 @@ oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _Bidirec shift_right(_ExecutionPolicy&& __exec, _BidirectionalIterator __first, _BidirectionalIterator __last, typename ::std::iterator_traits<_BidirectionalIterator>::difference_type __n) { - return oneapi::dpl::__internal::__pattern_shift_right( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __n, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _BidirectionalIterator>(), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _BidirectionalIterator>()); + constexpr auto __dispatch_tag = + oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _BidirectionalIterator>(); + + return oneapi::dpl::__internal::__pattern_shift_right(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), + __first, __last, __n); } } // namespace dpl From 6c4634c165472c68d173fa7e8853b6d467ea901e Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Wed, 7 Feb 2024 11:33:35 +0100 Subject: [PATCH 143/566] __pattern_sort + tag impls --- include/oneapi/dpl/pstl/algorithm_fwd.h | 10 +++++++ include/oneapi/dpl/pstl/algorithm_impl.h | 26 +++++++++++++++++++ .../dpl/pstl/hetero/algorithm_impl_hetero.h | 9 +++++++ 3 files changed, 45 insertions(+) diff --git a/include/oneapi/dpl/pstl/algorithm_fwd.h b/include/oneapi/dpl/pstl/algorithm_fwd.h index 2946cbd7f5c..26c0c17a261 100644 --- a/include/oneapi/dpl/pstl/algorithm_fwd.h +++ b/include/oneapi/dpl/pstl/algorithm_fwd.h @@ -1088,12 +1088,22 @@ oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy> __pattern_sort(_ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _Compare, _IsVector /*is_vector*/, /*is_parallel=*/::std::false_type, _IsMoveConstructible) noexcept; +template +void +__pattern_sort(_Tag, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _Compare, + _IsMoveConstructible) noexcept; + template oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy> __pattern_sort(_ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _Compare, _IsVector /*is_vector*/, /*is_parallel=*/::std::true_type, /*is_move_constructible=*/::std::true_type); +template +void +__pattern_sort(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _Compare, + /*is_move_constructible=*/::std::true_type); + //------------------------------------------------------------------------ // stable_sort //------------------------------------------------------------------------ diff --git a/include/oneapi/dpl/pstl/algorithm_impl.h b/include/oneapi/dpl/pstl/algorithm_impl.h index 46771e8cf72..12e119ce397 100644 --- a/include/oneapi/dpl/pstl/algorithm_impl.h +++ b/include/oneapi/dpl/pstl/algorithm_impl.h @@ -3436,6 +3436,16 @@ __pattern_sort(_ExecutionPolicy&&, _RandomAccessIterator __first, _RandomAccessI ::std::sort(__first, __last, __comp); } +template +void +__pattern_sort(_Tag, _ExecutionPolicy&&, _RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp, + _IsMoveConstructible) noexcept +{ + static_assert(__is_backend_tag_v<_Tag>); + + ::std::sort(__first, __last, __comp); +} + template oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy> __pattern_sort(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp, @@ -3451,6 +3461,22 @@ __pattern_sort(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _Random }); } +template +void +__pattern_sort(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, + _RandomAccessIterator __last, _Compare __comp, + /*is_move_constructible=*/::std::true_type) +{ + __internal::__except_handler([&]() { + __par_backend::__parallel_stable_sort( + ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __comp, + [](_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp) { + ::std::sort(__first, __last, __comp); + }, + __last - __first); + }); +} + //------------------------------------------------------------------------ // stable_sort //------------------------------------------------------------------------ diff --git a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h index 9dffa37b99f..f76b3f9a660 100644 --- a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h +++ b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h @@ -2100,6 +2100,15 @@ __pattern_sort(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, _ oneapi::dpl::identity{}); } +template +void +__pattern_sort(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, + _Compare __comp, /*is_move_constructible=*/::std::true_type) +{ + __stable_sort_with_projection(::std::forward<_ExecutionPolicy>(__exec), __first, __last, __comp, + oneapi::dpl::identity{}); +} + //------------------------------------------------------------------------ // stable_sort //------------------------------------------------------------------------ From 08bd8ba9dc63e6bbfc67e0f9320833de9ea3b2fa Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Wed, 7 Feb 2024 11:34:25 +0100 Subject: [PATCH 144/566] __pattern_sort + tag calls --- include/oneapi/dpl/pstl/glue_algorithm_impl.h | 7 +++++-- include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h | 7 +++++-- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/include/oneapi/dpl/pstl/glue_algorithm_impl.h b/include/oneapi/dpl/pstl/glue_algorithm_impl.h index 9bd59d079a7..d187b575446 100644 --- a/include/oneapi/dpl/pstl/glue_algorithm_impl.h +++ b/include/oneapi/dpl/pstl/glue_algorithm_impl.h @@ -672,11 +672,14 @@ template oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy> sort(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp) { + constexpr auto __dispatch_tag = + oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _RandomAccessIterator>(); + typedef typename ::std::iterator_traits<_RandomAccessIterator>::value_type _InputType; + oneapi::dpl::__internal::__pattern_sort( + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __comp, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _RandomAccessIterator>(), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _RandomAccessIterator>(), typename ::std::is_move_constructible<_InputType>::type()); } diff --git a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h index f76b3f9a660..39a8a039bd6 100644 --- a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h +++ b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h @@ -2536,9 +2536,12 @@ __pattern_partial_sort_copy(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __brick_copy<_ExecutionPolicy>{}); // Use regular sort as partial_sort isn't required to be stable - __pattern_sort( + constexpr auto __dispatch_tag = + oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, decltype(__out_first), decltype(__out_end)>(); + + __pattern_sort(__dispatch_tag, __par_backend_hetero::make_wrapped_policy<__partial_sort_1>(::std::forward<_ExecutionPolicy>(__exec)), - __out_first, __out_end, __comp, ::std::true_type{}, ::std::true_type{}, ::std::true_type{}); + __out_first, __out_end, __comp, ::std::true_type{}); return __out_end; } From 8c29d9245b155f94b7ba9a6c9fe243c27a4414fc Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Wed, 7 Feb 2024 11:45:24 +0100 Subject: [PATCH 145/566] __pattern_sort_by_key + tag impls --- include/oneapi/dpl/pstl/algorithm_fwd.h | 12 ++++++ include/oneapi/dpl/pstl/algorithm_impl.h | 42 +++++++++++++++++++ .../dpl/pstl/hetero/algorithm_impl_hetero.h | 14 +++++++ 3 files changed, 68 insertions(+) diff --git a/include/oneapi/dpl/pstl/algorithm_fwd.h b/include/oneapi/dpl/pstl/algorithm_fwd.h index 26c0c17a261..0be83b0cd80 100644 --- a/include/oneapi/dpl/pstl/algorithm_fwd.h +++ b/include/oneapi/dpl/pstl/algorithm_fwd.h @@ -1130,6 +1130,12 @@ __pattern_sort_by_key(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __keys_f _RandomAccessIterator1 __keys_last, _RandomAccessIterator2 __values_first, _Compare __comp, _IsVector /*vector=*/, /*is_parallel=*/::std::false_type) noexcept; +template +void +__pattern_sort_by_key(_Tag, _ExecutionPolicy&&, _RandomAccessIterator1, _RandomAccessIterator1, _RandomAccessIterator2, + _Compare) noexcept; + template oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy> @@ -1137,6 +1143,12 @@ __pattern_sort_by_key(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __keys_f _RandomAccessIterator1 __keys_last, _RandomAccessIterator2 __values_first, _Compare __comp, _IsVector /*vector=*/, /*is_parallel=*/::std::true_type); +template +void +__pattern_sort_by_key(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator1, _RandomAccessIterator1, + _RandomAccessIterator2, _Compare); + //------------------------------------------------------------------------ // partial_sort //------------------------------------------------------------------------ diff --git a/include/oneapi/dpl/pstl/algorithm_impl.h b/include/oneapi/dpl/pstl/algorithm_impl.h index 12e119ce397..025e442cbb1 100644 --- a/include/oneapi/dpl/pstl/algorithm_impl.h +++ b/include/oneapi/dpl/pstl/algorithm_impl.h @@ -3524,6 +3524,24 @@ __pattern_sort_by_key(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __keys_f ::std::sort(__beg, __end, __cmp_f); } +template +void +__pattern_sort_by_key(_Tag, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __keys_first, + _RandomAccessIterator1 __keys_last, _RandomAccessIterator2 __values_first, + _Compare __comp) noexcept +{ + static_assert(__is_backend_tag_v<_Tag>); + + auto __beg = oneapi::dpl::make_zip_iterator(__keys_first, __values_first); + auto __end = __beg + (__keys_last - __keys_first); + auto __cmp_f = [__comp](const auto& __a, const auto& __b) { + return __comp(::std::get<0>(__a), ::std::get<0>(__b)); + }; + + ::std::sort(__beg, __end, __cmp_f); +} + template oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy> @@ -3549,6 +3567,30 @@ __pattern_sort_by_key(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __keys_f }); } +template +void +__pattern_sort_by_key(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __keys_first, + _RandomAccessIterator1 __keys_last, _RandomAccessIterator2 __values_first, _Compare __comp) +{ + static_assert( + ::std::is_move_constructible_v::value_type> && + ::std::is_move_constructible_v::value_type>, + "The keys and values should be move constructible in case of parallel execution."); + + auto __beg = oneapi::dpl::make_zip_iterator(__keys_first, __values_first); + auto __end = __beg + (__keys_last - __keys_first); + auto __cmp_f = [__comp](const auto& __a, const auto& __b) { + return __comp(::std::get<0>(__a), ::std::get<0>(__b)); + }; + + __internal::__except_handler([&]() { + __par_backend::__parallel_stable_sort( + ::std::forward<_ExecutionPolicy>(__exec), __beg, __end, __cmp_f, + [](auto __first, auto __last, auto __cmp) { ::std::sort(__first, __last, __cmp); }, __end - __beg); + }); +} + //------------------------------------------------------------------------ // partial_sort //------------------------------------------------------------------------ diff --git a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h index 39a8a039bd6..141b989fc91 100644 --- a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h +++ b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h @@ -2137,6 +2137,20 @@ __pattern_sort_by_key(_ExecutionPolicy&& __exec, _Iterator1 __keys_first, _Itera [](const auto& __a) { return ::std::get<0>(__a); }); } +template +void +__pattern_sort_by_key(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Iterator1 __keys_first, + _Iterator1 __keys_last, _Iterator2 __values_first, _Compare __comp) +{ + static_assert(::std::is_move_constructible_v::value_type> && + ::std::is_move_constructible_v::value_type>, + "The keys and values should be move constructible in case of parallel execution."); + + auto __beg = oneapi::dpl::make_zip_iterator(__keys_first, __values_first); + auto __end = __beg + (__keys_last - __keys_first); + __stable_sort_with_projection(::std::forward<_ExecutionPolicy>(__exec), __beg, __end, __comp, + [](const auto& __a) { return ::std::get<0>(__a); }); +} template oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, _Iterator> From 22cc335b5813ef4cc63f31cb2f9184a174764a5b Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Wed, 7 Feb 2024 11:45:52 +0100 Subject: [PATCH 146/566] __pattern_sort_by_key + tag calls --- include/oneapi/dpl/pstl/glue_algorithm_impl.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/include/oneapi/dpl/pstl/glue_algorithm_impl.h b/include/oneapi/dpl/pstl/glue_algorithm_impl.h index d187b575446..aba552030e0 100644 --- a/include/oneapi/dpl/pstl/glue_algorithm_impl.h +++ b/include/oneapi/dpl/pstl/glue_algorithm_impl.h @@ -719,10 +719,11 @@ oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy> sort_by_key(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __keys_first, _RandomAccessIterator1 __keys_last, _RandomAccessIterator2 __values_first, _Compare __comp) { - oneapi::dpl::__internal::__pattern_sort_by_key( - ::std::forward<_ExecutionPolicy>(__exec), __keys_first, __keys_last, __values_first, __comp, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _RandomAccessIterator1, _RandomAccessIterator2>(), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _RandomAccessIterator1, _RandomAccessIterator2>()); + constexpr auto __dispatch_tag = + oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _RandomAccessIterator1, _RandomAccessIterator2>(); + + oneapi::dpl::__internal::__pattern_sort_by_key(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), + __keys_first, __keys_last, __values_first, __comp); } template From 38ea27e2d335b3b1744926529f9a0e18509913c6 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Wed, 7 Feb 2024 12:00:53 +0100 Subject: [PATCH 147/566] __pattern_stable_partition + tag impls --- include/oneapi/dpl/pstl/algorithm_fwd.h | 10 +++ include/oneapi/dpl/pstl/algorithm_impl.h | 61 +++++++++++++++++++ .../dpl/pstl/hetero/algorithm_impl_hetero.h | 46 ++++++++++++++ 3 files changed, 117 insertions(+) diff --git a/include/oneapi/dpl/pstl/algorithm_fwd.h b/include/oneapi/dpl/pstl/algorithm_fwd.h index 0be83b0cd80..60b02a29dc8 100644 --- a/include/oneapi/dpl/pstl/algorithm_fwd.h +++ b/include/oneapi/dpl/pstl/algorithm_fwd.h @@ -1030,11 +1030,21 @@ __pattern_stable_partition(_ExecutionPolicy&&, _BidirectionalIterator, _Bidirect _IsVector, /*is_parallelization=*/::std::false_type) noexcept; +template +_BidirectionalIterator +__pattern_stable_partition(_Tag, _ExecutionPolicy&&, _BidirectionalIterator, _BidirectionalIterator, + _UnaryPredicate) noexcept; + template oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator> __pattern_stable_partition(_ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _UnaryPredicate, _IsVector, /*is_parallelization=*/::std::true_type); +template +_RandomAccessIterator +__pattern_stable_partition(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, + _UnaryPredicate); + //------------------------------------------------------------------------ // partition_copy //------------------------------------------------------------------------ diff --git a/include/oneapi/dpl/pstl/algorithm_impl.h b/include/oneapi/dpl/pstl/algorithm_impl.h index 025e442cbb1..0744dbe33d0 100644 --- a/include/oneapi/dpl/pstl/algorithm_impl.h +++ b/include/oneapi/dpl/pstl/algorithm_impl.h @@ -3244,6 +3244,16 @@ __pattern_stable_partition(_ExecutionPolicy&&, _BidirectionalIterator __first, _ return __internal::__brick_stable_partition(__first, __last, __pred, __is_vector); } +template +_BidirectionalIterator +__pattern_stable_partition(_Tag, _ExecutionPolicy&&, _BidirectionalIterator __first, _BidirectionalIterator __last, + _UnaryPredicate __pred) noexcept +{ + static_assert(__is_backend_tag_v<_Tag>); + + return __internal::__brick_stable_partition(__first, __last, __pred, typename _Tag::__is_vector{}); +} + template oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator> __pattern_stable_partition(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last, @@ -3296,6 +3306,57 @@ __pattern_stable_partition(_ExecutionPolicy&& __exec, _RandomAccessIterator __fi }); } +template +_RandomAccessIterator +__pattern_stable_partition(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, + _RandomAccessIterator __last, _UnaryPredicate __pred) +{ + // partitioned range: elements before pivot satisfy pred (true part), + // elements after pivot don't satisfy pred (false part) + struct _PartitionRange + { + _RandomAccessIterator __begin; + _RandomAccessIterator __pivot; + _RandomAccessIterator __end; + }; + + return __internal::__except_handler([&]() { + _PartitionRange __init{__last, __last, __last}; + + // lambda for merging two partitioned ranges to one partitioned range + auto __reductor = [](_PartitionRange __val1, _PartitionRange __val2) -> _PartitionRange { + auto __size1 = __val1.__end - __val1.__pivot; + auto __new_begin = __val2.__begin - (__val1.__end - __val1.__begin); + + // if all elements in left range satisfy pred then we can move new pivot to pivot of right range + if (__val1.__end == __val1.__pivot) + { + return {__new_begin, __val2.__pivot, __val2.__end}; + } + // if true part of right range greater than false part of left range + // then we should swap the false part of left range and last part of true part of right range + else + { + __internal::__brick_rotate(__val1.__pivot, __val2.__begin, __val2.__pivot, _IsVector{}); + return {__new_begin, __val2.__pivot - __size1, __val2.__end}; + } + }; + + _PartitionRange __result = __par_backend::__parallel_reduce( + ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __init, + [&__pred, __reductor](_RandomAccessIterator __i, _RandomAccessIterator __j, + _PartitionRange __value) -> _PartitionRange { + //1. serial stable_partition + _RandomAccessIterator __pivot = __internal::__brick_stable_partition(__i, __j, __pred, _IsVector{}); + + // 2. merging of two ranges (left and right respectively) + return __reductor(__value, {__i, __pivot, __j}); + }, + __reductor); + return __result.__pivot; + }); +} + //------------------------------------------------------------------------ // partition_copy //------------------------------------------------------------------------ diff --git a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h index 141b989fc91..03a96684233 100644 --- a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h +++ b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h @@ -2203,6 +2203,52 @@ __pattern_stable_partition(_ExecutionPolicy&& __exec, _Iterator __first, _Iterat return __first + true_count; } +template +_Iterator +__pattern_stable_partition(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Iterator __first, + _Iterator __last, _UnaryPredicate __pred) +{ + if (__last == __first) + return __last; + else if (__last - __first < 2) + return __pattern_any_of(__tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __pred) ? __last + : __first; + + using _ValueType = typename ::std::iterator_traits<_Iterator>::value_type; + + auto __n = __last - __first; + + oneapi::dpl::__par_backend_hetero::__buffer<_ExecutionPolicy, _ValueType> __true_buf(__exec, __n); + oneapi::dpl::__par_backend_hetero::__buffer<_ExecutionPolicy, _ValueType> __false_buf(__exec, __n); + auto __true_result = __true_buf.get(); + auto __false_result = __false_buf.get(); + + constexpr auto __dispatch_tag = __select_backend<_ExecutionPolicy, decltype(__first), decltype(__last), + decltype(__true_result), decltype(__false_result)>(); + auto copy_result = + __pattern_partition_copy(__dispatch_tag, __exec, __first, __last, __true_result, __false_result, __pred); + auto true_count = copy_result.first - __true_result; + + //TODO: optimize copy back if possible (inplace, decrease number of submits) + constexpr auto __dispatch_tag1 = + oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, decltype(__true_result), + decltype(copy_result.first), decltype(__first)>(); + using __backend_tag1 = typename decltype(__dispatch_tag1)::__backend_tag; + __pattern_walk2<__backend_tag1, /*_IsSync=*/::std::false_type>( + __dispatch_tag1, __par_backend_hetero::make_wrapped_policy(__exec), __true_result, + copy_result.first, __first, __brick_move<_ExecutionPolicy>{}); + + constexpr auto __dispatch_tag2 = + oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, decltype(__false_result), + decltype(copy_result.second), decltype(__first + true_count)>(); + __pattern_walk2( + __dispatch_tag2, + __par_backend_hetero::make_wrapped_policy(::std::forward<_ExecutionPolicy>(__exec)), + __false_result, copy_result.second, __first + true_count, __brick_move<_ExecutionPolicy>{}); + + return __first + true_count; +} + template oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, _Iterator> __pattern_partition(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, _UnaryPredicate __pred, From a481bf5e739f6dd0d2fbb7c6d1b4b06324765914 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Wed, 7 Feb 2024 12:01:44 +0100 Subject: [PATCH 148/566] __pattern_stable_partition + tag calls --- include/oneapi/dpl/pstl/glue_algorithm_impl.h | 9 +++++---- include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h | 10 ++++++---- 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/include/oneapi/dpl/pstl/glue_algorithm_impl.h b/include/oneapi/dpl/pstl/glue_algorithm_impl.h index aba552030e0..27d8afeaa30 100644 --- a/include/oneapi/dpl/pstl/glue_algorithm_impl.h +++ b/include/oneapi/dpl/pstl/glue_algorithm_impl.h @@ -646,10 +646,11 @@ oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _Bidirec stable_partition(_ExecutionPolicy&& __exec, _BidirectionalIterator __first, _BidirectionalIterator __last, _UnaryPredicate __pred) { - return oneapi::dpl::__internal::__pattern_stable_partition( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __pred, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _BidirectionalIterator>(), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _BidirectionalIterator>()); + constexpr auto __dispatch_tag = + oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _BidirectionalIterator>(); + + return oneapi::dpl::__internal::__pattern_stable_partition(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), + __first, __last, __pred); } template (); + //TODO: consider nonstable approaches - return __pattern_stable_partition(::std::forward<_ExecutionPolicy>(__exec), __first, __last, __pred, - ::std::true_type(), ::std::true_type()); + return __pattern_stable_partition(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __pred); } template @@ -2264,9 +2265,10 @@ _Iterator __pattern_partition(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, _UnaryPredicate __pred) { + constexpr auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _Iterator>(); + //TODO: consider nonstable approaches - return __pattern_stable_partition(::std::forward<_ExecutionPolicy>(__exec), __first, __last, __pred, - ::std::true_type(), ::std::true_type()); + return __pattern_stable_partition(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __pred); } //------------------------------------------------------------------------ From ed0cab97743318cf619ec8561a9d2758ba3fe28a Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Wed, 7 Feb 2024 12:15:20 +0100 Subject: [PATCH 149/566] __pattern_stable_sort + tag impls --- include/oneapi/dpl/pstl/algorithm_fwd.h | 9 +++++++ include/oneapi/dpl/pstl/algorithm_impl.h | 24 +++++++++++++++++++ .../dpl/pstl/hetero/algorithm_impl_hetero.h | 9 +++++++ 3 files changed, 42 insertions(+) diff --git a/include/oneapi/dpl/pstl/algorithm_fwd.h b/include/oneapi/dpl/pstl/algorithm_fwd.h index 60b02a29dc8..73220412b0c 100644 --- a/include/oneapi/dpl/pstl/algorithm_fwd.h +++ b/include/oneapi/dpl/pstl/algorithm_fwd.h @@ -1124,12 +1124,21 @@ __pattern_stable_sort(_ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIt _IsVector /*is_vector*/, /*is_parallel=*/::std::false_type) noexcept; +template +void +__pattern_stable_sort(_Tag, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _Compare) noexcept; + template oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy> __pattern_stable_sort(_ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _Compare, _IsVector /*is_vector*/, /*is_parallel=*/::std::true_type); +template +void +__pattern_stable_sort(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, + _Compare); + //------------------------------------------------------------------------ // sort_by_key //------------------------------------------------------------------------ diff --git a/include/oneapi/dpl/pstl/algorithm_impl.h b/include/oneapi/dpl/pstl/algorithm_impl.h index 0744dbe33d0..9ca164bbdb2 100644 --- a/include/oneapi/dpl/pstl/algorithm_impl.h +++ b/include/oneapi/dpl/pstl/algorithm_impl.h @@ -3550,6 +3550,15 @@ __pattern_stable_sort(_ExecutionPolicy&&, _RandomAccessIterator __first, _Random ::std::stable_sort(__first, __last, __comp); } +template +void +__pattern_stable_sort(_Tag, _ExecutionPolicy&&, _RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp) noexcept +{ + static_assert(__is_backend_tag_v<_Tag>); + + ::std::stable_sort(__first, __last, __comp); +} + template oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy> __pattern_stable_sort(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last, @@ -3565,6 +3574,21 @@ __pattern_stable_sort(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, }); } +template +void +__pattern_stable_sort(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, + _RandomAccessIterator __last, _Compare __comp) +{ + __internal::__except_handler([&]() { + __par_backend::__parallel_stable_sort( + ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __comp, + [](_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp) { + ::std::stable_sort(__first, __last, __comp); + }, + __last - __first); + }); +} + //------------------------------------------------------------------------ // sort_by_key //------------------------------------------------------------------------ diff --git a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h index 38f734668dc..cf284bc7c3f 100644 --- a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h +++ b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h @@ -2121,6 +2121,15 @@ __pattern_stable_sort(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __ oneapi::dpl::identity{}); } +template +void +__pattern_stable_sort(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, + _Compare __comp) +{ + __stable_sort_with_projection(::std::forward<_ExecutionPolicy>(__exec), __first, __last, __comp, + oneapi::dpl::identity{}); +} + template oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy> __pattern_sort_by_key(_ExecutionPolicy&& __exec, _Iterator1 __keys_first, _Iterator1 __keys_last, From 8f6de3844d9adf2a3c8e72268fbbcf075a7ac2e0 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Wed, 7 Feb 2024 12:15:30 +0100 Subject: [PATCH 150/566] __pattern_stable_sort + tag calls --- include/oneapi/dpl/pstl/glue_algorithm_impl.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/include/oneapi/dpl/pstl/glue_algorithm_impl.h b/include/oneapi/dpl/pstl/glue_algorithm_impl.h index 27d8afeaa30..abbff064991 100644 --- a/include/oneapi/dpl/pstl/glue_algorithm_impl.h +++ b/include/oneapi/dpl/pstl/glue_algorithm_impl.h @@ -698,10 +698,11 @@ template oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy> stable_sort(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp) { - oneapi::dpl::__internal::__pattern_stable_sort( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __comp, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _RandomAccessIterator>(), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _RandomAccessIterator>()); + constexpr auto __dispatch_tag = + oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _RandomAccessIterator>(); + + oneapi::dpl::__internal::__pattern_stable_sort(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, + __last, __comp); } template From 832e93d701ffc418e25082f618494d3fe98e16f1 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Wed, 7 Feb 2024 12:25:47 +0100 Subject: [PATCH 151/566] __pattern_swap + tag impls --- include/oneapi/dpl/pstl/algorithm_impl.h | 10 ++++++++++ .../oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h | 11 +++++++++++ 2 files changed, 21 insertions(+) diff --git a/include/oneapi/dpl/pstl/algorithm_impl.h b/include/oneapi/dpl/pstl/algorithm_impl.h index 9ca164bbdb2..eee3f82927a 100644 --- a/include/oneapi/dpl/pstl/algorithm_impl.h +++ b/include/oneapi/dpl/pstl/algorithm_impl.h @@ -6224,6 +6224,16 @@ __pattern_swap(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIt __is_parallel); } +template +_ForwardIterator2 +__pattern_swap(_Tag __tag, _ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, + _ForwardIterator2 __first2, _Function __f) +{ + static_assert(__is_backend_tag_v<_Tag>); + + return __pattern_walk2(__tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __f); +} + //------------------------------------------------------------------------ // shift_left //------------------------------------------------------------------------ diff --git a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h index cf284bc7c3f..05087fe6bf1 100644 --- a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h +++ b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h @@ -204,6 +204,17 @@ __pattern_swap(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIt __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __f); } +template +_ForwardIterator2 +__pattern_swap(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _ForwardIterator1 __first1, + _ForwardIterator1 __last1, _ForwardIterator2 __first2, _Function __f) +{ + return __pattern_walk2<_BackendTag, /*_IsSync=*/::std::true_type, __par_backend_hetero::access_mode::read_write, + __par_backend_hetero::access_mode::read_write>( + __tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __f); +} + //------------------------------------------------------------------------ // walk3 //------------------------------------------------------------------------ From 68f6ad1f08a830c1a7661726f439bf241e145c3d Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Wed, 7 Feb 2024 12:26:12 +0100 Subject: [PATCH 152/566] __pattern_swap + tag calls --- include/oneapi/dpl/pstl/glue_algorithm_impl.h | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/include/oneapi/dpl/pstl/glue_algorithm_impl.h b/include/oneapi/dpl/pstl/glue_algorithm_impl.h index abbff064991..a332a039c1c 100644 --- a/include/oneapi/dpl/pstl/glue_algorithm_impl.h +++ b/include/oneapi/dpl/pstl/glue_algorithm_impl.h @@ -313,14 +313,15 @@ swap_ranges(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardItera { typedef typename ::std::iterator_traits<_ForwardIterator1>::reference _ReferenceType1; typedef typename ::std::iterator_traits<_ForwardIterator2>::reference _ReferenceType2; - return oneapi::dpl::__internal::__pattern_swap( - ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, - [](_ReferenceType1 __x, _ReferenceType2 __y) { - using ::std::swap; - swap(__x, __y); - }, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>(), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>()); + + constexpr auto __dispatch_tag = + oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>(); + + return oneapi::dpl::__internal::__pattern_swap(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, + __last1, __first2, [](_ReferenceType1 __x, _ReferenceType2 __y) { + using ::std::swap; + swap(__x, __y); + }); } // [alg.transform] From d94d02b47ca269e76515fca6e721d3dc142dd4ee Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Wed, 7 Feb 2024 12:41:10 +0100 Subject: [PATCH 153/566] __pattern_unique + tag impls --- include/oneapi/dpl/pstl/algorithm_fwd.h | 9 +++++ include/oneapi/dpl/pstl/algorithm_impl.h | 35 +++++++++++++++++++ .../dpl/pstl/hetero/algorithm_impl_hetero.h | 28 +++++++++++++++ 3 files changed, 72 insertions(+) diff --git a/include/oneapi/dpl/pstl/algorithm_fwd.h b/include/oneapi/dpl/pstl/algorithm_fwd.h index 73220412b0c..d9210c80d9e 100644 --- a/include/oneapi/dpl/pstl/algorithm_fwd.h +++ b/include/oneapi/dpl/pstl/algorithm_fwd.h @@ -775,11 +775,20 @@ oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _Fo __pattern_unique(_ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _BinaryPredicate, _IsVector, /*is_parallel=*/::std::false_type) noexcept; +template +_ForwardIterator +__pattern_unique(_Tag, _ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _BinaryPredicate) noexcept; + template oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator> __pattern_unique(_ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _BinaryPredicate, _IsVector, /*is_parallel=*/::std::true_type); +template +_RandomAccessIterator +__pattern_unique(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, + _BinaryPredicate); + //------------------------------------------------------------------------ // unique_copy //------------------------------------------------------------------------ diff --git a/include/oneapi/dpl/pstl/algorithm_impl.h b/include/oneapi/dpl/pstl/algorithm_impl.h index eee3f82927a..bf6d352e6a6 100644 --- a/include/oneapi/dpl/pstl/algorithm_impl.h +++ b/include/oneapi/dpl/pstl/algorithm_impl.h @@ -2096,6 +2096,15 @@ __pattern_unique(_ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator return __internal::__brick_unique(__first, __last, __pred, __is_vector); } +template +_ForwardIterator +__pattern_unique(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, _BinaryPredicate __pred) noexcept +{ + static_assert(__is_backend_tag_v<_Tag>); + + return __internal::__brick_unique(__first, __last, __pred, typename _Tag::__is_vector{}); +} + // That function is shared between two algorithms - remove_if (__pattern_remove_if) and unique (pattern unique). But a mask calculation is different. // So, a caller passes _CalcMask brick into remove_elements. template @@ -2204,6 +2213,32 @@ __pattern_unique(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _Rand __is_vector); } +template +_RandomAccessIterator +__pattern_unique(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, + _RandomAccessIterator __last, _BinaryPredicate __pred) +{ + typedef typename ::std::iterator_traits<_RandomAccessIterator>::reference _ReferenceType; + + if (__first == __last) + { + return __last; + } + if (__first + 1 == __last || __first + 2 == __last) + { + // Trivial sequence - use serial algorithm + return __internal::__brick_unique(__first, __last, __pred, _IsVector{}); + } + return __internal::__remove_elements( + ::std::forward<_ExecutionPolicy>(__exec), ++__first, __last, + [&__pred](bool* __b, bool* __e, _RandomAccessIterator __it) { + __internal::__brick_walk3( + __b, __e, __it - 1, __it, + [&__pred](bool& __x, _ReferenceType __y, _ReferenceType __z) { __x = !__pred(__y, __z); }, _IsVector{}); + }, + _IsVector{}); +} + //------------------------------------------------------------------------ // unique_copy //------------------------------------------------------------------------ diff --git a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h index 05087fe6bf1..747f0d59380 100644 --- a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h +++ b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h @@ -1767,6 +1767,34 @@ __pattern_unique(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, __copy_first, __copy_last, __first, __brick_copy<_ExecutionPolicy>{}); } +template +_Iterator +__pattern_unique(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, + _BinaryPredicate __pred) +{ + if (__last - __first < 2) + return __last; + + using _ValueType = typename ::std::iterator_traits<_Iterator>::value_type; + + oneapi::dpl::__par_backend_hetero::__buffer<_ExecutionPolicy, _ValueType> __buf(__exec, __last - __first); + auto __copy_first = __buf.get(); + auto __copy_last = __pattern_unique_copy(__exec, __first, __last, __copy_first, __pred, + /*vector=*/::std::true_type{}, /*parallel*/ ::std::true_type{}); + + constexpr auto __dispatch_tag1 = + oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, decltype(__copy_first), decltype(__copy_last), + decltype(__first)>(); + using __backend_tag1 = typename decltype(__dispatch_tag1)::__backend_tag; + + //TODO: optimize copy back depending on Iterator, i.e. set_final_data for host iterator/pointer + return __pattern_walk2<__backend_tag1, /*_IsSync=*/::std::true_type, __par_backend_hetero::access_mode::read_write, + __par_backend_hetero::access_mode::read_write>( + __dispatch_tag1, + __par_backend_hetero::make_wrapped_policy(::std::forward<_ExecutionPolicy>(__exec)), + __copy_first, __copy_last, __first, __brick_copy<_ExecutionPolicy>{}); +} + //------------------------------------------------------------------------ // is_partitioned //------------------------------------------------------------------------ From 8f9bc07177659d93498eb217499fd14334de2bfb Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Wed, 7 Feb 2024 12:41:37 +0100 Subject: [PATCH 154/566] __pattern_unique + tag calls --- include/oneapi/dpl/pstl/glue_algorithm_impl.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/include/oneapi/dpl/pstl/glue_algorithm_impl.h b/include/oneapi/dpl/pstl/glue_algorithm_impl.h index a332a039c1c..adc9f6f4702 100644 --- a/include/oneapi/dpl/pstl/glue_algorithm_impl.h +++ b/include/oneapi/dpl/pstl/glue_algorithm_impl.h @@ -538,10 +538,10 @@ template unique(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _BinaryPredicate __pred) { - return oneapi::dpl::__internal::__pattern_unique( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __pred, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>()); + constexpr auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _ForwardIterator>(); + + return oneapi::dpl::__internal::__pattern_unique(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, + __last, __pred); } template From ff9bb256839399d363c09137c67847c7ce11141a Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Wed, 7 Feb 2024 12:56:10 +0100 Subject: [PATCH 155/566] __pattern_unique_copy + tag impls --- include/oneapi/dpl/pstl/algorithm_fwd.h | 11 ++++ include/oneapi/dpl/pstl/algorithm_impl.h | 59 +++++++++++++++++++ .../dpl/pstl/hetero/algorithm_impl_hetero.h | 19 ++++++ 3 files changed, 89 insertions(+) diff --git a/include/oneapi/dpl/pstl/algorithm_fwd.h b/include/oneapi/dpl/pstl/algorithm_fwd.h index d9210c80d9e..b9893fa63df 100644 --- a/include/oneapi/dpl/pstl/algorithm_fwd.h +++ b/include/oneapi/dpl/pstl/algorithm_fwd.h @@ -807,6 +807,11 @@ oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _Ou __pattern_unique_copy(_ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _OutputIterator, _BinaryPredicate, _IsVector, /*parallel=*/::std::false_type) noexcept; +template +_OutputIterator +__pattern_unique_copy(_Tag, _ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _OutputIterator, + _BinaryPredicate) noexcept; + template _DifferenceType __brick_calc_mask_2(_RandomAccessIterator, _RandomAccessIterator, bool* __restrict, _BinaryPredicate, @@ -823,6 +828,12 @@ oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _Ou __pattern_unique_copy(_ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _OutputIterator, _BinaryPredicate, _IsVector, /*parallel=*/::std::true_type); +template +_RandomAccessIterator2 +__pattern_unique_copy(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator1, _RandomAccessIterator1, + _RandomAccessIterator2, _BinaryPredicate); + //------------------------------------------------------------------------ // reverse //------------------------------------------------------------------------ diff --git a/include/oneapi/dpl/pstl/algorithm_impl.h b/include/oneapi/dpl/pstl/algorithm_impl.h index bf6d352e6a6..d49e89bf990 100644 --- a/include/oneapi/dpl/pstl/algorithm_impl.h +++ b/include/oneapi/dpl/pstl/algorithm_impl.h @@ -2272,6 +2272,16 @@ __pattern_unique_copy(_ExecutionPolicy&&, _ForwardIterator __first, _ForwardIter return __internal::__brick_unique_copy(__first, __last, __result, __pred, __is_vector); } +template +_OutputIterator +__pattern_unique_copy(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, + _OutputIterator __result, _BinaryPredicate __pred) noexcept +{ + static_assert(__is_backend_tag_v<_Tag>); + + return __internal::__brick_unique_copy(__first, __last, __result, __pred, typename _Tag::__is_vector{}); +} + template _DifferenceType __brick_calc_mask_2(_RandomAccessIterator __first, _RandomAccessIterator __last, bool* __restrict __mask, @@ -2344,6 +2354,55 @@ __pattern_unique_copy(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first, return __internal::__brick_unique_copy(__first, __last, __result, __pred, __is_vector); } +template +_RandomAccessIterator2 +__pattern_unique_copy(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first, + _RandomAccessIterator1 __last, _RandomAccessIterator2 __result, _BinaryPredicate __pred) +{ + typedef typename ::std::iterator_traits<_RandomAccessIterator1>::difference_type _DifferenceType; + const _DifferenceType __n = __last - __first; + if (_DifferenceType(2) < __n) + { + __par_backend::__buffer<_ExecutionPolicy, bool> __mask_buf(__n); + if (_DifferenceType(2) < __n) + { + return __internal::__except_handler([&__exec, __n, __first, __result, __pred, &__mask_buf]() { + bool* __mask = __mask_buf.get(); + _DifferenceType __m{}; + __par_backend::__parallel_strict_scan( + ::std::forward<_ExecutionPolicy>(__exec), __n, _DifferenceType(0), + [=](_DifferenceType __i, _DifferenceType __len) -> _DifferenceType { // Reduce + _DifferenceType __extra = 0; + if (__i == 0) + { + // Special boundary case + __mask[__i] = true; + if (--__len == 0) + return 1; + ++__i; + ++__extra; + } + return __internal::__brick_calc_mask_2<_DifferenceType>(__first + __i, __first + (__i + __len), + __mask + __i, __pred, _IsVector{}) + + __extra; + }, + ::std::plus<_DifferenceType>(), // Combine + [=](_DifferenceType __i, _DifferenceType __len, _DifferenceType __initial) { // Scan + // Phase 2 is same as for __pattern_copy_if + __internal::__brick_copy_by_mask( + __first + __i, __first + (__i + __len), __result + __initial, __mask + __i, + [](_RandomAccessIterator1 __x, _RandomAccessIterator2 __z) { *__z = *__x; }, _IsVector{}); + }, + [&__m](_DifferenceType __total) { __m = __total; }); + return __result + __m; + }); + } + } + // trivial sequence - use serial algorithm + return __internal::__brick_unique_copy(__first, __last, __result, __pred, _IsVector{}); +} + //------------------------------------------------------------------------ // reverse //------------------------------------------------------------------------ diff --git a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h index 747f0d59380..32654de3adc 100644 --- a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h +++ b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h @@ -1676,6 +1676,25 @@ __pattern_unique_copy(_ExecutionPolicy&& __exec, _Iterator1 __first, _Iterator1 return __result_first + __result.second; } +template +_Iterator2 +__pattern_unique_copy(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Iterator1 __first, _Iterator1 __last, + _Iterator2 __result_first, _BinaryPredicate __pred) +{ + using _It1DifferenceType = typename ::std::iterator_traits<_Iterator1>::difference_type; + unseq_backend::__copy_by_mask<::std::plus<_It1DifferenceType>, oneapi::dpl::__internal::__pstl_assign, + /*inclusive*/ ::std::true_type, 1> + __copy_by_mask_op; + __create_mask_unique_copy<__not_pred<_BinaryPredicate>, _It1DifferenceType> __create_mask_op{ + __not_pred<_BinaryPredicate>{__pred}}; + + auto __result = __pattern_scan_copy(::std::forward<_ExecutionPolicy>(__exec), __first, __last, __result_first, + __create_mask_op, __copy_by_mask_op); + + return __result_first + __result.second; +} + template class copy_back_wrapper { From f93e8675bd1dc3a508a2216c7c88ebaedfeee7b5 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Wed, 7 Feb 2024 12:57:07 +0100 Subject: [PATCH 156/566] __pattern_unique_copy + tag calls --- include/oneapi/dpl/pstl/glue_algorithm_impl.h | 9 +++++---- .../oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h | 12 ++++++++---- 2 files changed, 13 insertions(+), 8 deletions(-) diff --git a/include/oneapi/dpl/pstl/glue_algorithm_impl.h b/include/oneapi/dpl/pstl/glue_algorithm_impl.h index adc9f6f4702..9236c9f22ff 100644 --- a/include/oneapi/dpl/pstl/glue_algorithm_impl.h +++ b/include/oneapi/dpl/pstl/glue_algorithm_impl.h @@ -557,10 +557,11 @@ oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _Forward unique_copy(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 __last, _ForwardIterator2 __result, _BinaryPredicate __pred) { - return oneapi::dpl::__internal::__pattern_unique_copy( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __result, __pred, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>(), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>()); + constexpr auto __dispatch_tag = + oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>(); + + return oneapi::dpl::__internal::__pattern_unique_copy(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), + __first, __last, __result, __pred); } template diff --git a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h index 32654de3adc..ed2ff5efc7a 100644 --- a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h +++ b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h @@ -1770,8 +1770,10 @@ __pattern_unique(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, oneapi::dpl::__par_backend_hetero::__buffer<_ExecutionPolicy, _ValueType> __buf(__exec, __last - __first); auto __copy_first = __buf.get(); - auto __copy_last = __pattern_unique_copy(__exec, __first, __last, __copy_first, __pred, - /*vector=*/::std::true_type{}, /*parallel*/ ::std::true_type{}); + auto __copy_last = + __pattern_unique_copy(oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, decltype(__first), + decltype(__last), decltype(__copy_first)>(), + __exec, __first, __last, __copy_first, __pred); constexpr auto __dispatch_tag1 = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, decltype(__copy_first), decltype(__copy_last), @@ -1798,8 +1800,10 @@ __pattern_unique(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _It oneapi::dpl::__par_backend_hetero::__buffer<_ExecutionPolicy, _ValueType> __buf(__exec, __last - __first); auto __copy_first = __buf.get(); - auto __copy_last = __pattern_unique_copy(__exec, __first, __last, __copy_first, __pred, - /*vector=*/::std::true_type{}, /*parallel*/ ::std::true_type{}); + auto __copy_last = + __pattern_unique_copy(oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, decltype(__first), + decltype(__last), decltype(__copy_first)>(), + __exec, __first, __last, __copy_first, __pred); constexpr auto __dispatch_tag1 = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, decltype(__copy_first), decltype(__copy_last), From 072914fa3f5d92a08e2f6ee09b5e0b782a1dcae2 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Wed, 7 Feb 2024 13:21:08 +0100 Subject: [PATCH 157/566] __pattern_walk3_transform_if + tag impls --- include/oneapi/dpl/pstl/algorithm_fwd.h | 6 ++++++ include/oneapi/dpl/pstl/algorithm_impl.h | 13 +++++++++++++ .../dpl/pstl/hetero/algorithm_impl_hetero.h | 17 +++++++++++++++++ 3 files changed, 36 insertions(+) diff --git a/include/oneapi/dpl/pstl/algorithm_fwd.h b/include/oneapi/dpl/pstl/algorithm_fwd.h index b9893fa63df..ec2cd79a135 100644 --- a/include/oneapi/dpl/pstl/algorithm_fwd.h +++ b/include/oneapi/dpl/pstl/algorithm_fwd.h @@ -394,6 +394,12 @@ __pattern_walk3_transform_if(_ExecutionPolicy&& __exec, _ForwardIterator1 __firs _ForwardIterator2 __first2, _ForwardIterator3 __first3, _Function __func, _IsVector __is_vector, _IsParallel __is_parallel) noexcept; +template +_ForwardIterator3 +__pattern_walk3_transform_if(_Tag, _ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, + _ForwardIterator3, _Function) noexcept; + //------------------------------------------------------------------------ // equal //------------------------------------------------------------------------ diff --git a/include/oneapi/dpl/pstl/algorithm_impl.h b/include/oneapi/dpl/pstl/algorithm_impl.h index d49e89bf990..705b8d278f6 100644 --- a/include/oneapi/dpl/pstl/algorithm_impl.h +++ b/include/oneapi/dpl/pstl/algorithm_impl.h @@ -948,6 +948,19 @@ __pattern_walk3_transform_if(_ExecutionPolicy&& __exec, _ForwardIterator1 __firs __is_vector, __is_parallel); } +template +_ForwardIterator3 +__pattern_walk3_transform_if(_Tag __tag, _ExecutionPolicy&& __exec, _ForwardIterator1 __first1, + _ForwardIterator1 __last1, _ForwardIterator2 __first2, _ForwardIterator3 __first3, + _Function __func) noexcept +{ + static_assert(__is_backend_tag_v<_Tag>); + + return __pattern_walk3(__tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __first3, + __func); +} + //------------------------------------------------------------------------ // equal //------------------------------------------------------------------------ diff --git a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h index ed2ff5efc7a..4e72e3af667 100644 --- a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h +++ b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h @@ -480,6 +480,23 @@ __pattern_walk3_transform_if(_ExecutionPolicy&& __exec, _ForwardIterator1 __firs /*vector=*/::std::true_type{}, /*parallel*/ ::std::true_type{}); } +template +_ForwardIterator3 +__pattern_walk3_transform_if(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _ForwardIterator1 __first1, + _ForwardIterator1 __last1, _ForwardIterator2 __first2, _ForwardIterator3 __first3, + _Function __func) +{ + // Require `read_write` access mode for output sequence to force a copy in for host iterators to capture incoming + // values of the output sequence for elements where the predicate is false. + return __pattern_walk3<_BackendTag, __par_backend_hetero::access_mode::read, + __par_backend_hetero::access_mode::read, __par_backend_hetero::access_mode::read_write>( + __tag, + __par_backend_hetero::make_wrapped_policy<__walk3_transform_if_wrapper>( + ::std::forward<_ExecutionPolicy>(__exec)), + __first1, __last1, __first2, __first3, __func); +} + //------------------------------------------------------------------------ // fill //------------------------------------------------------------------------ From 81b8a5749ede28dc4117505b28cd0aac13fa03e5 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Wed, 7 Feb 2024 13:21:28 +0100 Subject: [PATCH 158/566] __pattern_walk3_transform_if + tag calls --- include/oneapi/dpl/pstl/glue_algorithm_impl.h | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/include/oneapi/dpl/pstl/glue_algorithm_impl.h b/include/oneapi/dpl/pstl/glue_algorithm_impl.h index 9236c9f22ff..19b0208acca 100644 --- a/include/oneapi/dpl/pstl/glue_algorithm_impl.h +++ b/include/oneapi/dpl/pstl/glue_algorithm_impl.h @@ -377,12 +377,13 @@ oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _Forward transform_if(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, _ForwardIterator3 __result, _BinaryOperation __op, _BinaryPredicate __pred) { + constexpr auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _ForwardIterator1, + _ForwardIterator2, _ForwardIterator3>(); + return oneapi::dpl::__internal::__pattern_walk3_transform_if( - ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __result, - oneapi::dpl::__internal::__transform_if_binary_functor<_BinaryOperation, _BinaryPredicate>(::std::move(__op), - ::std::move(__pred)), - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2, _ForwardIterator3>(), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2, _ForwardIterator3>()); + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __result, + oneapi::dpl::__internal::__transform_if_binary_functor<_BinaryOperation, _BinaryPredicate>( + ::std::move(__op), ::std::move(__pred))); } // [alg.replace] From 8bf565ce406d47a839fb3905c07ec9aaceaab67d Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Wed, 7 Feb 2024 14:49:57 +0100 Subject: [PATCH 159/566] __pattern_transform_reduce + tag impls --- .../dpl/pstl/hetero/numeric_impl_hetero.h | 53 +++++++++++++++ include/oneapi/dpl/pstl/numeric_fwd.h | 36 ++++++++++ include/oneapi/dpl/pstl/numeric_impl.h | 65 +++++++++++++++++++ 3 files changed, 154 insertions(+) diff --git a/include/oneapi/dpl/pstl/hetero/numeric_impl_hetero.h b/include/oneapi/dpl/pstl/hetero/numeric_impl_hetero.h index 90cf7764f4d..e581a66546c 100644 --- a/include/oneapi/dpl/pstl/hetero/numeric_impl_hetero.h +++ b/include/oneapi/dpl/pstl/hetero/numeric_impl_hetero.h @@ -66,6 +66,35 @@ __pattern_transform_reduce(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __f .get(); } +template +_Tp +__pattern_transform_reduce(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, + _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, _Tp __init, + _BinaryOperation1 __binary_op1, _BinaryOperation2 __binary_op2) +{ + if (__first1 == __last1) + return __init; + + using _Functor = unseq_backend::walk_n<_ExecutionPolicy, _BinaryOperation2>; + using _RepackedTp = __par_backend_hetero::__repacked_tuple_t<_Tp>; + + auto __n = __last1 - __first1; + auto __keep1 = + oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read, _RandomAccessIterator1>(); + auto __buf1 = __keep1(__first1, __last1); + auto __keep2 = + oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read, _RandomAccessIterator2>(); + auto __buf2 = __keep2(__first2, __first2 + __n); + + return oneapi::dpl::__par_backend_hetero::__parallel_transform_reduce<_RepackedTp, + ::std::true_type /*is_commutative*/>( + ::std::forward<_ExecutionPolicy>(__exec), __binary_op1, _Functor{__binary_op2}, + unseq_backend::__init_value<_RepackedTp>{__init}, // initial value + __buf1.all_view(), __buf2.all_view()) + .get(); +} + //------------------------------------------------------------------------ // transform_reduce (with unary and binary functions) //------------------------------------------------------------------------ @@ -94,6 +123,30 @@ __pattern_transform_reduce(_ExecutionPolicy&& __exec, _ForwardIterator __first, .get(); } +template +_Tp +__pattern_transform_reduce(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _ForwardIterator __first, + _ForwardIterator __last, _Tp __init, _BinaryOperation __binary_op, + _UnaryOperation __unary_op) +{ + if (__first == __last) + return __init; + + using _Functor = unseq_backend::walk_n<_ExecutionPolicy, _UnaryOperation>; + using _RepackedTp = __par_backend_hetero::__repacked_tuple_t<_Tp>; + + auto __keep = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read, _ForwardIterator>(); + auto __buf = __keep(__first, __last); + + return oneapi::dpl::__par_backend_hetero::__parallel_transform_reduce<_RepackedTp, + ::std::true_type /*is_commutative*/>( + ::std::forward<_ExecutionPolicy>(__exec), __binary_op, _Functor{__unary_op}, + unseq_backend::__init_value<_RepackedTp>{__init}, // initial value + __buf.all_view()) + .get(); +} + //------------------------------------------------------------------------ // transform_scan //------------------------------------------------------------------------ diff --git a/include/oneapi/dpl/pstl/numeric_fwd.h b/include/oneapi/dpl/pstl/numeric_fwd.h index aab7edc0150..e060327043d 100644 --- a/include/oneapi/dpl/pstl/numeric_fwd.h +++ b/include/oneapi/dpl/pstl/numeric_fwd.h @@ -50,6 +50,12 @@ __pattern_transform_reduce(_ExecutionPolicy&&, _ForwardIterator1, _ForwardIterat _BinaryOperation1, _BinaryOperation2, _IsVector, /*is_parallel=*/::std::false_type) noexcept; +template +_Tp +__pattern_transform_reduce(_Tag, _ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, _Tp, + _BinaryOperation1, _BinaryOperation2) noexcept; + template oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _Tp> @@ -57,6 +63,12 @@ __pattern_transform_reduce(_ExecutionPolicy&&, _RandomAccessIterator1, _RandomAc _Tp, _BinaryOperation1, _BinaryOperation2, _IsVector __is_vector, /*is_parallel=*/::std::true_type); +template +_Tp +__pattern_transform_reduce(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator1, + _RandomAccessIterator1, _RandomAccessIterator2, _Tp, _BinaryOperation1, _BinaryOperation2); + //------------------------------------------------------------------------ // transform_reduce (version with unary and binary functions) //------------------------------------------------------------------------ @@ -76,6 +88,12 @@ __pattern_transform_reduce(_ExecutionPolicy&&, _ForwardIterator, _ForwardIterato _UnaryOperation, _IsVector, /*is_parallel=*/::std::false_type) noexcept; +template +_Tp +__pattern_transform_reduce(_Tag, _ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, _Tp, + _BinaryOperation1, _BinaryOperation2 __bnary_op2) noexcept; + template oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _Tp> @@ -83,6 +101,24 @@ __pattern_transform_reduce(_ExecutionPolicy&&, _RandomAccessIterator, _RandomAcc _UnaryOperation, _IsVector, /*is_parallel=*/::std::true_type); +template +_Tp +__pattern_transform_reduce(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator1, + _RandomAccessIterator1, _RandomAccessIterator2, _Tp, _BinaryOperation1, _BinaryOperation2); + +template +_Tp +__pattern_transform_reduce(_Tag, _ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _Tp, _BinaryOperation, + _UnaryOperation) noexcept; + +template +_Tp +__pattern_transform_reduce(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, + _Tp, _BinaryOperation, _UnaryOperation); + //------------------------------------------------------------------------ // transform_exclusive_scan // diff --git a/include/oneapi/dpl/pstl/numeric_impl.h b/include/oneapi/dpl/pstl/numeric_impl.h index 8e65580918c..8253106a7c5 100644 --- a/include/oneapi/dpl/pstl/numeric_impl.h +++ b/include/oneapi/dpl/pstl/numeric_impl.h @@ -72,6 +72,19 @@ __pattern_transform_reduce(_ExecutionPolicy&&, _ForwardIterator1 __first1, _Forw return __brick_transform_reduce(__first1, __last1, __first2, __init, __binary_op1, __binary_op2, __is_vector); } +template +_Tp +__pattern_transform_reduce(_Tag, _ExecutionPolicy&&, _ForwardIterator1 __first1, _ForwardIterator1 __last1, + _ForwardIterator2 __first2, _Tp __init, _BinaryOperation1 __binary_op1, + _BinaryOperation2 __binary_op2) noexcept +{ + static_assert(__is_backend_tag_v<_Tag>); + + return __brick_transform_reduce(__first1, __last1, __first2, __init, __binary_op1, __binary_op2, + typename _Tag::__is_vector{}); +} + template oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _Tp> @@ -95,6 +108,29 @@ __pattern_transform_reduce(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __f }); } +template +_Tp +__pattern_transform_reduce(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, + _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, _Tp __init, + _BinaryOperation1 __binary_op1, _BinaryOperation2 __binary_op2) +{ + return __internal::__except_handler([&]() { + return __par_backend::__parallel_transform_reduce( + ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, + [__first1, __first2, __binary_op2](_RandomAccessIterator1 __i) mutable { + return __binary_op2(*__i, *(__first2 + (__i - __first1))); + }, + __init, + __binary_op1, // Combine + [__first1, __first2, __binary_op1, __binary_op2](_RandomAccessIterator1 __i, _RandomAccessIterator1 __j, + _Tp __init) -> _Tp { + return __internal::__brick_transform_reduce(__i, __j, __first2 + (__i - __first1), __init, __binary_op1, + __binary_op2, _IsVector{}); + }); + }); +} + //------------------------------------------------------------------------ // transform_reduce (version with unary and binary functions) //------------------------------------------------------------------------ @@ -133,6 +169,18 @@ __pattern_transform_reduce(_ExecutionPolicy&&, _ForwardIterator __first, _Forwar return __internal::__brick_transform_reduce(__first, __last, __init, __binary_op, __unary_op, __is_vector); } +template +_Tp +__pattern_transform_reduce(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, _Tp __init, + _BinaryOperation __binary_op, _UnaryOperation __unary_op) noexcept +{ + static_assert(__is_backend_tag_v<_Tag>); + + return __internal::__brick_transform_reduce(__first, __last, __init, __binary_op, __unary_op, + typename _Tag::__is_vector{}); +} + template oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _Tp> @@ -150,6 +198,23 @@ __pattern_transform_reduce(_ExecutionPolicy&& __exec, _RandomAccessIterator __fi }); } +template +_Tp +__pattern_transform_reduce(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, + _RandomAccessIterator __last, _Tp __init, _BinaryOperation __binary_op, + _UnaryOperation __unary_op) +{ + return __internal::__except_handler([&]() { + return __par_backend::__parallel_transform_reduce( + ::std::forward<_ExecutionPolicy>(__exec), __first, __last, + [__unary_op](_RandomAccessIterator __i) mutable { return __unary_op(*__i); }, __init, __binary_op, + [__unary_op, __binary_op](_RandomAccessIterator __i, _RandomAccessIterator __j, _Tp __init) { + return __internal::__brick_transform_reduce(__i, __j, __init, __binary_op, __unary_op, _IsVector{}); + }); + }); +} + //------------------------------------------------------------------------ // transform_exclusive_scan // From a32df2280222e8af0be85fba0ff9350a1bf59125 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Wed, 7 Feb 2024 14:50:12 +0100 Subject: [PATCH 160/566] __pattern_transform_reduce + tag calls --- include/oneapi/dpl/pstl/glue_numeric_impl.h | 28 ++++++++++++--------- 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/include/oneapi/dpl/pstl/glue_numeric_impl.h b/include/oneapi/dpl/pstl/glue_numeric_impl.h index 9c450f79a2b..82d5fe9e8a9 100644 --- a/include/oneapi/dpl/pstl/glue_numeric_impl.h +++ b/include/oneapi/dpl/pstl/glue_numeric_impl.h @@ -70,11 +70,13 @@ transform_reduce(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _Forward _ForwardIterator2 __first2, _Tp __init) { typedef typename ::std::iterator_traits<_ForwardIterator1>::value_type _InputType; + + constexpr auto __dispatch_tag = + oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>(); + return oneapi::dpl::__internal::__pattern_transform_reduce( - ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __init, ::std::plus<_InputType>(), - ::std::multiplies<_InputType>(), - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>(), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>()); + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __init, + ::std::plus<_InputType>(), ::std::multiplies<_InputType>()); } template transform_reduce(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, _Tp __init, _BinaryOperation1 __binary_op1, _BinaryOperation2 __binary_op2) { - return oneapi::dpl::__internal::__pattern_transform_reduce( - ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __init, __binary_op1, __binary_op2, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>(), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>()); + constexpr auto __dispatch_tag = + oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>(); + + return oneapi::dpl::__internal::__pattern_transform_reduce(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), + __first1, __last1, __first2, __init, __binary_op1, + __binary_op2); } template @@ -94,10 +98,10 @@ oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _Tp> transform_reduce(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _Tp __init, _BinaryOperation __binary_op, _UnaryOperation __unary_op) { - return oneapi::dpl::__internal::__pattern_transform_reduce( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __init, __binary_op, __unary_op, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>()); + constexpr auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _ForwardIterator>(); + + return oneapi::dpl::__internal::__pattern_transform_reduce(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), + __first, __last, __init, __binary_op, __unary_op); } // [exclusive.scan] From db44b15df58833e9c246168026a5d3925689572e Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Wed, 7 Feb 2024 15:49:59 +0100 Subject: [PATCH 161/566] __pattern_transform_scan + tag impls --- .../dpl/pstl/hetero/numeric_impl_hetero.h | 30 ++++++ include/oneapi/dpl/pstl/numeric_fwd.h | 24 +++++ include/oneapi/dpl/pstl/numeric_impl.h | 102 ++++++++++++++++++ 3 files changed, 156 insertions(+) diff --git a/include/oneapi/dpl/pstl/hetero/numeric_impl_hetero.h b/include/oneapi/dpl/pstl/hetero/numeric_impl_hetero.h index e581a66546c..cdaf9c8a606 100644 --- a/include/oneapi/dpl/pstl/hetero/numeric_impl_hetero.h +++ b/include/oneapi/dpl/pstl/hetero/numeric_impl_hetero.h @@ -252,6 +252,21 @@ __pattern_transform_scan(_ExecutionPolicy&& __exec, _Iterator1 __first, _Iterato __unary_op, _InitType{__init}, __binary_op, _Inclusive{}); } +template +_Iterator2 +__pattern_transform_scan(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Iterator1 __first, + _Iterator1 __last, _Iterator2 __result, + _UnaryOperation __unary_op, _Type __init, _BinaryOperation __binary_op, _Inclusive) +{ + using _RepackedType = __par_backend_hetero::__repacked_tuple_t<_Type>; + using _InitType = unseq_backend::__init_value<_RepackedType>; + + return __pattern_transform_scan_base(::std::forward<_ExecutionPolicy>(__exec), __first, __last, __result, + __unary_op, _InitType{__init}, __binary_op, _Inclusive{}); +} + // scan without initial element template @@ -268,6 +283,21 @@ __pattern_transform_scan(_ExecutionPolicy&& __exec, _Iterator1 __first, _Iterato __unary_op, _InitType{}, __binary_op, _Inclusive{}); } +template +_Iterator2 +__pattern_transform_scan(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Iterator1 __first, + _Iterator1 __last, _Iterator2 __result, _UnaryOperation __unary_op, + _BinaryOperation __binary_op, _Inclusive) +{ + using _Type = typename ::std::iterator_traits<_Iterator1>::value_type; + using _RepackedType = __par_backend_hetero::__repacked_tuple_t<_Type>; + using _InitType = unseq_backend::__no_init_value<_RepackedType>; + + return __pattern_transform_scan_base(::std::forward<_ExecutionPolicy>(__exec), __first, __last, __result, + __unary_op, _InitType{}, __binary_op, _Inclusive{}); +} + //------------------------------------------------------------------------ // adjacent_difference //------------------------------------------------------------------------ diff --git a/include/oneapi/dpl/pstl/numeric_fwd.h b/include/oneapi/dpl/pstl/numeric_fwd.h index e060327043d..28e95d0175c 100644 --- a/include/oneapi/dpl/pstl/numeric_fwd.h +++ b/include/oneapi/dpl/pstl/numeric_fwd.h @@ -142,6 +142,12 @@ __pattern_transform_scan(_ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _BinaryOperation, _Inclusive, _IsVector, /*is_parallel=*/::std::false_type) noexcept; +template +_OutputIterator +__pattern_transform_scan(_Tag, _ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _OutputIterator, _UnaryOperation, + _Tp, _BinaryOperation, _Inclusive) noexcept; + template oneapi::dpl::__internal::__enable_if_host_execution_policy_conditional< @@ -150,6 +156,12 @@ __pattern_transform_scan(_ExecutionPolicy&&, _RandomAccessIterator, _RandomAcces _UnaryOperation, _Tp, _BinaryOperation, _Inclusive, _IsVector, /*is_parallel=*/::std::true_type); +template +::std::enable_if_t, _OutputIterator> +__pattern_transform_scan(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, + _OutputIterator, _UnaryOperation, _Tp, _BinaryOperation, _Inclusive); + template oneapi::dpl::__internal::__enable_if_host_execution_policy_conditional<_ExecutionPolicy, @@ -158,6 +170,12 @@ __pattern_transform_scan(_ExecutionPolicy&&, _RandomAccessIterator, _RandomAcces _UnaryOperation, _Tp, _BinaryOperation, _Inclusive, _IsVector, /*is_parallel=*/::std::true_type); +template +::std::enable_if_t<::std::is_floating_point_v<_Tp>, _OutputIterator> +__pattern_transform_scan(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, + _OutputIterator, _UnaryOperation, _Tp, _BinaryOperation, _Inclusive); + // transform_scan without initial element template @@ -166,6 +184,12 @@ __pattern_transform_scan(_ExecutionPolicy&& __exec, _ForwardIterator __first, _F _OutputIterator __result, _UnaryOperation __unary_op, _BinaryOperation __binary_op, _Inclusive, _IsVector __is_vector, _IsParallel __is_parallel); +template +_OutputIterator +__pattern_transform_scan(_Tag, _ExecutionPolicy&& __exec, _ForwardIterator, _ForwardIterator, _OutputIterator, + _UnaryOperation, _BinaryOperation, _Inclusive); + //------------------------------------------------------------------------ // adjacent_difference //------------------------------------------------------------------------ diff --git a/include/oneapi/dpl/pstl/numeric_impl.h b/include/oneapi/dpl/pstl/numeric_impl.h index 8253106a7c5..28fcec58ab3 100644 --- a/include/oneapi/dpl/pstl/numeric_impl.h +++ b/include/oneapi/dpl/pstl/numeric_impl.h @@ -305,6 +305,20 @@ __pattern_transform_scan(_ExecutionPolicy&&, _ForwardIterator __first, _ForwardI .first; } +template +_OutputIterator +__pattern_transform_scan(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, + _OutputIterator __result, _UnaryOperation __unary_op, _Tp __init, _BinaryOperation __binary_op, + _Inclusive) noexcept +{ + static_assert(__is_backend_tag_v<_Tag>); + + return __internal::__brick_transform_scan(__first, __last, __result, __unary_op, __init, __binary_op, _Inclusive(), + typename _Tag::__is_vector{}) + .first; +} + template oneapi::dpl::__internal::__enable_if_host_execution_policy_conditional< @@ -336,6 +350,36 @@ __pattern_transform_scan(_ExecutionPolicy&& __exec, _RandomAccessIterator __firs }); } +template +::std::enable_if_t, _OutputIterator> +__pattern_transform_scan(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, + _RandomAccessIterator __last, _OutputIterator __result, _UnaryOperation __unary_op, _Tp __init, + _BinaryOperation __binary_op, _Inclusive) +{ + typedef typename ::std::iterator_traits<_RandomAccessIterator>::difference_type _DifferenceType; + + return __internal::__except_handler([&]() { + __par_backend::__parallel_transform_scan( + ::std::forward<_ExecutionPolicy>(__exec), __last - __first, + [__first, __unary_op](_DifferenceType __i) mutable { return __unary_op(__first[__i]); }, __init, + __binary_op, + [__first, __unary_op, __binary_op](_DifferenceType __i, _DifferenceType __j, _Tp __init) { + // Execute serial __brick_transform_reduce, due to the explicit SIMD vectorization (reduction) requires a commutative operation for the guarantee of correct scan. + return __internal::__brick_transform_reduce(__first + __i, __first + __j, __init, __binary_op, + __unary_op, + /*__is_vector*/ ::std::false_type()); + }, + [__first, __unary_op, __binary_op, __result](_DifferenceType __i, _DifferenceType __j, + _Tp __init) { + return __internal::__brick_transform_scan(__first + __i, __first + __j, __result + __i, __unary_op, + __init, __binary_op, _Inclusive(), _IsVector{}) + .second; + }); + return __result + (__last - __first); + }); +} + template oneapi::dpl::__internal::__enable_if_host_execution_policy_conditional<_ExecutionPolicy, @@ -373,6 +417,42 @@ __pattern_transform_scan(_ExecutionPolicy&& __exec, _RandomAccessIterator __firs }); } +template +::std::enable_if_t<::std::is_floating_point_v<_Tp>, _OutputIterator> +__pattern_transform_scan(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, + _RandomAccessIterator __last, _OutputIterator __result, _UnaryOperation __unary_op, _Tp __init, + _BinaryOperation __binary_op, _Inclusive) +{ + typedef typename ::std::iterator_traits<_RandomAccessIterator>::difference_type _DifferenceType; + _DifferenceType __n = __last - __first; + + if (__n <= 0) + { + return __result; + } + return __internal::__except_handler([&]() { + __par_backend::__parallel_strict_scan( + ::std::forward<_ExecutionPolicy>(__exec), __n, __init, + [__first, __unary_op, __binary_op, __result](_DifferenceType __i, _DifferenceType __len) { + return __internal::__brick_transform_scan(__first + __i, __first + (__i + __len), __result + __i, + __unary_op, _Tp{}, __binary_op, _Inclusive(), _IsVector{}) + .second; + }, + __binary_op, + [__result, &__binary_op](_DifferenceType __i, _DifferenceType __len, _Tp __initial) { + return *(::std::transform(__result + __i, __result + __i + __len, __result + __i, + [&__initial, &__binary_op](const _Tp& __x) { + _ONEDPL_PRAGMA_FORCEINLINE + return __binary_op(__initial, __x); + }) - + 1); + }, + [](_Tp) {}); + return __result + (__last - __first); + }); +} + // transform_scan without initial element template @@ -395,6 +475,28 @@ __pattern_transform_scan(_ExecutionPolicy&& __exec, _ForwardIterator __first, _F } } +template +_OutputIterator +__pattern_transform_scan(_Tag __tag, _ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, + _OutputIterator __result, _UnaryOperation __unary_op, _BinaryOperation __binary_op, _Inclusive) +{ + static_assert(__is_backend_tag_v<_Tag>); + + typedef typename ::std::iterator_traits<_ForwardIterator>::value_type _ValueType; + if (__first != __last) + { + _ValueType __tmp = __unary_op(*__first); + *__result = __tmp; + return __pattern_transform_scan(::std::forward<_ExecutionPolicy>(__exec), ++__first, __last, ++__result, + __unary_op, __tmp, __binary_op, _Inclusive(), __is_vector, __is_parallel); + } + else + { + return __result; + } +} + //------------------------------------------------------------------------ // adjacent_difference //------------------------------------------------------------------------ From 8af57fb21dead97d159bac7c971cac88ca0d2a75 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Wed, 7 Feb 2024 15:15:49 +0100 Subject: [PATCH 162/566] __pattern_transform_scan + tag calls --- include/oneapi/dpl/pstl/glue_numeric_impl.h | 33 +++++++++++---------- include/oneapi/dpl/pstl/numeric_impl.h | 14 ++++++--- 2 files changed, 28 insertions(+), 19 deletions(-) diff --git a/include/oneapi/dpl/pstl/glue_numeric_impl.h b/include/oneapi/dpl/pstl/glue_numeric_impl.h index 82d5fe9e8a9..91c8b7a7c7b 100644 --- a/include/oneapi/dpl/pstl/glue_numeric_impl.h +++ b/include/oneapi/dpl/pstl/glue_numeric_impl.h @@ -225,11 +225,12 @@ transform_exclusive_scan(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ _ForwardIterator2 __result, _Tp __init, _BinaryOperation __binary_op, _UnaryOperation __unary_op) { - return oneapi::dpl::__internal::__pattern_transform_scan( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __result, __unary_op, __init, __binary_op, - /*inclusive=*/::std::false_type(), - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>(), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>()); + constexpr auto __dispatch_tag = + oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>(); + + return oneapi::dpl::__internal::__pattern_transform_scan(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), + __first, __last, __result, __unary_op, __init, __binary_op, + /*inclusive=*/::std::false_type()); } // [transform.inclusive.scan] @@ -241,11 +242,12 @@ transform_inclusive_scan(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ _ForwardIterator2 __result, _BinaryOperation __binary_op, _UnaryOperation __unary_op, _Tp __init) { - return oneapi::dpl::__internal::__pattern_transform_scan( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __result, __unary_op, __init, __binary_op, - /*inclusive=*/::std::true_type(), - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>(), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>()); + constexpr auto __dispatch_tag = + oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>(); + + return oneapi::dpl::__internal::__pattern_transform_scan(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), + __first, __last, __result, __unary_op, __init, __binary_op, + /*inclusive=*/::std::true_type()); } template (__exec), __first, __last, __result, __unary_op, __binary_op, - /*inclusive=*/::std::true_type(), - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>(), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>()); + constexpr auto __dispatch_tag = + oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>(); + + return oneapi::dpl::__internal::__pattern_transform_scan(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), + __first, __last, __result, __unary_op, __binary_op, + /*inclusive=*/::std::true_type()); } // [adjacent.difference] diff --git a/include/oneapi/dpl/pstl/numeric_impl.h b/include/oneapi/dpl/pstl/numeric_impl.h index 28fcec58ab3..683531a9844 100644 --- a/include/oneapi/dpl/pstl/numeric_impl.h +++ b/include/oneapi/dpl/pstl/numeric_impl.h @@ -466,8 +466,13 @@ __pattern_transform_scan(_ExecutionPolicy&& __exec, _ForwardIterator __first, _F { _ValueType __tmp = __unary_op(*__first); *__result = __tmp; - return __pattern_transform_scan(::std::forward<_ExecutionPolicy>(__exec), ++__first, __last, ++__result, - __unary_op, __tmp, __binary_op, _Inclusive(), __is_vector, __is_parallel); + + constexpr auto __dispatch_tag = + oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _ForwardIterator, _OutputIterator>(); + + return __pattern_transform_scan(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), ++__first, __last, + ++__result, + __unary_op, __tmp, __binary_op, _Inclusive()); } else { @@ -488,8 +493,9 @@ __pattern_transform_scan(_Tag __tag, _ExecutionPolicy&& __exec, _ForwardIterator { _ValueType __tmp = __unary_op(*__first); *__result = __tmp; - return __pattern_transform_scan(::std::forward<_ExecutionPolicy>(__exec), ++__first, __last, ++__result, - __unary_op, __tmp, __binary_op, _Inclusive(), __is_vector, __is_parallel); + + return __pattern_transform_scan(__tag, ::std::forward<_ExecutionPolicy>(__exec), ++__first, __last, ++__result, + __unary_op, __tmp, __binary_op, _Inclusive()); } else { From 57de95f3b04de03297b9c742d98a681547e21289 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Wed, 7 Feb 2024 15:34:46 +0100 Subject: [PATCH 163/566] __pattern_adjacent_difference + tag impls --- .../dpl/pstl/hetero/numeric_impl_hetero.h | 55 +++++++++++++++++++ include/oneapi/dpl/pstl/numeric_fwd.h | 11 ++++ include/oneapi/dpl/pstl/numeric_impl.h | 34 ++++++++++++ 3 files changed, 100 insertions(+) diff --git a/include/oneapi/dpl/pstl/hetero/numeric_impl_hetero.h b/include/oneapi/dpl/pstl/hetero/numeric_impl_hetero.h index cdaf9c8a606..a051fccfd28 100644 --- a/include/oneapi/dpl/pstl/hetero/numeric_impl_hetero.h +++ b/include/oneapi/dpl/pstl/hetero/numeric_impl_hetero.h @@ -365,6 +365,61 @@ __pattern_adjacent_difference(_ExecutionPolicy&& __exec, _ForwardIterator1 __fir } } +template +_ForwardIterator2 +__pattern_adjacent_difference(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _ForwardIterator1 __first, + _ForwardIterator1 __last, _ForwardIterator2 __d_first, _BinaryOperation __op) +{ + auto __n = __last - __first; + if (__n <= 0) + return __d_first; + + using _It1ValueT = typename ::std::iterator_traits<_ForwardIterator1>::value_type; + using _It2ValueTRef = typename ::std::iterator_traits<_ForwardIterator2>::reference; + + _ForwardIterator2 __d_last = __d_first + __n; + +#if !__SYCL_UNNAMED_LAMBDA__ + // if we have the only element, just copy it according to the specification + if (__n == 1) + { + return __internal::__except_handler([&__exec, __first, __last, __d_first, __d_last, &__op, __tag]() { + auto __wrapped_policy = __par_backend_hetero::make_wrapped_policy( + ::std::forward<_ExecutionPolicy>(__exec)); + + __internal::__pattern_walk2_brick(__tag, __wrapped_policy, __first, __last, __d_first, + __internal::__brick_copy{}); + + return __d_last; + }); + } + else +#endif + { + return __internal::__except_handler([&__exec, __first, __last, __d_first, __d_last, &__op, __n]() { + auto __fn = [__op](_It1ValueT __in1, _It1ValueT __in2, _It2ValueTRef __out1) { + __out1 = __op(__in2, __in1); // This move assignment is allowed by the C++ standard draft N4810 + }; + + auto __keep1 = + oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read, _ForwardIterator1>(); + auto __buf1 = __keep1(__first, __last); + auto __keep2 = + oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::write, _ForwardIterator2>(); + auto __buf2 = __keep2(__d_first, __d_last); + + using _Function = unseq_backend::walk_adjacent_difference<_ExecutionPolicy, decltype(__fn)>; + + oneapi::dpl::__par_backend_hetero::__parallel_for(__exec, _Function{__fn}, __n, __buf1.all_view(), + __buf2.all_view()) + .wait(); + + return __d_last; + }); + } +} + } // namespace __internal } // namespace dpl } // namespace oneapi diff --git a/include/oneapi/dpl/pstl/numeric_fwd.h b/include/oneapi/dpl/pstl/numeric_fwd.h index 28e95d0175c..fdd4928c230 100644 --- a/include/oneapi/dpl/pstl/numeric_fwd.h +++ b/include/oneapi/dpl/pstl/numeric_fwd.h @@ -209,12 +209,23 @@ oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _Ou __pattern_adjacent_difference(_ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _OutputIterator, _BinaryOperation, _IsVector, /*is_parallel*/ ::std::false_type) noexcept; +template +_OutputIterator +__pattern_adjacent_difference(_Tag, _ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _OutputIterator, + _BinaryOperation) noexcept; + template oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _OutputIterator> __pattern_adjacent_difference(_ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _OutputIterator, _BinaryOperation, _IsVector, /*is_parallel*/ ::std::true_type); +template +_RandomAccessIterator2 +__pattern_adjacent_difference(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator1, + _RandomAccessIterator1, _RandomAccessIterator2, _BinaryOperation); + } // namespace __internal } // namespace dpl } // namespace oneapi diff --git a/include/oneapi/dpl/pstl/numeric_impl.h b/include/oneapi/dpl/pstl/numeric_impl.h index 683531a9844..39d9e16242e 100644 --- a/include/oneapi/dpl/pstl/numeric_impl.h +++ b/include/oneapi/dpl/pstl/numeric_impl.h @@ -543,6 +543,16 @@ __pattern_adjacent_difference(_ExecutionPolicy&&, _ForwardIterator __first, _For return __internal::__brick_adjacent_difference(__first, __last, __d_first, __op, __is_vector); } +template +_OutputIterator +__pattern_adjacent_difference(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, + _OutputIterator __d_first, _BinaryOperation __op) noexcept +{ + static_assert(__is_backend_tag_v<_Tag>); + + return __internal::__brick_adjacent_difference(__first, __last, __d_first, __op, typename _Tag::__is_vector{}); +} + template oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator2> @@ -567,6 +577,30 @@ __pattern_adjacent_difference(_ExecutionPolicy&& __exec, _RandomAccessIterator1 return __d_first + (__last - __first); } +template +_RandomAccessIterator2 +__pattern_adjacent_difference(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, + _RandomAccessIterator1 __first, _RandomAccessIterator1 __last, + _RandomAccessIterator2 __d_first, _BinaryOperation __op) +{ + assert(__first != __last); + typedef typename ::std::iterator_traits<_RandomAccessIterator1>::reference _ReferenceType1; + typedef typename ::std::iterator_traits<_RandomAccessIterator2>::reference _ReferenceType2; + + *__d_first = *__first; + __par_backend::__parallel_for( + ::std::forward<_ExecutionPolicy>(__exec), __first, __last - 1, + [&__op, __d_first, __first](_RandomAccessIterator1 __b, _RandomAccessIterator1 __e) { + _RandomAccessIterator2 __d_b = __d_first + (__b - __first); + __internal::__brick_walk3( + __b, __e, __b + 1, __d_b + 1, + [&__op](_ReferenceType1 __x, _ReferenceType1 __y, _ReferenceType2 __z) { __z = __op(__y, __x); }, + _IsVector{}); + }); + return __d_first + (__last - __first); +} + } // namespace __internal } // namespace dpl } // namespace oneapi From 9894ddb7dd18ae01efd271c1826a08e2bbfbe815 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Wed, 7 Feb 2024 15:33:58 +0100 Subject: [PATCH 164/566] __pattern_adjacent_difference + tag calls --- include/oneapi/dpl/pstl/glue_numeric_impl.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/include/oneapi/dpl/pstl/glue_numeric_impl.h b/include/oneapi/dpl/pstl/glue_numeric_impl.h index 91c8b7a7c7b..d6a98e6529f 100644 --- a/include/oneapi/dpl/pstl/glue_numeric_impl.h +++ b/include/oneapi/dpl/pstl/glue_numeric_impl.h @@ -271,14 +271,14 @@ oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _Forward adjacent_difference(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 __last, _ForwardIterator2 __d_first, _BinaryOperation __op) { - if (__first == __last) return __d_first; + constexpr auto __dispatch_tag = + oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>(); + return oneapi::dpl::__internal::__pattern_adjacent_difference( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __d_first, __op, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>(), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>()); + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __d_first, __op); } template From a795d1013a3e0aaa67ef67fbc95e563c51791c41 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Thu, 8 Feb 2024 10:54:48 +0100 Subject: [PATCH 165/566] __pattern_histogram + tag impls --- .../oneapi/dpl/pstl/hetero/histogram_impl_hetero.h | 11 ++++++----- include/oneapi/dpl/pstl/histogram_impl.h | 8 +++++--- 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/include/oneapi/dpl/pstl/hetero/histogram_impl_hetero.h b/include/oneapi/dpl/pstl/hetero/histogram_impl_hetero.h index 27179622b6b..fbdeb161c29 100644 --- a/include/oneapi/dpl/pstl/hetero/histogram_impl_hetero.h +++ b/include/oneapi/dpl/pstl/hetero/histogram_impl_hetero.h @@ -117,11 +117,12 @@ struct __hist_fill_zeros_wrapper { }; -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy> -__pattern_histogram(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first, _RandomAccessIterator1 __last, - _Size __num_bins, _BinHash&& __func, _RandomAccessIterator2 __histogram_first) +template +void +__pattern_histogram(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first, + _RandomAccessIterator1 __last, _Size __num_bins, _BinHash&& __func, + _RandomAccessIterator2 __histogram_first) { //If there are no histogram bins there is nothing to do if (__num_bins > 0) diff --git a/include/oneapi/dpl/pstl/histogram_impl.h b/include/oneapi/dpl/pstl/histogram_impl.h index 4c8f5204793..04893469ce5 100644 --- a/include/oneapi/dpl/pstl/histogram_impl.h +++ b/include/oneapi/dpl/pstl/histogram_impl.h @@ -32,12 +32,14 @@ namespace dpl namespace __internal { -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy> -__pattern_histogram(_ExecutionPolicy&& exec, _RandomAccessIterator1 __first, _RandomAccessIterator1 __last, +void +__pattern_histogram(_Tag, _ExecutionPolicy&& exec, _RandomAccessIterator1 __first, _RandomAccessIterator1 __last, _Size __num_bins, _IdxHashFunc __func, _RandomAccessIterator2 __histogram_first) { + static_assert(__is_backend_tag_v<_Tag>); + static_assert(sizeof(_Size) == 0 /*false*/, "Histogram API is currently unsupported for policies other than device execution policies"); } From f4dd2947461e6fe41e1078242b3443d815102796 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Thu, 8 Feb 2024 10:55:04 +0100 Subject: [PATCH 166/566] __pattern_histogram + tag calls --- include/oneapi/dpl/pstl/histogram_impl.h | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/include/oneapi/dpl/pstl/histogram_impl.h b/include/oneapi/dpl/pstl/histogram_impl.h index 04893469ce5..a67455df13e 100644 --- a/include/oneapi/dpl/pstl/histogram_impl.h +++ b/include/oneapi/dpl/pstl/histogram_impl.h @@ -52,7 +52,11 @@ oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _RandomA histogram(_ExecutionPolicy&& exec, _RandomAccessIterator1 first, _RandomAccessIterator1 last, _Size num_bins, _ValueType first_bin_min_val, _ValueType last_bin_max_val, _RandomAccessIterator2 histogram_first) { + constexpr auto __dispatch_tag = + oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _RandomAccessIterator1, _RandomAccessIterator2>(); + oneapi::dpl::__internal::__pattern_histogram( + __dispatch_tag, ::std::forward<_ExecutionPolicy>(exec), first, last, num_bins, oneapi::dpl::__internal::__evenly_divided_binhash<_ValueType>(first_bin_min_val, last_bin_max_val, num_bins), histogram_first); @@ -66,9 +70,13 @@ histogram(_ExecutionPolicy&& exec, _RandomAccessIterator1 first, _RandomAccessIt _RandomAccessIterator2 boundary_first, _RandomAccessIterator2 boundary_last, _RandomAccessIterator3 histogram_first) { + constexpr auto __dispatch_tag = + oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _RandomAccessIterator1, _RandomAccessIterator2, + _RandomAccessIterator3>(); + ::std::ptrdiff_t num_bins = boundary_last - boundary_first - 1; oneapi::dpl::__internal::__pattern_histogram( - ::std::forward<_ExecutionPolicy>(exec), first, last, num_bins, + __dispatch_tag, ::std::forward<_ExecutionPolicy>(exec), first, last, num_bins, oneapi::dpl::__internal::__custom_boundary_binhash{boundary_first, boundary_last}, histogram_first); return histogram_first + num_bins; } From 89fe1ed5b553e79c247dab6ed37e05f9fdfb3a0b Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Thu, 8 Feb 2024 12:57:22 +0100 Subject: [PATCH 167/566] __pattern_for_loop_n + tag impls --- .../experimental/internal/for_loop_impl.h | 142 +++++++++--------- 1 file changed, 69 insertions(+), 73 deletions(-) diff --git a/include/oneapi/dpl/pstl/experimental/internal/for_loop_impl.h b/include/oneapi/dpl/pstl/experimental/internal/for_loop_impl.h index 9f4314f20ee..f323c14a225 100644 --- a/include/oneapi/dpl/pstl/experimental/internal/for_loop_impl.h +++ b/include/oneapi/dpl/pstl/experimental/internal/for_loop_impl.h @@ -162,37 +162,63 @@ class __reduction_pack }; // Sequenced version of for_loop_n -template +template void -__pattern_for_loop_n(_ExecutionPolicy&&, _Ip __first, _Size __n, _Function __f, __single_stride_type, - /*vector=*/::std::false_type, /*parallel=*/::std::false_type, _Rest&&... __rest) noexcept +__pattern_for_loop_n(_Tag, _ExecutionPolicy&&, _Ip __first, _Size __n, _Function __f, __single_stride_type, + _Rest&&... __rest) noexcept { + static_assert(__is_backend_tag_serial_v<_Tag>); + __reduction_pack<_Rest...> __pack{__reduction_pack_tag(), ::std::forward<_Rest>(__rest)...}; - for (_Size __i = 0; __i < __n; ++__i, ++__first) - __pack.__apply_func(__f, __first, __i); + if constexpr (!typename _Tag::__is_vector{}) + { + for (_Size __i = 0; __i < __n; ++__i, ++__first) + __pack.__apply_func(__f, __first, __i); + } + else + { + oneapi::dpl::__internal::__brick_walk1( + __n, [&__pack, __first, __f](_Size __idx) { __pack.__apply_func(__f, __first + __idx, __idx); }, + ::std::true_type{}); + } __pack.__finalize(__n); } -template +template void -__pattern_for_loop_n(_ExecutionPolicy&&, _Ip __first, _Size __n, _Function __f, _Sp __stride, - /*vector=*/::std::false_type, /*parallel=*/::std::false_type, _Rest&&... __rest) noexcept +__pattern_for_loop_n(_Tag, _ExecutionPolicy&&, _Ip __first, _Size __n, _Function __f, _Sp __stride, + _Rest&&... __rest) noexcept { + static_assert(__is_backend_tag_serial_v<_Tag>); + __reduction_pack<_Rest...> __pack{__reduction_pack_tag(), ::std::forward<_Rest>(__rest)...}; - // Simple loop from 0 to __n is not suitable here as we need to ensure that __first is always - // <= than the end iterator, even if it's not dereferenced. Some implementation might place - // validation checks to enforce this invariant. - if (__n > 0) + if constexpr (!typename _Tag::__is_vector{}) { - for (_Size __i = 0; __i < __n - 1; ++__i, oneapi::dpl::__internal::__advance(__first, __stride)) + // Simple loop from 0 to __n is not suitable here as we need to ensure that __first is always + // <= than the end iterator, even if it's not dereferenced. Some implementation might place + // validation checks to enforce this invariant. + if (__n > 0) { - __pack.__apply_func(__f, __first, __i); - } + for (_Size __i = 0; __i < __n - 1; ++__i, oneapi::dpl::__internal::__advance(__first, __stride)) + { + __pack.__apply_func(__f, __first, __i); + } - __pack.__apply_func(__f, __first, __n - 1); + __pack.__apply_func(__f, __first, __n - 1); + } + } + else + { + oneapi::dpl::__internal::__brick_walk1( + __n, + [&__pack, __first, __f, __stride](_Size __idx) { + __pack.__apply_func(__f, __first + __idx * __stride, __idx); + }, + ::std::true_type{}); } __pack.__finalize(__n); @@ -338,36 +364,6 @@ __pattern_for_loop(_ExecutionPolicy&&, _Ip __first, _Ip __last, _Function __f, _ __pack.__finalize(__ordinal_position); } -// Vectorized version of for_loop_n -template -void -__pattern_for_loop_n(_ExecutionPolicy&&, _Ip __first, _Size __n, _Function __f, __single_stride_type, - /*vector=*/::std::true_type, /*parallel=*/::std::false_type, _Rest&&... __rest) noexcept -{ - __reduction_pack<_Rest...> __pack{__reduction_pack_tag(), ::std::forward<_Rest>(__rest)...}; - - oneapi::dpl::__internal::__brick_walk1( - __n, [&__pack, __first, __f](_Size __idx) { __pack.__apply_func(__f, __first + __idx, __idx); }, - ::std::true_type{}); - - __pack.__finalize(__n); -} - -template -void -__pattern_for_loop_n(_ExecutionPolicy&&, _Ip __first, _Size __n, _Function __f, _Sp __stride, - /*vector=*/::std::true_type, /*parallel=*/::std::false_type, _Rest&&... __rest) noexcept -{ - __reduction_pack<_Rest...> __pack{__reduction_pack_tag(), ::std::forward<_Rest>(__rest)...}; - - oneapi::dpl::__internal::__brick_walk1( - __n, - [&__pack, __first, __f, __stride](_Size __idx) { __pack.__apply_func(__f, __first + __idx * __stride, __idx); }, - ::std::true_type{}); - - __pack.__finalize(__n); -} - // Vectorized version of for_loop template void @@ -386,11 +382,11 @@ __pattern_for_loop(_ExecutionPolicy&& __exec, _Ip __first, _Ip __last, _Function // perhaps it's better to check for presence of reduction object and call parallel_for routine instead. // TODO: need to add a static_assert for match between rest and f's arguments, currently there is a lot // of unclear error in cast of mismatch. -template void -__pattern_for_loop_n(_ExecutionPolicy&& __exec, _Ip __first, _Size __n, _Function __f, __single_stride_type, - _IsVector __is_vector, /*parallel=*/::std::true_type, _Rest&&... __rest) +__pattern_for_loop_n(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _Ip __first, _Size __n, _Function __f, + __single_stride_type, _Rest&&... __rest) { using __pack_type = __reduction_pack<_Rest...>; @@ -398,34 +394,34 @@ __pattern_for_loop_n(_ExecutionPolicy&& __exec, _Ip __first, _Size __n, _Functio const __pack_type __identity{__reduction_pack_tag(), ::std::forward<_Rest>(__rest)...}; oneapi::dpl::__internal::__except_handler([&]() { - return __par_backend::__parallel_reduce(::std::forward<_ExecutionPolicy>(__exec), _Size(0), __n, __identity, - [__is_vector, __first, __f](_Size __i, _Size __j, __pack_type __value) { - const auto __subseq_start = __first + __i; - const auto __length = __j - __i; - - oneapi::dpl::__internal::__brick_walk1( - __length, - [&__value, __f, __i, __subseq_start](_Size __idx) { - __value.__apply_func(__f, __subseq_start + __idx, - __i + __idx); - }, - __is_vector); - - return __value; - }, - [](__pack_type __lhs, const __pack_type& __rhs) { - __lhs.__combine(__rhs); - return __lhs; - }) + return __par_backend::__parallel_reduce( + ::std::forward<_ExecutionPolicy>(__exec), _Size(0), __n, __identity, + [__first, __f](_Size __i, _Size __j, __pack_type __value) { + const auto __subseq_start = __first + __i; + const auto __length = __j - __i; + + oneapi::dpl::__internal::__brick_walk1( + __length, + [&__value, __f, __i, __subseq_start](_Size __idx) { + __value.__apply_func(__f, __subseq_start + __idx, __i + __idx); + }, + _IsVector{}); + + return __value; + }, + [](__pack_type __lhs, const __pack_type& __rhs) { + __lhs.__combine(__rhs); + return __lhs; + }) .__finalize(__n); }); } -template void -__pattern_for_loop_n(_ExecutionPolicy&& __exec, _Ip __first, _Size __n, _Function __f, _Sp __stride, - _IsVector __is_vector, /*parallel=*/::std::true_type, _Rest&&... __rest) +__pattern_for_loop_n(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _Ip __first, _Size __n, _Function __f, + _Sp __stride, _Rest&&... __rest) { using __pack_type = __reduction_pack<_Rest...>; @@ -435,7 +431,7 @@ __pattern_for_loop_n(_ExecutionPolicy&& __exec, _Ip __first, _Size __n, _Functio oneapi::dpl::__internal::__except_handler([&]() { return __par_backend::__parallel_reduce( ::std::forward<_ExecutionPolicy>(__exec), _Size(0), __n, __identity, - [__is_vector, __first, __f, __stride](_Size __i, _Size __j, __pack_type __value) { + [__first, __f, __stride](_Size __i, _Size __j, __pack_type __value) { const auto __subseq_start = __first + __i * __stride; const auto __length = __j - __i; @@ -444,7 +440,7 @@ __pattern_for_loop_n(_ExecutionPolicy&& __exec, _Ip __first, _Size __n, _Functio [&__value, __f, __i, __subseq_start, __stride](_Size __idx) { __value.__apply_func(__f, __subseq_start + __idx * __stride, __i + __idx); }, - __is_vector); + _IsVector{}); return __value; }, From 584bae221ebb68513333af202f6489710bf0f55a Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Thu, 8 Feb 2024 12:57:54 +0100 Subject: [PATCH 168/566] __pattern_for_loop_n + tag calls --- .../experimental/internal/for_loop_impl.h | 35 ++++++++++++------- 1 file changed, 22 insertions(+), 13 deletions(-) diff --git a/include/oneapi/dpl/pstl/experimental/internal/for_loop_impl.h b/include/oneapi/dpl/pstl/experimental/internal/for_loop_impl.h index f323c14a225..69e07159b1d 100644 --- a/include/oneapi/dpl/pstl/experimental/internal/for_loop_impl.h +++ b/include/oneapi/dpl/pstl/experimental/internal/for_loop_impl.h @@ -252,9 +252,9 @@ __pattern_for_loop(_ExecutionPolicy&& __exec, _Ip __first, _Ip __last, _Function /*vector=*/::std::false_type, /*parallel=*/::std::false_type, _Rest&&... __rest) noexcept { oneapi::dpl::__internal::__pattern_for_loop_n( - ::std::forward<_ExecutionPolicy>(__exec), __first, + __serial_tag<::std::false_type>{}, ::std::forward<_ExecutionPolicy>(__exec), __first, oneapi::dpl::__internal::__calculate_input_sequence_length(__first, __last, __stride), __f, __stride, - ::std::false_type{}, ::std::false_type{}, ::std::forward<_Rest>(__rest)...); + ::std::forward<_Rest>(__rest)...); } template @@ -371,9 +371,9 @@ __pattern_for_loop(_ExecutionPolicy&& __exec, _Ip __first, _Ip __last, _Function /*vector=*/::std::true_type, /*parallel=*/::std::false_type, _Rest&&... __rest) noexcept { oneapi::dpl::__internal::__pattern_for_loop_n( - ::std::forward<_ExecutionPolicy>(__exec), __first, + __serial_tag<::std::true_type>{}, ::std::forward<_ExecutionPolicy>(__exec), __first, oneapi::dpl::__internal::__calculate_input_sequence_length(__first, __last, __stride), __f, __stride, - ::std::true_type{}, ::std::false_type{}, ::std::forward<_Rest>(__rest)...); + ::std::forward<_Rest>(__rest)...); } // Parallel version of for_loop_n @@ -460,9 +460,9 @@ __pattern_for_loop(_ExecutionPolicy&& __exec, _Ip __first, _Ip __last, _Function /*parallel=*/::std::true_type, _Rest&&... __rest) { oneapi::dpl::__internal::__pattern_for_loop_n( - ::std::forward<_ExecutionPolicy>(__exec), __first, + __parallel_tag<_IsVector>{}, ::std::forward<_ExecutionPolicy>(__exec), __first, oneapi::dpl::__internal::__calculate_input_sequence_length(__first, __last, __stride), __f, __stride, - __is_vector, ::std::true_type{}, ::std::forward<_Rest>(__rest)...); + ::std::forward<_Rest>(__rest)...); } // Helper structure to split code functions for integral and iterator types so the return @@ -508,7 +508,7 @@ struct __use_par_vec_helper<_Ip, ::std::enable_if_t>> // Special versions for for_loop: handles both iterators and integral types(treated as random access iterators) template -auto +constexpr auto __use_vectorization() -> decltype(__use_par_vec_helper<_Ip>::template __use_vector<_ExecutionPolicy>()) { @@ -516,7 +516,7 @@ __use_vectorization() } template -auto +constexpr auto __use_parallelization() -> decltype(__use_par_vec_helper<_Ip>::template __use_parallel<_ExecutionPolicy>()) { @@ -542,11 +542,20 @@ void __for_loop_n_impl(_ExecutionPolicy&& __exec, _Ip __start, _Size __n, _Fp&& __f, _Sp __stride, ::std::tuple<_Rest...>&& __t, ::std::index_sequence<_Is...>) { - oneapi::dpl::__internal::__pattern_for_loop_n( - ::std::forward<_ExecutionPolicy>(__exec), __start, __n, __f, __stride, - oneapi::dpl::__internal::__use_vectorization<_ExecutionPolicy, _Ip>(), - oneapi::dpl::__internal::__use_parallelization<_ExecutionPolicy, _Ip>(), - ::std::get<_Is>(::std::move(__t))...); + using _IsVector = decltype(oneapi::dpl::__internal::__use_vectorization<_ExecutionPolicy, _Ip>()); + + if constexpr (oneapi::dpl::__internal::__use_parallelization<_ExecutionPolicy, _Ip>()) + { + oneapi::dpl::__internal::__pattern_for_loop_n(__parallel_tag<_IsVector>{}, + ::std::forward<_ExecutionPolicy>(__exec), __start, __n, __f, + __stride, ::std::get<_Is>(::std::move(__t))...); + } + else + { + oneapi::dpl::__internal::__pattern_for_loop_n(__serial_tag<_IsVector>{}, + ::std::forward<_ExecutionPolicy>(__exec), __start, __n, __f, + __stride, ::std::get<_Is>(::std::move(__t))...); + } } template From c0ccfca9739501430a8c493414f8a23d477eb86e Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Thu, 8 Feb 2024 13:28:23 +0100 Subject: [PATCH 169/566] __pattern_for_loop + tag impls --- .../experimental/internal/for_loop_impl.h | 114 +++++++++--------- 1 file changed, 58 insertions(+), 56 deletions(-) diff --git a/include/oneapi/dpl/pstl/experimental/internal/for_loop_impl.h b/include/oneapi/dpl/pstl/experimental/internal/for_loop_impl.h index 69e07159b1d..2f5b511acb4 100644 --- a/include/oneapi/dpl/pstl/experimental/internal/for_loop_impl.h +++ b/include/oneapi/dpl/pstl/experimental/internal/for_loop_impl.h @@ -242,19 +242,61 @@ struct __is_random_access_or_integral<_Ip, { }; +template +::std::enable_if_t< + ::std::is_same_v::iterator_category, ::std::bidirectional_iterator_tag>, + _IndexType> +__execute_loop_strided(_Ip __first, _Ip __last, _Function __f, _Sp __stride, _Pack& __pack, _IndexType) noexcept; + +template +::std::enable_if_t< + ::std::is_same_v::iterator_category, ::std::forward_iterator_tag> || + ::std::is_same_v::iterator_category, ::std::input_iterator_tag>, + _IndexType> +__execute_loop_strided(_Ip __first, _Ip __last, _Function __f, _Sp __stride, _Pack& __pack, _IndexType) noexcept; + template inline constexpr bool __is_random_access_or_integral_v = __is_random_access_or_integral<_Ip>::value; // Sequenced version of for_loop for RAI and integral types -template -::std::enable_if_t<__is_random_access_or_integral_v<_Ip>> -__pattern_for_loop(_ExecutionPolicy&& __exec, _Ip __first, _Ip __last, _Function __f, _Sp __stride, - /*vector=*/::std::false_type, /*parallel=*/::std::false_type, _Rest&&... __rest) noexcept +// Vectorized version of for_loop +template +void +__pattern_for_loop(_Tag __tag, _ExecutionPolicy&& __exec, _Ip __first, _Ip __last, _Function __f, _Sp __stride, _Rest&&... __rest) noexcept { - oneapi::dpl::__internal::__pattern_for_loop_n( - __serial_tag<::std::false_type>{}, ::std::forward<_ExecutionPolicy>(__exec), __first, - oneapi::dpl::__internal::__calculate_input_sequence_length(__first, __last, __stride), __f, __stride, - ::std::forward<_Rest>(__rest)...); + static_assert(__is_backend_tag_serial_v<_Tag>); + + if constexpr (__is_random_access_or_integral_v<_Ip>) + { + oneapi::dpl::__internal::__pattern_for_loop_n( + __tag, ::std::forward<_ExecutionPolicy>(__exec), __first, + oneapi::dpl::__internal::__calculate_input_sequence_length(__first, __last, __stride), __f, __stride, + ::std::forward<_Rest>(__rest)...); + } + else + { + __reduction_pack<_Rest...> __pack{__reduction_pack_tag(), ::std::forward<_Rest>(__rest)...}; + + // Make sure that our index type is able to hold all the possible values + using __index_type = typename __difference<_Ip>::__type; + __index_type __ordinal_position = 0; + + if (__stride == 1) + { + // Avoid check for i % stride on each iteration for the most common case. + for (; __first != __last; ++__first, ++__ordinal_position) + __pack.__apply_func(__f, __first, __ordinal_position); + } + else + { + __ordinal_position = + oneapi::dpl::__internal::__execute_loop_strided(__first, __last, __f, __stride, __pack, + // Only passed to deduce the type for internal counter + __index_type{}); + } + + __pack.__finalize(__ordinal_position); + } } template @@ -318,11 +360,12 @@ __execute_loop_strided(_Ip __first, _Ip __last, _Function __f, _Sp __stride, _Pa } // Sequenced version of for_loop for non-RAI and non-integral types -template +template ::std::enable_if_t> -__pattern_for_loop(_ExecutionPolicy&&, _Ip __first, _Ip __last, _Function __f, __single_stride_type, - /*vector=*/::std::false_type, /*parallel=*/::std::false_type, _Rest&&... __rest) noexcept +__pattern_for_loop(_Tag, _ExecutionPolicy&&, _Ip __first, _Ip __last, _Function __f, __single_stride_type, _Rest&&... __rest) noexcept { + static_assert(__is_backend_tag_serial_v<_Tag>); + __reduction_pack<_Rest...> __pack{__reduction_pack_tag(), ::std::forward<_Rest>(__rest)...}; // Make sure that our index type is able to hold all the possible values @@ -336,46 +379,6 @@ __pattern_for_loop(_ExecutionPolicy&&, _Ip __first, _Ip __last, _Function __f, _ __pack.__finalize(__ordinal_position); } -template -::std::enable_if_t> -__pattern_for_loop(_ExecutionPolicy&&, _Ip __first, _Ip __last, _Function __f, _Sp __stride, - /*vector=*/::std::false_type, /*parallel=*/::std::false_type, _Rest&&... __rest) noexcept -{ - __reduction_pack<_Rest...> __pack{__reduction_pack_tag(), ::std::forward<_Rest>(__rest)...}; - - // Make sure that our index type is able to hold all the possible values - using __index_type = typename __difference<_Ip>::__type; - __index_type __ordinal_position = 0; - - if (__stride == 1) - { - // Avoid check for i % stride on each iteration for the most common case. - for (; __first != __last; ++__first, ++__ordinal_position) - __pack.__apply_func(__f, __first, __ordinal_position); - } - else - { - __ordinal_position = - oneapi::dpl::__internal::__execute_loop_strided(__first, __last, __f, __stride, __pack, - // Only passed to deduce the type for internal counter - __index_type{}); - } - - __pack.__finalize(__ordinal_position); -} - -// Vectorized version of for_loop -template -void -__pattern_for_loop(_ExecutionPolicy&& __exec, _Ip __first, _Ip __last, _Function __f, _Sp __stride, - /*vector=*/::std::true_type, /*parallel=*/::std::false_type, _Rest&&... __rest) noexcept -{ - oneapi::dpl::__internal::__pattern_for_loop_n( - __serial_tag<::std::true_type>{}, ::std::forward<_ExecutionPolicy>(__exec), __first, - oneapi::dpl::__internal::__calculate_input_sequence_length(__first, __last, __stride), __f, __stride, - ::std::forward<_Rest>(__rest)...); -} - // Parallel version of for_loop_n // TODO: Using parallel_reduce when we don't have a reduction object in the pack might be ineffective, @@ -452,15 +455,14 @@ __pattern_for_loop_n(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, }); } -template void -__pattern_for_loop(_ExecutionPolicy&& __exec, _Ip __first, _Ip __last, _Function __f, _Sp __stride, - _IsVector __is_vector, - /*parallel=*/::std::true_type, _Rest&&... __rest) +__pattern_for_loop(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _Ip __first, _Ip __last, _Function __f, + _Sp __stride, _Rest&&... __rest) { oneapi::dpl::__internal::__pattern_for_loop_n( - __parallel_tag<_IsVector>{}, ::std::forward<_ExecutionPolicy>(__exec), __first, + __tag, ::std::forward<_ExecutionPolicy>(__exec), __first, oneapi::dpl::__internal::__calculate_input_sequence_length(__first, __last, __stride), __f, __stride, ::std::forward<_Rest>(__rest)...); } From 31dc09e2e67843d2276d3230128ffb09f195a6bf Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Thu, 8 Feb 2024 13:28:39 +0100 Subject: [PATCH 170/566] __pattern_for_loop + tag calls --- .../experimental/internal/for_loop_impl.h | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/include/oneapi/dpl/pstl/experimental/internal/for_loop_impl.h b/include/oneapi/dpl/pstl/experimental/internal/for_loop_impl.h index 2f5b511acb4..170cc0f0d62 100644 --- a/include/oneapi/dpl/pstl/experimental/internal/for_loop_impl.h +++ b/include/oneapi/dpl/pstl/experimental/internal/for_loop_impl.h @@ -531,11 +531,20 @@ void __for_loop_impl(_ExecutionPolicy&& __exec, _Ip __start, _Ip __finish, _Fp&& __f, _Sp __stride, ::std::tuple<_Rest...>&& __t, ::std::index_sequence<_Is...>) { - oneapi::dpl::__internal::__pattern_for_loop( - ::std::forward<_ExecutionPolicy>(__exec), __start, __finish, __f, __stride, - oneapi::dpl::__internal::__use_vectorization<_ExecutionPolicy, _Ip>(), - oneapi::dpl::__internal::__use_parallelization<_ExecutionPolicy, _Ip>(), - ::std::get<_Is>(::std::move(__t))...); + using _IsVector = decltype(oneapi::dpl::__internal::__use_vectorization<_ExecutionPolicy, _Ip>()); + + if constexpr (oneapi::dpl::__internal::__use_parallelization<_ExecutionPolicy, _Ip>()) + { + oneapi::dpl::__internal::__pattern_for_loop(__parallel_tag<_IsVector>{}, + ::std::forward<_ExecutionPolicy>(__exec), __start, __finish, __f, + __stride, ::std::get<_Is>(::std::move(__t))...); + } + else + { + oneapi::dpl::__internal::__pattern_for_loop(__serial_tag<_IsVector>{}, + ::std::forward<_ExecutionPolicy>(__exec), __start, __finish, __f, + __stride, ::std::get<_Is>(::std::move(__t))...); + } } template Date: Fri, 9 Feb 2024 14:09:47 +0100 Subject: [PATCH 171/566] __pattern_scan_copy + tag impls --- .../dpl/pstl/hetero/algorithm_impl_hetero.h | 27 +++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h index 4e72e3af667..88585811c6b 100644 --- a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h +++ b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h @@ -1567,6 +1567,33 @@ __pattern_scan_copy(_ExecutionPolicy&& __exec, _Iterator1 __first, _Iterator1 __ return ::std::make_pair(__output_first + __n, __num_copied); } +template +::std::pair<_IteratorOrTuple, typename ::std::iterator_traits<_Iterator1>::difference_type> +__pattern_scan_copy(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _Iterator1 __first, _Iterator1 __last, + _IteratorOrTuple __output_first, _CreateMaskOp __create_mask_op, _CopyByMaskOp __copy_by_mask_op) +{ + using _It1DifferenceType = typename ::std::iterator_traits<_Iterator1>::difference_type; + + if (__first == __last) + return ::std::make_pair(__output_first, _It1DifferenceType{0}); + + _It1DifferenceType __n = __last - __first; + + auto __keep1 = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read, _Iterator1>(); + auto __buf1 = __keep1(__first, __last); + auto __keep2 = + oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::write, _IteratorOrTuple>(); + auto __buf2 = __keep2(__output_first, __output_first + __n); + + auto __res = + __par_backend_hetero::__parallel_scan_copy(::std::forward<_ExecutionPolicy>(__exec), __buf1.all_view(), + __buf2.all_view(), __n, __create_mask_op, __copy_by_mask_op); + + ::std::size_t __num_copied = __res.get(); + return ::std::make_pair(__output_first + __n, __num_copied); +} + template oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, _Iterator2> __pattern_copy_if(_ExecutionPolicy&& __exec, _Iterator1 __first, _Iterator1 __last, _Iterator2 __result_first, From 47e30fc9c11425ba8e6d852ee110a235193d30b3 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Fri, 9 Feb 2024 14:10:18 +0100 Subject: [PATCH 172/566] __pattern_scan_copy + tag calls --- include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h index 88585811c6b..0c5cd09cb4d 100644 --- a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h +++ b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h @@ -1689,7 +1689,7 @@ __pattern_partition_copy(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __e unseq_backend::__partition_by_mask<_ReduceOp, /*inclusive*/ ::std::true_type> __copy_by_mask_op{_ReduceOp{}}; auto __result = __pattern_scan_copy( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, + __tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __par_backend_hetero::zip( __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::write>(__result1), __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::write>(__result2)), @@ -1733,8 +1733,8 @@ __pattern_unique_copy(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec __create_mask_unique_copy<__not_pred<_BinaryPredicate>, _It1DifferenceType> __create_mask_op{ __not_pred<_BinaryPredicate>{__pred}}; - auto __result = __pattern_scan_copy(::std::forward<_ExecutionPolicy>(__exec), __first, __last, __result_first, - __create_mask_op, __copy_by_mask_op); + auto __result = __pattern_scan_copy(__tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, + __result_first, __create_mask_op, __copy_by_mask_op); return __result_first + __result.second; } From d1ca14fa0cf248ace53cda64b7ad87bb1f8e2ccd Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Fri, 9 Feb 2024 14:10:40 +0100 Subject: [PATCH 173/566] __pattern_hetero_set_op + tag impls --- .../dpl/pstl/hetero/algorithm_impl_hetero.h | 65 +++++++++++++++++++ 1 file changed, 65 insertions(+) diff --git a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h index 0c5cd09cb4d..0091ef00e46 100644 --- a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h +++ b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h @@ -3111,6 +3111,71 @@ __pattern_set_intersection(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1 __result, __comp, unseq_backend::_IntersectionTag()); } +template +_OutputIterator +__pattern_hetero_set_op(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _ForwardIterator1 __first1, + _ForwardIterator1 __last1, _ForwardIterator2 __first2, _ForwardIterator2 __last2, + _OutputIterator __result, _Compare __comp, _IsOpDifference) +{ + typedef typename ::std::iterator_traits<_ForwardIterator1>::difference_type _Size1; + typedef typename ::std::iterator_traits<_ForwardIterator2>::difference_type _Size2; + + const _Size1 __n1 = __last1 - __first1; + const _Size2 __n2 = __last2 - __first2; + + //Algo is based on the recommended approach of set_intersection algo for GPU: binary search + scan (copying by mask). + using _ReduceOp = ::std::plus<_Size1>; + using _Assigner = unseq_backend::__scan_assigner; + using _NoAssign = unseq_backend::__scan_no_assign; + using _MaskAssigner = unseq_backend::__mask_assigner<2>; + using _InitType = unseq_backend::__no_init_value<_Size1>; + using _DataAcc = unseq_backend::walk_n<_ExecutionPolicy, oneapi::dpl::__internal::__no_op>; + + _ReduceOp __reduce_op; + _Assigner __assign_op; + _DataAcc __get_data_op; + unseq_backend::__copy_by_mask<_ReduceOp, oneapi::dpl::__internal::__pstl_assign, /*inclusive*/ ::std::true_type, 2> + __copy_by_mask_op; + unseq_backend::__brick_set_op<_ExecutionPolicy, _Compare, _Size1, _Size2, _IsOpDifference> __create_mask_op{ + __comp, __n1, __n2}; + + // temporary buffer to store boolean mask + oneapi::dpl::__par_backend_hetero::__buffer<_ExecutionPolicy, int32_t> __mask_buf(__exec, __n1); + + auto __keep1 = + oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read, _ForwardIterator1>(); + auto __buf1 = __keep1(__first1, __last1); + auto __keep2 = + oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read, _ForwardIterator2>(); + auto __buf2 = __keep2(__first2, __last2); + + auto __keep3 = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::write, _OutputIterator>(); + auto __buf3 = __keep3(__result, __result + __n1); + + auto __result_size = + __par_backend_hetero::__parallel_transform_scan_base( + ::std::forward<_ExecutionPolicy>(__exec), + oneapi::dpl::__ranges::make_zip_view( + __buf1.all_view(), __buf2.all_view(), + oneapi::dpl::__ranges::all_view( + __mask_buf.get_buffer())), + __buf3.all_view(), __reduce_op, _InitType{}, + // local scan + unseq_backend::__scan{ + __reduce_op, __get_data_op, __assign_op, _MaskAssigner{}, __create_mask_op}, + // scan between groups + unseq_backend::__scan{__reduce_op, __get_data_op, _NoAssign{}, __assign_op, + __get_data_op}, + // global scan + __copy_by_mask_op) + .get(); + + return __result + __result_size; +} + template _OutputIterator From ea0f7ec8df149341ae2539c7585eb2011e4c3e55 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Fri, 9 Feb 2024 14:10:53 +0100 Subject: [PATCH 174/566] __pattern_hetero_set_op + tag calls --- .../dpl/pstl/hetero/algorithm_impl_hetero.h | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h index 0091ef00e46..52b89025106 100644 --- a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h +++ b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h @@ -3187,8 +3187,8 @@ __pattern_set_intersection(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& _ if (__first1 == __last1 || __first2 == __last2) return __result; - return __pattern_hetero_set_op(::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2, - __result, __comp, unseq_backend::_IntersectionTag()); + return __pattern_hetero_set_op(__tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, + __last2, __result, __comp, unseq_backend::_IntersectionTag()); } //Dummy names to avoid kernel problems @@ -3246,8 +3246,8 @@ __pattern_set_difference(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __e __first1, __last1, __result, oneapi::dpl::__internal::__brick_copy<_ExecutionPolicy>{}); } - return __pattern_hetero_set_op(::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2, - __result, __comp, unseq_backend::_DifferenceTag()); + return __pattern_hetero_set_op(__tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, + __last2, __result, __comp, unseq_backend::_DifferenceTag()); } //Dummy names to avoid kernel problems @@ -3353,8 +3353,10 @@ __pattern_set_union(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, //1. Calc difference {2} \ {1} const auto __n_diff = - oneapi::dpl::__internal::__pattern_hetero_set_op(__exec, __first2, __last2, __first1, __last1, __buf, __comp, - unseq_backend::_DifferenceTag()) - + oneapi::dpl::__internal::__pattern_hetero_set_op( + oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, decltype(__first2), decltype(__last2), + decltype(__first1), decltype(__last1), decltype(__buf)>(), + __exec, __first2, __last2, __first1, __last1, __buf, __comp, unseq_backend::_DifferenceTag()) - __buf; //2. Merge {1} and the difference @@ -3503,6 +3505,8 @@ __pattern_set_symmetric_difference(__hetero_tag<_BackendTag> __tag, _ExecutionPo //1. Calc difference {1} \ {2} const auto __n_diff_1 = oneapi::dpl::__internal::__pattern_hetero_set_op( + oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, decltype(__first1), decltype(__last1), + decltype(__first2), decltype(__last2), decltype(__buf_1)>(), oneapi::dpl::__par_backend_hetero::make_wrapped_policy<__set_symmetric_difference_phase_1>(__exec), __first1, __last1, __first2, __last2, __buf_1, __comp, unseq_backend::_DifferenceTag()) - __buf_1; @@ -3510,6 +3514,8 @@ __pattern_set_symmetric_difference(__hetero_tag<_BackendTag> __tag, _ExecutionPo //2. Calc difference {2} \ {1} const auto __n_diff_2 = oneapi::dpl::__internal::__pattern_hetero_set_op( + oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, decltype(__first2), decltype(__last2), + decltype(__first1), decltype(__last1), decltype(__buf_2)>(), oneapi::dpl::__par_backend_hetero::make_wrapped_policy<__set_symmetric_difference_phase_2>(__exec), __first2, __last2, __first1, __last1, __buf_2, __comp, unseq_backend::_DifferenceTag()) - __buf_2; From 669d73da1416b5458ba0f8271741bcb1445b34de Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Fri, 9 Feb 2024 14:11:14 +0100 Subject: [PATCH 175/566] __pattern_transform_scan_base + tag impls --- .../dpl/pstl/hetero/numeric_impl_hetero.h | 63 +++++++++++++++++++ 1 file changed, 63 insertions(+) diff --git a/include/oneapi/dpl/pstl/hetero/numeric_impl_hetero.h b/include/oneapi/dpl/pstl/hetero/numeric_impl_hetero.h index a051fccfd28..89e2782a0c5 100644 --- a/include/oneapi/dpl/pstl/hetero/numeric_impl_hetero.h +++ b/include/oneapi/dpl/pstl/hetero/numeric_impl_hetero.h @@ -252,6 +252,69 @@ __pattern_transform_scan(_ExecutionPolicy&& __exec, _Iterator1 __first, _Iterato __unary_op, _InitType{__init}, __binary_op, _Inclusive{}); } +template +_Iterator2 +__pattern_transform_scan_base(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _Iterator1 __first, + _Iterator1 __last, _Iterator2 __result, _UnaryOperation __unary_op, _InitType __init, + _BinaryOperation __binary_op, _Inclusive) +{ + if (__first == __last) + return __result; + + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _Iterator1, _Iterator2>(); + + const auto __n = __last - __first; + + auto __keep1 = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read, _Iterator1>(); + auto __buf1 = __keep1(__first, __last); + + // This is a temporary workaround for an in-place exclusive scan while the SYCL backend scan pattern is not fixed. + const bool __is_scan_inplace_exclusive = __n > 1 && !_Inclusive{} && __iterators_possibly_equal(__first, __result); + if (!__is_scan_inplace_exclusive) + { + auto __keep2 = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::write, _Iterator2>(); + auto __buf2 = __keep2(__result, __result + __n); + + oneapi::dpl::__par_backend_hetero::__parallel_transform_scan(::std::forward<_ExecutionPolicy>(__exec), + __buf1.all_view(), __buf2.all_view(), __n, + __unary_op, __init, __binary_op, _Inclusive{}) + .wait(); + } + else + { + assert(__n > 1); + assert(!_Inclusive{}); + assert(__iterators_possibly_equal(__first, __result)); + + using _Type = typename _InitType::__value_type; + + auto __policy = + __par_backend_hetero::make_wrapped_policy(::std::forward<_ExecutionPolicy>(__exec)); + using _NewExecutionPolicy = decltype(__policy); + + // Create temporary buffer + oneapi::dpl::__par_backend_hetero::__buffer<_NewExecutionPolicy, _Type> __tmp_buf(__policy, __n); + auto __first_tmp = __tmp_buf.get(); + auto __last_tmp = __first_tmp + __n; + auto __keep2 = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::write, _Iterator2>(); + auto __buf2 = __keep2(__first_tmp, __last_tmp); + + // Run main algorithm and save data into temporary buffer + oneapi::dpl::__par_backend_hetero::__parallel_transform_scan(__policy, __buf1.all_view(), __buf2.all_view(), + __n, __unary_op, __init, __binary_op, _Inclusive{}) + .wait(); + + // Move data from temporary buffer into results + oneapi::dpl::__internal::__pattern_walk2_brick(__dispatch_tag, ::std::move(__policy), __first_tmp, __last_tmp, + __result, + oneapi::dpl::__internal::__brick_move<_NewExecutionPolicy>{}); + + //TODO: optimize copy back depending on Iterator, i.e. set_final_data for host iterator/pointer + } + + return __result + __n; +} template From 6d81e7fa273ad7fb11b9c22da0e1f3541741078a Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Fri, 9 Feb 2024 14:11:30 +0100 Subject: [PATCH 176/566] __pattern_transform_scan_base + tag calls --- include/oneapi/dpl/pstl/hetero/numeric_impl_hetero.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/oneapi/dpl/pstl/hetero/numeric_impl_hetero.h b/include/oneapi/dpl/pstl/hetero/numeric_impl_hetero.h index 89e2782a0c5..e1af0f7a73c 100644 --- a/include/oneapi/dpl/pstl/hetero/numeric_impl_hetero.h +++ b/include/oneapi/dpl/pstl/hetero/numeric_impl_hetero.h @@ -326,7 +326,7 @@ __pattern_transform_scan(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __e using _RepackedType = __par_backend_hetero::__repacked_tuple_t<_Type>; using _InitType = unseq_backend::__init_value<_RepackedType>; - return __pattern_transform_scan_base(::std::forward<_ExecutionPolicy>(__exec), __first, __last, __result, + return __pattern_transform_scan_base(__tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __result, __unary_op, _InitType{__init}, __binary_op, _Inclusive{}); } @@ -357,7 +357,7 @@ __pattern_transform_scan(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __e using _RepackedType = __par_backend_hetero::__repacked_tuple_t<_Type>; using _InitType = unseq_backend::__no_init_value<_RepackedType>; - return __pattern_transform_scan_base(::std::forward<_ExecutionPolicy>(__exec), __first, __last, __result, + return __pattern_transform_scan_base(__tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __result, __unary_op, _InitType{}, __binary_op, _Inclusive{}); } From 062988d2894f360772661c1a6b0216ae9712045c Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Fri, 9 Feb 2024 17:37:02 +0100 Subject: [PATCH 177/566] include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h - fix compile error: no matching function for call to '__parallel_find_or' --- .../pstl/hetero/dpcpp/parallel_backend_sycl.h | 104 ++++++++++++++++++ 1 file changed, 104 insertions(+) diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h index b437154f666..6038150b03b 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h @@ -1205,6 +1205,109 @@ __parallel_find_or(_ExecutionPolicy&& __exec, _Brick __f, _BrickTag __brick_tag, return __result != __init_value ? __result : __rng_n; } +// Base pattern for __parallel_or and __parallel_find. The execution depends on tag type _BrickTag. +template +::std::conditional_t< + ::std::is_same_v<_BrickTag, __parallel_or_tag>, bool, + oneapi::dpl::__internal::__difference_t::type>> +__parallel_find_or(oneapi::dpl::__internal::__device_backend_tag, _ExecutionPolicy&& __exec, _Brick __f, + _BrickTag __brick_tag, _Ranges&&... __rngs) +{ + using _CustomName = oneapi::dpl::__internal::__policy_kernel_name<_ExecutionPolicy>; + using _AtomicType = typename _BrickTag::_AtomicType; + using _FindOrKernel = + oneapi::dpl::__par_backend_hetero::__internal::__kernel_name_generator<__find_or_kernel, _CustomName, _Brick, + _BrickTag, _Ranges...>; + + constexpr bool __or_tag_check = ::std::is_same_v<_BrickTag, __parallel_or_tag>; + auto __rng_n = oneapi::dpl::__ranges::__get_first_range_size(__rngs...); + assert(__rng_n > 0); + + // TODO: find a way to generalize getting of reliable work-group size + auto __wgroup_size = oneapi::dpl::__internal::__max_work_group_size(__exec); +#if _ONEDPL_COMPILE_KERNEL + auto __kernel = __internal::__kernel_compiler<_FindOrKernel>::__compile(__exec); + __wgroup_size = ::std::min(__wgroup_size, oneapi::dpl::__internal::__kernel_work_group_size(__exec, __kernel)); +#endif + auto __max_cu = oneapi::dpl::__internal::__max_compute_units(__exec); + + auto __n_groups = (__rng_n - 1) / __wgroup_size + 1; + // TODO: try to change __n_groups with another formula for more perfect load balancing + __n_groups = ::std::min(__n_groups, decltype(__n_groups)(__max_cu)); + + auto __n_iter = (__rng_n - 1) / (__n_groups * __wgroup_size) + 1; + + _PRINT_INFO_IN_DEBUG_MODE(__exec, __wgroup_size, __max_cu); + + _AtomicType __init_value = _BrickTag::__init_value(__rng_n); + auto __result = __init_value; + + auto __pred = oneapi::dpl::__par_backend_hetero::__early_exit_find_or<_ExecutionPolicy, _Brick>{__f}; + + // scope is to copy data back to __result after destruction of temporary sycl:buffer + { + auto __temp = sycl::buffer<_AtomicType, 1>(&__result, 1); // temporary storage for global atomic + + // main parallel_for + __exec.queue().submit([&](sycl::handler& __cgh) { + oneapi::dpl::__ranges::__require_access(__cgh, __rngs...); + auto __temp_acc = __temp.template get_access(__cgh); + + // create local accessor to connect atomic with + __dpl_sycl::__local_accessor<_AtomicType> __temp_local(1, __cgh); +#if _ONEDPL_COMPILE_KERNEL && _ONEDPL_KERNEL_BUNDLE_PRESENT + __cgh.use_kernel_bundle(__kernel.get_kernel_bundle()); +#endif + __cgh.parallel_for<_FindOrKernel>( +#if _ONEDPL_COMPILE_KERNEL && !_ONEDPL_KERNEL_BUNDLE_PRESENT + __kernel, +#endif + sycl::nd_range(sycl::range(__n_groups * __wgroup_size), + sycl::range(__wgroup_size)), + [=](sycl::nd_item __item_id) { + auto __local_idx = __item_id.get_local_id(0); + + __dpl_sycl::__atomic_ref<_AtomicType, sycl::access::address_space::global_space> __found( + *__dpl_sycl::__get_accessor_ptr(__temp_acc)); + __dpl_sycl::__atomic_ref<_AtomicType, sycl::access::address_space::local_space> __found_local( + *__dpl_sycl::__get_accessor_ptr(__temp_local)); + + // 1. Set initial value to local atomic + if (__local_idx == 0) + __found_local.store(__init_value); + __dpl_sycl::__group_barrier(__item_id); + + // 2. Find any element that satisfies pred and set local atomic value to global atomic + constexpr auto __comp = typename _BrickTag::_Compare{}; + __pred(__item_id, __n_iter, __wgroup_size, __comp, __found_local, __brick_tag, __rngs...); + __dpl_sycl::__group_barrier(__item_id); + + // Set local atomic value to global atomic + if (__local_idx == 0 && __comp(__found_local.load(), __found.load())) + { + if constexpr (__or_tag_check) + __found.store(1); + else + { + for (auto __old = __found.load(); __comp(__found_local.load(), __old); + __old = __found.load()) + { + __found.compare_exchange_strong(__old, __found_local.load()); + } + } + } + }); + }); + //The end of the scope - a point of synchronization (on temporary sycl buffer destruction) + } + + if constexpr (__or_tag_check) + return __result; + else + return __result != __init_value ? __result : __rng_n; +} + + //------------------------------------------------------------------------ // parallel_or - sync pattern //------------------------------------------------------------------------ @@ -1301,6 +1404,7 @@ __parallel_find(oneapi::dpl::__internal::__device_backend_tag, _ExecutionPolicy& typename ::std::conditional<_IsFirst::value, __parallel_find_forward_tag, __parallel_find_backward_tag>::type; return __first + oneapi::dpl::__par_backend_hetero::__parallel_find_or( + oneapi::dpl::__internal::__device_backend_tag{}, __par_backend_hetero::make_wrapped_policy<__find_policy_wrapper>( ::std::forward<_ExecutionPolicy>(__exec)), __f, _TagType{}, __buf.all_view()); From 112654a2033b5a04757833996d8ff3a7eebb54fd Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Wed, 14 Feb 2024 10:51:40 +0100 Subject: [PATCH 178/566] Fix review comment: I guess a backend type should be depended on the environment and a logic of backend selection should be like that we have currently... --- include/oneapi/dpl/pstl/execution_impl.h | 23 +++++++++++++++++-- include/oneapi/dpl/pstl/omp/parallel_for.h | 2 +- .../oneapi/dpl/pstl/omp/parallel_for_each.h | 2 +- .../oneapi/dpl/pstl/parallel_backend_serial.h | 5 ++-- include/oneapi/dpl/pstl/parallel_impl.h | 10 +++++--- 5 files changed, 33 insertions(+), 9 deletions(-) diff --git a/include/oneapi/dpl/pstl/execution_impl.h b/include/oneapi/dpl/pstl/execution_impl.h index ad29a81b0c5..33ada982ff7 100644 --- a/include/oneapi/dpl/pstl/execution_impl.h +++ b/include/oneapi/dpl/pstl/execution_impl.h @@ -19,6 +19,7 @@ #include #include +#include "parallel_backend.h" #include "execution_defs.h" #include "iterator_defs.h" @@ -96,10 +97,28 @@ __is_parallelization_preferred() // backend selector with tags //------------------------------------------------------------------------ +struct __serial_backend_tag +{ +}; + struct __tbb_backend_tag { }; +struct __omp_backend_tag +{ +}; + +#if _ONEDPL_PAR_BACKEND_TBB +using __par_backend_tag = __tbb_backend_tag; +#elif _ONEDPL_PAR_BACKEND_OPENMP +using __par_backend_tag = __omp_backend_tag; +#elif _ONEDPL_PAR_BACKEND_SERIAL +using __par_backend_tag = __serial_backend_tag; +#else +# error "Parallel backend was not specified" +#endif + template struct __serial_tag { @@ -112,7 +131,7 @@ struct __parallel_tag using __is_vector = _IsVector; // backend tag can be change depending on // TBB availability in the environment - using __backend_tag = __tbb_backend_tag; + using __backend_tag = __par_backend_tag; }; struct __parallel_forward_tag @@ -120,7 +139,7 @@ struct __parallel_forward_tag using __is_vector = ::std::false_type; // backend tag can be change depending on // TBB availability in the environment - using __backend_tag = __tbb_backend_tag; + using __backend_tag = __par_backend_tag; }; template diff --git a/include/oneapi/dpl/pstl/omp/parallel_for.h b/include/oneapi/dpl/pstl/omp/parallel_for.h index 296b24e282b..6f870dc4b4a 100644 --- a/include/oneapi/dpl/pstl/omp/parallel_for.h +++ b/include/oneapi/dpl/pstl/omp/parallel_for.h @@ -68,7 +68,7 @@ __parallel_for(_ExecutionPolicy&&, _Index __first, _Index __last, _Fp __f) template void -__parallel_for(oneapi::dpl::__internal::__tbb_backend_tag, _ExecutionPolicy&&, _Index __first, _Index __last, _Fp __f) +__parallel_for(oneapi::dpl::__internal::__omp_backend_tag, _ExecutionPolicy&&, _Index __first, _Index __last, _Fp __f) { if (omp_in_parallel()) { diff --git a/include/oneapi/dpl/pstl/omp/parallel_for_each.h b/include/oneapi/dpl/pstl/omp/parallel_for_each.h index 510cd04c352..9cc4f249f80 100644 --- a/include/oneapi/dpl/pstl/omp/parallel_for_each.h +++ b/include/oneapi/dpl/pstl/omp/parallel_for_each.h @@ -63,7 +63,7 @@ __parallel_for_each(_ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterat template void -__parallel_for_each(oneapi::dpl::__internal::__tbb_backend_tag, _ExecutionPolicy&&, _ForwardIterator __first, +__parallel_for_each(oneapi::dpl::__internal::__omp_backend_tag, _ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, _Fp __f) { if (omp_in_parallel()) diff --git a/include/oneapi/dpl/pstl/parallel_backend_serial.h b/include/oneapi/dpl/pstl/parallel_backend_serial.h index 577fcbd36a0..f8bc92c5fc8 100644 --- a/include/oneapi/dpl/pstl/parallel_backend_serial.h +++ b/include/oneapi/dpl/pstl/parallel_backend_serial.h @@ -73,7 +73,8 @@ __parallel_for(_ExecutionPolicy&&, _Index __first, _Index __last, _Fp __f) template void -__parallel_for(oneapi::dpl::__internal::__tbb_backend_tag, _ExecutionPolicy&&, _Index __first, _Index __last, _Fp __f) +__parallel_for(oneapi::dpl::__internal::__serial_backend_tag, _ExecutionPolicy&&, _Index __first, _Index __last, + _Fp __f) { __f(__first, __last); } @@ -157,7 +158,7 @@ __parallel_for_each(_ExecutionPolicy&&, _ForwardIterator __begin, _ForwardIterat template void -__parallel_for_each(oneapi::dpl::__internal::__tbb_backend_tag, _ExecutionPolicy&&, _ForwardIterator __begin, +__parallel_for_each(oneapi::dpl::__internal::__serial_backend_tag, _ExecutionPolicy&&, _ForwardIterator __begin, _ForwardIterator __end, _Fp __f) { for (auto __iter = __begin; __iter != __end; ++__iter) diff --git a/include/oneapi/dpl/pstl/parallel_impl.h b/include/oneapi/dpl/pstl/parallel_impl.h index 752db370260..d30fc053cde 100644 --- a/include/oneapi/dpl/pstl/parallel_impl.h +++ b/include/oneapi/dpl/pstl/parallel_impl.h @@ -66,10 +66,14 @@ __parallel_find(_ExecutionPolicy&& __exec, _Index __first, _Index __last, _Brick return __extremum != __initial_dist ? __first + __extremum : __last; } -template +template _Index -__parallel_find(__tbb_backend_tag __tag, _ExecutionPolicy&& __exec, _Index __first, _Index __last, _Brick __f, _IsFirst) +__parallel_find(_Tag __tag, _ExecutionPolicy&& __exec, _Index __first, _Index __last, _Brick __f, _IsFirst) { + static_assert(__is_backend_tag_v<_Tag>); + + using __backend_tag = typename __parallel_tag<_IsVector>::__backend_tag; + typedef typename ::std::iterator_traits<_Index>::difference_type _DifferenceType; const _DifferenceType __n = __last - __first; _DifferenceType __initial_dist = _IsFirst::value ? __n : -1; @@ -78,7 +82,7 @@ __parallel_find(__tbb_backend_tag __tag, _ExecutionPolicy&& __exec, _Index __fir ::std::atomic<_DifferenceType> __extremum(__initial_dist); // TODO: find out what is better here: parallel_for or parallel_reduce - __par_backend::__parallel_for(__tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, + __par_backend::__parallel_for(__backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, [__comp, __f, __first, &__extremum](_Index __i, _Index __j) { // See "Reducing Contention Through Priority Updates", PPoPP '13, for discussion of // why using a shared variable scales fairly well in this situation. From bd5a343d59fd36ef7c7f408a1a9d690a8b2e4a4a Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Wed, 14 Feb 2024 11:25:37 +0100 Subject: [PATCH 179/566] __parallel_transform_reduce + tag impls --- .../hetero/dpcpp/parallel_backend_sycl_fpga.h | 11 ++ .../dpcpp/parallel_backend_sycl_reduce.h | 100 ++++++++++++++++++ .../dpl/pstl/omp/parallel_transform_reduce.h | 30 ++++++ .../oneapi/dpl/pstl/parallel_backend_serial.h | 8 ++ .../oneapi/dpl/pstl/parallel_backend_tbb.h | 12 +++ 5 files changed, 161 insertions(+) diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h index f2f07b70f64..85f4f0cf528 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h @@ -145,6 +145,17 @@ __parallel_transform_reduce(_ExecutionPolicy&& __exec, _ReduceOp __reduce_op, _T __exec.__device_policy(), __reduce_op, __transform_op, __init, ::std::forward<_Ranges>(__rngs)...); } +template +auto +__parallel_transform_reduce(oneapi::dpl::__internal::__fpga_backend_tag, _ExecutionPolicy&& __exec, + _ReduceOp __reduce_op, _TransformOp __transform_op, _InitType __init, _Ranges&&... __rngs) +{ + // workaround until we implement more performant version for patterns + return oneapi::dpl::__par_backend_hetero::__parallel_transform_reduce<_Tp, _Commutative>( + __exec.__device_policy(), __reduce_op, __transform_op, __init, ::std::forward<_Ranges>(__rngs)...); +} + //------------------------------------------------------------------------ // parallel_transform_scan //------------------------------------------------------------------------ diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_reduce.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_reduce.h index 52b036ab4b3..6eab6ac03ec 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_reduce.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_reduce.h @@ -511,6 +511,106 @@ __parallel_transform_reduce(_ExecutionPolicy&& __exec, _ReduceOp __reduce_op, _T ::std::forward<_Ranges>(__rngs)...); } +template +auto +__parallel_transform_reduce(oneapi::dpl::__internal::__device_backend_tag, _ExecutionPolicy&& __exec, + _ReduceOp __reduce_op, _TransformOp __transform_op, _InitType __init, _Ranges&&... __rngs) +{ + auto __n = oneapi::dpl::__ranges::__get_first_range_size(__rngs...); + assert(__n > 0); + + // Get the work group size adjusted to the local memory limit. + // Pessimistically double the memory requirement to take into account memory used by compiled kernel. + // TODO: find a way to generalize getting of reliable work-group size. + ::std::size_t __work_group_size = oneapi::dpl::__internal::__slm_adjusted_work_group_size(__exec, sizeof(_Tp) * 2); + + // Use single work group implementation if array < __work_group_size * __iters_per_work_item. + if (__work_group_size >= 256) + { + if (__n <= 256) + { + return __parallel_transform_reduce_small_impl<_Tp, 256, 1, _Commutative>( + ::std::forward<_ExecutionPolicy>(__exec), __n, __reduce_op, __transform_op, __init, + ::std::forward<_Ranges>(__rngs)...); + } + else if (__n <= 512) + { + return __parallel_transform_reduce_small_impl<_Tp, 256, 2, _Commutative>( + ::std::forward<_ExecutionPolicy>(__exec), __n, __reduce_op, __transform_op, __init, + ::std::forward<_Ranges>(__rngs)...); + } + else if (__n <= 1024) + { + return __parallel_transform_reduce_small_impl<_Tp, 256, 4, _Commutative>( + ::std::forward<_ExecutionPolicy>(__exec), __n, __reduce_op, __transform_op, __init, + ::std::forward<_Ranges>(__rngs)...); + } + else if (__n <= 2048) + { + return __parallel_transform_reduce_small_impl<_Tp, 256, 8, _Commutative>( + ::std::forward<_ExecutionPolicy>(__exec), __n, __reduce_op, __transform_op, __init, + ::std::forward<_Ranges>(__rngs)...); + } + else if (__n <= 4096) + { + return __parallel_transform_reduce_small_impl<_Tp, 256, 16, _Commutative>( + ::std::forward<_ExecutionPolicy>(__exec), __n, __reduce_op, __transform_op, __init, + ::std::forward<_Ranges>(__rngs)...); + } + else if (__n <= 8192) + { + return __parallel_transform_reduce_small_impl<_Tp, 256, 32, _Commutative>( + ::std::forward<_ExecutionPolicy>(__exec), __n, __reduce_op, __transform_op, __init, + ::std::forward<_Ranges>(__rngs)...); + } + + // Use two-step tree reduction. + // First step reduces __work_group_size * __iters_per_work_item_device_kernel elements. + // Second step reduces __work_group_size * __iters_per_work_item_work_group_kernel elements. + else if (__n <= 2097152) + { + return __parallel_transform_reduce_mid_impl<_Tp, 256, 32, 1, _Commutative>( + ::std::forward<_ExecutionPolicy>(__exec), __n, __reduce_op, __transform_op, __init, + ::std::forward<_Ranges>(__rngs)...); + } + else if (__n <= 4194304) + { + return __parallel_transform_reduce_mid_impl<_Tp, 256, 32, 2, _Commutative>( + ::std::forward<_ExecutionPolicy>(__exec), __n, __reduce_op, __transform_op, __init, + ::std::forward<_Ranges>(__rngs)...); + } + else if (__n <= 8388608) + { + return __parallel_transform_reduce_mid_impl<_Tp, 256, 32, 4, _Commutative>( + ::std::forward<_ExecutionPolicy>(__exec), __n, __reduce_op, __transform_op, __init, + ::std::forward<_Ranges>(__rngs)...); + } + else if (__n <= 16777216) + { + return __parallel_transform_reduce_mid_impl<_Tp, 256, 32, 8, _Commutative>( + ::std::forward<_ExecutionPolicy>(__exec), __n, __reduce_op, __transform_op, __init, + ::std::forward<_Ranges>(__rngs)...); + } + else if (__n <= 33554432) + { + return __parallel_transform_reduce_mid_impl<_Tp, 256, 32, 16, _Commutative>( + ::std::forward<_ExecutionPolicy>(__exec), __n, __reduce_op, __transform_op, __init, + ::std::forward<_Ranges>(__rngs)...); + } + else if (__n <= 67108864) + { + return __parallel_transform_reduce_mid_impl<_Tp, 256, 32, 32, _Commutative>( + ::std::forward<_ExecutionPolicy>(__exec), __n, __reduce_op, __transform_op, __init, + ::std::forward<_Ranges>(__rngs)...); + } + } + // Otherwise use a recursive tree reduction. + return __parallel_transform_reduce_impl<_Tp, 32, _Commutative>::submit( + ::std::forward<_ExecutionPolicy>(__exec), __n, __work_group_size, __reduce_op, __transform_op, __init, + ::std::forward<_Ranges>(__rngs)...); +} + } // namespace __par_backend_hetero } // namespace dpl } // namespace oneapi diff --git a/include/oneapi/dpl/pstl/omp/parallel_transform_reduce.h b/include/oneapi/dpl/pstl/omp/parallel_transform_reduce.h index d94e5fd36e9..aa400692f51 100644 --- a/include/oneapi/dpl/pstl/omp/parallel_transform_reduce.h +++ b/include/oneapi/dpl/pstl/omp/parallel_transform_reduce.h @@ -112,6 +112,36 @@ __parallel_transform_reduce(_ExecutionPolicy&&, _RandomAccessIterator __first, _ return __result; } +template +_Value +__parallel_transform_reduce(oneapi::dpl::__internal::__omp_backend_tag, _ExecutionPolicy&&, + _RandomAccessIterator __first, _RandomAccessIterator __last, _UnaryOp __unary_op, + _Value __init, _Combiner __combiner, _Reduction __reduction) +{ + _Value __result = __init; + if (omp_in_parallel()) + { + // We don't create a nested parallel region in an existing parallel + // region: just create tasks + __result = oneapi::dpl::__omp_backend::__transform_reduce_body(__first, __last, __unary_op, __init, __combiner, + __reduction); + } + else + { + // Create a parallel region, and a single thread will create tasks + // for the region. + _PSTL_PRAGMA(omp parallel) + _PSTL_PRAGMA(omp single nowait) + { + __result = oneapi::dpl::__omp_backend::__transform_reduce_body(__first, __last, __unary_op, __init, + __combiner, __reduction); + } + } + + return __result; +} + } // namespace __omp_backend } // namespace dpl } // namespace oneapi diff --git a/include/oneapi/dpl/pstl/parallel_backend_serial.h b/include/oneapi/dpl/pstl/parallel_backend_serial.h index f8bc92c5fc8..654859e7c6e 100644 --- a/include/oneapi/dpl/pstl/parallel_backend_serial.h +++ b/include/oneapi/dpl/pstl/parallel_backend_serial.h @@ -102,6 +102,14 @@ __parallel_transform_reduce(_ExecutionPolicy&&, _Index __first, _Index __last, _ return __reduce(__first, __last, __init); } +template +_Tp +__parallel_transform_reduce(oneapi::dpl::__internal::__serial_backend_tag, _ExecutionPolicy&&, _Index __first, + _Index __last, _UnaryOp, _Tp __init, _BinaryOp, _Reduce __reduce) +{ + return __reduce(__first, __last, __init); +} + template void __parallel_strict_scan(_ExecutionPolicy&&, _Index __n, _Tp __initial, _Rp __reduce, _Cp __combine, _Sp __scan, diff --git a/include/oneapi/dpl/pstl/parallel_backend_tbb.h b/include/oneapi/dpl/pstl/parallel_backend_tbb.h index 67db87432e5..fb2339808e3 100644 --- a/include/oneapi/dpl/pstl/parallel_backend_tbb.h +++ b/include/oneapi/dpl/pstl/parallel_backend_tbb.h @@ -232,6 +232,18 @@ __parallel_transform_reduce(_ExecutionPolicy&&, _Index __first, _Index __last, _ return __body.sum(); } +template +_Tp +__parallel_transform_reduce(oneapi::dpl::__internal::__tbb_backend_tag, _ExecutionPolicy&&, _Index __first, + _Index __last, _Up __u, _Tp __init, _Cp __combine, _Rp __brick_reduce) +{ + __tbb_backend::__par_trans_red_body<_Index, _Up, _Tp, _Cp, _Rp> __body(__u, __init, __combine, __brick_reduce); + // The grain size of 3 is used in order to provide minimum 2 elements for each body + tbb::this_task_arena::isolate( + [__first, __last, &__body]() { tbb::parallel_reduce(tbb::blocked_range<_Index>(__first, __last, 3), __body); }); + return __body.sum(); +} + //------------------------------------------------------------------------ // parallel_scan //------------------------------------------------------------------------ From f6af9aeac36450db976090efe8271cdb77fc9d04 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Wed, 14 Feb 2024 12:42:24 +0100 Subject: [PATCH 180/566] __parallel_transform_reduce + tag calls --- .../internal/async_impl/async_impl_hetero.h | 11 ++++++++-- include/oneapi/dpl/pstl/numeric_impl.h | 22 ++++++++++++++----- 2 files changed, 26 insertions(+), 7 deletions(-) diff --git a/include/oneapi/dpl/internal/async_impl/async_impl_hetero.h b/include/oneapi/dpl/internal/async_impl/async_impl_hetero.h index cd080b1e6a1..3e91088e92e 100644 --- a/include/oneapi/dpl/internal/async_impl/async_impl_hetero.h +++ b/include/oneapi/dpl/internal/async_impl/async_impl_hetero.h @@ -139,9 +139,13 @@ __pattern_transform_reduce_async(_ExecutionPolicy&& __exec, _RandomAccessIterato oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read, _RandomAccessIterator2>(); auto __buf2 = __keep2(__first2, __first2 + __n); + constexpr auto __dispatch_tag = + oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _RandomAccessIterator1, _RandomAccessIterator2>(); + using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; + return oneapi::dpl::__par_backend_hetero::__parallel_transform_reduce<_RepackedTp, ::std::true_type /*is_commutative*/>( - ::std::forward<_ExecutionPolicy>(__exec), __binary_op1, _Functor{__binary_op2}, + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __binary_op1, _Functor{__binary_op2}, unseq_backend::__init_value<_RepackedTp>{__init}, // initial value __buf1.all_view(), __buf2.all_view()); } @@ -166,9 +170,12 @@ __pattern_transform_reduce_async(_ExecutionPolicy&& __exec, _ForwardIterator __f auto __keep = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read, _ForwardIterator>(); auto __buf = __keep(__first, __last); + constexpr auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _ForwardIterator>(); + using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; + return oneapi::dpl::__par_backend_hetero::__parallel_transform_reduce<_RepackedTp, ::std::true_type /*is_commutative*/>( - ::std::forward<_ExecutionPolicy>(__exec), __binary_op, _Functor{__unary_op}, + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __binary_op, _Functor{__unary_op}, unseq_backend::__init_value<_RepackedTp>{__init}, // initial value __buf.all_view()); } diff --git a/include/oneapi/dpl/pstl/numeric_impl.h b/include/oneapi/dpl/pstl/numeric_impl.h index 39d9e16242e..dc3eec4045d 100644 --- a/include/oneapi/dpl/pstl/numeric_impl.h +++ b/include/oneapi/dpl/pstl/numeric_impl.h @@ -92,9 +92,13 @@ __pattern_transform_reduce(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __f _RandomAccessIterator2 __first2, _Tp __init, _BinaryOperation1 __binary_op1, _BinaryOperation2 __binary_op2, _IsVector __is_vector, /*is_parallel=*/::std::true_type) { + constexpr auto __dispatch_tag = + oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _RandomAccessIterator1, _RandomAccessIterator2>(); + using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; + return __internal::__except_handler([&]() { return __par_backend::__parallel_transform_reduce( - ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, [__first1, __first2, __binary_op2](_RandomAccessIterator1 __i) mutable { return __binary_op2(*__i, *(__first2 + (__i - __first1))); }, @@ -115,9 +119,11 @@ __pattern_transform_reduce(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& _ _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, _Tp __init, _BinaryOperation1 __binary_op1, _BinaryOperation2 __binary_op2) { + using __backend_tag = typename __parallel_tag<_IsVector>::__backend_tag; + return __internal::__except_handler([&]() { return __par_backend::__parallel_transform_reduce( - ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, [__first1, __first2, __binary_op2](_RandomAccessIterator1 __i) mutable { return __binary_op2(*__i, *(__first2 + (__i - __first1))); }, @@ -188,9 +194,13 @@ __pattern_transform_reduce(_ExecutionPolicy&& __exec, _RandomAccessIterator __fi _Tp __init, _BinaryOperation __binary_op, _UnaryOperation __unary_op, _IsVector __is_vector, /*is_parallel=*/::std::true_type) { + constexpr auto __dispatch_tag = + oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _RandomAccessIterator>(); + using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; + return __internal::__except_handler([&]() { return __par_backend::__parallel_transform_reduce( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, [__unary_op](_RandomAccessIterator __i) mutable { return __unary_op(*__i); }, __init, __binary_op, [__unary_op, __binary_op, __is_vector](_RandomAccessIterator __i, _RandomAccessIterator __j, _Tp __init) { return __internal::__brick_transform_reduce(__i, __j, __init, __binary_op, __unary_op, __is_vector); @@ -201,13 +211,15 @@ __pattern_transform_reduce(_ExecutionPolicy&& __exec, _RandomAccessIterator __fi template _Tp -__pattern_transform_reduce(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, +__pattern_transform_reduce(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last, _Tp __init, _BinaryOperation __binary_op, _UnaryOperation __unary_op) { + using __backend_tag = typename __parallel_tag<_IsVector>::__backend_tag; + return __internal::__except_handler([&]() { return __par_backend::__parallel_transform_reduce( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, [__unary_op](_RandomAccessIterator __i) mutable { return __unary_op(*__i); }, __init, __binary_op, [__unary_op, __binary_op](_RandomAccessIterator __i, _RandomAccessIterator __j, _Tp __init) { return __internal::__brick_transform_reduce(__i, __j, __init, __binary_op, __unary_op, _IsVector{}); From 92d385d6e1bba2ea030a1b2dc3d87cac59ecc8f1 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Wed, 14 Feb 2024 14:25:02 +0100 Subject: [PATCH 181/566] __parallel_for + tag calls --- .../internal/async_impl/async_impl_hetero.h | 26 +++- .../oneapi/dpl/internal/binary_search_impl.h | 18 ++- include/oneapi/dpl/pstl/algorithm_impl.h | 129 ++++++++++++++---- .../dpl/pstl/hetero/algorithm_impl_hetero.h | 73 ++++++---- .../dpl/pstl/hetero/histogram_impl_hetero.h | 2 +- .../dpl/pstl/hetero/numeric_impl_hetero.h | 9 +- include/oneapi/dpl/pstl/numeric_impl.h | 12 +- include/oneapi/dpl/pstl/parallel_impl.h | 28 +++- 8 files changed, 229 insertions(+), 68 deletions(-) diff --git a/include/oneapi/dpl/internal/async_impl/async_impl_hetero.h b/include/oneapi/dpl/internal/async_impl/async_impl_hetero.h index 3e91088e92e..232957a9b4a 100644 --- a/include/oneapi/dpl/internal/async_impl/async_impl_hetero.h +++ b/include/oneapi/dpl/internal/async_impl/async_impl_hetero.h @@ -42,9 +42,12 @@ __pattern_walk1_async(_ExecutionPolicy&& __exec, _ForwardIterator __first, _Forw oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read_write, _ForwardIterator>(); auto __buf = __keep(__first, __last); + constexpr auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _ForwardIterator>(); + using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; + auto __future_obj = oneapi::dpl::__par_backend_hetero::__parallel_for( - ::std::forward<_ExecutionPolicy>(__exec), unseq_backend::walk_n<_ExecutionPolicy, _Function>{__f}, __n, - __buf.all_view()); + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), + unseq_backend::walk_n<_ExecutionPolicy, _Function>{__f}, __n, __buf.all_view()); return __future_obj; } @@ -66,9 +69,13 @@ __pattern_walk2_async(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _Fo auto __keep2 = oneapi::dpl::__ranges::__get_sycl_range<__acc_mode2, _ForwardIterator2>(); auto __buf2 = __keep2(__first2, __first2 + __n); + constexpr auto __dispatch_tag = + oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>(); + using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; + auto __future = oneapi::dpl::__par_backend_hetero::__parallel_for( - ::std::forward<_ExecutionPolicy>(__exec), unseq_backend::walk_n<_ExecutionPolicy, _Function>{__f}, __n, - __buf1.all_view(), __buf2.all_view()); + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), + unseq_backend::walk_n<_ExecutionPolicy, _Function>{__f}, __n, __buf1.all_view(), __buf2.all_view()); if constexpr (_IsSync::value) __future.wait(); @@ -95,9 +102,14 @@ __pattern_walk3_async(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _Fo oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::write, _ForwardIterator3>(); auto __buf3 = __keep3(__first3, __first3 + __n); - auto __future = oneapi::dpl::__par_backend_hetero::__parallel_for( - ::std::forward<_ExecutionPolicy>(__exec), unseq_backend::walk_n<_ExecutionPolicy, _Function>{__f}, __n, - __buf1.all_view(), __buf2.all_view(), __buf3.all_view()); + constexpr auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _ForwardIterator1, + _ForwardIterator2, _ForwardIterator3>(); + using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; + + auto __future = + oneapi::dpl::__par_backend_hetero::__parallel_for(__backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), + unseq_backend::walk_n<_ExecutionPolicy, _Function>{__f}, __n, + __buf1.all_view(), __buf2.all_view(), __buf3.all_view()); return __future.__make_future(__first3 + __n); } diff --git a/include/oneapi/dpl/internal/binary_search_impl.h b/include/oneapi/dpl/internal/binary_search_impl.h index 0c689fe6b8e..3fd155f03a0 100644 --- a/include/oneapi/dpl/internal/binary_search_impl.h +++ b/include/oneapi/dpl/internal/binary_search_impl.h @@ -111,6 +111,10 @@ oneapi::dpl::__internal::__enable_if_hetero_execution_policy(); + using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; + namespace __bknd = __par_backend_hetero; const auto size = ::std::distance(start, end); @@ -128,7 +132,7 @@ lower_bound_impl(Policy&& policy, InputIterator1 start, InputIterator1 end, Inpu auto keep_result = oneapi::dpl::__ranges::__get_sycl_range<__bknd::access_mode::read_write, OutputIterator>(); auto result_buf = keep_result(result, result + value_size); auto zip_vw = make_zip_view(input_buf.all_view(), value_buf.all_view(), result_buf.all_view()); - __bknd::__parallel_for(::std::forward(policy), + __bknd::__parallel_for(__backend_tag{}, ::std::forward(policy), custom_brick{comp, size}, value_size, zip_vw) .wait(); @@ -141,6 +145,10 @@ oneapi::dpl::__internal::__enable_if_hetero_execution_policy(); + using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; + namespace __bknd = __par_backend_hetero; const auto size = ::std::distance(start, end); @@ -158,7 +166,7 @@ upper_bound_impl(Policy&& policy, InputIterator1 start, InputIterator1 end, Inpu auto keep_result = oneapi::dpl::__ranges::__get_sycl_range<__bknd::access_mode::read_write, OutputIterator>(); auto result_buf = keep_result(result, result + value_size); auto zip_vw = make_zip_view(input_buf.all_view(), value_buf.all_view(), result_buf.all_view()); - __bknd::__parallel_for(::std::forward(policy), + __bknd::__parallel_for(__backend_tag{}, ::std::forward(policy), custom_brick{comp, size}, value_size, zip_vw) .wait(); @@ -171,6 +179,10 @@ oneapi::dpl::__internal::__enable_if_hetero_execution_policy(); + using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; + namespace __bknd = __par_backend_hetero; const auto size = ::std::distance(start, end); @@ -188,7 +200,7 @@ binary_search_impl(Policy&& policy, InputIterator1 start, InputIterator1 end, In auto keep_result = oneapi::dpl::__ranges::__get_sycl_range<__bknd::access_mode::read_write, OutputIterator>(); auto result_buf = keep_result(result, result + value_size); auto zip_vw = make_zip_view(input_buf.all_view(), value_buf.all_view(), result_buf.all_view()); - __bknd::__parallel_for(::std::forward(policy), + __bknd::__parallel_for(__backend_tag{}, ::std::forward(policy), custom_brick{comp, size}, value_size, zip_vw) .wait(); diff --git a/include/oneapi/dpl/pstl/algorithm_impl.h b/include/oneapi/dpl/pstl/algorithm_impl.h index 705b8d278f6..d32664bfcc6 100644 --- a/include/oneapi/dpl/pstl/algorithm_impl.h +++ b/include/oneapi/dpl/pstl/algorithm_impl.h @@ -226,8 +226,12 @@ __pattern_walk1(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _Rando _IsVector __is_vector, /*parallel=*/::std::true_type) { + constexpr auto __dispatch_tag = + oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _RandomAccessIterator>(); + using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; + __internal::__except_handler([&]() { - __par_backend::__parallel_for(::std::forward<_ExecutionPolicy>(__exec), __first, __last, + __par_backend::__parallel_for(__backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, [__f, __is_vector](_RandomAccessIterator __i, _RandomAccessIterator __j) { __internal::__brick_walk1(__i, __j, __f, __is_vector); }); @@ -272,9 +276,13 @@ __pattern_walk_brick(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _ _Brick __brick, /*parallel=*/::std::true_type) { + constexpr auto __dispatch_tag = + oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _RandomAccessIterator>(); + using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; + constexpr auto __is_vector = __internal::__is_vectorization_preferred<_ExecutionPolicy, _RandomAccessIterator>(); __internal::__except_handler([&]() { - __par_backend::__parallel_for(::std::forward<_ExecutionPolicy>(__exec), __first, __last, + __par_backend::__parallel_for(__backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, [__brick, __is_vector](_RandomAccessIterator __i, _RandomAccessIterator __j) { __brick(__i, __j, __is_vector); }); @@ -377,9 +385,13 @@ oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _Ra __pattern_walk_brick_n(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _Size __n, _Brick __brick, /*is_parallel=*/::std::true_type) { + constexpr auto __dispatch_tag = + oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _RandomAccessIterator>(); + using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; + constexpr auto __is_vector = __internal::__is_vectorization_preferred<_ExecutionPolicy, _RandomAccessIterator>(); return __internal::__except_handler([&]() { - __par_backend::__parallel_for(::std::forward<_ExecutionPolicy>(__exec), __first, __first + __n, + __par_backend::__parallel_for(__backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __first + __n, [__brick, __is_vector](_RandomAccessIterator __i, _RandomAccessIterator __j) { __brick(__i, __j - __i, __is_vector); }); @@ -470,9 +482,13 @@ oneapi::dpl::__internal::__enable_if_host_execution_policy_conditional< __pattern_walk2(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, _Function __f, _IsVector __is_vector, /*parallel=*/::std::true_type) { + constexpr auto __dispatch_tag = + oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _RandomAccessIterator1, _RandomAccessIterator2>(); + using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; + return __internal::__except_handler([&]() { __par_backend::__parallel_for( - ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, [__f, __first1, __first2, __is_vector](_RandomAccessIterator1 __i, _RandomAccessIterator1 __j) { __internal::__brick_walk2(__i, __j, __first2 + (__i - __first1), __f, __is_vector); }); @@ -485,9 +501,11 @@ _RandomAccessIterator2 __pattern_walk2(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, _Function __f) { + using __backend_tag = typename __parallel_tag<_IsVector>::__backend_tag; + return __internal::__except_handler([&]() { __par_backend::__parallel_for( - ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, [__f, __first1, __first2](_RandomAccessIterator1 __i, _RandomAccessIterator1 __j) { __internal::__brick_walk2(__i, __j, __first2 + (__i - __first1), __f, _IsVector{}); }); @@ -619,11 +637,16 @@ oneapi::dpl::__internal::__enable_if_host_execution_policy_conditional< __pattern_walk2_brick(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, _Brick __brick, /*parallel=*/::std::true_type) { + constexpr auto __dispatch_tag = + oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _RandomAccessIterator1, _RandomAccessIterator2>(); + using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; + constexpr auto __is_vector = __internal::__is_vectorization_preferred<_ExecutionPolicy, _RandomAccessIterator1, _RandomAccessIterator2>(); return __except_handler([&]() { __par_backend::__parallel_for( + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, [&__is_vector, __first1, __first2, __brick](_RandomAccessIterator1 __i, _RandomAccessIterator1 __j) { __brick(__i, __j, __first2 + (__i - __first1), __is_vector); @@ -714,11 +737,16 @@ oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _Ra __pattern_walk2_brick_n(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, _Size __n, _RandomAccessIterator2 __first2, _Brick __brick, /*parallel=*/::std::true_type) { + constexpr auto __dispatch_tag = + oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _RandomAccessIterator1, _RandomAccessIterator2>(); + using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; + constexpr auto __is_vector = __internal::__is_vectorization_preferred<_ExecutionPolicy, _RandomAccessIterator1, _RandomAccessIterator2>(); return __except_handler([&]() { __par_backend::__parallel_for( + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first1, __first1 + __n, [&__is_vector, __first1, __first2, __brick](_RandomAccessIterator1 __i, _RandomAccessIterator1 __j) { __brick(__i, __j - __i, __first2 + (__i - __first1), __is_vector); @@ -820,8 +848,14 @@ __pattern_walk3(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, _Ran _RandomAccessIterator2 __first2, _RandomAccessIterator3 __first3, _Function __f, _IsVector __is_vector, /*parallel=*/::std::true_type) { + constexpr auto __dispatch_tag = + oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _RandomAccessIterator1, _RandomAccessIterator2, + _RandomAccessIterator3>(); + using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; + return __internal::__except_handler([&]() { __par_backend::__parallel_for( + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, [__f, __first1, __first2, __first3, __is_vector](_RandomAccessIterator1 __i, _RandomAccessIterator1 __j) { __internal::__brick_walk3(__i, __j, __first2 + (__i - __first1), __first3 + (__i - __first1), __f, @@ -837,8 +871,11 @@ _RandomAccessIterator3 __pattern_walk3(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, _RandomAccessIterator3 __first3, _Function __f) { + using __backend_tag = typename __parallel_tag<_IsVector>::__backend_tag; + return __internal::__except_handler([&]() { __par_backend::__parallel_for( + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, [__f, __first1, __first2, __first3](_RandomAccessIterator1 __i, _RandomAccessIterator1 __j) { __internal::__brick_walk3(__i, __j, __first2 + (__i - __first1), __first3 + (__i - __first1), __f, @@ -2191,8 +2228,8 @@ __remove_elements(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardI [&__m](_DifferenceType __total) { __m = __total; }); // 3. Elements from result are moved to [first, last) - __par_backend::__parallel_for(::std::forward<_ExecutionPolicy>(__exec), __result, __result + __m, - [__result, __first, __is_vector](_Tp* __i, _Tp* __j) { + __par_backend::__parallel_for(__backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __result, + __result + __m, [__result, __first, __is_vector](_Tp* __i, _Tp* __j) { __brick_move_destroy<_ExecutionPolicy>{}(__i, __j, __first + (__i - __result), __is_vector); }); @@ -2493,8 +2530,12 @@ oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy> __pattern_reverse(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last, _IsVector __is_vector, /*is_parallel=*/::std::true_type) { + constexpr auto __dispatch_tag = + oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _RandomAccessIterator>(); + using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; + __par_backend::__parallel_for( - ::std::forward<_ExecutionPolicy>(__exec), __first, __first + (__last - __first) / 2, + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __first + (__last - __first) / 2, [__is_vector, __first, __last](_RandomAccessIterator __inner_first, _RandomAccessIterator __inner_last) { __internal::__brick_reverse(__inner_first, __inner_last, __last - (__inner_first - __first), __is_vector); }); @@ -2505,8 +2546,10 @@ void __pattern_reverse(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last) { + using __backend_tag = typename __parallel_tag<_IsVector>::__backend_tag; + __par_backend::__parallel_for( - ::std::forward<_ExecutionPolicy>(__exec), __first, __first + (__last - __first) / 2, + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __first + (__last - __first) / 2, [__first, __last](_RandomAccessIterator __inner_first, _RandomAccessIterator __inner_last) { __internal::__brick_reverse(__inner_first, __inner_last, __last - (__inner_first - __first), _IsVector{}); }); @@ -2559,8 +2602,12 @@ oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _Ra __pattern_reverse_copy(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first, _RandomAccessIterator1 __last, _RandomAccessIterator2 __d_first, _IsVector __is_vector, /*is_parallel=*/::std::true_type) { + constexpr auto __dispatch_tag = + oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _RandomAccessIterator1, _RandomAccessIterator2>(); + using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; + auto __len = __last - __first; - __par_backend::__parallel_for(::std::forward<_ExecutionPolicy>(__exec), __first, __last, + __par_backend::__parallel_for(__backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, [__is_vector, __first, __len, __d_first](_RandomAccessIterator1 __inner_first, _RandomAccessIterator1 __inner_last) { __internal::__brick_reverse_copy(__inner_first, __inner_last, @@ -2661,6 +2708,10 @@ oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _Ra __pattern_rotate(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __middle, _RandomAccessIterator __last, _IsVector __is_vector, /*is_parallel=*/::std::true_type) { + constexpr auto __dispatch_tag = + oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _RandomAccessIterator>(); + using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; + typedef typename ::std::iterator_traits<_RandomAccessIterator>::value_type _Tp; auto __n = __last - __first; auto __m = __middle - __first; @@ -2670,19 +2721,19 @@ __pattern_rotate(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _Rand return __internal::__except_handler([&__exec, __n, __m, __first, __middle, __last, __is_vector, &__buf]() { _Tp* __result = __buf.get(); __par_backend::__parallel_for( - ::std::forward<_ExecutionPolicy>(__exec), __middle, __last, + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __middle, __last, [__middle, __result, __is_vector](_RandomAccessIterator __b, _RandomAccessIterator __e) { __internal::__brick_uninitialized_move(__b, __e, __result + (__b - __middle), __is_vector); }); __par_backend::__parallel_for( - ::std::forward<_ExecutionPolicy>(__exec), __first, __middle, + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __middle, [__last, __middle, __is_vector](_RandomAccessIterator __b, _RandomAccessIterator __e) { __internal::__brick_move<_ExecutionPolicy>{}(__b, __e, __b + (__last - __middle), __is_vector); }); - __par_backend::__parallel_for(::std::forward<_ExecutionPolicy>(__exec), __result, __result + (__n - __m), - [__first, __result, __is_vector](_Tp* __b, _Tp* __e) { + __par_backend::__parallel_for(__backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __result, + __result + (__n - __m), [__first, __result, __is_vector](_Tp* __b, _Tp* __e) { __brick_move_destroy<_ExecutionPolicy>{}( __b, __e, __first + (__b - __result), __is_vector); }); @@ -2696,18 +2747,19 @@ __pattern_rotate(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _Rand return __internal::__except_handler([&__exec, __n, __m, __first, __middle, __last, __is_vector, &__buf]() { _Tp* __result = __buf.get(); __par_backend::__parallel_for( - ::std::forward<_ExecutionPolicy>(__exec), __first, __middle, + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __middle, [__first, __result, __is_vector](_RandomAccessIterator __b, _RandomAccessIterator __e) { __internal::__brick_uninitialized_move(__b, __e, __result + (__b - __first), __is_vector); }); __par_backend::__parallel_for( - ::std::forward<_ExecutionPolicy>(__exec), __middle, __last, + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __middle, __last, [__first, __middle, __is_vector](_RandomAccessIterator __b, _RandomAccessIterator __e) { __internal::__brick_move<_ExecutionPolicy>{}(__b, __e, __first + (__b - __middle), __is_vector); }); - __par_backend::__parallel_for(::std::forward<_ExecutionPolicy>(__exec), __result, __result + __m, + __par_backend::__parallel_for(__backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __result, + __result + __m, [__n, __m, __first, __result, __is_vector](_Tp* __b, _Tp* __e) { __brick_move_destroy<_ExecutionPolicy>{}( __b, __e, __first + ((__n - __m) + (__b - __result)), __is_vector); @@ -2831,7 +2883,12 @@ __pattern_rotate_copy(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first, _RandomAccessIterator1 __last, _RandomAccessIterator2 __result, _IsVector __is_vector, /*is_parallel=*/::std::true_type) { + constexpr auto __dispatch_tag = + oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _RandomAccessIterator1, _RandomAccessIterator2>(); + using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; + __par_backend::__parallel_for( + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, [__first, __last, __middle, __result, __is_vector](_RandomAccessIterator1 __b, _RandomAccessIterator1 __e) { __internal::__brick_copy<_ExecutionPolicy> __copy{}; @@ -3212,6 +3269,7 @@ __pattern_partition(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _R else if (__size2 > __size1) { __par_backend::__parallel_for( + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __val1.__pivot, __val1.__pivot + __size1, [__val1, __val2, __size1, __is_vector](_RandomAccessIterator __i, _RandomAccessIterator __j) { __internal::__brick_swap_ranges(__i, __j, (__val2.__pivot - __size1) + (__i - __val1.__pivot), @@ -3223,6 +3281,7 @@ __pattern_partition(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _R else { __par_backend::__parallel_for( + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __val1.__pivot, __val1.__pivot + __size2, [__val1, __val2, __is_vector](_RandomAccessIterator __i, _RandomAccessIterator __j) { __internal::__brick_swap_ranges(__i, __j, __val2.__begin + (__i - __val1.__pivot), __is_vector); @@ -3931,7 +3990,7 @@ __pattern_partial_sort_copy(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __ __n2); // 3. Move elements from temporary buffer to output - __par_backend::__parallel_for(::std::forward<_ExecutionPolicy>(__exec), __r, __r + __n2, + __par_backend::__parallel_for(__backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __r, __r + __n2, [__r, __d_first, __is_vector](_T1* __i, _T1* __j) { __brick_move_destroy<_ExecutionPolicy>{}( __i, __j, __d_first + (__i - __r), __is_vector); @@ -3939,7 +3998,7 @@ __pattern_partial_sort_copy(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __ if constexpr (!::std::is_trivially_destructible_v<_T1>) __par_backend::__parallel_for( - ::std::forward<_ExecutionPolicy>(__exec), __r + __n2, __r + __n1, + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __r + __n2, __r + __n1, [__is_vector](_T1* __i, _T1* __j) { __brick_destroy(__i, __j, __is_vector); }); return __d_first + __n2; @@ -4305,8 +4364,13 @@ __pattern_fill(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _Random const _Tp& __value, /*is_parallel=*/::std::true_type, _IsVector __is_vector) { + constexpr auto __dispatch_tag = + oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _RandomAccessIterator>(); + using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; + return __internal::__except_handler([&__exec, __first, __last, &__value, __is_vector]() { __par_backend::__parallel_for( + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, [&__value, __is_vector](_RandomAccessIterator __begin, _RandomAccessIterator __end) { __internal::__brick_fill<_Tp, _ExecutionPolicy>{__value}(__begin, __end, __is_vector); @@ -4434,8 +4498,12 @@ __pattern_generate(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _Ra _Generator __g, /*is_parallel=*/::std::true_type, _IsVector __is_vector) { + constexpr auto __dispatch_tag = + oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _RandomAccessIterator>(); + using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; + return __internal::__except_handler([&]() { - __par_backend::__parallel_for(::std::forward<_ExecutionPolicy>(__exec), __first, __last, + __par_backend::__parallel_for(__backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, [__g, __is_vector](_RandomAccessIterator __begin, _RandomAccessIterator __end) { __internal::__brick_generate(__begin, __end, __g, __is_vector); }); @@ -4724,6 +4792,11 @@ __pattern_inplace_merge(_ExecutionPolicy&& __exec, _RandomAccessIterator __first { return; } + + constexpr auto __dispatch_tag = + oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _RandomAccessIterator>(); + using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; + typedef typename ::std::iterator_traits<_RandomAccessIterator>::value_type _Tp; auto __n = __last - __first; __par_backend::__buffer<_ExecutionPolicy, _Tp> __buf(__n); @@ -4749,10 +4822,11 @@ __pattern_inplace_merge(_ExecutionPolicy&& __exec, _RandomAccessIterator __first __move_sequences, __move_sequences); return __f3 + (__l1 - __f1) + (__l2 - __f2); }); - __par_backend::__parallel_for( - ::std::forward<_ExecutionPolicy>(__exec), __r, __r + __n, [__r, __first, __is_vector](_Tp* __i, _Tp* __j) { - __brick_move_destroy<_ExecutionPolicy>{}(__i, __j, __first + (__i - __r), __is_vector); - }); + __par_backend::__parallel_for(__backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __r, __r + __n, + [__r, __first, __is_vector](_Tp* __i, _Tp* __j) { + __brick_move_destroy<_ExecutionPolicy>{}(__i, __j, __first + (__i - __r), + __is_vector); + }); }); } @@ -6434,6 +6508,9 @@ __pattern_shift_left(_ExecutionPolicy&& __exec, _ForwardIterator __first, _Forwa typename ::std::iterator_traits<_ForwardIterator>::difference_type __n, _IsVector __is_vector, /*is_parallel=*/::std::true_type) { + constexpr auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _ForwardIterator>(); + using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; + //If (n > 0 && n < m), returns first + (m - n). Otherwise, if n > 0, returns first. Otherwise, returns last. if (__n <= 0) return __last; @@ -6449,7 +6526,7 @@ __pattern_shift_left(_ExecutionPolicy&& __exec, _ForwardIterator __first, _Forwa //1. n >= size/2; there is enough memory to 'total' parallel copying if (__n >= __mid) { - __par_backend::__parallel_for(::std::forward<_ExecutionPolicy>(__exec), __n, __size, + __par_backend::__parallel_for(__backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __n, __size, [__first, __n, __is_vector](_DiffType __i, _DiffType __j) { __brick_move<_ExecutionPolicy>{}(__first + __i, __first + __j, __first + __i - __n, __is_vector); @@ -6461,7 +6538,7 @@ __pattern_shift_left(_ExecutionPolicy&& __exec, _ForwardIterator __first, _Forwa for (auto __k = __n; __k < __size; __k += __n) { auto __end = ::std::min(__k + __n, __size); - __par_backend::__parallel_for(::std::forward<_ExecutionPolicy>(__exec), __k, __end, + __par_backend::__parallel_for(__backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __k, __end, [__first, __n, __is_vector](_DiffType __i, _DiffType __j) { __brick_move<_ExecutionPolicy>{}(__first + __i, __first + __j, __first + __i - __n, __is_vector); diff --git a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h index 52b89025106..7df8d6d7a0d 100644 --- a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h +++ b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h @@ -52,7 +52,10 @@ __pattern_walk1(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIte oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read_write, _ForwardIterator>(); auto __buf = __keep(__first, __last); - oneapi::dpl::__par_backend_hetero::__parallel_for(::std::forward<_ExecutionPolicy>(__exec), + constexpr auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _ForwardIterator>(); + using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; + + oneapi::dpl::__par_backend_hetero::__parallel_for(__backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), unseq_backend::walk_n<_ExecutionPolicy, _Function>{__f}, __n, __buf.all_view()) .wait(); @@ -126,9 +129,13 @@ __pattern_walk2(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardI auto __keep2 = oneapi::dpl::__ranges::__get_sycl_range<__acc_mode2, _ForwardIterator2>(); auto __buf2 = __keep2(__first2, __first2 + __n); + constexpr auto __dispatch_tag = + oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>(); + using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; + auto __future_obj = oneapi::dpl::__par_backend_hetero::__parallel_for( - ::std::forward<_ExecutionPolicy>(__exec), unseq_backend::walk_n<_ExecutionPolicy, _Function>{__f}, __n, - __buf1.all_view(), __buf2.all_view()); + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), + unseq_backend::walk_n<_ExecutionPolicy, _Function>{__f}, __n, __buf1.all_view(), __buf2.all_view()); if constexpr (_IsSync()) __future_obj.wait(); @@ -155,8 +162,8 @@ __pattern_walk2(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _ForwardIt auto __buf2 = __keep2(__first2, __first2 + __n); auto __future_obj = oneapi::dpl::__par_backend_hetero::__parallel_for( - ::std::forward<_ExecutionPolicy>(__exec), unseq_backend::walk_n<_ExecutionPolicy, _Function>{__f}, __n, - __buf1.all_view(), __buf2.all_view()); + _BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), + unseq_backend::walk_n<_ExecutionPolicy, _Function>{__f}, __n, __buf1.all_view(), __buf2.all_view()); if constexpr (_IsSync()) __future_obj.wait(); @@ -240,7 +247,11 @@ __pattern_walk3(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardI auto __keep3 = oneapi::dpl::__ranges::__get_sycl_range<__acc_mode3, _ForwardIterator3>(); auto __buf3 = __keep3(__first3, __first3 + __n); - oneapi::dpl::__par_backend_hetero::__parallel_for(::std::forward<_ExecutionPolicy>(__exec), + constexpr auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _ForwardIterator1, + _ForwardIterator2, _ForwardIterator3>(); + using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; + + oneapi::dpl::__par_backend_hetero::__parallel_for(__backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), unseq_backend::walk_n<_ExecutionPolicy, _Function>{__f}, __n, __buf1.all_view(), __buf2.all_view(), __buf3.all_view()) .wait(); @@ -269,7 +280,7 @@ __pattern_walk3(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _ForwardIt auto __keep3 = oneapi::dpl::__ranges::__get_sycl_range<__acc_mode3, _ForwardIterator3>(); auto __buf3 = __keep3(__first3, __first3 + __n); - oneapi::dpl::__par_backend_hetero::__parallel_for(::std::forward<_ExecutionPolicy>(__exec), + oneapi::dpl::__par_backend_hetero::__parallel_for(_BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), unseq_backend::walk_n<_ExecutionPolicy, _Function>{__f}, __n, __buf1.all_view(), __buf2.all_view(), __buf3.all_view()) .wait(); @@ -2806,10 +2817,13 @@ __pattern_reverse(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last if (__n <= 0) return; + constexpr auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _Iterator>(); + using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; + auto __keep = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read_write, _Iterator>(); auto __buf = __keep(__first, __last); oneapi::dpl::__par_backend_hetero::__parallel_for( - ::std::forward<_ExecutionPolicy>(__exec), + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), unseq_backend::__reverse_functor::difference_type>{__n}, __n / 2, __buf.all_view()) .wait(); @@ -2828,8 +2842,7 @@ __pattern_reverse(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _I auto __keep = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read_write, _Iterator>(); auto __buf = __keep(__first, __last); oneapi::dpl::__par_backend_hetero::__parallel_for( - __backend_tag{}, - ::std::forward<_ExecutionPolicy>(__exec), + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), unseq_backend::__reverse_functor::difference_type>{__n}, __n / 2, __buf.all_view()) .wait(); @@ -2852,9 +2865,14 @@ __pattern_reverse_copy(_ExecutionPolicy&& __exec, _BidirectionalIterator __first auto __buf1 = __keep1(__first, __last); auto __keep2 = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::write, _ForwardIterator>(); + + constexpr auto __dispatch_tag = + oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _BidirectionalIterator>(); + using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; + auto __buf2 = __keep2(__result, __result + __n); oneapi::dpl::__par_backend_hetero::__parallel_for( - ::std::forward<_ExecutionPolicy>(__exec), + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), unseq_backend::__reverse_copy::difference_type>{__n}, __n, __buf1.all_view(), __buf2.all_view()) .wait(); @@ -2918,16 +2936,20 @@ __pattern_rotate(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __new_f oneapi::dpl::__ranges::all_view<_Tp, __par_backend_hetero::access_mode::write>(__temp_buf.get_buffer()); const auto __shift = __new_first - __first; + + constexpr auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _Iterator>(); + using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; + oneapi::dpl::__par_backend_hetero::__parallel_for( - oneapi::dpl::__par_backend_hetero::make_wrapped_policy<__rotate_wrapper>(__exec), + __backend_tag{}, oneapi::dpl::__par_backend_hetero::make_wrapped_policy<__rotate_wrapper>(__exec), unseq_backend::__rotate_copy::difference_type>{__n, __shift}, __n, __buf.all_view(), __temp_rng); using _Function = __brick_move<_ExecutionPolicy>; auto __brick = unseq_backend::walk_n<_ExecutionPolicy, _Function>{_Function{}}; - oneapi::dpl::__par_backend_hetero::__parallel_for(::std::forward<_ExecutionPolicy>(__exec), __brick, __n, - __temp_rng, __buf.all_view()) + oneapi::dpl::__par_backend_hetero::__parallel_for(__backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), + __brick, __n, __temp_rng, __buf.all_view()) .wait(); return __first + (__last - __new_first); @@ -2938,8 +2960,6 @@ _Iterator __pattern_rotate(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Iterator __first, _Iterator __new_first, _Iterator __last) { - using __backend_tag = typename decltype(__tag)::__backend_tag; - auto __n = __last - __first; if (__n <= 0) return __first; @@ -2955,14 +2975,14 @@ __pattern_rotate(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _It const auto __shift = __new_first - __first; oneapi::dpl::__par_backend_hetero::__parallel_for( - __backend_tag{}, oneapi::dpl::__par_backend_hetero::make_wrapped_policy<__rotate_wrapper>(__exec), + _BackendTag{}, oneapi::dpl::__par_backend_hetero::make_wrapped_policy<__rotate_wrapper>(__exec), unseq_backend::__rotate_copy::difference_type>{__n, __shift}, __n, __buf.all_view(), __temp_rng); using _Function = __brick_move<_ExecutionPolicy>; auto __brick = unseq_backend::walk_n<_ExecutionPolicy, _Function>{_Function{}}; - oneapi::dpl::__par_backend_hetero::__parallel_for(__backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), + oneapi::dpl::__par_backend_hetero::__parallel_for(_BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), __brick, __n, __temp_rng, __buf.all_view()) .wait(); @@ -2978,6 +2998,10 @@ __pattern_rotate_copy(_ExecutionPolicy&& __exec, _BidirectionalIterator __first, _BidirectionalIterator __last, _ForwardIterator __result, /*vector=*/::std::true_type, /*parallel=*/::std::true_type) { + constexpr auto __dispatch_tag = + oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _BidirectionalIterator, _ForwardIterator>(); + using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; + auto __n = __last - __first; if (__n <= 0) return __result; @@ -2992,6 +3016,7 @@ __pattern_rotate_copy(_ExecutionPolicy&& __exec, _BidirectionalIterator __first, const auto __shift = __new_first - __first; oneapi::dpl::__par_backend_hetero::__parallel_for( + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), unseq_backend::__rotate_copy::difference_type>{__n, __shift}, @@ -3006,8 +3031,6 @@ _ForwardIterator __pattern_rotate_copy(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _BidirectionalIterator __first, _BidirectionalIterator __new_first, _BidirectionalIterator __last, _ForwardIterator __result) { - using __backend_tag = typename decltype(__tag)::__backend_tag; - auto __n = __last - __first; if (__n <= 0) return __result; @@ -3022,7 +3045,7 @@ __pattern_rotate_copy(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec const auto __shift = __new_first - __first; oneapi::dpl::__par_backend_hetero::__parallel_for( - __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), + _BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), unseq_backend::__rotate_copy::difference_type>{__n, __shift}, __n, __buf1.all_view(), __buf2.all_view()) @@ -3547,6 +3570,9 @@ __pattern_shift_left(_ExecutionPolicy&& __exec, _Range __rng, oneapi::dpl::__int _DiffType __mid = __size / 2 + __size % 2; _DiffType __size_res = __size - __n; + //TODO: required to implement correct tag selection here + using __backend_tag = oneapi::dpl::__internal::__device_backend_tag; + //1. n >= size/2; 'size - _n' parallel copying if (__n >= __mid) { @@ -3557,14 +3583,15 @@ __pattern_shift_left(_ExecutionPolicy&& __exec, _Range __rng, oneapi::dpl::__int auto __src = oneapi::dpl::__ranges::drop_view_simple<_Range, _DiffType>(__rng, __n); auto __dst = oneapi::dpl::__ranges::take_view_simple<_Range, _DiffType>(__rng, __size_res); - oneapi::dpl::__par_backend_hetero::__parallel_for(::std::forward<_ExecutionPolicy>(__exec), __brick, __size_res, - __src, __dst) + oneapi::dpl::__par_backend_hetero::__parallel_for(__backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), + __brick, __size_res, __src, __dst) .wait(); } else //2. n < size/2; 'n' parallel copying { auto __brick = unseq_backend::__brick_shift_left<_ExecutionPolicy, _DiffType>{__size, __n}; oneapi::dpl::__par_backend_hetero::__parallel_for( + __backend_tag{}, oneapi::dpl::__par_backend_hetero::make_wrapped_policy<__shift_left_right>( ::std::forward<_ExecutionPolicy>(__exec)), __brick, __n, __rng) diff --git a/include/oneapi/dpl/pstl/hetero/histogram_impl_hetero.h b/include/oneapi/dpl/pstl/hetero/histogram_impl_hetero.h index fbdeb161c29..c594e5c25a0 100644 --- a/include/oneapi/dpl/pstl/hetero/histogram_impl_hetero.h +++ b/include/oneapi/dpl/pstl/hetero/histogram_impl_hetero.h @@ -144,7 +144,7 @@ __pattern_histogram(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, //fill histogram bins with zeros auto __init_event = oneapi::dpl::__par_backend_hetero::__parallel_for( - oneapi::dpl::__par_backend_hetero::make_wrapped_policy<__hist_fill_zeros_wrapper>(__exec), + _BackendTag{}, oneapi::dpl::__par_backend_hetero::make_wrapped_policy<__hist_fill_zeros_wrapper>(__exec), unseq_backend::walk_n<_ExecutionPolicy, decltype(__fill_func)>{__fill_func}, __num_bins, __bins); if (__n > 0) diff --git a/include/oneapi/dpl/pstl/hetero/numeric_impl_hetero.h b/include/oneapi/dpl/pstl/hetero/numeric_impl_hetero.h index e1af0f7a73c..3edccc031e6 100644 --- a/include/oneapi/dpl/pstl/hetero/numeric_impl_hetero.h +++ b/include/oneapi/dpl/pstl/hetero/numeric_impl_hetero.h @@ -382,6 +382,7 @@ __pattern_adjacent_difference(_ExecutionPolicy&& __exec, _ForwardIterator1 __fir return __d_first; const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>(); + using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; using _It1ValueT = typename ::std::iterator_traits<_ForwardIterator1>::value_type; using _It2ValueTRef = typename ::std::iterator_traits<_ForwardIterator2>::reference; @@ -419,8 +420,8 @@ __pattern_adjacent_difference(_ExecutionPolicy&& __exec, _ForwardIterator1 __fir using _Function = unseq_backend::walk_adjacent_difference<_ExecutionPolicy, decltype(__fn)>; - oneapi::dpl::__par_backend_hetero::__parallel_for(__exec, _Function{__fn}, __n, __buf1.all_view(), - __buf2.all_view()) + oneapi::dpl::__par_backend_hetero::__parallel_for(__backend_tag{}, __exec, _Function{__fn}, __n, + __buf1.all_view(), __buf2.all_view()) .wait(); return __d_last; @@ -474,8 +475,8 @@ __pattern_adjacent_difference(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy& using _Function = unseq_backend::walk_adjacent_difference<_ExecutionPolicy, decltype(__fn)>; - oneapi::dpl::__par_backend_hetero::__parallel_for(__exec, _Function{__fn}, __n, __buf1.all_view(), - __buf2.all_view()) + oneapi::dpl::__par_backend_hetero::__parallel_for(_BackendTag{}, __exec, _Function{__fn}, __n, + __buf1.all_view(), __buf2.all_view()) .wait(); return __d_last; diff --git a/include/oneapi/dpl/pstl/numeric_impl.h b/include/oneapi/dpl/pstl/numeric_impl.h index dc3eec4045d..1bf5ec91e9f 100644 --- a/include/oneapi/dpl/pstl/numeric_impl.h +++ b/include/oneapi/dpl/pstl/numeric_impl.h @@ -572,13 +572,17 @@ __pattern_adjacent_difference(_ExecutionPolicy&& __exec, _RandomAccessIterator1 _RandomAccessIterator2 __d_first, _BinaryOperation __op, _IsVector __is_vector, /*is_parallel=*/::std::true_type) { + constexpr auto __dispatch_tag = + oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _RandomAccessIterator1, _RandomAccessIterator2>(); + using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; + assert(__first != __last); typedef typename ::std::iterator_traits<_RandomAccessIterator1>::reference _ReferenceType1; typedef typename ::std::iterator_traits<_RandomAccessIterator2>::reference _ReferenceType2; *__d_first = *__first; __par_backend::__parallel_for( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last - 1, + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __last - 1, [&__op, __is_vector, __d_first, __first](_RandomAccessIterator1 __b, _RandomAccessIterator1 __e) { _RandomAccessIterator2 __d_b = __d_first + (__b - __first); __internal::__brick_walk3( @@ -596,13 +600,17 @@ __pattern_adjacent_difference(__parallel_tag<_IsVector> __tag, _ExecutionPolicy& _RandomAccessIterator1 __first, _RandomAccessIterator1 __last, _RandomAccessIterator2 __d_first, _BinaryOperation __op) { + constexpr auto __dispatch_tag = + oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _RandomAccessIterator1, _RandomAccessIterator2>(); + using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; + assert(__first != __last); typedef typename ::std::iterator_traits<_RandomAccessIterator1>::reference _ReferenceType1; typedef typename ::std::iterator_traits<_RandomAccessIterator2>::reference _ReferenceType2; *__d_first = *__first; __par_backend::__parallel_for( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last - 1, + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __last - 1, [&__op, __d_first, __first](_RandomAccessIterator1 __b, _RandomAccessIterator1 __e) { _RandomAccessIterator2 __d_b = __d_first + (__b - __first); __internal::__brick_walk3( diff --git a/include/oneapi/dpl/pstl/parallel_impl.h b/include/oneapi/dpl/pstl/parallel_impl.h index d30fc053cde..57f3fa737e6 100644 --- a/include/oneapi/dpl/pstl/parallel_impl.h +++ b/include/oneapi/dpl/pstl/parallel_impl.h @@ -42,9 +42,12 @@ __parallel_find(_ExecutionPolicy&& __exec, _Index __first, _Index __last, _Brick constexpr auto __comp = ::std::conditional_t<_IsFirst::value, __pstl_less, __pstl_greater>{}; + constexpr auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _Index>(); + using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; + ::std::atomic<_DifferenceType> __extremum(__initial_dist); // TODO: find out what is better here: parallel_for or parallel_reduce - __par_backend::__parallel_for(::std::forward<_ExecutionPolicy>(__exec), __first, __last, + __par_backend::__parallel_for(__backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, [__comp, __f, __first, &__extremum](_Index __i, _Index __j) { // See "Reducing Contention Through Priority Updates", PPoPP '13, for discussion of // why using a shared variable scales fairly well in this situation. @@ -112,8 +115,29 @@ template bool __parallel_or(_ExecutionPolicy&& __exec, _Index __first, _Index __last, _Brick __f) { + constexpr auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _Index>(); + using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; + + ::std::atomic __found(false); + __par_backend::__parallel_for(__backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, + [__f, &__found](_Index __i, _Index __j) { + if (!__found.load(::std::memory_order_relaxed) && __f(__i, __j)) + { + __found.store(true, ::std::memory_order_relaxed); + __par_backend::__cancel_execution(); + } + }); + return __found; +} + +template +bool +__parallel_or(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _Index __first, _Index __last, _Brick __f) +{ + using __backend_tag = typename __parallel_tag<_IsVector>::__backend_tag; + ::std::atomic __found(false); - __par_backend::__parallel_for(::std::forward<_ExecutionPolicy>(__exec), __first, __last, + __par_backend::__parallel_for(__backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, [__f, &__found](_Index __i, _Index __j) { if (!__found.load(::std::memory_order_relaxed) && __f(__i, __j)) { From dc34c04364ea352f34477f6764af5f6556365774 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Wed, 14 Feb 2024 15:04:52 +0100 Subject: [PATCH 182/566] __parallel_find + tag impls --- .../pstl/hetero/dpcpp/parallel_backend_sycl.h | 19 ++++++++++++++++ .../hetero/dpcpp/parallel_backend_sycl_fpga.h | 22 +++++++++++++++++++ include/oneapi/dpl/pstl/parallel_impl.h | 7 +++--- 3 files changed, 44 insertions(+), 4 deletions(-) diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h index 6038150b03b..0cc786fa94a 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h @@ -1374,6 +1374,25 @@ __parallel_find(_ExecutionPolicy&& __exec, _Iterator1 __first, _Iterator1 __last __f, _TagType{}, __buf.all_view(), __s_buf.all_view()); } +template +_Iterator1 +__parallel_find(oneapi::dpl::__internal::__device_backend_tag, _ExecutionPolicy&& __exec, _Iterator1 __first, + _Iterator1 __last, _Iterator2 __s_first, _Iterator2 __s_last, _Brick __f, _IsFirst) +{ + auto __keep = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read, _Iterator1>(); + auto __buf = __keep(__first, __last); + auto __s_keep = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read, _Iterator2>(); + auto __s_buf = __s_keep(__s_first, __s_last); + + using _TagType = ::std::conditional_t<_IsFirst::value, __parallel_find_forward_tag, + __parallel_find_backward_tag>; + return __first + oneapi::dpl::__par_backend_hetero::__parallel_find_or( + __par_backend_hetero::make_wrapped_policy<__find_policy_wrapper>( + ::std::forward<_ExecutionPolicy>(__exec)), + __f, _TagType{}, __buf.all_view(), __s_buf.all_view()); +} + + // Special overload for single sequence cases. // TODO: check if similar pattern may apply to other algorithms. If so, these overloads should be moved out of // backend code. diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h index 85f4f0cf528..69228bb3601 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h @@ -261,6 +261,18 @@ __parallel_find(_ExecutionPolicy&& __exec, _Iterator1 __first, _Iterator1 __last __s_last, __f, __is_first); } +template +_Iterator1 +__parallel_find(oneapi::dpl::__internal::__fpga_backend_tag, _ExecutionPolicy&& __exec, _Iterator1 __first, + _Iterator1 __last, _Iterator2 __s_first, + _Iterator2 __s_last, _Brick __f, _IsFirst __is_first) +{ + // workaround until we implement more performant version for patterns + return oneapi::dpl::__par_backend_hetero::__parallel_find(oneapi::dpl::__internal::__device_backend_tag{}, + __exec.__device_policy(), __first, __last, __s_first, + __s_last, __f, __is_first); +} + template oneapi::dpl::__internal::__enable_if_fpga_execution_policy<_ExecutionPolicy, _Iterator> __parallel_find(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, _Brick __f, _IsFirst __is_first) @@ -270,6 +282,16 @@ __parallel_find(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, __is_first); } +template +_Iterator +__parallel_find(oneapi::dpl::__internal::__fpga_backend_tag, _ExecutionPolicy&& __exec, _Iterator __first, + _Iterator __last, _Brick __f, _IsFirst __is_first) +{ + // workaround until we implement more performant version for patterns + return oneapi::dpl::__par_backend_hetero::__parallel_find( + oneapi::dpl::__internal::__device_backend_tag{}, __exec.__device_policy(), __first, __last, __f, __is_first); +} + //------------------------------------------------------------------------ // parallel_merge //----------------------------------------------------------------------- diff --git a/include/oneapi/dpl/pstl/parallel_impl.h b/include/oneapi/dpl/pstl/parallel_impl.h index 57f3fa737e6..2d3f327bf53 100644 --- a/include/oneapi/dpl/pstl/parallel_impl.h +++ b/include/oneapi/dpl/pstl/parallel_impl.h @@ -69,12 +69,11 @@ __parallel_find(_ExecutionPolicy&& __exec, _Index __first, _Index __last, _Brick return __extremum != __initial_dist ? __first + __extremum : __last; } -template +template _Index -__parallel_find(_Tag __tag, _ExecutionPolicy&& __exec, _Index __first, _Index __last, _Brick __f, _IsFirst) +__parallel_find(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _Index __first, _Index __last, _Brick __f, + _IsFirst) { - static_assert(__is_backend_tag_v<_Tag>); - using __backend_tag = typename __parallel_tag<_IsVector>::__backend_tag; typedef typename ::std::iterator_traits<_Index>::difference_type _DifferenceType; From 2f948ac73289b7dcd0a6e4c9a74f6abde2c30229 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Wed, 14 Feb 2024 15:06:34 +0100 Subject: [PATCH 183/566] __parallel_find + tag calls --- include/oneapi/dpl/pstl/algorithm_impl.h | 60 ++++++++++----- .../dpl/pstl/hetero/algorithm_impl_hetero.h | 73 ++++++++++++------- .../hetero/dpcpp/parallel_backend_sycl_fpga.h | 3 +- 3 files changed, 89 insertions(+), 47 deletions(-) diff --git a/include/oneapi/dpl/pstl/algorithm_impl.h b/include/oneapi/dpl/pstl/algorithm_impl.h index d32664bfcc6..607579fcad6 100644 --- a/include/oneapi/dpl/pstl/algorithm_impl.h +++ b/include/oneapi/dpl/pstl/algorithm_impl.h @@ -1350,13 +1350,13 @@ __pattern_find_end(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first, _R _RandomAccessIterator2 __s_first, _RandomAccessIterator2 __s_last, _BinaryPredicate __pred, _IsVector __is_vector, /*is_parallel=*/::std::true_type) { + // TODO is it correct that we check _RandomAccessIterator2 in __select_backend ? + constexpr auto __dispatch_tag = + oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _RandomAccessIterator1, _RandomAccessIterator2>(); + using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; + if (__last - __first == __s_last - __s_first) { - // TODO is it correct that we check _RandomAccessIterator2 in __select_backend ? - constexpr auto __dispatch_tag = - oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _RandomAccessIterator1, - _RandomAccessIterator2>(); - const bool __res = __internal::__pattern_equal(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __s_first, __pred); return __res ? __first : __last; @@ -1365,7 +1365,7 @@ __pattern_find_end(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first, _R { return __internal::__except_handler([&]() { return __internal::__parallel_find( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, [__last, __s_first, __s_last, __pred, __is_vector](_RandomAccessIterator1 __i, _RandomAccessIterator1 __j) { return __internal::__find_subrange(__i, __j, __last, __s_first, __s_last, __pred, false, @@ -1395,8 +1395,7 @@ __pattern_find_end(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _ { return __internal::__except_handler([&]() { return __internal::__parallel_find( - __backend_tag{}, - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, [__last, __s_first, __s_last, __pred](_RandomAccessIterator1 __i, _RandomAccessIterator1 __j) { return __internal::__find_subrange(__i, __j, __last, __s_first, __s_last, __pred, false, _IsVector{}); @@ -1454,9 +1453,13 @@ __pattern_find_first_of(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _F _ForwardIterator2 __s_first, _ForwardIterator2 __s_last, _BinaryPredicate __pred, _IsVector __is_vector, /*is_parallel=*/::std::true_type) { + constexpr auto __dispatch_tag = + oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>(); + using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; + return __internal::__except_handler([&]() { return __internal::__parallel_find( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, [__s_first, __s_last, &__pred, __is_vector](_ForwardIterator1 __i, _ForwardIterator1 __j) { return __internal::__brick_find_first_of(__i, __j, __s_first, __s_last, __pred, __is_vector); }, @@ -1530,13 +1533,13 @@ __pattern_search(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first, _Ran _IsVector __is_vector, /*is_parallel=*/::std::true_type) { + // TODO is it correct that we check _RandomAccessIterator2 in __select_backend ? + constexpr auto __dispatch_tag = + oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _RandomAccessIterator1, _RandomAccessIterator2>(); + using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; + if (__last - __first == __s_last - __s_first) { - // TODO is it correct that we check _RandomAccessIterator2 in __select_backend ? - constexpr auto __dispatch_tag = - oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _RandomAccessIterator1, - _RandomAccessIterator2>(); - const bool __res = __internal::__pattern_equal(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __s_first, __pred); return __res ? __first : __last; @@ -1545,7 +1548,7 @@ __pattern_search(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first, _Ran { return __internal::__except_handler([&]() { return __internal::__parallel_find( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, [__last, __s_first, __s_last, __pred, __is_vector](_RandomAccessIterator1 __i, _RandomAccessIterator1 __j) { return __internal::__find_subrange(__i, __j, __last, __s_first, __s_last, __pred, true, @@ -1631,6 +1634,10 @@ __pattern_search_n(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _Ra _Size __count, const _Tp& __value, _BinaryPredicate __pred, _IsVector __is_vector, /*is_parallel=*/::std::true_type) { + constexpr auto __dispatch_tag = + oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _RandomAccessIterator>(); + using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; + if (static_cast<_Size>(__last - __first) == __count) { const bool __result = !__internal::__pattern_any_of( @@ -1643,7 +1650,7 @@ __pattern_search_n(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _Ra { return __internal::__except_handler([&__exec, __first, __last, __count, &__value, __pred, __is_vector]() { return __internal::__parallel_find( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, [__last, __count, &__value, __pred, __is_vector](_RandomAccessIterator __i, _RandomAccessIterator __j) { return __internal::__find_subrange(__i, __j, __last, __count, __value, __pred, __is_vector); }, @@ -5831,9 +5838,13 @@ oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _Ra __pattern_is_heap_until(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp, _IsVector __is_vector, /* is_parallel = */ ::std::true_type) { + constexpr auto __dispatch_tag = + oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _RandomAccessIterator>(); + using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; + return __internal::__except_handler([&]() { return __parallel_find( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, [__first, __comp, __is_vector](_RandomAccessIterator __i, _RandomAccessIterator __j) { return __internal::__is_heap_until_local(__first, __i - __first, __j - __first, __comp, __is_vector); }, @@ -5848,7 +5859,7 @@ __pattern_is_heap_until(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _R { return __internal::__except_handler([&]() { return __parallel_find( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, + __tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, [__first, __comp](_RandomAccessIterator __i, _RandomAccessIterator __j) { return __internal::__is_heap_until_local(__first, __i - __first, __j - __first, __comp, _IsVector{}); }, @@ -6228,10 +6239,14 @@ __pattern_mismatch(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, _ _RandomAccessIterator2 __first2, _RandomAccessIterator2 __last2, _Predicate __pred, _IsVector __is_vector, /* is_parallel = */ ::std::true_type) { + constexpr auto __dispatch_tag = + oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _RandomAccessIterator1, _RandomAccessIterator2>(); + using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; + return __internal::__except_handler([&]() { auto __n = ::std::min(__last1 - __first1, __last2 - __first2); auto __result = __internal::__parallel_find( - ::std::forward<_ExecutionPolicy>(__exec), __first1, __first1 + __n, + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first1, __first1 + __n, [__first1, __first2, __pred, __is_vector](_RandomAccessIterator1 __i, _RandomAccessIterator1 __j) { return __internal::__brick_mismatch(__i, __j, __first2 + (__i - __first1), __first2 + (__j - __first1), __pred, __is_vector) @@ -6362,13 +6377,18 @@ __pattern_lexicographical_compare(_ExecutionPolicy&& __exec, _RandomAccessIterat } else { + constexpr auto __dispatch_tag = + oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _RandomAccessIterator1, + _RandomAccessIterator2>(); + using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; + typedef typename ::std::iterator_traits<_RandomAccessIterator1>::reference _RefType1; typedef typename ::std::iterator_traits<_RandomAccessIterator2>::reference _RefType2; --__last1; --__last2; auto __n = ::std::min(__last1 - __first1, __last2 - __first2); auto __result = __internal::__parallel_find( - ::std::forward<_ExecutionPolicy>(__exec), __first1, __first1 + __n, + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first1, __first1 + __n, [__first1, __first2, &__comp, __is_vector](_RandomAccessIterator1 __i, _RandomAccessIterator1 __j) { return __internal::__brick_mismatch( __i, __j, __first2 + (__i - __first1), __first2 + (__j - __first1), diff --git a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h index 7df8d6d7a0d..34707777978 100644 --- a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h +++ b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h @@ -991,11 +991,14 @@ __pattern_adjacent_find(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator if (__last - __first < 2) return __last; + constexpr auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _Iterator>(); + using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; + using _Predicate = oneapi::dpl::unseq_backend::single_match_pred<_ExecutionPolicy, adjacent_find_fn<_BinaryPredicate>>; auto __result = __par_backend_hetero::__parallel_find( - ::std::forward<_ExecutionPolicy>(__exec), + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __par_backend_hetero::zip( __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__first), __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__first + 1)), @@ -1024,7 +1027,7 @@ __pattern_adjacent_find(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __ex oneapi::dpl::unseq_backend::single_match_pred<_ExecutionPolicy, adjacent_find_fn<_BinaryPredicate>>; auto __result = __par_backend_hetero::__parallel_find( - ::std::forward<_ExecutionPolicy>(__exec), + _BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), __par_backend_hetero::zip( __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__first), __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__first + 1)), @@ -1254,12 +1257,13 @@ __pattern_find_end(_ExecutionPolicy&& __exec, _Iterator1 __first, _Iterator1 __l if (__first == __last || __s_last == __s_first || __last - __first < __s_last - __s_first) return __last; + // TODO is it correct that we check _Iterator2 in __select_backend ? + constexpr auto __dispatch_tag = + oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _Iterator1, _Iterator2>(); + using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; + if (__last - __first == __s_last - __s_first) { - // TODO is it correct that we check _Iterator2 in __select_backend ? - constexpr auto __dispatch_tag = - oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _Iterator1, _Iterator2>(); - const bool __res = __pattern_equal(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __s_first, __pred); return __res ? __first : __last; @@ -1269,7 +1273,7 @@ __pattern_find_end(_ExecutionPolicy&& __exec, _Iterator1 __first, _Iterator1 __l using _Predicate = unseq_backend::multiple_match_pred<_ExecutionPolicy, _Pred>; return __par_backend_hetero::__parallel_find( - ::std::forward<_ExecutionPolicy>(__exec), + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__first), __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__last), __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__s_first), @@ -1297,7 +1301,7 @@ __pattern_find_end(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _ using _Predicate = unseq_backend::multiple_match_pred<_ExecutionPolicy, _Pred>; return __par_backend_hetero::__parallel_find( - ::std::forward<_ExecutionPolicy>(__exec), + _BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__first), __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__last), __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__s_first), @@ -1318,12 +1322,16 @@ __pattern_find_first_of(_ExecutionPolicy&& __exec, _Iterator1 __first, _Iterator if (__first == __last || __s_last == __s_first) return __last; + constexpr auto __dispatch_tag = + oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _Iterator1, _Iterator2>(); + using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; + using _Predicate = unseq_backend::first_match_pred<_ExecutionPolicy, _Pred>; // TODO: To check whether it makes sense to iterate over the second sequence in case of // distance(__first, __last) < distance(__s_first, __s_last). return __par_backend_hetero::__parallel_find( - ::std::forward<_ExecutionPolicy>(__exec), + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__first), __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__last), __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__s_first), @@ -1344,7 +1352,7 @@ __pattern_find_first_of(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __ex // TODO: To check whether it makes sense to iterate over the second sequence in case of // distance(__first, __last) < distance(__s_first, __s_last). return __par_backend_hetero::__parallel_find( - ::std::forward<_ExecutionPolicy>(__exec), + _BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__first), __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__last), __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__s_first), @@ -1373,12 +1381,13 @@ __pattern_search(_ExecutionPolicy&& __exec, _Iterator1 __first, _Iterator1 __las if (__last - __first < __s_last - __s_first) return __last; + // TODO is it correct that we check _Iterator2 in __select_backend ? + constexpr auto __dispatch_tag = + oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _Iterator1, _Iterator2>(); + using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; + if (__last - __first == __s_last - __s_first) { - // TODO is it correct that we check _Iterator2 in __select_backend ? - constexpr auto __dispatch_tag = - oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _Iterator1, _Iterator2>(); - const bool __res = __pattern_equal( __dispatch_tag, __par_backend_hetero::make_wrapped_policy(::std::forward<_ExecutionPolicy>(__exec)), __first, @@ -1388,7 +1397,7 @@ __pattern_search(_ExecutionPolicy&& __exec, _Iterator1 __first, _Iterator1 __las using _Predicate = unseq_backend::multiple_match_pred<_ExecutionPolicy, _Pred>; return __par_backend_hetero::__parallel_find( - ::std::forward<_ExecutionPolicy>(__exec), + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__first), __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__last), __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__s_first), @@ -1398,7 +1407,7 @@ __pattern_search(_ExecutionPolicy&& __exec, _Iterator1 __first, _Iterator1 __las template _Iterator1 -__pattern_search(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Iterator1 __first, _Iterator1 __last, +__pattern_search(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _Iterator1 __first, _Iterator1 __last, _Iterator2 __s_first, _Iterator2 __s_last, _Pred __pred) { if (__s_last == __s_first) @@ -1417,7 +1426,7 @@ __pattern_search(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _It using _Predicate = unseq_backend::multiple_match_pred<_ExecutionPolicy, _Pred>; return __par_backend_hetero::__parallel_find( - ::std::forward<_ExecutionPolicy>(__exec), + _BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__first), __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__last), __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__s_first), @@ -1465,8 +1474,12 @@ __pattern_search_n(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __las } using _Predicate = unseq_backend::n_elem_match_pred<_ExecutionPolicy, _BinaryPredicate, _Tp, _Size>; + + constexpr auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _Iterator>(); + using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; + return __par_backend_hetero::__parallel_find( - ::std::forward<_ExecutionPolicy>(__exec), + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__first), __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__last), _Predicate{__pred, __value, __count}, ::std::true_type{}); @@ -1513,14 +1526,18 @@ __pattern_mismatch(_ExecutionPolicy&& __exec, _Iterator1 __first1, _Iterator1 __ if (__n <= 0) return ::std::make_pair(__first1, __first2); + constexpr auto __dispatch_tag = + oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _Iterator1, _Iterator2>(); + using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; + using _Predicate = oneapi::dpl::unseq_backend::single_match_pred<_ExecutionPolicy, equal_predicate<_Pred>>; auto __first_zip = __par_backend_hetero::zip( __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__first1), __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__first2)); - auto __result = - __par_backend_hetero::__parallel_find(::std::forward<_ExecutionPolicy>(__exec), __first_zip, __first_zip + __n, - _Predicate{equal_predicate<_Pred>{__pred}}, ::std::true_type{}); + auto __result = __par_backend_hetero::__parallel_find( + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first_zip, __first_zip + __n, + _Predicate{equal_predicate<_Pred>{__pred}}, ::std::true_type{}); __n = __result - __first_zip; return ::std::make_pair(__first1 + __n, __first2 + __n); } @@ -1539,9 +1556,9 @@ __pattern_mismatch(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _ auto __first_zip = __par_backend_hetero::zip( __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__first1), __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__first2)); - auto __result = - __par_backend_hetero::__parallel_find(::std::forward<_ExecutionPolicy>(__exec), __first_zip, __first_zip + __n, - _Predicate{equal_predicate<_Pred>{__pred}}, ::std::true_type{}); + auto __result = __par_backend_hetero::__parallel_find( + _BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), __first_zip, __first_zip + __n, + _Predicate{equal_predicate<_Pred>{__pred}}, ::std::true_type{}); __n = __result - __first_zip; return ::std::make_pair(__first1 + __n, __first2 + __n); } @@ -1976,11 +1993,15 @@ __pattern_is_heap_until(_ExecutionPolicy&& __exec, _RandomAccessIterator __first if (__last - __first < 2) return __last; + constexpr auto __dispatch_tag = + oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _RandomAccessIterator>(); + using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; + using _Predicate = oneapi::dpl::unseq_backend::single_match_pred_by_idx<_ExecutionPolicy, __is_heap_check<_Compare>>; return __par_backend_hetero::__parallel_find( - ::std::forward<_ExecutionPolicy>(__exec), + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__first), __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__last), _Predicate{__comp}, ::std::true_type{}); @@ -1998,7 +2019,7 @@ __pattern_is_heap_until(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __ex oneapi::dpl::unseq_backend::single_match_pred_by_idx<_ExecutionPolicy, __is_heap_check<_Compare>>; return __par_backend_hetero::__parallel_find( - ::std::forward<_ExecutionPolicy>(__exec), + _BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__first), __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__last), _Predicate{__comp}, ::std::true_type{}); diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h index 69228bb3601..64898115e0c 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h @@ -257,7 +257,8 @@ __parallel_find(_ExecutionPolicy&& __exec, _Iterator1 __first, _Iterator1 __last { // workaround until we implement more performant version for patterns - return oneapi::dpl::__par_backend_hetero::__parallel_find(__exec.__device_policy(), __first, __last, __s_first, + return oneapi::dpl::__par_backend_hetero::__parallel_find(oneapi::dpl::__internal::__device_backend_tag{}, + __exec.__device_policy(), __first, __last, __s_first, __s_last, __f, __is_first); } From 5533a8683c167716e475e9a9438f52d46630849b Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Thu, 15 Feb 2024 12:47:48 +0100 Subject: [PATCH 184/566] __pattern_mismatch + tag impls --- include/oneapi/dpl/pstl/algorithm_impl.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/include/oneapi/dpl/pstl/algorithm_impl.h b/include/oneapi/dpl/pstl/algorithm_impl.h index 607579fcad6..f2a1ddb0458 100644 --- a/include/oneapi/dpl/pstl/algorithm_impl.h +++ b/include/oneapi/dpl/pstl/algorithm_impl.h @@ -6260,7 +6260,7 @@ __pattern_mismatch(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, _ template ::std::pair<_RandomAccessIterator1, _RandomAccessIterator2> -__pattern_mismatch(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, +__pattern_mismatch(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, _RandomAccessIterator2 __last2, _Predicate __pred) { @@ -6380,7 +6380,6 @@ __pattern_lexicographical_compare(_ExecutionPolicy&& __exec, _RandomAccessIterat constexpr auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _RandomAccessIterator1, _RandomAccessIterator2>(); - using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; typedef typename ::std::iterator_traits<_RandomAccessIterator1>::reference _RefType1; typedef typename ::std::iterator_traits<_RandomAccessIterator2>::reference _RefType2; @@ -6388,7 +6387,7 @@ __pattern_lexicographical_compare(_ExecutionPolicy&& __exec, _RandomAccessIterat --__last2; auto __n = ::std::min(__last1 - __first1, __last2 - __first2); auto __result = __internal::__parallel_find( - __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first1, __first1 + __n, + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __first1 + __n, [__first1, __first2, &__comp, __is_vector](_RandomAccessIterator1 __i, _RandomAccessIterator1 __j) { return __internal::__brick_mismatch( __i, __j, __first2 + (__i - __first1), __first2 + (__j - __first1), From 48e0a2eb8903f80f3fd5e48249fc30c05d88f491 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Thu, 15 Feb 2024 11:55:48 +0100 Subject: [PATCH 185/566] __internal::__parallel_find + tag calls --- include/oneapi/dpl/pstl/algorithm_impl.h | 43 ++++++++---------------- 1 file changed, 14 insertions(+), 29 deletions(-) diff --git a/include/oneapi/dpl/pstl/algorithm_impl.h b/include/oneapi/dpl/pstl/algorithm_impl.h index f2a1ddb0458..c759267de54 100644 --- a/include/oneapi/dpl/pstl/algorithm_impl.h +++ b/include/oneapi/dpl/pstl/algorithm_impl.h @@ -1189,14 +1189,12 @@ __pattern_find_if(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _ForwardIt template _ForwardIterator -__pattern_find_if(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _ForwardIterator __first, +__pattern_find_if(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _Predicate __pred) { - using __backend_tag = typename __parallel_tag<_IsVector>::__backend_tag; - return __except_handler([&]() { return __parallel_find( - __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, + __tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, [__pred](_ForwardIterator __i, _ForwardIterator __j) { return __brick_find_if(__i, __j, __pred, _IsVector{}); }, @@ -1353,7 +1351,6 @@ __pattern_find_end(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first, _R // TODO is it correct that we check _RandomAccessIterator2 in __select_backend ? constexpr auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _RandomAccessIterator1, _RandomAccessIterator2>(); - using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; if (__last - __first == __s_last - __s_first) { @@ -1365,7 +1362,7 @@ __pattern_find_end(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first, _R { return __internal::__except_handler([&]() { return __internal::__parallel_find( - __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, [__last, __s_first, __s_last, __pred, __is_vector](_RandomAccessIterator1 __i, _RandomAccessIterator1 __j) { return __internal::__find_subrange(__i, __j, __last, __s_first, __s_last, __pred, false, @@ -1383,8 +1380,6 @@ __pattern_find_end(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _ _RandomAccessIterator1 __last, _RandomAccessIterator2 __s_first, _RandomAccessIterator2 __s_last, _BinaryPredicate __pred) { - using __backend_tag = typename __parallel_tag<_IsVector>::__backend_tag; - if (__last - __first == __s_last - __s_first) { const bool __res = __internal::__pattern_equal(__tag, ::std::forward<_ExecutionPolicy>(__exec), @@ -1395,7 +1390,7 @@ __pattern_find_end(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _ { return __internal::__except_handler([&]() { return __internal::__parallel_find( - __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, + __tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, [__last, __s_first, __s_last, __pred](_RandomAccessIterator1 __i, _RandomAccessIterator1 __j) { return __internal::__find_subrange(__i, __j, __last, __s_first, __s_last, __pred, false, _IsVector{}); @@ -1455,11 +1450,10 @@ __pattern_find_first_of(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _F { constexpr auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>(); - using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; return __internal::__except_handler([&]() { return __internal::__parallel_find( - __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, [__s_first, __s_last, &__pred, __is_vector](_ForwardIterator1 __i, _ForwardIterator1 __j) { return __internal::__brick_find_first_of(__i, __j, __s_first, __s_last, __pred, __is_vector); }, @@ -1470,7 +1464,7 @@ __pattern_find_first_of(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _F template _ForwardIterator1 -__pattern_find_first_of(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _ForwardIterator1 __first, +__pattern_find_first_of(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 __last, _ForwardIterator2 __s_first, _ForwardIterator2 __s_last, _BinaryPredicate __pred) { @@ -1478,7 +1472,7 @@ __pattern_find_first_of(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _F return __internal::__except_handler([&]() { return __internal::__parallel_find( - __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, + __tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, [__s_first, __s_last, &__pred](_ForwardIterator1 __i, _ForwardIterator1 __j) { return __internal::__brick_find_first_of(__i, __j, __s_first, __s_last, __pred, _IsVector{}); }, @@ -1536,7 +1530,6 @@ __pattern_search(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first, _Ran // TODO is it correct that we check _RandomAccessIterator2 in __select_backend ? constexpr auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _RandomAccessIterator1, _RandomAccessIterator2>(); - using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; if (__last - __first == __s_last - __s_first) { @@ -1548,7 +1541,7 @@ __pattern_search(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first, _Ran { return __internal::__except_handler([&]() { return __internal::__parallel_find( - __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, [__last, __s_first, __s_last, __pred, __is_vector](_RandomAccessIterator1 __i, _RandomAccessIterator1 __j) { return __internal::__find_subrange(__i, __j, __last, __s_first, __s_last, __pred, true, @@ -1566,8 +1559,6 @@ __pattern_search(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _Ra _RandomAccessIterator1 __last, _RandomAccessIterator2 __s_first, _RandomAccessIterator2 __s_last, _BinaryPredicate __pred) { - using __backend_tag = typename __parallel_tag<_IsVector>::__backend_tag; - if (__last - __first == __s_last - __s_first) { const bool __res = __internal::__pattern_equal(__tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, @@ -1578,7 +1569,7 @@ __pattern_search(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _Ra { return __internal::__except_handler([&]() { return __internal::__parallel_find( - __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, + __tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, [__last, __s_first, __s_last, __pred](_RandomAccessIterator1 __i, _RandomAccessIterator1 __j) { return __internal::__find_subrange(__i, __j, __last, __s_first, __s_last, __pred, true, _IsVector{}); @@ -1636,7 +1627,6 @@ __pattern_search_n(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _Ra { constexpr auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _RandomAccessIterator>(); - using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; if (static_cast<_Size>(__last - __first) == __count) { @@ -1650,7 +1640,7 @@ __pattern_search_n(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _Ra { return __internal::__except_handler([&__exec, __first, __last, __count, &__value, __pred, __is_vector]() { return __internal::__parallel_find( - __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, [__last, __count, &__value, __pred, __is_vector](_RandomAccessIterator __i, _RandomAccessIterator __j) { return __internal::__find_subrange(__i, __j, __last, __count, __value, __pred, __is_vector); }, @@ -1665,8 +1655,6 @@ _RandomAccessIterator __pattern_search_n(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last, _Size __count, const _Tp& __value, _BinaryPredicate __pred) { - using __backend_tag = typename __parallel_tag<_IsVector>::__backend_tag; - if (static_cast<_Size>(__last - __first) == __count) { const bool __result = @@ -1676,9 +1664,9 @@ __pattern_search_n(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _ } else { - return __internal::__except_handler([&__exec, __first, __last, __count, &__value, __pred]() { + return __internal::__except_handler([&__exec, __first, __last, __count, &__value, __pred, __tag]() { return __internal::__parallel_find( - __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, + __tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, [__last, __count, &__value, __pred](_RandomAccessIterator __i, _RandomAccessIterator __j) { return __internal::__find_subrange(__i, __j, __last, __count, __value, __pred, _IsVector{}); }, @@ -6241,12 +6229,11 @@ __pattern_mismatch(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, _ { constexpr auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _RandomAccessIterator1, _RandomAccessIterator2>(); - using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; return __internal::__except_handler([&]() { auto __n = ::std::min(__last1 - __first1, __last2 - __first2); auto __result = __internal::__parallel_find( - __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first1, __first1 + __n, + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __first1 + __n, [__first1, __first2, __pred, __is_vector](_RandomAccessIterator1 __i, _RandomAccessIterator1 __j) { return __internal::__brick_mismatch(__i, __j, __first2 + (__i - __first1), __first2 + (__j - __first1), __pred, __is_vector) @@ -6264,12 +6251,10 @@ __pattern_mismatch(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _ _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, _RandomAccessIterator2 __last2, _Predicate __pred) { - using __backend_tag = typename __parallel_tag<_IsVector>::__backend_tag; - return __internal::__except_handler([&]() { auto __n = ::std::min(__last1 - __first1, __last2 - __first2); auto __result = __internal::__parallel_find( - __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first1, __first1 + __n, + __tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __first1 + __n, [__first1, __first2, __pred](_RandomAccessIterator1 __i, _RandomAccessIterator1 __j) { return __internal::__brick_mismatch(__i, __j, __first2 + (__i - __first1), __first2 + (__j - __first1), __pred, _IsVector{}) From fff6053a93f5eb8c81557def2f35f1d9913dbcc7 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Wed, 14 Feb 2024 15:34:02 +0100 Subject: [PATCH 186/566] __parallel_reduce + tag impls --- include/oneapi/dpl/pstl/omp/parallel_reduce.h | 27 +++++++++++++++++++ .../oneapi/dpl/pstl/parallel_backend_serial.h | 15 +++++++++++ .../oneapi/dpl/pstl/parallel_backend_tbb.h | 15 +++++++++++ 3 files changed, 57 insertions(+) diff --git a/include/oneapi/dpl/pstl/omp/parallel_reduce.h b/include/oneapi/dpl/pstl/omp/parallel_reduce.h index beefe09b738..a59a56d374f 100644 --- a/include/oneapi/dpl/pstl/omp/parallel_reduce.h +++ b/include/oneapi/dpl/pstl/omp/parallel_reduce.h @@ -77,6 +77,33 @@ __parallel_reduce(_ExecutionPolicy&&, _RandomAccessIterator __first, _RandomAcce return __res; } +template +_Value +__parallel_reduce(oneapi::dpl::__internal::__omp_backend_tag, _ExecutionPolicy&&, _RandomAccessIterator __first, + _RandomAccessIterator __last, _Value __identity, _RealBody __real_body, _Reduction __reduction) +{ + // We don't create a nested parallel region in an existing parallel region: + // just create tasks. + if (omp_in_parallel()) + { + return oneapi::dpl::__omp_backend::__parallel_reduce_body(__first, __last, __identity, __real_body, + __reduction); + } + + // In any case (nested or non-nested) one parallel region is created and only + // one thread creates a set of tasks. + _Value __res = __identity; + + _PSTL_PRAGMA(omp parallel) + _PSTL_PRAGMA(omp single nowait) + { + __res = + oneapi::dpl::__omp_backend::__parallel_reduce_body(__first, __last, __identity, __real_body, __reduction); + } + + return __res; +} + } // namespace __omp_backend } // namespace dpl } // namespace oneapi diff --git a/include/oneapi/dpl/pstl/parallel_backend_serial.h b/include/oneapi/dpl/pstl/parallel_backend_serial.h index 654859e7c6e..c6de6d97784 100644 --- a/include/oneapi/dpl/pstl/parallel_backend_serial.h +++ b/include/oneapi/dpl/pstl/parallel_backend_serial.h @@ -94,6 +94,21 @@ __parallel_reduce(_ExecutionPolicy&&, _Index __first, _Index __last, const _Valu } } +template +_Value +__parallel_reduce(oneapi::dpl::__internal::__serial_backend_tag, _ExecutionPolicy&&, _Index __first, _Index __last, + const _Value& __identity, const _RealBody& __real_body, const _Reduction&) +{ + if (__first == __last) + { + return __identity; + } + else + { + return __real_body(__first, __last, __identity); + } +} + template _Tp __parallel_transform_reduce(_ExecutionPolicy&&, _Index __first, _Index __last, _UnaryOp, _Tp __init, _BinaryOp, diff --git a/include/oneapi/dpl/pstl/parallel_backend_tbb.h b/include/oneapi/dpl/pstl/parallel_backend_tbb.h index fb2339808e3..77276c469ac 100644 --- a/include/oneapi/dpl/pstl/parallel_backend_tbb.h +++ b/include/oneapi/dpl/pstl/parallel_backend_tbb.h @@ -154,6 +154,21 @@ __parallel_reduce(_ExecutionPolicy&&, _Index __first, _Index __last, const _Valu }); } +template +_Value +__parallel_reduce(oneapi::dpl::__internal::__tbb_backend_tag, _ExecutionPolicy&&, _Index __first, _Index __last, + const _Value& __identity, const _RealBody& __real_body, const _Reduction& __reduction) +{ + return tbb::this_task_arena::isolate([__first, __last, &__identity, &__real_body, &__reduction]() -> _Value { + return tbb::parallel_reduce( + tbb::blocked_range<_Index>(__first, __last), __identity, + [__real_body](const tbb::blocked_range<_Index>& __r, const _Value& __value) -> _Value { + return __real_body(__r.begin(), __r.end(), __value); + }, + __reduction); + }); +} + //------------------------------------------------------------------------ // parallel_transform_reduce // From 373b48be9ec156e03ee85aecd08ebae2c6a06bfc Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Wed, 14 Feb 2024 15:38:08 +0100 Subject: [PATCH 187/566] __parallel_reduce + tag calls --- include/oneapi/dpl/pstl/algorithm_impl.h | 74 +++++++++++++++++-- .../experimental/internal/for_loop_impl.h | 6 ++ 2 files changed, 73 insertions(+), 7 deletions(-) diff --git a/include/oneapi/dpl/pstl/algorithm_impl.h b/include/oneapi/dpl/pstl/algorithm_impl.h index c759267de54..aa4c6b5b4d1 100644 --- a/include/oneapi/dpl/pstl/algorithm_impl.h +++ b/include/oneapi/dpl/pstl/algorithm_impl.h @@ -2082,9 +2082,13 @@ __pattern_count(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _Rando if (__first == __last) return _SizeType(0); + constexpr auto __dispatch_tag = + oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _RandomAccessIterator>(); + using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; + return __internal::__except_handler([&]() { return __par_backend::__parallel_reduce( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, _SizeType(0), + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, _SizeType(0), [__pred, __is_vector](_RandomAccessIterator __begin, _RandomAccessIterator __end, _SizeType __value) -> _SizeType { return __value + __internal::__brick_count(__begin, __end, __pred, __is_vector); }, ::std::plus<_SizeType>()); @@ -2096,6 +2100,8 @@ typename ::std::iterator_traits<_RandomAccessIterator>::difference_type __pattern_count(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last, _Predicate __pred) { + using __backend_tag = typename __parallel_tag<_IsVector>::__backend_tag; + typedef typename ::std::iterator_traits<_RandomAccessIterator>::difference_type _SizeType; //trivial pre-checks @@ -2104,7 +2110,7 @@ __pattern_count(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAcc return __internal::__except_handler([&]() { return __par_backend::__parallel_reduce( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, _SizeType(0), + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, _SizeType(0), [__pred](_RandomAccessIterator __begin, _RandomAccessIterator __end, _SizeType __value) -> _SizeType { return __value + __internal::__brick_count(__begin, __end, __pred, _IsVector{}); }, @@ -2157,6 +2163,9 @@ _ForwardIterator __remove_elements(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _CalcMask __calc_mask, _IsVector __is_vector) { + constexpr auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _ForwardIterator>(); + using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; + typedef typename ::std::iterator_traits<_ForwardIterator>::difference_type _DifferenceType; typedef typename ::std::iterator_traits<_ForwardIterator>::value_type _Tp; _DifferenceType __n = __last - __first; @@ -2165,7 +2174,7 @@ __remove_elements(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardI return __internal::__except_handler([&]() { bool* __mask = __mask_buf.get(); _DifferenceType __min = __par_backend::__parallel_reduce( - ::std::forward<_ExecutionPolicy>(__exec), _DifferenceType(0), __n, __n, + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), _DifferenceType(0), __n, __n, [__first, __mask, &__calc_mask, __is_vector](_DifferenceType __i, _DifferenceType __j, _DifferenceType __local_min) -> _DifferenceType { // Create mask @@ -3002,6 +3011,10 @@ oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, boo __pattern_is_partitioned(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last, _UnaryPredicate __pred, _IsVector __is_vector, /*is_parallel=*/::std::true_type) { + constexpr auto __dispatch_tag = + oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _RandomAccessIterator>(); + using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; + //trivial pre-checks if (__first == __last) return true; @@ -3039,7 +3052,7 @@ __pattern_is_partitioned(_ExecutionPolicy&& __exec, _RandomAccessIterator __firs const _ReduceType __identity{__not_init, __last}; _ReduceType __result = __par_backend::__parallel_reduce( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __identity, + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __identity, [&__pred, __combine, __is_vector](_RandomAccessIterator __i, _RandomAccessIterator __j, _ReduceType __value) -> _ReduceType { if (__value.__val == __broken) @@ -3103,6 +3116,10 @@ __pattern_is_partitioned(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _ if (__first == __last) return true; + constexpr auto __dispatch_tag = + oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _RandomAccessIterator>(); + using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; + return __internal::__except_handler([&]() { // State of current range: // broken - current range is not partitioned by pred @@ -3136,7 +3153,7 @@ __pattern_is_partitioned(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _ const _ReduceType __identity{__not_init, __last}; _ReduceType __result = __par_backend::__parallel_reduce( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __identity, + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __identity, [&__pred, __combine](_RandomAccessIterator __i, _RandomAccessIterator __j, _ReduceType __value) -> _ReduceType { if (__value.__val == __broken) @@ -3235,6 +3252,9 @@ oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _Ra __pattern_partition(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last, _UnaryPredicate __pred, _IsVector __is_vector, /*is_parallel=*/::std::true_type) { + constexpr auto __dispatch_tag = + oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _RandomAccessIterator>(); + using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; // partitioned range: elements before pivot satisfy pred (true part), // elements after pivot don't satisfy pred (false part) @@ -3286,6 +3306,7 @@ __pattern_partition(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _R }; _PartitionRange __result = __par_backend::__parallel_reduce( + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __init, [__pred, __is_vector, __reductor](_RandomAccessIterator __i, _RandomAccessIterator __j, _PartitionRange __value) -> _PartitionRange { @@ -3360,8 +3381,12 @@ __pattern_partition(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _Rando } }; + constexpr auto __dispatch_tag = + oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _RandomAccessIterator>(); + using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; + _PartitionRange __result = __par_backend::__parallel_reduce( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __init, + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __init, [__pred, __reductor](_RandomAccessIterator __i, _RandomAccessIterator __j, _PartitionRange __value) -> _PartitionRange { //1. serial partition @@ -3452,7 +3477,12 @@ __pattern_stable_partition(_ExecutionPolicy&& __exec, _RandomAccessIterator __fi } }; + constexpr auto __dispatch_tag = + oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _RandomAccessIterator>(); + using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; + _PartitionRange __result = __par_backend::__parallel_reduce( + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __init, [&__pred, __is_vector, __reductor](_RandomAccessIterator __i, _RandomAccessIterator __j, _PartitionRange __value) -> _PartitionRange { @@ -3503,8 +3533,12 @@ __pattern_stable_partition(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, } }; + constexpr auto __dispatch_tag = + oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _RandomAccessIterator>(); + using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; + _PartitionRange __result = __par_backend::__parallel_reduce( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __init, + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __init, [&__pred, __reductor](_RandomAccessIterator __i, _RandomAccessIterator __j, _PartitionRange __value) -> _PartitionRange { //1. serial stable_partition @@ -4122,8 +4156,13 @@ __pattern_adjacent_find(_ExecutionPolicy&& __exec, _RandomAccessIterator __first if (__last - __first < 2) return __last; + constexpr auto __dispatch_tag = + oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _RandomAccessIterator>(); + using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; + return __internal::__except_handler([&]() { return __par_backend::__parallel_reduce( + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __last, [__last, __pred, __is_vector, __or_semantic](_RandomAccessIterator __begin, _RandomAccessIterator __end, _RandomAccessIterator __value) -> _RandomAccessIterator { @@ -4166,8 +4205,13 @@ __pattern_adjacent_find(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _R if (__last - __first < 2) return __last; + constexpr auto __dispatch_tag = + oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _RandomAccessIterator>(); + using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; + return __internal::__except_handler([&]() { return __par_backend::__parallel_reduce( + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __last, [__last, __pred, __or_semantic](_RandomAccessIterator __begin, _RandomAccessIterator __end, _RandomAccessIterator __value) -> _RandomAccessIterator { @@ -5992,8 +6036,13 @@ __pattern_min_element(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, if (__last - __first < 2) return __first; + constexpr auto __dispatch_tag = + oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _RandomAccessIterator>(); + using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; + return __internal::__except_handler([&]() { return __par_backend::__parallel_reduce( + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, /*identity*/ __last, [=](_RandomAccessIterator __begin, _RandomAccessIterator __end, _RandomAccessIterator __init) -> _RandomAccessIterator { @@ -6023,8 +6072,11 @@ __pattern_min_element(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _Ran if (__last - __first < 2) return __first; + using __backend_tag = typename __parallel_tag<_IsVector>::__backend_tag; + return __internal::__except_handler([&]() { return __par_backend::__parallel_reduce( + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, /*identity*/ __last, [=](_RandomAccessIterator __begin, _RandomAccessIterator __end, _RandomAccessIterator __init) -> _RandomAccessIterator { @@ -6098,10 +6150,15 @@ __pattern_minmax_element(_ExecutionPolicy&& __exec, _RandomAccessIterator __firs if (__last - __first < 2) return ::std::make_pair(__first, __first); + constexpr auto __dispatch_tag = + oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _RandomAccessIterator>(); + using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; + return __internal::__except_handler([&]() { typedef ::std::pair<_RandomAccessIterator, _RandomAccessIterator> _Result; return __par_backend::__parallel_reduce( + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, /*identity*/ ::std::make_pair(__last, __last), [=, &__comp](_RandomAccessIterator __begin, _RandomAccessIterator __end, _Result __init) -> _Result { @@ -6139,10 +6196,13 @@ __pattern_minmax_element(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _ if (__last - __first < 2) return ::std::make_pair(__first, __first); + using __backend_tag = typename __parallel_tag<_IsVector>::__backend_tag; + return __internal::__except_handler([&]() { typedef ::std::pair<_RandomAccessIterator, _RandomAccessIterator> _Result; return __par_backend::__parallel_reduce( + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, /*identity*/ ::std::make_pair(__last, __last), [=, &__comp](_RandomAccessIterator __begin, _RandomAccessIterator __end, _Result __init) -> _Result { diff --git a/include/oneapi/dpl/pstl/experimental/internal/for_loop_impl.h b/include/oneapi/dpl/pstl/experimental/internal/for_loop_impl.h index 170cc0f0d62..35e7433a09f 100644 --- a/include/oneapi/dpl/pstl/experimental/internal/for_loop_impl.h +++ b/include/oneapi/dpl/pstl/experimental/internal/for_loop_impl.h @@ -391,6 +391,8 @@ void __pattern_for_loop_n(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _Ip __first, _Size __n, _Function __f, __single_stride_type, _Rest&&... __rest) { + using __backend_tag = typename decltype(__tag)::__backend_tag; + using __pack_type = __reduction_pack<_Rest...>; // Create an identity pack object, operations are done on copies of it. @@ -398,6 +400,7 @@ __pattern_for_loop_n(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, oneapi::dpl::__internal::__except_handler([&]() { return __par_backend::__parallel_reduce( + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), _Size(0), __n, __identity, [__first, __f](_Size __i, _Size __j, __pack_type __value) { const auto __subseq_start = __first + __i; @@ -426,6 +429,8 @@ void __pattern_for_loop_n(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _Ip __first, _Size __n, _Function __f, _Sp __stride, _Rest&&... __rest) { + using __backend_tag = typename decltype(__tag)::__backend_tag; + using __pack_type = __reduction_pack<_Rest...>; // Create an identity pack object, operations are done on copies of it. @@ -433,6 +438,7 @@ __pattern_for_loop_n(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, oneapi::dpl::__internal::__except_handler([&]() { return __par_backend::__parallel_reduce( + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), _Size(0), __n, __identity, [__first, __f, __stride](_Size __i, _Size __j, __pack_type __value) { const auto __subseq_start = __first + __i * __stride; From adb4c5ecc63c50ab5f2ceea9091b4d4dec7682fa Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Wed, 14 Feb 2024 15:45:49 +0100 Subject: [PATCH 188/566] __parallel_strict_scan + tag impls --- include/oneapi/dpl/pstl/omp/parallel_scan.h | 36 +++++++++++++++++ .../oneapi/dpl/pstl/parallel_backend_serial.h | 13 ++++++ .../oneapi/dpl/pstl/parallel_backend_tbb.h | 40 +++++++++++++++++++ 3 files changed, 89 insertions(+) diff --git a/include/oneapi/dpl/pstl/omp/parallel_scan.h b/include/oneapi/dpl/pstl/omp/parallel_scan.h index 29c6c77be54..7001d4267b3 100644 --- a/include/oneapi/dpl/pstl/omp/parallel_scan.h +++ b/include/oneapi/dpl/pstl/omp/parallel_scan.h @@ -142,6 +142,42 @@ __parallel_strict_scan(_ExecutionPolicy&&, _Index __n, _Tp __initial, _Rp __redu } } +template +void +__parallel_strict_scan(oneapi::dpl::__internal::__omp_backend_tag, _ExecutionPolicy&&, _Index __n, _Tp __initial, + _Rp __reduce, _Cp __combine, _Sp __scan, _Ap __apex) +{ + if (__n <= __default_chunk_size) + { + _Tp __sum = __initial; + if (__n) + { + __sum = __combine(__sum, __reduce(_Index(0), __n)); + } + __apex(__sum); + if (__n) + { + __scan(_Index(0), __n, __initial); + } + return; + } + + if (omp_in_parallel()) + { + oneapi::dpl::__omp_backend::__parallel_strict_scan_body<_ExecutionPolicy>(__n, __initial, __reduce, __combine, + __scan, __apex); + } + else + { + _PSTL_PRAGMA(omp parallel) + _PSTL_PRAGMA(omp single nowait) + { + oneapi::dpl::__omp_backend::__parallel_strict_scan_body<_ExecutionPolicy>(__n, __initial, __reduce, + __combine, __scan, __apex); + } + } +} + } // namespace __omp_backend } // namespace dpl } // namespace oneapi diff --git a/include/oneapi/dpl/pstl/parallel_backend_serial.h b/include/oneapi/dpl/pstl/parallel_backend_serial.h index c6de6d97784..6634e692033 100644 --- a/include/oneapi/dpl/pstl/parallel_backend_serial.h +++ b/include/oneapi/dpl/pstl/parallel_backend_serial.h @@ -138,6 +138,19 @@ __parallel_strict_scan(_ExecutionPolicy&&, _Index __n, _Tp __initial, _Rp __redu __scan(_Index(0), __n, __initial); } +template +void +__parallel_strict_scan(oneapi::dpl::__internal::__serial_backend_tag, _ExecutionPolicy&&, _Index __n, _Tp __initial, + _Rp __reduce, _Cp __combine, _Sp __scan, _Ap __apex) +{ + _Tp __sum = __initial; + if (__n) + __sum = __combine(__sum, __reduce(_Index(0), __n)); + __apex(__sum); + if (__n) + __scan(_Index(0), __n, __initial); +} + template _Tp __parallel_transform_scan(_ExecutionPolicy&&, _Index __n, _UnaryOp, _Tp __init, _BinaryOp, _Reduce, _Scan __scan) diff --git a/include/oneapi/dpl/pstl/parallel_backend_tbb.h b/include/oneapi/dpl/pstl/parallel_backend_tbb.h index 77276c469ac..98ac5da86cd 100644 --- a/include/oneapi/dpl/pstl/parallel_backend_tbb.h +++ b/include/oneapi/dpl/pstl/parallel_backend_tbb.h @@ -456,6 +456,46 @@ __parallel_strict_scan(_ExecutionPolicy&&, _Index __n, _Tp __initial, _Rp __redu }); } +template +void +__parallel_strict_scan(oneapi::dpl::__internal::__tbb_backend_tag, _ExecutionPolicy&&, _Index __n, _Tp __initial, + _Rp __reduce, _Cp __combine, _Sp __scan, _Ap __apex) +{ + tbb::this_task_arena::isolate([=, &__combine]() { + if (__n > 1) + { + _Index __p = tbb::this_task_arena::max_concurrency(); + const _Index __slack = 4; + _Index __tilesize = (__n - 1) / (__slack * __p) + 1; + _Index __m = (__n - 1) / __tilesize; + __tbb_backend::__buffer<_ExecutionPolicy, _Tp> __buf(__m + 1); + _Tp* __r = __buf.get(); + __tbb_backend::__upsweep(_Index(0), _Index(__m + 1), __tilesize, __r, __n - __m * __tilesize, __reduce, + __combine); + + // When __apex is a no-op and __combine has no side effects, a good optimizer + // should be able to eliminate all code between here and __apex. + // Alternatively, provide a default value for __apex that can be + // recognized by metaprogramming that conditionlly executes the following. + size_t __k = __m + 1; + _Tp __t = __r[__k - 1]; + while ((__k &= __k - 1)) + __t = __combine(__r[__k - 1], __t); + __apex(__combine(__initial, __t)); + __tbb_backend::__downsweep(_Index(0), _Index(__m + 1), __tilesize, __r, __n - __m * __tilesize, __initial, + __combine, __scan); + return; + } + // Fewer than 2 elements in sequence, or out of memory. Handle has single block. + _Tp __sum = __initial; + if (__n) + __sum = __combine(__sum, __reduce(_Index(0), __n)); + __apex(__sum); + if (__n) + __scan(_Index(0), __n, __initial); + }); +} + template _Tp __parallel_transform_scan(_ExecutionPolicy&&, _Index __n, _Up __u, _Tp __init, _Cp __combine, _Rp __brick_reduce, From a36888f1e160987930732490e1ad767bbfb4a664 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Wed, 14 Feb 2024 16:00:02 +0100 Subject: [PATCH 189/566] __parallel_strict_scan + tag calls --- include/oneapi/dpl/pstl/algorithm_impl.h | 31 ++++++++++++++++++++++++ include/oneapi/dpl/pstl/numeric_impl.h | 12 ++++++++- 2 files changed, 42 insertions(+), 1 deletion(-) diff --git a/include/oneapi/dpl/pstl/algorithm_impl.h b/include/oneapi/dpl/pstl/algorithm_impl.h index aa4c6b5b4d1..7c722594610 100644 --- a/include/oneapi/dpl/pstl/algorithm_impl.h +++ b/include/oneapi/dpl/pstl/algorithm_impl.h @@ -1968,6 +1968,10 @@ __pattern_copy_if(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first, _Ra _RandomAccessIterator2 __result, _UnaryPredicate __pred, _IsVector __is_vector, /*parallel=*/::std::true_type) { + constexpr auto __dispatch_tag = + oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _RandomAccessIterator1, _RandomAccessIterator2>(); + using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; + typedef typename ::std::iterator_traits<_RandomAccessIterator1>::difference_type _DifferenceType; const _DifferenceType __n = __last - __first; if (_DifferenceType(1) < __n) @@ -1977,6 +1981,7 @@ __pattern_copy_if(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first, _Ra bool* __mask = __mask_buf.get(); _DifferenceType __m{}; __par_backend::__parallel_strict_scan( + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __n, _DifferenceType(0), [=](_DifferenceType __i, _DifferenceType __len) { // Reduce return __internal::__brick_calc_mask_1<_DifferenceType>(__first + __i, __first + (__i + __len), @@ -2003,6 +2008,8 @@ _RandomAccessIterator2 __pattern_copy_if(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first, _RandomAccessIterator1 __last, _RandomAccessIterator2 __result, _UnaryPredicate __pred) { + using __backend_tag = typename __parallel_tag<_IsVector>::__backend_tag; + typedef typename ::std::iterator_traits<_RandomAccessIterator1>::difference_type _DifferenceType; const _DifferenceType __n = __last - __first; if (_DifferenceType(1) < __n) @@ -2012,6 +2019,7 @@ __pattern_copy_if(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomA bool* __mask = __mask_buf.get(); _DifferenceType __m{}; __par_backend::__parallel_strict_scan( + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __n, _DifferenceType(0), [=](_DifferenceType __i, _DifferenceType __len) { // Reduce return __internal::__brick_calc_mask_1<_DifferenceType>(__first + __i, __first + (__i + __len), @@ -2212,6 +2220,7 @@ __remove_elements(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardI _DifferenceType __m{}; // 2. Elements that doesn't satisfy pred are moved to result __par_backend::__parallel_strict_scan( + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __n, _DifferenceType(0), [__mask, __is_vector](_DifferenceType __i, _DifferenceType __len) { return __internal::__brick_count( @@ -2365,6 +2374,10 @@ __pattern_unique_copy(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first, _RandomAccessIterator2 __result, _BinaryPredicate __pred, _IsVector __is_vector, /*parallel=*/::std::true_type) { + constexpr auto __dispatch_tag = + oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _RandomAccessIterator1, _RandomAccessIterator2>(); + using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; + typedef typename ::std::iterator_traits<_RandomAccessIterator1>::difference_type _DifferenceType; const _DifferenceType __n = __last - __first; if (_DifferenceType(2) < __n) @@ -2376,6 +2389,7 @@ __pattern_unique_copy(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first, bool* __mask = __mask_buf.get(); _DifferenceType __m{}; __par_backend::__parallel_strict_scan( + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __n, _DifferenceType(0), [=](_DifferenceType __i, _DifferenceType __len) -> _DifferenceType { // Reduce _DifferenceType __extra = 0; @@ -2414,6 +2428,8 @@ _RandomAccessIterator2 __pattern_unique_copy(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first, _RandomAccessIterator1 __last, _RandomAccessIterator2 __result, _BinaryPredicate __pred) { + using __backend_tag = typename __parallel_tag<_IsVector>::__backend_tag; + typedef typename ::std::iterator_traits<_RandomAccessIterator1>::difference_type _DifferenceType; const _DifferenceType __n = __last - __first; if (_DifferenceType(2) < __n) @@ -2425,6 +2441,7 @@ __pattern_unique_copy(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _Ran bool* __mask = __mask_buf.get(); _DifferenceType __m{}; __par_backend::__parallel_strict_scan( + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __n, _DifferenceType(0), [=](_DifferenceType __i, _DifferenceType __len) -> _DifferenceType { // Reduce _DifferenceType __extra = 0; @@ -3609,6 +3626,11 @@ __pattern_partition_copy(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __fir _RandomAccessIterator2 __out_true, _RandomAccessIterator3 __out_false, _UnaryPredicate __pred, _IsVector __is_vector, /*is_parallelization=*/::std::true_type) { + constexpr auto __dispatch_tag = + oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _RandomAccessIterator1, _RandomAccessIterator2, + _RandomAccessIterator3>(); + using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; + typedef typename ::std::iterator_traits<_RandomAccessIterator1>::difference_type _DifferenceType; typedef ::std::pair<_DifferenceType, _DifferenceType> _ReturnType; const _DifferenceType __n = __last - __first; @@ -3620,6 +3642,7 @@ __pattern_partition_copy(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __fir bool* __mask = __mask_buf.get(); _ReturnType __m{}; __par_backend::__parallel_strict_scan( + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __n, ::std::make_pair(_DifferenceType(0), _DifferenceType(0)), [=](_DifferenceType __i, _DifferenceType __len) { // Reduce return __internal::__brick_calc_mask_1<_DifferenceType>(__first + __i, __first + (__i + __len), @@ -3648,6 +3671,8 @@ __pattern_partition_copy(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _ _RandomAccessIterator1 __last, _RandomAccessIterator2 __out_true, _RandomAccessIterator3 __out_false, _UnaryPredicate __pred) { + using __backend_tag = typename __parallel_tag<_IsVector>::__backend_tag; + typedef typename ::std::iterator_traits<_RandomAccessIterator1>::difference_type _DifferenceType; typedef ::std::pair<_DifferenceType, _DifferenceType> _ReturnType; const _DifferenceType __n = __last - __first; @@ -3658,6 +3683,7 @@ __pattern_partition_copy(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _ bool* __mask = __mask_buf.get(); _ReturnType __m{}; __par_backend::__parallel_strict_scan( + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __n, ::std::make_pair(_DifferenceType(0), _DifferenceType(0)), [=](_DifferenceType __i, _DifferenceType __len) { // Reduce return __internal::__brick_calc_mask_1<_DifferenceType>(__first + __i, __first + (__i + __len), @@ -5068,6 +5094,10 @@ __parallel_set_op(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _Forwar _ForwardIterator2 __first2, _ForwardIterator2 __last2, _OutputIterator __result, _Compare __comp, _SizeFunction __size_func, _SetOP __set_op, _IsVector __is_vector) { + constexpr auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _ForwardIterator1, + _ForwardIterator2, _OutputIterator>(); + using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; + typedef typename ::std::iterator_traits<_ForwardIterator1>::difference_type _DifferenceType; typedef typename ::std::iterator_traits<_OutputIterator>::value_type _T; @@ -5097,6 +5127,7 @@ __parallel_set_op(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _Forwar __result + __s.__pos, __is_vector); }; __par_backend::__parallel_strict_scan( + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __n1, _SetRange{0, 0, 0}, //-1, 0}, [=](_DifferenceType __i, _DifferenceType __len) { // Reduce //[__b; __e) - a subrange of the first sequence, to reduce diff --git a/include/oneapi/dpl/pstl/numeric_impl.h b/include/oneapi/dpl/pstl/numeric_impl.h index 1bf5ec91e9f..7a1c05f7af7 100644 --- a/include/oneapi/dpl/pstl/numeric_impl.h +++ b/include/oneapi/dpl/pstl/numeric_impl.h @@ -407,8 +407,14 @@ __pattern_transform_scan(_ExecutionPolicy&& __exec, _RandomAccessIterator __firs { return __result; } + + constexpr auto __dispatch_tag = + oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _RandomAccessIterator, _OutputIterator>(); + using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; + return __internal::__except_handler([&]() { __par_backend::__parallel_strict_scan( + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __n, __init, [__first, __unary_op, __binary_op, __result, __is_vector](_DifferenceType __i, _DifferenceType __len) { return __internal::__brick_transform_scan(__first + __i, __first + (__i + __len), __result + __i, @@ -432,7 +438,7 @@ __pattern_transform_scan(_ExecutionPolicy&& __exec, _RandomAccessIterator __firs template ::std::enable_if_t<::std::is_floating_point_v<_Tp>, _OutputIterator> -__pattern_transform_scan(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, +__pattern_transform_scan(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last, _OutputIterator __result, _UnaryOperation __unary_op, _Tp __init, _BinaryOperation __binary_op, _Inclusive) { @@ -443,8 +449,12 @@ __pattern_transform_scan(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __e { return __result; } + + using __backend_tag = typename __parallel_tag<_IsVector>::__backend_tag; + return __internal::__except_handler([&]() { __par_backend::__parallel_strict_scan( + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __n, __init, [__first, __unary_op, __binary_op, __result](_DifferenceType __i, _DifferenceType __len) { return __internal::__brick_transform_scan(__first + __i, __first + (__i + __len), __result + __i, From 274def37334287edd0366203b299bd681ef2d424 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Wed, 14 Feb 2024 16:13:36 +0100 Subject: [PATCH 190/566] __parallel_transform_scan + tag impls --- .../pstl/hetero/dpcpp/parallel_backend_sycl.h | 59 +++++++++++++++++++ .../hetero/dpcpp/parallel_backend_sycl_fpga.h | 13 ++++ .../dpl/pstl/omp/parallel_transform_scan.h | 9 +++ .../oneapi/dpl/pstl/parallel_backend_serial.h | 8 +++ .../oneapi/dpl/pstl/parallel_backend_tbb.h | 12 ++++ 5 files changed, 101 insertions(+) diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h index 0cc786fa94a..faf435a51ff 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h @@ -844,6 +844,65 @@ __parallel_transform_scan(_ExecutionPolicy&& __exec, _Range1&& __in_rng, _Range2 .event()); } +template +auto +__parallel_transform_scan(oneapi::dpl::__internal::__device_backend_tag, _ExecutionPolicy&& __exec, _Range1&& __in_rng, + _Range2&& __out_rng, ::std::size_t __n, + _UnaryOperation __unary_op, _InitType __init, _BinaryOperation __binary_op, _Inclusive) +{ + using _Type = typename _InitType::__value_type; + + // Next power of 2 greater than or equal to __n + auto __n_uniform = __n; + if ((__n_uniform & (__n_uniform - 1)) != 0) + __n_uniform = oneapi::dpl::__internal::__dpl_bit_floor(__n) << 1; + + // Pessimistically only use half of the memory to take into account memory used by compiled kernel + const ::std::size_t __max_slm_size = + __exec.queue().get_device().template get_info() / 2; + const auto __req_slm_size = sizeof(_Type) * __n_uniform; + + constexpr int __single_group_upper_limit = 16384; + + constexpr bool __can_use_group_scan = unseq_backend::__has_known_identity<_BinaryOperation, _Type>::value; + if constexpr (__can_use_group_scan) + { + if (__n <= __single_group_upper_limit && __max_slm_size >= __req_slm_size) + { + return __parallel_transform_scan_single_group( + std::forward<_ExecutionPolicy>(__exec), ::std::forward<_Range1>(__in_rng), + ::std::forward<_Range2>(__out_rng), __n, __unary_op, __init, __binary_op, _Inclusive{}); + } + } + + // Either we can't use group scan or this input is too big for one workgroup + using _Assigner = unseq_backend::__scan_assigner; + using _NoAssign = unseq_backend::__scan_no_assign; + using _UnaryFunctor = unseq_backend::walk_n<_ExecutionPolicy, _UnaryOperation>; + using _NoOpFunctor = unseq_backend::walk_n<_ExecutionPolicy, oneapi::dpl::__internal::__no_op>; + + _Assigner __assign_op; + _NoAssign __no_assign_op; + _NoOpFunctor __get_data_op; + + return __future( + __parallel_transform_scan_base( + ::std::forward<_ExecutionPolicy>(__exec), ::std::forward<_Range1>(__in_rng), + ::std::forward<_Range2>(__out_rng), __binary_op, __init, + // local scan + unseq_backend::__scan<_Inclusive, _ExecutionPolicy, _BinaryOperation, _UnaryFunctor, _Assigner, _Assigner, + _NoOpFunctor, _InitType>{__binary_op, _UnaryFunctor{__unary_op}, __assign_op, + __assign_op, __get_data_op}, + // scan between groups + unseq_backend::__scan>{ + __binary_op, _NoOpFunctor{}, __no_assign_op, __assign_op, __get_data_op}, + // global scan + unseq_backend::__global_scan_functor<_Inclusive, _BinaryOperation, _InitType>{__binary_op, __init}) + .event()); +} + template struct __invoke_single_group_copy_if { diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h index 64898115e0c..c554011a1a6 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h @@ -173,6 +173,19 @@ __parallel_transform_scan(_ExecutionPolicy&& __exec, _Range1&& __in_rng, _Range2 __unary_op, __init, __binary_op, _Inclusive{}); } +template +auto +__parallel_transform_scan(oneapi::dpl::__internal::__fpga_backend_tag, _ExecutionPolicy&& __exec, _Range1&& __in_rng, + _Range2&& __out_rng, ::std::size_t __n, _UnaryOperation __unary_op, _InitType __init, + _BinaryOperation __binary_op, _Inclusive) +{ + // workaround until we implement more performant version for patterns + return oneapi::dpl::__par_backend_hetero::__parallel_transform_scan( + oneapi::dpl::__internal::__device_backend_tag{}, __exec.__device_policy(), ::std::forward<_Range1>(__in_rng), + ::std::forward<_Range2>(__out_rng), __n, __unary_op, __init, __binary_op, _Inclusive{}); +} + template = 0> diff --git a/include/oneapi/dpl/pstl/omp/parallel_transform_scan.h b/include/oneapi/dpl/pstl/omp/parallel_transform_scan.h index 98262635d1e..f0093ccab98 100644 --- a/include/oneapi/dpl/pstl/omp/parallel_transform_scan.h +++ b/include/oneapi/dpl/pstl/omp/parallel_transform_scan.h @@ -34,6 +34,15 @@ __parallel_transform_scan(_ExecutionPolicy&&, _Index __n, _Up /* __u */, _Tp __i return __scan(_Index(0), __n, __init); } +template +_Tp +__parallel_transform_scan(oneapi::dpl::__internal::__omp_backend_tag, _ExecutionPolicy&&, _Index __n, _Up /* __u */, + _Tp __init, _Cp /* __combine */, _Rp /* __brick_reduce */, _Sp __scan) +{ + // TODO: parallelize this function. + return __scan(_Index(0), __n, __init); +} + } // namespace __omp_backend } // namespace dpl } // namespace oneapi diff --git a/include/oneapi/dpl/pstl/parallel_backend_serial.h b/include/oneapi/dpl/pstl/parallel_backend_serial.h index 6634e692033..590201fd16d 100644 --- a/include/oneapi/dpl/pstl/parallel_backend_serial.h +++ b/include/oneapi/dpl/pstl/parallel_backend_serial.h @@ -158,6 +158,14 @@ __parallel_transform_scan(_ExecutionPolicy&&, _Index __n, _UnaryOp, _Tp __init, return __scan(_Index(0), __n, __init); } +template +_Tp +__parallel_transform_scan(oneapi::dpl::__internal::__serial_backend_tag, _ExecutionPolicy&&, _Index __n, _UnaryOp, + _Tp __init, _BinaryOp, _Reduce, _Scan __scan) +{ + return __scan(_Index(0), __n, __init); +} + template void __parallel_stable_sort(_ExecutionPolicy&&, _RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp, diff --git a/include/oneapi/dpl/pstl/parallel_backend_tbb.h b/include/oneapi/dpl/pstl/parallel_backend_tbb.h index 98ac5da86cd..531858c9f67 100644 --- a/include/oneapi/dpl/pstl/parallel_backend_tbb.h +++ b/include/oneapi/dpl/pstl/parallel_backend_tbb.h @@ -507,6 +507,18 @@ __parallel_transform_scan(_ExecutionPolicy&&, _Index __n, _Up __u, _Tp __init, _ return __body.sum(); } +template +_Tp +__parallel_transform_scan(oneapi::dpl::__internal::__tbb_backend_tag, _ExecutionPolicy&&, _Index __n, _Up __u, + _Tp __init, _Cp __combine, _Rp __brick_reduce, _Sp __scan) +{ + __trans_scan_body<_Index, _Up, _Tp, _Cp, _Rp, _Sp> __body(__u, __init, __combine, __brick_reduce, __scan); + auto __range = tbb::blocked_range<_Index>(0, __n); + tbb::this_task_arena::isolate([__range, &__body]() { tbb::parallel_scan(__range, __body); }); + return __body.sum(); +} + + //------------------------------------------------------------------------ // parallel_stable_sort //------------------------------------------------------------------------ From dbe64de6a7ca5d8fddd2d4490a2b720715c93f52 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Wed, 14 Feb 2024 16:15:14 +0100 Subject: [PATCH 191/566] __parallel_transform_scan + tag calls --- .../hetero/dpcpp/parallel_backend_sycl_fpga.h | 1 + .../dpl/pstl/hetero/numeric_impl_hetero.h | 24 +++++++++++-------- 2 files changed, 15 insertions(+), 10 deletions(-) diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h index c554011a1a6..bc1d53d4cea 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h @@ -169,6 +169,7 @@ __parallel_transform_scan(_ExecutionPolicy&& __exec, _Range1&& __in_rng, _Range2 { // workaround until we implement more performant version for patterns return oneapi::dpl::__par_backend_hetero::__parallel_transform_scan( + oneapi::dpl::__internal::__device_backend_tag{}, __exec.__device_policy(), ::std::forward<_Range1>(__in_rng), ::std::forward<_Range2>(__out_rng), __n, __unary_op, __init, __binary_op, _Inclusive{}); } diff --git a/include/oneapi/dpl/pstl/hetero/numeric_impl_hetero.h b/include/oneapi/dpl/pstl/hetero/numeric_impl_hetero.h index 3edccc031e6..16521a0835b 100644 --- a/include/oneapi/dpl/pstl/hetero/numeric_impl_hetero.h +++ b/include/oneapi/dpl/pstl/hetero/numeric_impl_hetero.h @@ -185,6 +185,7 @@ __pattern_transform_scan_base(_ExecutionPolicy&& __exec, _Iterator1 __first, _It return __result; const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _Iterator1, _Iterator2>(); + using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; const auto __n = __last - __first; @@ -198,9 +199,9 @@ __pattern_transform_scan_base(_ExecutionPolicy&& __exec, _Iterator1 __first, _It auto __keep2 = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::write, _Iterator2>(); auto __buf2 = __keep2(__result, __result + __n); - oneapi::dpl::__par_backend_hetero::__parallel_transform_scan(::std::forward<_ExecutionPolicy>(__exec), - __buf1.all_view(), __buf2.all_view(), __n, - __unary_op, __init, __binary_op, _Inclusive{}) + oneapi::dpl::__par_backend_hetero::__parallel_transform_scan( + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __buf1.all_view(), __buf2.all_view(), __n, + __unary_op, __init, __binary_op, _Inclusive{}) .wait(); } else @@ -223,8 +224,9 @@ __pattern_transform_scan_base(_ExecutionPolicy&& __exec, _Iterator1 __first, _It auto __buf2 = __keep2(__first_tmp, __last_tmp); // Run main algorithm and save data into temporary buffer - oneapi::dpl::__par_backend_hetero::__parallel_transform_scan(__policy, __buf1.all_view(), __buf2.all_view(), - __n, __unary_op, __init, __binary_op, _Inclusive{}) + oneapi::dpl::__par_backend_hetero::__parallel_transform_scan(__backend_tag{}, __policy, __buf1.all_view(), + __buf2.all_view(), __n, __unary_op, __init, + __binary_op, _Inclusive{}) .wait(); // Move data from temporary buffer into results @@ -263,6 +265,7 @@ __pattern_transform_scan_base(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __ex return __result; const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _Iterator1, _Iterator2>(); + using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; const auto __n = __last - __first; @@ -276,9 +279,9 @@ __pattern_transform_scan_base(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __ex auto __keep2 = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::write, _Iterator2>(); auto __buf2 = __keep2(__result, __result + __n); - oneapi::dpl::__par_backend_hetero::__parallel_transform_scan(::std::forward<_ExecutionPolicy>(__exec), - __buf1.all_view(), __buf2.all_view(), __n, - __unary_op, __init, __binary_op, _Inclusive{}) + oneapi::dpl::__par_backend_hetero::__parallel_transform_scan( + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __buf1.all_view(), __buf2.all_view(), __n, + __unary_op, __init, __binary_op, _Inclusive{}) .wait(); } else @@ -301,8 +304,9 @@ __pattern_transform_scan_base(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __ex auto __buf2 = __keep2(__first_tmp, __last_tmp); // Run main algorithm and save data into temporary buffer - oneapi::dpl::__par_backend_hetero::__parallel_transform_scan(__policy, __buf1.all_view(), __buf2.all_view(), - __n, __unary_op, __init, __binary_op, _Inclusive{}) + oneapi::dpl::__par_backend_hetero::__parallel_transform_scan(__backend_tag{}, __policy, __buf1.all_view(), + __buf2.all_view(), __n, __unary_op, __init, + __binary_op, _Inclusive{}) .wait(); // Move data from temporary buffer into results From 3cc8b8da44d0845aefdfa385d20e9e1c172d2ec5 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Wed, 14 Feb 2024 16:20:47 +0100 Subject: [PATCH 192/566] __parallel_stable_sort + tag impls --- .../pstl/hetero/dpcpp/parallel_backend_sycl.h | 26 +++++++++++ .../hetero/dpcpp/parallel_backend_sycl_fpga.h | 11 +++++ .../dpl/pstl/omp/parallel_stable_sort.h | 43 +++++++++++++++++++ .../oneapi/dpl/pstl/parallel_backend_serial.h | 8 ++++ .../oneapi/dpl/pstl/parallel_backend_tbb.h | 25 +++++++++++ 5 files changed, 113 insertions(+) diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h index faf435a51ff..c3e2fe8b483 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h @@ -2018,6 +2018,18 @@ __parallel_stable_sort(_ExecutionPolicy&& __exec, _Range&& __rng, _Compare, _Pro return __parallel_radix_sort<__internal::__is_comp_ascending<::std::decay_t<_Compare>>::value>( ::std::forward<_ExecutionPolicy>(__exec), ::std::forward<_Range>(__rng), __proj); } + +template , _Compare>::value, + int> = 0> +auto +__parallel_stable_sort(oneapi::dpl::__internal::__device_backend_tag, _ExecutionPolicy&& __exec, _Range&& __rng, + _Compare, _Proj __proj) +{ + return __parallel_radix_sort<__internal::__is_comp_ascending<::std::decay_t<_Compare>>::value>( + ::std::forward<_ExecutionPolicy>(__exec), ::std::forward<_Range>(__rng), __proj); +} #endif template (__exec), ::std::forward<_Range>(__rng), __cmp_f); } +template , _Compare>::value, + int> = 0> +auto +__parallel_stable_sort(oneapi::dpl::__internal::__device_backend_tag, _ExecutionPolicy&& __exec, _Range&& __rng, + _Compare __comp, _Proj __proj) +{ + auto __cmp_f = [__comp, __proj](const auto& __a, const auto& __b) mutable { + return __comp(__proj(__a), __proj(__b)); + }; + return __parallel_sort_impl(::std::forward<_ExecutionPolicy>(__exec), ::std::forward<_Range>(__rng), __cmp_f); +} + //------------------------------------------------------------------------ // parallel_partial_sort - async pattern //----------------------------------------------------------------------- diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h index bc1d53d4cea..7f03369d7c4 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h @@ -339,6 +339,17 @@ __parallel_stable_sort(_ExecutionPolicy&& __exec, _Range&& __rng, _Compare __com ::std::forward<_Range>(__rng), __comp, __proj); } +template +auto +__parallel_stable_sort(oneapi::dpl::__internal::__fpga_backend_tag, _ExecutionPolicy&& __exec, _Range&& __rng, + _Compare __comp, _Proj __proj) +{ + // workaround until we implement more performant version for patterns + return oneapi::dpl::__par_backend_hetero::__parallel_stable_sort(oneapi::dpl::__internal::__device_backend_tag{}, + __exec.__device_policy(), + ::std::forward<_Range>(__rng), __comp, __proj); +} + //------------------------------------------------------------------------ // parallel_partial_sort //----------------------------------------------------------------------- diff --git a/include/oneapi/dpl/pstl/omp/parallel_stable_sort.h b/include/oneapi/dpl/pstl/omp/parallel_stable_sort.h index 14aa7b7bf04..5c05452a428 100644 --- a/include/oneapi/dpl/pstl/omp/parallel_stable_sort.h +++ b/include/oneapi/dpl/pstl/omp/parallel_stable_sort.h @@ -162,6 +162,49 @@ __parallel_stable_sort(_ExecutionPolicy&& /*__exec*/, _RandomAccessIterator __xs } } +template +void +__parallel_stable_sort(oneapi::dpl::__internal::__omp_backend_tag, _ExecutionPolicy&& /*__exec*/, + _RandomAccessIterator __xs, _RandomAccessIterator __xe, _Compare __comp, _LeafSort __leaf_sort, + std::size_t __nsort = 0) +{ + auto __count = static_cast(__xe - __xs); + if (__count <= __default_chunk_size || __nsort < __count) + { + __leaf_sort(__xs, __xe, __comp); + return; + } + + // TODO: the partial sort implementation should + // be shared with the other backends. + + if (omp_in_parallel()) + { + if (__count <= __nsort) + { + oneapi::dpl::__omp_backend::__parallel_stable_sort_body(__xs, __xe, __comp, __leaf_sort); + } + else + { + oneapi::dpl::__omp_backend::__parallel_stable_partial_sort(__xs, __xe, __comp, __leaf_sort, __nsort); + } + } + else + { + _PSTL_PRAGMA(omp parallel) + _PSTL_PRAGMA(omp single nowait) + if (__count <= __nsort) + { + oneapi::dpl::__omp_backend::__parallel_stable_sort_body(__xs, __xe, __comp, __leaf_sort); + } + else + { + oneapi::dpl::__omp_backend::__parallel_stable_partial_sort(__xs, __xe, __comp, __leaf_sort, __nsort); + } + } +} + + } // namespace __omp_backend } // namespace dpl } // namespace oneapi diff --git a/include/oneapi/dpl/pstl/parallel_backend_serial.h b/include/oneapi/dpl/pstl/parallel_backend_serial.h index 590201fd16d..ef9bdd7dfaf 100644 --- a/include/oneapi/dpl/pstl/parallel_backend_serial.h +++ b/include/oneapi/dpl/pstl/parallel_backend_serial.h @@ -174,6 +174,14 @@ __parallel_stable_sort(_ExecutionPolicy&&, _RandomAccessIterator __first, _Rando __leaf_sort(__first, __last, __comp); } +template +void +__parallel_stable_sort(oneapi::dpl::__internal::__serial_backend_tag, _ExecutionPolicy&&, _RandomAccessIterator __first, + _RandomAccessIterator __last, _Compare __comp, _LeafSort __leaf_sort, ::std::size_t = 0) +{ + __leaf_sort(__first, __last, __comp); +} + template void diff --git a/include/oneapi/dpl/pstl/parallel_backend_tbb.h b/include/oneapi/dpl/pstl/parallel_backend_tbb.h index 531858c9f67..a7c521ffe84 100644 --- a/include/oneapi/dpl/pstl/parallel_backend_tbb.h +++ b/include/oneapi/dpl/pstl/parallel_backend_tbb.h @@ -1296,6 +1296,31 @@ __parallel_stable_sort(_ExecutionPolicy&&, _RandomAccessIterator __xs, _RandomAc }); } +template +void +__parallel_stable_sort(oneapi::dpl::__internal::__tbb_backend_tag, _ExecutionPolicy&&, _RandomAccessIterator __xs, + _RandomAccessIterator __xe, _Compare __comp, _LeafSort __leaf_sort, ::std::size_t __nsort) +{ + tbb::this_task_arena::isolate([=, &__nsort]() { + //sorting based on task tree and parallel merge + typedef typename ::std::iterator_traits<_RandomAccessIterator>::value_type _ValueType; + typedef typename ::std::iterator_traits<_RandomAccessIterator>::difference_type _DifferenceType; + const _DifferenceType __n = __xe - __xs; + + const _DifferenceType __sort_cut_off = _ONEDPL_STABLE_SORT_CUT_OFF; + if (__n > __sort_cut_off) + { + __tbb_backend::__buffer<_ExecutionPolicy, _ValueType> __buf(__n); + __root_task<__stable_sort_func<_RandomAccessIterator, _ValueType*, _Compare, _LeafSort>> __root{ + __xs, __xe, __buf.get(), true, __comp, __leaf_sort, __nsort, __xs, __buf.get()}; + __task::spawn_root_and_wait(__root); + return; + } + //serial sort + __leaf_sort(__xs, __xe, __comp); + }); +} + //------------------------------------------------------------------------ // parallel_merge //------------------------------------------------------------------------ From fc62101e78787cd9bde3b58a67b745896a107816 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Wed, 14 Feb 2024 16:35:11 +0100 Subject: [PATCH 193/566] __parallel_stable_sort + tag calls --- .../dpl/internal/async_impl/glue_async_impl.h | 7 +++- include/oneapi/dpl/pstl/algorithm_impl.h | 41 +++++++++++++++++++ .../dpl/pstl/hetero/algorithm_impl_hetero.h | 8 +++- .../hetero/dpcpp/parallel_backend_sycl_fpga.h | 3 +- 4 files changed, 54 insertions(+), 5 deletions(-) diff --git a/include/oneapi/dpl/internal/async_impl/glue_async_impl.h b/include/oneapi/dpl/internal/async_impl/glue_async_impl.h index 26eca467131..3cceec2f5cb 100644 --- a/include/oneapi/dpl/internal/async_impl/glue_async_impl.h +++ b/include/oneapi/dpl/internal/async_impl/glue_async_impl.h @@ -93,8 +93,11 @@ sort_async(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, _Comp auto __keep = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read_write, _Iterator>(); auto __buf = __keep(__first, __last); - return __par_backend_hetero::__parallel_stable_sort(::std::forward<_ExecutionPolicy>(__exec), __buf.all_view(), - __comp, oneapi::dpl::identity{}); + constexpr auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _Iterator>(); + using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; + + return __par_backend_hetero::__parallel_stable_sort(__backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), + __buf.all_view(), __comp, oneapi::dpl::identity{}); } template __pattern_sort(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp, _IsVector /*is_vector*/, /*is_parallel=*/::std::true_type, /*is_move_constructible=*/::std::true_type) { + constexpr auto __dispatch_tag = + oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _RandomAccessIterator>(); + using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; + __internal::__except_handler([&]() { __par_backend::__parallel_stable_sort( + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __comp, [](_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp) { ::std::sort(__first, __last, __comp); @@ -3749,8 +3754,11 @@ __pattern_sort(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAcce _RandomAccessIterator __last, _Compare __comp, /*is_move_constructible=*/::std::true_type) { + using __backend_tag = typename __parallel_tag<_IsVector>::__backend_tag; + __internal::__except_handler([&]() { __par_backend::__parallel_stable_sort( + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __comp, [](_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp) { ::std::sort(__first, __last, __comp); @@ -3785,8 +3793,13 @@ oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy> __pattern_stable_sort(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp, _IsVector /*is_vector*/, /*is_parallel=*/::std::true_type) { + constexpr auto __dispatch_tag = + oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _RandomAccessIterator>(); + using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; + __internal::__except_handler([&]() { __par_backend::__parallel_stable_sort( + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __comp, [](_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp) { ::std::stable_sort(__first, __last, __comp); @@ -3800,8 +3813,11 @@ void __pattern_stable_sort(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp) { + using __backend_tag = typename __parallel_tag<_IsVector>::__backend_tag; + __internal::__except_handler([&]() { __par_backend::__parallel_stable_sort( + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __comp, [](_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp) { ::std::stable_sort(__first, __last, __comp); @@ -3866,8 +3882,13 @@ __pattern_sort_by_key(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __keys_f return __comp(::std::get<0>(__a), ::std::get<0>(__b)); }; + constexpr auto __dispatch_tag = + oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _RandomAccessIterator1, _RandomAccessIterator2>(); + using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; + __internal::__except_handler([&]() { __par_backend::__parallel_stable_sort( + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __beg, __end, __cmp_f, [](auto __first, auto __last, auto __cmp) { ::std::sort(__first, __last, __cmp); }, __end - __beg); }); @@ -3890,8 +3911,11 @@ __pattern_sort_by_key(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _Ran return __comp(::std::get<0>(__a), ::std::get<0>(__b)); }; + using __backend_tag = typename __parallel_tag<_IsVector>::__backend_tag; + __internal::__except_handler([&]() { __par_backend::__parallel_stable_sort( + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __beg, __end, __cmp_f, [](auto __first, auto __last, auto __cmp) { ::std::sort(__first, __last, __cmp); }, __end - __beg); }); @@ -3929,8 +3953,13 @@ __pattern_partial_sort(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, if (__n == 0) return; + constexpr auto __dispatch_tag = + oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _RandomAccessIterator>(); + using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; + __except_handler([&]() { __par_backend::__parallel_stable_sort( + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __comp, [__n](_RandomAccessIterator __begin, _RandomAccessIterator __end, _Compare __comp) { if (__n < __end - __begin) @@ -3951,8 +3980,11 @@ __pattern_partial_sort(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _Ra if (__n == 0) return; + using __backend_tag = typename __parallel_tag<_IsVector>::__backend_tag; + __except_handler([&]() { __par_backend::__parallel_stable_sort( + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __comp, [__n](_RandomAccessIterator __begin, _RandomAccessIterator __end, _Compare __comp) { if (__n < __end - __begin) @@ -3998,12 +4030,18 @@ __pattern_partial_sort_copy(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __ { return __d_first; } + + constexpr auto __dispatch_tag = + oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _RandomAccessIterator1, _RandomAccessIterator2>(); + using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; + auto __n1 = __last - __first; auto __n2 = __d_last - __d_first; return __internal::__except_handler([&]() { if (__n2 >= __n1) { __par_backend::__parallel_stable_sort( + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __d_first, __d_first + __n1, __comp, [__first, __d_first, __is_vector](_RandomAccessIterator2 __i, _RandomAccessIterator2 __j, _Compare __comp) { @@ -4026,6 +4064,7 @@ __pattern_partial_sort_copy(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __ _T1* __r = __buf.get(); __par_backend::__parallel_stable_sort( + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __r, __r + __n1, __comp, [__n2, __first, __r](_T1* __i, _T1* __j, _Compare __comp) { _RandomAccessIterator1 __it = __first + (__i - __r); @@ -4080,6 +4119,7 @@ __pattern_partial_sort_copy(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec if (__n2 >= __n1) { __par_backend::__parallel_stable_sort( + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __d_first, __d_first + __n1, __comp, [__first, __d_first](_RandomAccessIterator2 __i, _RandomAccessIterator2 __j, _Compare __comp) { _RandomAccessIterator1 __i1 = __first + (__i - __d_first); @@ -4101,6 +4141,7 @@ __pattern_partial_sort_copy(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec _T1* __r = __buf.get(); __par_backend::__parallel_stable_sort( + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __r, __r + __n1, __comp, [__n2, __first, __r](_T1* __i, _T1* __j, _Compare __comp) { _RandomAccessIterator1 __it = __first + (__i - __r); diff --git a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h index 34707777978..e8e4856e3a1 100644 --- a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h +++ b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h @@ -2225,8 +2225,12 @@ __stable_sort_with_projection(_ExecutionPolicy&& __exec, _Iterator __first, _Ite auto __keep = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read_write, _Iterator>(); auto __buf = __keep(__first, __last); - __par_backend_hetero::__parallel_stable_sort( - ::std::forward<_ExecutionPolicy>(__exec), __buf.all_view(), __comp, __proj).wait(); + constexpr auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _Iterator>(); + using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; + + __par_backend_hetero::__parallel_stable_sort(__backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), + __buf.all_view(), __comp, __proj) + .wait(); } template diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h index 7f03369d7c4..0022ba737be 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h @@ -335,7 +335,8 @@ auto __parallel_stable_sort(_ExecutionPolicy&& __exec, _Range&& __rng, _Compare __comp, _Proj __proj) { // workaround until we implement more performant version for patterns - return oneapi::dpl::__par_backend_hetero::__parallel_stable_sort(__exec.__device_policy(), + return oneapi::dpl::__par_backend_hetero::__parallel_stable_sort(oneapi::dpl::__internal::__device_backend_tag{}, + __exec.__device_policy(), ::std::forward<_Range>(__rng), __comp, __proj); } From 579ea0266c22de170907e7148851f9e6ad26f470 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Wed, 14 Feb 2024 17:33:10 +0100 Subject: [PATCH 194/566] __pattern_histogram + tag impls --- include/oneapi/dpl/pstl/hetero/histogram_impl_hetero.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/oneapi/dpl/pstl/hetero/histogram_impl_hetero.h b/include/oneapi/dpl/pstl/hetero/histogram_impl_hetero.h index c594e5c25a0..491bc107951 100644 --- a/include/oneapi/dpl/pstl/hetero/histogram_impl_hetero.h +++ b/include/oneapi/dpl/pstl/hetero/histogram_impl_hetero.h @@ -120,7 +120,7 @@ struct __hist_fill_zeros_wrapper template void -__pattern_histogram(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first, +__pattern_histogram(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first, _RandomAccessIterator1 __last, _Size __num_bins, _BinHash&& __func, _RandomAccessIterator2 __histogram_first) { From 1a897d26bb8ab650c41bece91fc9641863109260 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Wed, 14 Feb 2024 17:34:21 +0100 Subject: [PATCH 195/566] __pattern_rotate + tag impls --- include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h index e8e4856e3a1..5df17e0e396 100644 --- a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h +++ b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h @@ -2982,7 +2982,7 @@ __pattern_rotate(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __new_f template _Iterator -__pattern_rotate(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Iterator __first, _Iterator __new_first, +__pattern_rotate(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _Iterator __first, _Iterator __new_first, _Iterator __last) { auto __n = __last - __first; From 4733da416a2750a68b376cd1be5314f3f07d56b6 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Wed, 14 Feb 2024 17:34:36 +0100 Subject: [PATCH 196/566] __pattern_rotate_copy + tag impls --- include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h index 5df17e0e396..1a88c92a564 100644 --- a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h +++ b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h @@ -3053,7 +3053,7 @@ __pattern_rotate_copy(_ExecutionPolicy&& __exec, _BidirectionalIterator __first, template _ForwardIterator -__pattern_rotate_copy(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _BidirectionalIterator __first, +__pattern_rotate_copy(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _BidirectionalIterator __first, _BidirectionalIterator __new_first, _BidirectionalIterator __last, _ForwardIterator __result) { auto __n = __last - __first; From 2c66c75c8a5321acdf9bdab49e024ce8709d5ef2 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Wed, 14 Feb 2024 17:48:41 +0100 Subject: [PATCH 197/566] __parallel_merge + tag impls --- .../pstl/hetero/dpcpp/parallel_backend_sycl.h | 28 +++++++++++++++++ .../hetero/dpcpp/parallel_backend_sycl_fpga.h | 14 +++++++++ include/oneapi/dpl/pstl/omp/parallel_merge.h | 30 +++++++++++++++++++ .../oneapi/dpl/pstl/parallel_backend_serial.h | 10 +++++++ .../oneapi/dpl/pstl/parallel_backend_tbb.h | 30 +++++++++++++++++++ 5 files changed, 112 insertions(+) diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h index c3e2fe8b483..5033f84a629 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h @@ -1741,6 +1741,34 @@ __parallel_merge(_ExecutionPolicy&& __exec, _Range1&& __rng1, _Range2&& __rng2, } } +template +auto +__parallel_merge(oneapi::dpl::__internal::__device_backend_tag, _ExecutionPolicy&& __exec, _Range1&& __rng1, + _Range2&& __rng2, _Range3&& __rng3, _Compare __comp) +{ + using _CustomName = oneapi::dpl::__internal::__policy_kernel_name<_ExecutionPolicy>; + + const auto __n = __rng1.size() + __rng2.size(); + if (__n <= std::numeric_limits<::std::uint32_t>::max()) + { + using _wi_index_type = ::std::uint32_t; + using _MergeKernel = oneapi::dpl::__par_backend_hetero::__internal::__kernel_name_provider< + __merge_kernel_name<_CustomName, _wi_index_type>>; + return __parallel_merge_submitter<_wi_index_type, _MergeKernel>()( + ::std::forward<_ExecutionPolicy>(__exec), ::std::forward<_Range1>(__rng1), ::std::forward<_Range2>(__rng2), + ::std::forward<_Range3>(__rng3), __comp); + } + else + { + using _wi_index_type = ::std::uint64_t; + using _MergeKernel = oneapi::dpl::__par_backend_hetero::__internal::__kernel_name_provider< + __merge_kernel_name<_CustomName, _wi_index_type>>; + return __parallel_merge_submitter<_wi_index_type, _MergeKernel>()( + ::std::forward<_ExecutionPolicy>(__exec), ::std::forward<_Range1>(__rng1), ::std::forward<_Range2>(__rng2), + ::std::forward<_Range3>(__rng3), __comp); + } +} + //----------------------------------------------------------------------- // parallel_sort: general implementation //----------------------------------------------------------------------- diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h index 0022ba737be..32696ac7ea8 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h @@ -325,6 +325,20 @@ __parallel_merge(_ExecutionPolicy&& __exec, _Range1&& __rng1, _Range2&& __rng2, ::std::forward<_Range3>(__rng3), __comp); } +template +auto +__parallel_merge(oneapi::dpl::__internal::__fpga_backend_tag, _ExecutionPolicy&& __exec, _Range1&& __rng1, + _Range2&& __rng2, _Range3&& __rng3, _Compare __comp) + -> decltype(oneapi::dpl::__par_backend_hetero::__parallel_merge( + oneapi::dpl::__internal::__device_backend_tag{}, __exec.__device_policy(), ::std::forward<_Range1>(__rng1), + ::std::forward<_Range2>(__rng2), ::std::forward<_Range3>(__rng3), __comp)) +{ + // workaround until we implement more performant version for patterns + return oneapi::dpl::__par_backend_hetero::__parallel_merge( + oneapi::dpl::__internal::__device_backend_tag{}, __exec.__device_policy(), ::std::forward<_Range1>(__rng1), + ::std::forward<_Range2>(__rng2), ::std::forward<_Range3>(__rng3), __comp); +} + //------------------------------------------------------------------------ // parallel_stable_sort //----------------------------------------------------------------------- diff --git a/include/oneapi/dpl/pstl/omp/parallel_merge.h b/include/oneapi/dpl/pstl/omp/parallel_merge.h index 911d4b2643b..a708a502bd4 100644 --- a/include/oneapi/dpl/pstl/omp/parallel_merge.h +++ b/include/oneapi/dpl/pstl/omp/parallel_merge.h @@ -74,7 +74,37 @@ void __parallel_merge(_ExecutionPolicy&& /*__exec*/, _RandomAccessIterator1 __xs, _RandomAccessIterator1 __xe, _RandomAccessIterator2 __ys, _RandomAccessIterator2 __ye, _RandomAccessIterator3 __zs, _Compare __comp, _LeafMerge __leaf_merge) +{ + std::size_t __size_x = __xe - __xs; + std::size_t __size_y = __ye - __ys; + + /* + * Run the merge in parallel by chunking it up. Use the smaller range (if any) as the iteration range, and the + * larger range as the search range. + */ + if (omp_in_parallel()) + { + oneapi::dpl::__omp_backend::__parallel_merge_body(__size_x, __size_y, __xs, __xe, __ys, __ye, __zs, __comp, + __leaf_merge); + } + else + { + _PSTL_PRAGMA(omp parallel) + { + _PSTL_PRAGMA(omp single nowait) + oneapi::dpl::__omp_backend::__parallel_merge_body(__size_x, __size_y, __xs, __xe, __ys, __ye, __zs, __comp, + __leaf_merge); + } + } +} + +template +void +__parallel_merge(oneapi::dpl::__internal::__omp_backend_tag, _ExecutionPolicy&& /*__exec*/, + _RandomAccessIterator1 __xs, _RandomAccessIterator1 __xe, _RandomAccessIterator2 __ys, + _RandomAccessIterator2 __ye, _RandomAccessIterator3 __zs, _Compare __comp, _LeafMerge __leaf_merge) { std::size_t __size_x = __xe - __xs; std::size_t __size_y = __ye - __ys; diff --git a/include/oneapi/dpl/pstl/parallel_backend_serial.h b/include/oneapi/dpl/pstl/parallel_backend_serial.h index ef9bdd7dfaf..6f69ef11416 100644 --- a/include/oneapi/dpl/pstl/parallel_backend_serial.h +++ b/include/oneapi/dpl/pstl/parallel_backend_serial.h @@ -192,6 +192,16 @@ __parallel_merge(_ExecutionPolicy&&, _RandomAccessIterator1 __first1, _RandomAcc __leaf_merge(__first1, __last1, __first2, __last2, __outit, __comp); } +template +void +__parallel_merge(oneapi::dpl::__internal::__serial_backend_tag, _ExecutionPolicy&&, _RandomAccessIterator1 __first1, + _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, _RandomAccessIterator2 __last2, + _RandomAccessIterator3 __outit, _Compare __comp, _LeafMerge __leaf_merge) +{ + __leaf_merge(__first1, __last1, __first2, __last2, __outit, __comp); +} + template void __parallel_invoke(_ExecutionPolicy&&, _F1&& __f1, _F2&& __f2) diff --git a/include/oneapi/dpl/pstl/parallel_backend_tbb.h b/include/oneapi/dpl/pstl/parallel_backend_tbb.h index a7c521ffe84..bc62641c9f7 100644 --- a/include/oneapi/dpl/pstl/parallel_backend_tbb.h +++ b/include/oneapi/dpl/pstl/parallel_backend_tbb.h @@ -1416,6 +1416,36 @@ __parallel_merge(_ExecutionPolicy&&, _RandomAccessIterator1 __xs, _RandomAccessI } } +template +void +__parallel_merge(oneapi::dpl::__internal::__tbb_backend_tag, _ExecutionPolicy&&, _RandomAccessIterator1 __xs, + _RandomAccessIterator1 __xe, + _RandomAccessIterator2 __ys, _RandomAccessIterator2 __ye, _RandomAccessIterator3 __zs, _Compare __comp, + _LeafMerge __leaf_merge) +{ + typedef typename ::std::iterator_traits<_RandomAccessIterator1>::difference_type _DifferenceType1; + typedef typename ::std::iterator_traits<_RandomAccessIterator2>::difference_type _DifferenceType2; + typedef typename ::std::common_type_t<_DifferenceType1, _DifferenceType2> _SizeType; + const _SizeType __n = (__xe - __xs) + (__ye - __ys); + const _SizeType __merge_cut_off = _ONEDPL_MERGE_CUT_OFF; + if (__n <= __merge_cut_off) + { + // Fall back on serial merge + __leaf_merge(__xs, __xe, __ys, __ye, __zs, __comp); + } + else + { + tbb::this_task_arena::isolate([=]() { + typedef __merge_func_static<_RandomAccessIterator1, _RandomAccessIterator2, _RandomAccessIterator3, + _Compare, _LeafMerge> + _TaskType; + __root_task<_TaskType> __root{__xs, __xe, __ys, __ye, __zs, __comp, __leaf_merge}; + __task::spawn_root_and_wait(__root); + }); + } +} + //------------------------------------------------------------------------ // parallel_invoke //------------------------------------------------------------------------ From cd4017313ff692e720900a1f6944d5d371191a5c Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Wed, 14 Feb 2024 17:49:47 +0100 Subject: [PATCH 198/566] __parallel_merge + tag calls --- include/oneapi/dpl/pstl/algorithm_impl.h | 11 +++++++++++ .../oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h | 5 +++-- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/include/oneapi/dpl/pstl/algorithm_impl.h b/include/oneapi/dpl/pstl/algorithm_impl.h index 260f1381d12..cd4d4eea729 100644 --- a/include/oneapi/dpl/pstl/algorithm_impl.h +++ b/include/oneapi/dpl/pstl/algorithm_impl.h @@ -4825,7 +4825,13 @@ __pattern_merge(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, _Ran _RandomAccessIterator2 __first2, _RandomAccessIterator2 __last2, _RandomAccessIterator3 __d_first, _Compare __comp, _IsVector __is_vector, /* is_parallel = */ ::std::true_type) { + constexpr auto __dispatch_tag = + oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _RandomAccessIterator1, _RandomAccessIterator2, + _RandomAccessIterator3>(); + using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; + __par_backend::__parallel_merge( + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2, __d_first, __comp, [__is_vector](_RandomAccessIterator1 __f1, _RandomAccessIterator1 __l1, _RandomAccessIterator2 __f2, _RandomAccessIterator2 __l2, _RandomAccessIterator3 __f3, _Compare __comp) { @@ -4841,7 +4847,10 @@ __pattern_merge(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAcc _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, _RandomAccessIterator2 __last2, _RandomAccessIterator3 __d_first, _Compare __comp) { + using __backend_tag = typename __parallel_tag<_IsVector>::__backend_tag; + __par_backend::__parallel_merge( + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2, __d_first, __comp, [](_RandomAccessIterator1 __f1, _RandomAccessIterator1 __l1, _RandomAccessIterator2 __f2, _RandomAccessIterator2 __l2, _RandomAccessIterator3 __f3, @@ -4920,6 +4929,7 @@ __pattern_inplace_merge(_ExecutionPolicy&& __exec, _RandomAccessIterator __first }; __par_backend::__parallel_merge( + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __middle, __middle, __last, __r, __comp, [__n, __move_values, __move_sequences](_RandomAccessIterator __f1, _RandomAccessIterator __l1, _RandomAccessIterator __f2, _RandomAccessIterator __l2, _Tp* __f3, @@ -4965,6 +4975,7 @@ __pattern_inplace_merge(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _R }; __par_backend::__parallel_merge( + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __middle, __middle, __last, __r, __comp, [__n, __move_values, __move_sequences](_RandomAccessIterator __f1, _RandomAccessIterator __l1, _RandomAccessIterator __f2, _RandomAccessIterator __l2, _Tp* __f3, diff --git a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h index 1a88c92a564..1e1b026f34a 100644 --- a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h +++ b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h @@ -2075,6 +2075,7 @@ __pattern_merge(_ExecutionPolicy&& __exec, _Iterator1 __first1, _Iterator1 __las return __d_first; const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _Iterator1, _Iterator2, _Iterator3>(); + using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; //To consider the direct copying pattern call in case just one of sequences is empty. if (__n1 == 0) @@ -2099,7 +2100,7 @@ __pattern_merge(_ExecutionPolicy&& __exec, _Iterator1 __first1, _Iterator1 __las auto __keep3 = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::write, _Iterator3>(); auto __buf3 = __keep3(__d_first, __d_first + __n); - __par_backend_hetero::__parallel_merge(::std::forward<_ExecutionPolicy>(__exec), __buf1.all_view(), + __par_backend_hetero::__parallel_merge(__backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __buf1.all_view(), __buf2.all_view(), __buf3.all_view(), __comp) .wait(); } @@ -2141,7 +2142,7 @@ __pattern_merge(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Ite auto __keep3 = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::write, _Iterator3>(); auto __buf3 = __keep3(__d_first, __d_first + __n); - __par_backend_hetero::__parallel_merge(::std::forward<_ExecutionPolicy>(__exec), __buf1.all_view(), + __par_backend_hetero::__parallel_merge(_BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), __buf1.all_view(), __buf2.all_view(), __buf3.all_view(), __comp) .wait(); } From 3907f9960a789f56c2ef85901800a6bbe04ded9c Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Wed, 14 Feb 2024 18:01:39 +0100 Subject: [PATCH 199/566] __parallel_invoke + tag impls --- include/oneapi/dpl/pstl/omp/parallel_invoke.h | 16 ++++++++++++++++ .../oneapi/dpl/pstl/parallel_backend_serial.h | 8 ++++++++ include/oneapi/dpl/pstl/parallel_backend_tbb.h | 9 +++++++++ 3 files changed, 33 insertions(+) diff --git a/include/oneapi/dpl/pstl/omp/parallel_invoke.h b/include/oneapi/dpl/pstl/omp/parallel_invoke.h index 32491ab9dfd..2520526a191 100644 --- a/include/oneapi/dpl/pstl/omp/parallel_invoke.h +++ b/include/oneapi/dpl/pstl/omp/parallel_invoke.h @@ -52,6 +52,22 @@ __parallel_invoke(_ExecutionPolicy&&, _F1&& __f1, _F2&& __f2) } } +template +void +__parallel_invoke(oneapi::dpl::__internal::__omp_backend_tag, _ExecutionPolicy&&, _F1&& __f1, _F2&& __f2) +{ + if (omp_in_parallel()) + { + oneapi::dpl::__omp_backend::__parallel_invoke_body(std::forward<_F1>(__f1), std::forward<_F2>(__f2)); + } + else + { + _PSTL_PRAGMA(omp parallel) + _PSTL_PRAGMA(omp single nowait) + oneapi::dpl::__omp_backend::__parallel_invoke_body(std::forward<_F1>(__f1), std::forward<_F2>(__f2)); + } +} + } // namespace __omp_backend } // namespace dpl } // namespace oneapi diff --git a/include/oneapi/dpl/pstl/parallel_backend_serial.h b/include/oneapi/dpl/pstl/parallel_backend_serial.h index 6f69ef11416..264947fe550 100644 --- a/include/oneapi/dpl/pstl/parallel_backend_serial.h +++ b/include/oneapi/dpl/pstl/parallel_backend_serial.h @@ -210,6 +210,14 @@ __parallel_invoke(_ExecutionPolicy&&, _F1&& __f1, _F2&& __f2) ::std::forward<_F2>(__f2)(); } +template +void +__parallel_invoke(oneapi::dpl::__internal::__serial_backend_tag, _ExecutionPolicy&&, _F1&& __f1, _F2&& __f2) +{ + ::std::forward<_F1>(__f1)(); + ::std::forward<_F2>(__f2)(); +} + template void __parallel_for_each(_ExecutionPolicy&&, _ForwardIterator __begin, _ForwardIterator __end, _Fp __f) diff --git a/include/oneapi/dpl/pstl/parallel_backend_tbb.h b/include/oneapi/dpl/pstl/parallel_backend_tbb.h index bc62641c9f7..39caf6cbc21 100644 --- a/include/oneapi/dpl/pstl/parallel_backend_tbb.h +++ b/include/oneapi/dpl/pstl/parallel_backend_tbb.h @@ -1458,6 +1458,15 @@ __parallel_invoke(_ExecutionPolicy&&, _F1&& __f1, _F2&& __f2) [&]() { tbb::parallel_invoke(::std::forward<_F1>(__f1), ::std::forward<_F2>(__f2)); }); } +template +void +__parallel_invoke(oneapi::dpl::__internal::__tbb_backend_tag, _ExecutionPolicy&&, _F1&& __f1, _F2&& __f2) +{ + //TODO: a version of tbb::this_task_arena::isolate with variadic arguments pack should be added in the future + tbb::this_task_arena::isolate( + [&]() { tbb::parallel_invoke(::std::forward<_F1>(__f1), ::std::forward<_F2>(__f2)); }); +} + //------------------------------------------------------------------------ // parallel_for_each //------------------------------------------------------------------------ From 9eeadbceb5878dd6a5ab5a912916523a7a2902c1 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Wed, 14 Feb 2024 18:01:56 +0100 Subject: [PATCH 200/566] __parallel_invoke + tag calls --- include/oneapi/dpl/pstl/algorithm_impl.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/include/oneapi/dpl/pstl/algorithm_impl.h b/include/oneapi/dpl/pstl/algorithm_impl.h index cd4d4eea729..26612aa8ae8 100644 --- a/include/oneapi/dpl/pstl/algorithm_impl.h +++ b/include/oneapi/dpl/pstl/algorithm_impl.h @@ -5245,6 +5245,7 @@ __parallel_set_union_op(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ typedef typename ::std::iterator_traits<_ForwardIterator1>::difference_type _DifferenceType; constexpr auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2, _OutputIterator>(); + using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; const auto __n1 = __last1 - __first1; const auto __n2 = __last2 - __first2; @@ -5268,7 +5269,7 @@ __parallel_set_union_op(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ { //{1} < {2}: seq2 is wholly greater than seq1, so, do parallel copying seq1 and seq2 __par_backend::__parallel_invoke( - ::std::forward<_ExecutionPolicy>(__exec), + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), [=] { __internal::__pattern_walk2_brick(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __result, __copy_range); @@ -5287,7 +5288,7 @@ __parallel_set_union_op(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ { //{2} < {1}: seq2 is wholly greater than seq1, so, do parallel copying seq1 and seq2 __par_backend::__parallel_invoke( - ::std::forward<_ExecutionPolicy>(__exec), + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), [=] { __internal::__pattern_walk2_brick(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first2, __last2, __result, __copy_range); @@ -5305,7 +5306,7 @@ __parallel_set_union_op(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ auto __res_or = __result; __result += __m1; //we know proper offset due to [first1; left_bound_seq_1) < [first2; last2) __par_backend::__parallel_invoke( - ::std::forward<_ExecutionPolicy>(__exec), + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), //do parallel copying of [first1; left_bound_seq_1) [=] { __internal::__pattern_walk2_brick(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, @@ -5327,7 +5328,7 @@ __parallel_set_union_op(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ auto __res_or = __result; __result += __m2; //we know proper offset due to [first2; left_bound_seq_2) < [first1; last1) __par_backend::__parallel_invoke( - ::std::forward<_ExecutionPolicy>(__exec), + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), //do parallel copying of [first2; left_bound_seq_2) [=] { __internal::__pattern_walk2_brick(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first2, From 1222137d6b6219a00d44020034377850e7e7799c Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Thu, 15 Feb 2024 09:43:51 +0100 Subject: [PATCH 201/566] __parallel_partial_sort + tag impls --- .../pstl/hetero/dpcpp/parallel_backend_sycl.h | 16 ++++++++++++++++ .../hetero/dpcpp/parallel_backend_sycl_fpga.h | 10 ++++++++++ 2 files changed, 26 insertions(+) diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h index 5033f84a629..f0283088c2b 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h @@ -2110,6 +2110,22 @@ __parallel_partial_sort(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __partial_merge_kernel{__mid_idx}, __comp); } +// TODO: check if it makes sense to move these wrappers out of backend to a common place +// TODO: consider changing __partial_merge_kernel to make it compatible with +// __full_merge_kernel in order to use __parallel_sort_impl routine +template +auto +__parallel_partial_sort(oneapi::dpl::__internal::__device_backend_tag, _ExecutionPolicy&& __exec, _Iterator __first, + _Iterator __mid, _Iterator __last, _Compare __comp) +{ + const auto __mid_idx = __mid - __first; + + auto __keep = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read_write, _Iterator>(); + auto __buf = __keep(__first, __last); + + return __parallel_partial_sort_impl(::std::forward<_ExecutionPolicy>(__exec), __buf.all_view(), + __partial_merge_kernel{__mid_idx}, __comp); +} } // namespace __par_backend_hetero } // namespace dpl } // namespace oneapi diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h index 32696ac7ea8..a82bb3df858 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h @@ -381,6 +381,16 @@ __parallel_partial_sort(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __comp); } +template +auto +__parallel_partial_sort(oneapi::dpl::__internal::__fpga_backend_tag, _ExecutionPolicy&& __exec, _Iterator __first, + _Iterator __mid, _Iterator __last, _Compare __comp) +{ + // workaround until we implement more performant version for patterns + return oneapi::dpl::__par_backend_hetero::__parallel_partial_sort( + oneapi::dpl::__internal::__device_backend_tag{}, __exec.__device_policy(), __first, __mid, __last, __comp); +} + //------------------------------------------------------------------------ // parallel_histogram //----------------------------------------------------------------------- From 3c699f7ce2a291c31fa819a311f1838b7795f394 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Thu, 15 Feb 2024 09:46:18 +0100 Subject: [PATCH 202/566] __parallel_partial_sort + tag calls --- .../dpl/pstl/hetero/algorithm_impl_hetero.h | 17 +++++++++++++++++ .../hetero/dpcpp/parallel_backend_sycl_fpga.h | 4 ++-- 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h index 1e1b026f34a..b26d51e8958 100644 --- a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h +++ b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h @@ -2591,7 +2591,11 @@ __pattern_partial_sort(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator _ if (__last - __first < 2) return; + constexpr auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _Iterator>(); + using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; + __par_backend_hetero::__parallel_partial_sort( + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read_write>(__first), __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read_write>(__mid), @@ -2608,6 +2612,7 @@ __pattern_partial_sort(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exe return; __par_backend_hetero::__parallel_partial_sort( + _BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read_write>(__first), __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read_write>(__mid), @@ -2707,7 +2712,13 @@ __pattern_partial_sort_copy(_ExecutionPolicy&& __exec, _InIterator __first, _InI auto __buf_mid = __buf_first + __out_size; + constexpr auto __dispatch_tag11 = + oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, decltype(__buf_first), decltype(__buf_mid), + decltype(__buf_last)>(); + using __backend_tag11 = typename decltype(__dispatch_tag11)::__backend_tag; + __par_backend_hetero::__parallel_partial_sort( + __backend_tag11{}, __par_backend_hetero::make_wrapped_policy<__partial_sort_2>(__exec), __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read_write>(__buf_first), __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read_write>(__buf_mid), @@ -2780,7 +2791,13 @@ __pattern_partial_sort_copy(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& auto __buf_mid = __buf_first + __out_size; + constexpr auto __dispatch_tag11 = + oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, decltype(__buf_first), decltype(__buf_mid), + decltype(__buf_last)>(); + using __backend_tag11 = typename decltype(__dispatch_tag11)::__backend_tag; + __par_backend_hetero::__parallel_partial_sort( + __backend_tag11{}, __par_backend_hetero::make_wrapped_policy<__partial_sort_2>(__exec), __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read_write>(__buf_first), __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read_write>(__buf_mid), diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h index a82bb3df858..cb7de2993fc 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h @@ -377,8 +377,8 @@ __parallel_partial_sort(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator _Compare __comp) { // workaround until we implement more performant version for patterns - return oneapi::dpl::__par_backend_hetero::__parallel_partial_sort(__exec.__device_policy(), __first, __mid, __last, - __comp); + return oneapi::dpl::__par_backend_hetero::__parallel_partial_sort( + oneapi::dpl::__internal::__device_backend_tag{}, __exec.__device_policy(), __first, __mid, __last, __comp); } template From e5c113ef4b1d86f6e69adfeae5ccf65c5283385a Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Thu, 15 Feb 2024 09:47:31 +0100 Subject: [PATCH 203/566] __pattern_partial_sort + tag impls --- include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h index b26d51e8958..970f6e12f3c 100644 --- a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h +++ b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h @@ -2604,7 +2604,7 @@ __pattern_partial_sort(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator _ } template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy> +void __pattern_partial_sort(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Iterator __first, _Iterator __mid, _Iterator __last, _Compare __comp) { From 47a3f5cb3ec03743c51c04a171d4fd83c216989b Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Thu, 15 Feb 2024 10:10:44 +0100 Subject: [PATCH 204/566] __parallel_scan_copy + tag impls --- .../pstl/hetero/dpcpp/parallel_backend_sycl.h | 40 +++++++++++++++++++ .../hetero/dpcpp/parallel_backend_sycl_fpga.h | 14 +++++++ 2 files changed, 54 insertions(+) diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h index f0283088c2b..876fc496bb6 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h @@ -984,6 +984,46 @@ __parallel_scan_copy(_ExecutionPolicy&& __exec, _InRng&& __in_rng, _OutRng&& __o __copy_by_mask_op); } +template +auto +__parallel_scan_copy(oneapi::dpl::__internal::__device_backend_tag, _ExecutionPolicy&& __exec, _InRng&& __in_rng, + _OutRng&& __out_rng, _Size __n, _CreateMaskOp __create_mask_op, _CopyByMaskOp __copy_by_mask_op) +{ + using _ReduceOp = ::std::plus<_Size>; + using _Assigner = unseq_backend::__scan_assigner; + using _NoAssign = unseq_backend::__scan_no_assign; + using _MaskAssigner = unseq_backend::__mask_assigner<1>; + using _DataAcc = unseq_backend::walk_n<_ExecutionPolicy, oneapi::dpl::__internal::__no_op>; + using _InitType = unseq_backend::__no_init_value<_Size>; + + _Assigner __assign_op; + _ReduceOp __reduce_op; + _DataAcc __get_data_op; + _MaskAssigner __add_mask_op; + + // temporary buffer to store boolean mask + oneapi::dpl::__par_backend_hetero::__buffer<_ExecutionPolicy, int32_t> __mask_buf(__exec, __n); + + return __parallel_transform_scan_base( + ::std::forward<_ExecutionPolicy>(__exec), + oneapi::dpl::__ranges::make_zip_view( + ::std::forward<_InRng>(__in_rng), + oneapi::dpl::__ranges::all_view( + __mask_buf.get_buffer())), + ::std::forward<_OutRng>(__out_rng), __reduce_op, _InitType{}, + // local scan + unseq_backend::__scan{__reduce_op, __get_data_op, __assign_op, + __add_mask_op, __create_mask_op}, + // scan between groups + unseq_backend::__scan{__reduce_op, __get_data_op, _NoAssign{}, __assign_op, + __get_data_op}, + // global scan + __copy_by_mask_op); +} + template = 0> auto diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h index cb7de2993fc..78273a74876 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h @@ -224,6 +224,20 @@ __parallel_scan_copy(_ExecutionPolicy&& __exec, _InRng&& __in_rng, _OutRng&& __o __create_mask_op, __copy_by_mask_op); } +template +auto +__parallel_scan_copy(oneapi::dpl::__internal::__fpga_backend_tag, _ExecutionPolicy&& __exec, _InRng&& __in_rng, + _OutRng&& __out_rng, _Size __n, + _CreateMaskOp __create_mask_op, _CopyByMaskOp __copy_by_mask_op) +{ + // workaround until we implement more performant version for patterns + return oneapi::dpl::__par_backend_hetero::__parallel_scan_copy( + oneapi::dpl::__internal::__device_backend_tag{}, + __exec.__device_policy(), ::std::forward<_InRng>(__in_rng), ::std::forward<_OutRng>(__out_rng), __n, + __create_mask_op, __copy_by_mask_op); +} + //------------------------------------------------------------------------ // __parallel_find_or //----------------------------------------------------------------------- From 480a31594b1d84884f9a18624df405afdfc136dc Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Thu, 15 Feb 2024 10:11:22 +0100 Subject: [PATCH 205/566] __parallel_scan_copy + tag calls --- .../oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h | 14 +++++++++----- .../dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h | 3 ++- .../pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h | 1 + 3 files changed, 12 insertions(+), 6 deletions(-) diff --git a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h index 970f6e12f3c..ac5c5984511 100644 --- a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h +++ b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h @@ -1574,6 +1574,10 @@ oneapi::dpl::__internal::__enable_if_hetero_execution_policy< __pattern_scan_copy(_ExecutionPolicy&& __exec, _Iterator1 __first, _Iterator1 __last, _IteratorOrTuple __output_first, _CreateMaskOp __create_mask_op, _CopyByMaskOp __copy_by_mask_op) { + constexpr auto __dispatch_tag = + oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _Iterator1, _IteratorOrTuple>(); + using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; + using _It1DifferenceType = typename ::std::iterator_traits<_Iterator1>::difference_type; if (__first == __last) @@ -1587,8 +1591,8 @@ __pattern_scan_copy(_ExecutionPolicy&& __exec, _Iterator1 __first, _Iterator1 __ oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::write, _IteratorOrTuple>(); auto __buf2 = __keep2(__output_first, __output_first + __n); - auto __res = - __par_backend_hetero::__parallel_scan_copy(::std::forward<_ExecutionPolicy>(__exec), __buf1.all_view(), + auto __res = __par_backend_hetero::__parallel_scan_copy(__backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), + __buf1.all_view(), __buf2.all_view(), __n, __create_mask_op, __copy_by_mask_op); ::std::size_t __num_copied = __res.get(); @@ -1614,9 +1618,9 @@ __pattern_scan_copy(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _Itera oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::write, _IteratorOrTuple>(); auto __buf2 = __keep2(__output_first, __output_first + __n); - auto __res = - __par_backend_hetero::__parallel_scan_copy(::std::forward<_ExecutionPolicy>(__exec), __buf1.all_view(), - __buf2.all_view(), __n, __create_mask_op, __copy_by_mask_op); + auto __res = __par_backend_hetero::__parallel_scan_copy(_BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), + __buf1.all_view(), __buf2.all_view(), __n, __create_mask_op, + __copy_by_mask_op); ::std::size_t __num_copied = __res.get(); return ::std::make_pair(__output_first + __n, __num_copied); diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h index 876fc496bb6..c641712a7f3 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h @@ -1062,7 +1062,8 @@ __parallel_copy_if(_ExecutionPolicy&& __exec, _InRng&& __in_rng, _OutRng&& __out using CopyOp = unseq_backend::__copy_by_mask<_ReduceOp, oneapi::dpl::__internal::__pstl_assign, /*inclusive*/ ::std::true_type, 1>; - return __parallel_scan_copy(::std::forward<_ExecutionPolicy>(__exec), ::std::forward<_InRng>(__in_rng), + return __parallel_scan_copy(oneapi::dpl::__internal::__device_backend_tag{}, + ::std::forward<_ExecutionPolicy>(__exec), ::std::forward<_InRng>(__in_rng), ::std::forward<_OutRng>(__out_rng), __n, CreateOp{__pred}, CopyOp{}); } } diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h index 78273a74876..67099e7e041 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h @@ -220,6 +220,7 @@ __parallel_scan_copy(_ExecutionPolicy&& __exec, _InRng&& __in_rng, _OutRng&& __o { // workaround until we implement more performant version for patterns return oneapi::dpl::__par_backend_hetero::__parallel_scan_copy( + oneapi::dpl::__internal::__device_backend_tag{}, __exec.__device_policy(), ::std::forward<_InRng>(__in_rng), ::std::forward<_OutRng>(__out_rng), __n, __create_mask_op, __copy_by_mask_op); } From 057e6ec540b230b6dc6d92e1a18948fceec0a62e Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Thu, 15 Feb 2024 10:28:09 +0100 Subject: [PATCH 206/566] __parallel_copy_if + tag impls --- .../pstl/hetero/dpcpp/parallel_backend_sycl.h | 44 +++++++++++++++++++ .../hetero/dpcpp/parallel_backend_sycl_fpga.h | 11 +++++ 2 files changed, 55 insertions(+) diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h index c641712a7f3..dc104106485 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h @@ -1068,6 +1068,50 @@ __parallel_copy_if(_ExecutionPolicy&& __exec, _InRng&& __in_rng, _OutRng&& __out } } +template +auto +__parallel_copy_if(oneapi::dpl::__internal::__device_backend_tag, _ExecutionPolicy&& __exec, _InRng&& __in_rng, + _OutRng&& __out_rng, _Size __n, _Pred __pred) +{ + using _SingleGroupInvoker = __invoke_single_group_copy_if<_Size>; + + // Next power of 2 greater than or equal to __n + auto __n_uniform = ::oneapi::dpl::__internal::__dpl_bit_ceil(static_cast<::std::make_unsigned_t<_Size>>(__n)); + + // Pessimistically only use half of the memory to take into account memory used by compiled kernel + const ::std::size_t __max_slm_size = + __exec.queue().get_device().template get_info() / 2; + + // The kernel stores n integers for the predicate and another n integers for the offsets + const auto __req_slm_size = sizeof(::std::uint16_t) * __n_uniform * 2; + + constexpr ::std::uint16_t __single_group_upper_limit = 16384; + + ::std::size_t __max_wg_size = oneapi::dpl::__internal::__max_work_group_size(__exec); + + if (__n <= __single_group_upper_limit && __max_slm_size >= __req_slm_size && + __max_wg_size >= _SingleGroupInvoker::__targeted_wg_size) + { + using _SizeBreakpoints = + ::std::integer_sequence<::std::uint16_t, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384>; + + return __par_backend_hetero::__static_monotonic_dispatcher<_SizeBreakpoints>::__dispatch( + _SingleGroupInvoker{}, __n, ::std::forward<_ExecutionPolicy>(__exec), __n, ::std::forward<_InRng>(__in_rng), + ::std::forward<_OutRng>(__out_rng), __pred); + } + else + { + using _ReduceOp = ::std::plus<_Size>; + using CreateOp = unseq_backend::__create_mask<_Pred, _Size>; + using CopyOp = unseq_backend::__copy_by_mask<_ReduceOp, oneapi::dpl::__internal::__pstl_assign, + /*inclusive*/ ::std::true_type, 1>; + + return __parallel_scan_copy(oneapi::dpl::__internal::__device_backend_tag{}, + ::std::forward<_ExecutionPolicy>(__exec), ::std::forward<_InRng>(__in_rng), + ::std::forward<_OutRng>(__out_rng), __n, CreateOp{__pred}, CopyOp{}); + } +} + //------------------------------------------------------------------------ // find_or tags //------------------------------------------------------------------------ diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h index 67099e7e041..9376a5ff978 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h @@ -211,6 +211,17 @@ __parallel_copy_if(_ExecutionPolicy&& __exec, _InRng&& __in_rng, _OutRng&& __out __exec.__device_policy(), ::std::forward<_InRng>(__in_rng), ::std::forward<_OutRng>(__out_rng), __n, __pred); } +template +auto +__parallel_copy_if(oneapi::dpl::__internal::__fpga_backend_tag, _ExecutionPolicy&& __exec, _InRng&& __in_rng, + _OutRng&& __out_rng, _Size __n, _Pred __pred) +{ + // workaround until we implement more performant version for patterns + return oneapi::dpl::__par_backend_hetero::__parallel_copy_if( + oneapi::dpl::__internal::__device_backend_tag{}, + __exec.__device_policy(), ::std::forward<_InRng>(__in_rng), ::std::forward<_OutRng>(__out_rng), __n, __pred); +} + template = 0> From e25607832f01731e48dba3174cf73b9d3e6a0d36 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Thu, 15 Feb 2024 10:29:15 +0100 Subject: [PATCH 207/566] __parallel_copy_if + tag calls --- .../oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h | 12 ++++++++---- .../pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h | 1 + 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h index ac5c5984511..fe3f635a07e 100644 --- a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h +++ b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h @@ -1631,6 +1631,10 @@ oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, _ __pattern_copy_if(_ExecutionPolicy&& __exec, _Iterator1 __first, _Iterator1 __last, _Iterator2 __result_first, _Predicate __pred, /*vector=*/::std::true_type, /*parallel=*/::std::true_type) { + constexpr auto __dispatch_tag = + oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _Iterator1, _Iterator2>(); + using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; + using _It1DifferenceType = typename ::std::iterator_traits<_Iterator1>::difference_type; if (__first == __last) @@ -1643,8 +1647,8 @@ __pattern_copy_if(_ExecutionPolicy&& __exec, _Iterator1 __first, _Iterator1 __la auto __keep2 = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::write, _Iterator2>(); auto __buf2 = __keep2(__result_first, __result_first + __n); - auto __res = __par_backend_hetero::__parallel_copy_if(::std::forward<_ExecutionPolicy>(__exec), __buf1.all_view(), - __buf2.all_view(), __n, __pred); + auto __res = __par_backend_hetero::__parallel_copy_if(__backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), + __buf1.all_view(), __buf2.all_view(), __n, __pred); ::std::size_t __num_copied = __res.get(); return __result_first + __num_copied; @@ -1668,8 +1672,8 @@ __pattern_copy_if(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _I auto __keep2 = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::write, _Iterator2>(); auto __buf2 = __keep2(__result_first, __result_first + __n); - auto __res = __par_backend_hetero::__parallel_copy_if(::std::forward<_ExecutionPolicy>(__exec), __buf1.all_view(), - __buf2.all_view(), __n, __pred); + auto __res = __par_backend_hetero::__parallel_copy_if(_BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), + __buf1.all_view(), __buf2.all_view(), __n, __pred); ::std::size_t __num_copied = __res.get(); return __result_first + __num_copied; diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h index 9376a5ff978..88637fb9b7b 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h @@ -208,6 +208,7 @@ __parallel_copy_if(_ExecutionPolicy&& __exec, _InRng&& __in_rng, _OutRng&& __out { // workaround until we implement more performant version for patterns return oneapi::dpl::__par_backend_hetero::__parallel_copy_if( + oneapi::dpl::__internal::__device_backend_tag{}, __exec.__device_policy(), ::std::forward<_InRng>(__in_rng), ::std::forward<_OutRng>(__out_rng), __n, __pred); } From 6702cc6b4cdfeb451664973a2b5ae6e4a5856e64 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Thu, 15 Feb 2024 10:50:03 +0100 Subject: [PATCH 208/566] __parallel_find_or + tag impls --- .../pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h index 88637fb9b7b..34f269a93d0 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h @@ -266,6 +266,18 @@ __parallel_find_or(_ExecutionPolicy&& __exec, _Brick __f, _BrickTag __brick_tag, ::std::forward<_Ranges>(__rngs)...); } +template +::std::conditional_t<::std::is_same_v<_BrickTag, __parallel_or_tag>, bool, + oneapi::dpl::__internal::__difference_t< + typename oneapi::dpl::__ranges::__get_first_range_type<_Ranges...>::type>> +__parallel_find_or(oneapi::dpl::__internal::__fpga_backend_tag, _ExecutionPolicy&& __exec, _Brick __f, + _BrickTag __brick_tag, _Ranges&&... __rngs) +{ + return oneapi::dpl::__par_backend_hetero::__parallel_find_or(oneapi::dpl::__internal::__device_backend_tag{}, + __exec.__device_policy(), __f, __brick_tag, + ::std::forward<_Ranges>(__rngs)...); +} + //------------------------------------------------------------------------ // parallel_or //----------------------------------------------------------------------- From f0353cac438c6f16dbed5b122211ed0df4140dc2 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Thu, 15 Feb 2024 10:50:49 +0100 Subject: [PATCH 209/566] __parallel_find_or + tag calls --- .../dpl/pstl/hetero/algorithm_impl_hetero.h | 19 +++++++++++++++++-- .../pstl/hetero/dpcpp/parallel_backend_sycl.h | 19 +++++++++++++++++++ .../hetero/dpcpp/parallel_backend_sycl_fpga.h | 3 ++- 3 files changed, 38 insertions(+), 3 deletions(-) diff --git a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h index fe3f635a07e..ac0f22ebe10 100644 --- a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h +++ b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h @@ -931,6 +931,9 @@ __pattern_adjacent_find(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator /*parallel*/ ::std::true_type, /*vector*/ ::std::true_type, oneapi::dpl::__internal::__or_semantic) { + constexpr auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _Iterator>(); + using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; + if (__last - __first < 2) return __last; @@ -945,8 +948,8 @@ __pattern_adjacent_find(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator // TODO: in case of confilicting names // __par_backend_hetero::make_wrapped_policy<__par_backend_hetero::__or_policy_wrapper>() bool result = __par_backend_hetero::__parallel_find_or( - ::std::forward<_ExecutionPolicy>(__exec), _Predicate{adjacent_find_fn<_BinaryPredicate>{__predicate}}, - __par_backend_hetero::__parallel_or_tag{}, + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), + _Predicate{adjacent_find_fn<_BinaryPredicate>{__predicate}}, __par_backend_hetero::__parallel_or_tag{}, oneapi::dpl::__ranges::make_zip_view(__buf1.all_view(), __buf2.all_view())); // inverted conditional because of @@ -973,6 +976,7 @@ __pattern_adjacent_find(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __ex // TODO: in case of confilicting names // __par_backend_hetero::make_wrapped_policy<__par_backend_hetero::__or_policy_wrapper>() bool result = __par_backend_hetero::__parallel_find_or( + _BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), _Predicate{adjacent_find_fn<_BinaryPredicate>{__predicate}}, __par_backend_hetero::__parallel_or_tag{}, oneapi::dpl::__ranges::make_zip_view(__buf1.all_view(), __buf2.all_view())); @@ -1113,6 +1117,9 @@ oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, b __pattern_any_of(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, _Pred __pred, /*vector=*/::std::true_type, /*parallel=*/::std::true_type) { + constexpr auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _Iterator>(); + using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; + if (__first == __last) return false; @@ -1122,6 +1129,7 @@ __pattern_any_of(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, auto __buf = __keep(__first, __last); return oneapi::dpl::__par_backend_hetero::__parallel_find_or( + __backend_tag{}, __par_backend_hetero::make_wrapped_policy<__par_backend_hetero::__or_policy_wrapper>( ::std::forward<_ExecutionPolicy>(__exec)), _Predicate{__pred}, __par_backend_hetero::__parallel_or_tag{}, __buf.all_view()); @@ -1141,6 +1149,7 @@ __pattern_any_of(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _It auto __buf = __keep(__first, __last); return oneapi::dpl::__par_backend_hetero::__parallel_find_or( + _BackendTag{}, __par_backend_hetero::make_wrapped_policy<__par_backend_hetero::__or_policy_wrapper>( ::std::forward<_ExecutionPolicy>(__exec)), _Predicate{__pred}, __par_backend_hetero::__parallel_or_tag{}, __buf.all_view()); @@ -1156,6 +1165,10 @@ __pattern_equal(_ExecutionPolicy&& __exec, _Iterator1 __first1, _Iterator1 __las _Iterator2 __last2, _Pred __pred, /*vector=*/::std::true_type, /*parallel=*/::std::true_type) { + constexpr auto __dispatch_tag = + oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _Iterator1, _Iterator2>(); + using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; + if (__last1 == __first1 || __last2 == __first2 || __last1 - __first1 != __last2 - __first2) return false; @@ -1169,6 +1182,7 @@ __pattern_equal(_ExecutionPolicy&& __exec, _Iterator1 __first1, _Iterator1 __las // TODO: in case of confilicting names // __par_backend_hetero::make_wrapped_policy<__par_backend_hetero::__or_policy_wrapper>() return !__par_backend_hetero::__parallel_find_or( + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), _Predicate{equal_predicate<_Pred>{__pred}}, __par_backend_hetero::__parallel_or_tag{}, oneapi::dpl::__ranges::make_zip_view(__buf1.all_view(), __buf2.all_view())); @@ -1192,6 +1206,7 @@ __pattern_equal(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Ite // TODO: in case of confilicting names // __par_backend_hetero::make_wrapped_policy<__par_backend_hetero::__or_policy_wrapper>() return !__par_backend_hetero::__parallel_find_or( + _BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), _Predicate{equal_predicate<_Pred>{__pred}}, __par_backend_hetero::__parallel_or_tag{}, oneapi::dpl::__ranges::make_zip_view(__buf1.all_view(), __buf2.all_view())); diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h index dc104106485..9a1a7ecd3f6 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h @@ -1466,12 +1466,17 @@ oneapi::dpl::__internal::__enable_if_device_execution_policy<_ExecutionPolicy, b __parallel_or(_ExecutionPolicy&& __exec, _Iterator1 __first, _Iterator1 __last, _Iterator2 __s_first, _Iterator2 __s_last, _Brick __f) { + constexpr auto __dispatch_tag = + oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _Iterator1, _Iterator2>(); + using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; + auto __keep = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read, _Iterator1>(); auto __buf = __keep(__first, __last); auto __s_keep = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read, _Iterator2>(); auto __s_buf = __s_keep(__s_first, __s_last); return oneapi::dpl::__par_backend_hetero::__parallel_find_or( + __backend_tag{}, __par_backend_hetero::make_wrapped_policy<__or_policy_wrapper>(::std::forward<_ExecutionPolicy>(__exec)), __f, __parallel_or_tag{}, __buf.all_view(), __s_buf.all_view()); } @@ -1483,10 +1488,14 @@ template oneapi::dpl::__internal::__enable_if_device_execution_policy<_ExecutionPolicy, bool> __parallel_or(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, _Brick __f) { + constexpr auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _Iterator>(); + using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; + auto __keep = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read, _Iterator>(); auto __buf = __keep(__first, __last); return oneapi::dpl::__par_backend_hetero::__parallel_find_or( + __backend_tag{}, __par_backend_hetero::make_wrapped_policy<__or_policy_wrapper>(::std::forward<_ExecutionPolicy>(__exec)), __f, __parallel_or_tag{}, __buf.all_view()); } @@ -1505,6 +1514,10 @@ oneapi::dpl::__internal::__enable_if_device_execution_policy<_ExecutionPolicy, _ __parallel_find(_ExecutionPolicy&& __exec, _Iterator1 __first, _Iterator1 __last, _Iterator2 __s_first, _Iterator2 __s_last, _Brick __f, _IsFirst) { + constexpr auto __dispatch_tag = + oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _Iterator1, _Iterator2>(); + using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; + auto __keep = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read, _Iterator1>(); auto __buf = __keep(__first, __last); auto __s_keep = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read, _Iterator2>(); @@ -1513,6 +1526,7 @@ __parallel_find(_ExecutionPolicy&& __exec, _Iterator1 __first, _Iterator1 __last using _TagType = ::std::conditional_t<_IsFirst::value, __parallel_find_forward_tag, __parallel_find_backward_tag>; return __first + oneapi::dpl::__par_backend_hetero::__parallel_find_or( + __backend_tag{}, __par_backend_hetero::make_wrapped_policy<__find_policy_wrapper>( ::std::forward<_ExecutionPolicy>(__exec)), __f, _TagType{}, __buf.all_view(), __s_buf.all_view()); @@ -1531,6 +1545,7 @@ __parallel_find(oneapi::dpl::__internal::__device_backend_tag, _ExecutionPolicy& using _TagType = ::std::conditional_t<_IsFirst::value, __parallel_find_forward_tag, __parallel_find_backward_tag>; return __first + oneapi::dpl::__par_backend_hetero::__parallel_find_or( + oneapi::dpl::__internal::__device_backend_tag{}, __par_backend_hetero::make_wrapped_policy<__find_policy_wrapper>( ::std::forward<_ExecutionPolicy>(__exec)), __f, _TagType{}, __buf.all_view(), __s_buf.all_view()); @@ -1544,12 +1559,16 @@ template __parallel_find(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, _Brick __f, _IsFirst) { + constexpr auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _Iterator>(); + using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; + auto __keep = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read, _Iterator>(); auto __buf = __keep(__first, __last); using _TagType = ::std::conditional_t<_IsFirst::value, __parallel_find_forward_tag, __parallel_find_backward_tag>; return __first + oneapi::dpl::__par_backend_hetero::__parallel_find_or( + __backend_tag{}, __par_backend_hetero::make_wrapped_policy<__find_policy_wrapper>( ::std::forward<_ExecutionPolicy>(__exec)), __f, _TagType{}, __buf.all_view()); diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h index 34f269a93d0..18b4d00b627 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h @@ -262,7 +262,8 @@ oneapi::dpl::__internal::__enable_if_fpga_execution_policy< typename oneapi::dpl::__ranges::__get_first_range_type<_Ranges...>::type>>> __parallel_find_or(_ExecutionPolicy&& __exec, _Brick __f, _BrickTag __brick_tag, _Ranges&&... __rngs) { - return oneapi::dpl::__par_backend_hetero::__parallel_find_or(__exec.__device_policy(), __f, __brick_tag, + return oneapi::dpl::__par_backend_hetero::__parallel_find_or(oneapi::dpl::__internal::__device_backend_tag{}, + __exec.__device_policy(), __f, __brick_tag, ::std::forward<_Ranges>(__rngs)...); } From 500fe4d314d57a40fa49130f6d688faa5f02ffbf Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Thu, 15 Feb 2024 11:05:12 +0100 Subject: [PATCH 210/566] __parallel_or + tag impls --- .../pstl/hetero/dpcpp/parallel_backend_sycl.h | 33 +++++++++++++++++++ .../hetero/dpcpp/parallel_backend_sycl_fpga.h | 21 ++++++++++++ 2 files changed, 54 insertions(+) diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h index 9a1a7ecd3f6..22a07100d96 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h @@ -1481,6 +1481,22 @@ __parallel_or(_ExecutionPolicy&& __exec, _Iterator1 __first, _Iterator1 __last, __parallel_or_tag{}, __buf.all_view(), __s_buf.all_view()); } +template +bool +__parallel_or(oneapi::dpl::__internal::__device_backend_tag, _ExecutionPolicy&& __exec, _Iterator1 __first, + _Iterator1 __last, _Iterator2 __s_first, _Iterator2 __s_last, _Brick __f) +{ + auto __keep = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read, _Iterator1>(); + auto __buf = __keep(__first, __last); + auto __s_keep = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read, _Iterator2>(); + auto __s_buf = __s_keep(__s_first, __s_last); + + return oneapi::dpl::__par_backend_hetero::__parallel_find_or( + oneapi::dpl::__internal::__device_backend_tag{}, + __par_backend_hetero::make_wrapped_policy<__or_policy_wrapper>(::std::forward<_ExecutionPolicy>(__exec)), __f, + __parallel_or_tag{}, __buf.all_view(), __s_buf.all_view()); +} + // Special overload for single sequence cases. // TODO: check if similar pattern may apply to other algorithms. If so, these overloads should be moved out of // backend code. @@ -1500,6 +1516,23 @@ __parallel_or(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, _B __parallel_or_tag{}, __buf.all_view()); } +// Special overload for single sequence cases. +// TODO: check if similar pattern may apply to other algorithms. If so, these overloads should be moved out of +// backend code. +template +bool +__parallel_or(oneapi::dpl::__internal::__device_backend_tag, _ExecutionPolicy&& __exec, _Iterator __first, + _Iterator __last, _Brick __f) +{ + auto __keep = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read, _Iterator>(); + auto __buf = __keep(__first, __last); + + return oneapi::dpl::__par_backend_hetero::__parallel_find_or( + oneapi::dpl::__internal::__device_backend_tag{}, + __par_backend_hetero::make_wrapped_policy<__or_policy_wrapper>(::std::forward<_ExecutionPolicy>(__exec)), __f, + __parallel_or_tag{}, __buf.all_view()); +} + //------------------------------------------------------------------------ // parallel_find - sync pattern //----------------------------------------------------------------------- diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h index 18b4d00b627..b6950c6f75c 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h @@ -292,6 +292,17 @@ __parallel_or(_ExecutionPolicy&& __exec, _Iterator1 __first, _Iterator1 __last, __s_last, __f); } +template +bool +__parallel_or(oneapi::dpl::__internal::__fpga_backend_tag, _ExecutionPolicy&& __exec, _Iterator1 __first, + _Iterator1 __last, _Iterator2 __s_first, _Iterator2 __s_last, _Brick __f) +{ + // workaround until we implement more performant version for patterns + return oneapi::dpl::__par_backend_hetero::__parallel_or(oneapi::dpl::__internal::__device_backend_tag{}, + __exec.__device_policy(), __first, __last, __s_first, + __s_last, __f); +} + template oneapi::dpl::__internal::__enable_if_fpga_execution_policy<_ExecutionPolicy, bool> __parallel_or(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, _Brick __f) @@ -300,6 +311,16 @@ __parallel_or(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, _B return oneapi::dpl::__par_backend_hetero::__parallel_or(__exec.__device_policy(), __first, __last, __f); } +template +bool +__parallel_or(oneapi::dpl::__internal::__fpga_backend_tag, _ExecutionPolicy&& __exec, _Iterator __first, + _Iterator __last, _Brick __f) +{ + // workaround until we implement more performant version for patterns + return oneapi::dpl::__par_backend_hetero::__parallel_or(oneapi::dpl::__internal::__device_backend_tag{}, + __exec.__device_policy(), __first, __last, __f); +} + //------------------------------------------------------------------------ // parallel_find //----------------------------------------------------------------------- From 1284178d0eeb98e9135914d095a73910ae6230c0 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Thu, 15 Feb 2024 11:08:45 +0100 Subject: [PATCH 211/566] __parallel_or + tag calls --- include/oneapi/dpl/pstl/algorithm_impl.h | 46 ++++++++++++++++--- .../dpl/pstl/hetero/algorithm_impl_hetero.h | 12 +++++ 2 files changed, 52 insertions(+), 6 deletions(-) diff --git a/include/oneapi/dpl/pstl/algorithm_impl.h b/include/oneapi/dpl/pstl/algorithm_impl.h index 26612aa8ae8..f9ffbc6ac7d 100644 --- a/include/oneapi/dpl/pstl/algorithm_impl.h +++ b/include/oneapi/dpl/pstl/algorithm_impl.h @@ -85,8 +85,12 @@ oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, boo __pattern_any_of(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last, _Pred __pred, _IsVector __is_vector, /*parallel=*/::std::true_type) { + constexpr auto __dispatch_tag = + oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _RandomAccessIterator>(); + using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; + return __internal::__except_handler([&]() { - return __internal::__parallel_or(::std::forward<_ExecutionPolicy>(__exec), __first, __last, + return __internal::__parallel_or(__backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, [__pred, __is_vector](_RandomAccessIterator __i, _RandomAccessIterator __j) { return __internal::__brick_any_of(__i, __j, __pred, __is_vector); }); @@ -98,8 +102,10 @@ bool __pattern_any_of(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last, _Pred __pred) { + using __backend_tag = typename __parallel_tag<_IsVector>::__backend_tag; + return __internal::__except_handler([&]() { - return __internal::__parallel_or(::std::forward<_ExecutionPolicy>(__exec), __first, __last, + return __internal::__parallel_or(__backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, [__pred](_RandomAccessIterator __i, _RandomAccessIterator __j) { return __internal::__brick_any_of(__i, __j, __pred, _IsVector{}); }); @@ -1050,12 +1056,16 @@ __pattern_equal(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, _Ran _RandomAccessIterator2 __first2, _RandomAccessIterator2 __last2, _BinaryPredicate __p, _IsVector __is_vector, /*is_parallel=*/::std::true_type) { + constexpr auto __dispatch_tag = + oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _RandomAccessIterator1, _RandomAccessIterator2>(); + using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; + if (__last1 - __first1 != __last2 - __first2) return false; return __internal::__except_handler([&]() { return !__internal::__parallel_or( - ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, [__first1, __first2, __p, __is_vector](_RandomAccessIterator1 __i, _RandomAccessIterator1 __j) { return !__internal::__brick_equal(__i, __j, __first2 + (__i - __first1), __first2 + (__j - __first1), __p, __is_vector); @@ -1070,11 +1080,14 @@ __pattern_equal(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAcc _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, _RandomAccessIterator2 __last2, _BinaryPredicate __p) { + using __backend_tag = typename __parallel_tag<_IsVector>::__backend_tag; + if (__last1 - __first1 != __last2 - __first2) return false; return __internal::__except_handler([&]() { return !__internal::__parallel_or( + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, [__first1, __first2, __p](_RandomAccessIterator1 __i, _RandomAccessIterator1 __j) { return !__internal::__brick_equal(__i, __j, __first2 + (__i - __first1), __first2 + (__j - __first1), @@ -1131,8 +1144,13 @@ __pattern_equal(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, _Ran _RandomAccessIterator2 __first2, _BinaryPredicate __p, _IsVector __is_vector, /*is_parallel=*/::std::true_type) { + constexpr auto __dispatch_tag = + oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _RandomAccessIterator1, _RandomAccessIterator2>(); + using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; + return __internal::__except_handler([&]() { return !__internal::__parallel_or( + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, [__first1, __first2, __p, __is_vector](_RandomAccessIterator1 __i, _RandomAccessIterator1 __j) { return !__internal::__brick_equal(__i, __j, __first2 + (__i - __first1), __p, __is_vector); @@ -1146,9 +1164,11 @@ bool __pattern_equal(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, _BinaryPredicate __p) { + using __backend_tag = typename __parallel_tag<_IsVector>::__backend_tag; + return __internal::__except_handler([&]() { return !__internal::__parallel_or( - ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, [__first1, __first2, __p](_RandomAccessIterator1 __i, _RandomAccessIterator1 __j) { return !__internal::__brick_equal(__i, __j, __first2 + (__i - __first1), __p, _IsVector{}); }); @@ -5021,6 +5041,10 @@ __pattern_includes(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, _ _RandomAccessIterator2 __first2, _RandomAccessIterator2 __last2, _Compare __comp, _IsVector, /*is_parallel=*/::std::true_type) { + constexpr auto __dispatch_tag = + oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _RandomAccessIterator1, _RandomAccessIterator2>(); + using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; + if (__first2 == __last2) return true; @@ -5041,6 +5065,7 @@ __pattern_includes(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, _ return __internal::__except_handler([&]() { return !__internal::__parallel_or( + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first2, __last2, [__first1, __last1, __first2, __last2, &__comp](_RandomAccessIterator2 __i, _RandomAccessIterator2 __j) { assert(__j > __i); @@ -5082,6 +5107,8 @@ __pattern_includes(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _Random _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, _RandomAccessIterator2 __last2, _Compare __comp) { + using __backend_tag = typename __parallel_tag<_IsVector>::__backend_tag; + if (__first2 == __last2) return true; @@ -5102,6 +5129,7 @@ __pattern_includes(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _Random return __internal::__except_handler([&]() { return !__internal::__parallel_or( + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first2, __last2, [__first1, __last1, __first2, __last2, &__comp](_RandomAccessIterator2 __i, _RandomAccessIterator2 __j) { assert(__j > __i); @@ -6047,8 +6075,12 @@ oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, boo __pattern_is_heap(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp, _IsVector __is_vector, /* is_parallel = */ ::std::true_type) { + constexpr auto __dispatch_tag = + oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _RandomAccessIterator>(); + using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; + return __internal::__except_handler([&]() { - return !__parallel_or(::std::forward<_ExecutionPolicy>(__exec), __first, __last, + return !__parallel_or(__backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, [__first, __comp, __is_vector](_RandomAccessIterator __i, _RandomAccessIterator __j) { return !__internal::__is_heap_local(__first, __i - __first, __j - __first, __comp, __is_vector); @@ -6061,8 +6093,10 @@ bool __pattern_is_heap(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp) { + using __backend_tag = typename __parallel_tag<_IsVector>::__backend_tag; + return __internal::__except_handler([&]() { - return !__parallel_or(::std::forward<_ExecutionPolicy>(__exec), __first, __last, + return !__parallel_or(__backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, [__first, __comp](_RandomAccessIterator __i, _RandomAccessIterator __j) { return !__internal::__is_heap_local(__first, __i - __first, __j - __first, __comp, _IsVector{}); diff --git a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h index ac0f22ebe10..c6e23ae6808 100644 --- a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h +++ b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h @@ -2053,6 +2053,10 @@ oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, b __pattern_is_heap(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp, /* vector */ ::std::true_type, /* parallel = */ ::std::true_type) { + constexpr auto __dispatch_tag = + oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _RandomAccessIterator>(); + using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; + if (__last - __first < 2) return true; @@ -2060,6 +2064,7 @@ __pattern_is_heap(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _Ran oneapi::dpl::unseq_backend::single_match_pred_by_idx<_ExecutionPolicy, __is_heap_check<_Compare>>; return !__par_backend_hetero::__parallel_or( + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__first), __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__last), _Predicate{__comp}); @@ -2077,6 +2082,7 @@ __pattern_is_heap(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _R oneapi::dpl::unseq_backend::single_match_pred_by_idx<_ExecutionPolicy, __is_heap_check<_Compare>>; return !__par_backend_hetero::__parallel_or( + _BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__first), __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__last), _Predicate{__comp}); @@ -2553,6 +2559,10 @@ __pattern_includes(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _Forwa _ForwardIterator2 __first2, _ForwardIterator2 __last2, _Compare __comp, /*vector=*/::std::true_type, /*parallel=*/::std::true_type) { + constexpr auto __dispatch_tag = + oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>(); + using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; + //according to the spec if (__first2 == __last2) return true; @@ -2567,6 +2577,7 @@ __pattern_includes(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _Forwa using __brick_include_type = unseq_backend::__brick_includes<_ExecutionPolicy, _Compare, _Size1, _Size2>; return !__par_backend_hetero::__parallel_or( + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__first2), __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__last2), @@ -2595,6 +2606,7 @@ __pattern_includes(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _ using __brick_include_type = unseq_backend::__brick_includes<_ExecutionPolicy, _Compare, _Size1, _Size2>; return !__par_backend_hetero::__parallel_or( + _BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__first2), __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__last2), From 274acc73fc2d323441c927d85c4510513514cff1 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Thu, 15 Feb 2024 12:02:11 +0100 Subject: [PATCH 212/566] __internal::__parallel_or + tag calls --- include/oneapi/dpl/pstl/algorithm_impl.h | 45 ++++++++---------------- 1 file changed, 15 insertions(+), 30 deletions(-) diff --git a/include/oneapi/dpl/pstl/algorithm_impl.h b/include/oneapi/dpl/pstl/algorithm_impl.h index f9ffbc6ac7d..7353936271e 100644 --- a/include/oneapi/dpl/pstl/algorithm_impl.h +++ b/include/oneapi/dpl/pstl/algorithm_impl.h @@ -87,10 +87,9 @@ __pattern_any_of(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _Rand { constexpr auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _RandomAccessIterator>(); - using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; return __internal::__except_handler([&]() { - return __internal::__parallel_or(__backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, + return __internal::__parallel_or(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, [__pred, __is_vector](_RandomAccessIterator __i, _RandomAccessIterator __j) { return __internal::__brick_any_of(__i, __j, __pred, __is_vector); }); @@ -99,13 +98,11 @@ __pattern_any_of(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _Rand template bool -__pattern_any_of(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, +__pattern_any_of(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last, _Pred __pred) { - using __backend_tag = typename __parallel_tag<_IsVector>::__backend_tag; - return __internal::__except_handler([&]() { - return __internal::__parallel_or(__backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, + return __internal::__parallel_or(__tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, [__pred](_RandomAccessIterator __i, _RandomAccessIterator __j) { return __internal::__brick_any_of(__i, __j, __pred, _IsVector{}); }); @@ -1058,14 +1055,13 @@ __pattern_equal(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, _Ran { constexpr auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _RandomAccessIterator1, _RandomAccessIterator2>(); - using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; if (__last1 - __first1 != __last2 - __first2) return false; return __internal::__except_handler([&]() { return !__internal::__parallel_or( - __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, [__first1, __first2, __p, __is_vector](_RandomAccessIterator1 __i, _RandomAccessIterator1 __j) { return !__internal::__brick_equal(__i, __j, __first2 + (__i - __first1), __first2 + (__j - __first1), __p, __is_vector); @@ -1076,18 +1072,16 @@ __pattern_equal(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, _Ran template bool -__pattern_equal(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, +__pattern_equal(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, _RandomAccessIterator2 __last2, _BinaryPredicate __p) { - using __backend_tag = typename __parallel_tag<_IsVector>::__backend_tag; - if (__last1 - __first1 != __last2 - __first2) return false; return __internal::__except_handler([&]() { return !__internal::__parallel_or( - __backend_tag{}, + __tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, [__first1, __first2, __p](_RandomAccessIterator1 __i, _RandomAccessIterator1 __j) { return !__internal::__brick_equal(__i, __j, __first2 + (__i - __first1), __first2 + (__j - __first1), @@ -1146,11 +1140,10 @@ __pattern_equal(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, _Ran { constexpr auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _RandomAccessIterator1, _RandomAccessIterator2>(); - using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; return __internal::__except_handler([&]() { return !__internal::__parallel_or( - __backend_tag{}, + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, [__first1, __first2, __p, __is_vector](_RandomAccessIterator1 __i, _RandomAccessIterator1 __j) { return !__internal::__brick_equal(__i, __j, __first2 + (__i - __first1), __p, __is_vector); @@ -1161,14 +1154,12 @@ __pattern_equal(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, _Ran template bool -__pattern_equal(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, +__pattern_equal(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, _BinaryPredicate __p) { - using __backend_tag = typename __parallel_tag<_IsVector>::__backend_tag; - return __internal::__except_handler([&]() { return !__internal::__parallel_or( - __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, + __tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, [__first1, __first2, __p](_RandomAccessIterator1 __i, _RandomAccessIterator1 __j) { return !__internal::__brick_equal(__i, __j, __first2 + (__i - __first1), __p, _IsVector{}); }); @@ -5043,7 +5034,6 @@ __pattern_includes(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, _ { constexpr auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _RandomAccessIterator1, _RandomAccessIterator2>(); - using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; if (__first2 == __last2) return true; @@ -5065,7 +5055,7 @@ __pattern_includes(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, _ return __internal::__except_handler([&]() { return !__internal::__parallel_or( - __backend_tag{}, + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first2, __last2, [__first1, __last1, __first2, __last2, &__comp](_RandomAccessIterator2 __i, _RandomAccessIterator2 __j) { assert(__j > __i); @@ -5103,12 +5093,10 @@ __pattern_includes(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, _ template bool -__pattern_includes(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, +__pattern_includes(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, _RandomAccessIterator2 __last2, _Compare __comp) { - using __backend_tag = typename __parallel_tag<_IsVector>::__backend_tag; - if (__first2 == __last2) return true; @@ -5129,7 +5117,7 @@ __pattern_includes(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _Random return __internal::__except_handler([&]() { return !__internal::__parallel_or( - __backend_tag{}, + __tag, ::std::forward<_ExecutionPolicy>(__exec), __first2, __last2, [__first1, __last1, __first2, __last2, &__comp](_RandomAccessIterator2 __i, _RandomAccessIterator2 __j) { assert(__j > __i); @@ -6077,10 +6065,9 @@ __pattern_is_heap(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _Ran { constexpr auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _RandomAccessIterator>(); - using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; return __internal::__except_handler([&]() { - return !__parallel_or(__backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, + return !__parallel_or(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, [__first, __comp, __is_vector](_RandomAccessIterator __i, _RandomAccessIterator __j) { return !__internal::__is_heap_local(__first, __i - __first, __j - __first, __comp, __is_vector); @@ -6090,13 +6077,11 @@ __pattern_is_heap(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _Ran template bool -__pattern_is_heap(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, +__pattern_is_heap(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp) { - using __backend_tag = typename __parallel_tag<_IsVector>::__backend_tag; - return __internal::__except_handler([&]() { - return !__parallel_or(__backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, + return !__parallel_or(__tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, [__first, __comp](_RandomAccessIterator __i, _RandomAccessIterator __j) { return !__internal::__is_heap_local(__first, __i - __first, __j - __first, __comp, _IsVector{}); From 553f101046c18cdb6d4166576d4d32046d0fa709 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Thu, 15 Feb 2024 12:24:08 +0100 Subject: [PATCH 213/566] __pattern_set_intersection + tag impls --- include/oneapi/dpl/pstl/algorithm_impl.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/oneapi/dpl/pstl/algorithm_impl.h b/include/oneapi/dpl/pstl/algorithm_impl.h index 7353936271e..77365909771 100644 --- a/include/oneapi/dpl/pstl/algorithm_impl.h +++ b/include/oneapi/dpl/pstl/algorithm_impl.h @@ -5588,7 +5588,7 @@ __pattern_set_intersection(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __f template _RandomAccessIterator3 -__pattern_set_intersection(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, +__pattern_set_intersection(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, _RandomAccessIterator2 __last2, _RandomAccessIterator3 __result, _Compare __comp) { From f1179ba38f33cffdfa1f4a209c7292f1df60d51f Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Thu, 15 Feb 2024 12:19:40 +0100 Subject: [PATCH 214/566] __internal::__parallel_set_op + tag impls --- include/oneapi/dpl/pstl/algorithm_impl.h | 94 ++++++++++++++++++++++++ 1 file changed, 94 insertions(+) diff --git a/include/oneapi/dpl/pstl/algorithm_impl.h b/include/oneapi/dpl/pstl/algorithm_impl.h index 77365909771..158f65791d9 100644 --- a/include/oneapi/dpl/pstl/algorithm_impl.h +++ b/include/oneapi/dpl/pstl/algorithm_impl.h @@ -5250,6 +5250,100 @@ __parallel_set_op(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _Forwar }); } +template +_OutputIterator +__parallel_set_op(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _ForwardIterator1 __first1, + _ForwardIterator1 __last1, + _ForwardIterator2 __first2, _ForwardIterator2 __last2, _OutputIterator __result, _Compare __comp, + _SizeFunction __size_func, _SetOP __set_op) +{ + using __backend_tag = typename decltype(__tag)::__backend_tag; + + typedef typename ::std::iterator_traits<_ForwardIterator1>::difference_type _DifferenceType; + typedef typename ::std::iterator_traits<_OutputIterator>::value_type _T; + + struct _SetRange + { + _DifferenceType __pos, __len, __buf_pos; + bool + empty() const + { + return __len == 0; + } + }; + + const _DifferenceType __n1 = __last1 - __first1; + const _DifferenceType __n2 = __last2 - __first2; + + __par_backend::__buffer<_ExecutionPolicy, _T> __buf(__size_func(__n1, __n2)); + + return __internal::__except_handler([&__exec, __n1, __first1, __last1, __first2, __last2, __result, + __comp, __size_func, __set_op, &__buf]() { + auto __tmp_memory = __buf.get(); + _DifferenceType __m{}; + auto __scan = [=](_DifferenceType, _DifferenceType, const _SetRange& __s) { // Scan + if (!__s.empty()) + __brick_move_destroy<_ExecutionPolicy>{}(__tmp_memory + __s.__buf_pos, + __tmp_memory + (__s.__buf_pos + __s.__len), + __result + __s.__pos, _IsVector{}); + }; + __par_backend::__parallel_strict_scan( + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __n1, _SetRange{0, 0, 0}, //-1, 0}, + [=](_DifferenceType __i, _DifferenceType __len) { // Reduce + //[__b; __e) - a subrange of the first sequence, to reduce + _ForwardIterator1 __b = __first1 + __i, __e = __first1 + (__i + __len); + + //try searching for the first element which not equal to *__b + if (__b != __first1) + __b = ::std::upper_bound(__b, __last1, *__b, __comp); + + //try searching for the first element which not equal to *__e + if (__e != __last1) + __e = ::std::upper_bound(__e, __last1, *__e, __comp); + + //check is [__b; __e) empty + if (__e - __b < 1) + { + _ForwardIterator2 __bb = __last2; + if (__b != __last1) + __bb = ::std::lower_bound(__first2, __last2, *__b, __comp); + + const _DifferenceType __buf_pos = __size_func((__b - __first1), (__bb - __first2)); + return _SetRange{0, 0, __buf_pos}; + } + + //try searching for "corresponding" subrange [__bb; __ee) in the second sequence + _ForwardIterator2 __bb = __first2; + if (__b != __first1) + __bb = ::std::lower_bound(__first2, __last2, *__b, __comp); + + _ForwardIterator2 __ee = __last2; + if (__e != __last1) + __ee = ::std::lower_bound(__bb, __last2, *__e, __comp); + + const _DifferenceType __buf_pos = __size_func((__b - __first1), (__bb - __first2)); + auto __buffer_b = __tmp_memory + __buf_pos; + auto __res = __set_op(__b, __e, __bb, __ee, __buffer_b, __comp); + + return _SetRange{0, __res - __buffer_b, __buf_pos}; + }, + [](const _SetRange& __a, const _SetRange& __b) { // Combine + if (__b.__buf_pos > __a.__buf_pos || ((__b.__buf_pos == __a.__buf_pos) && !__b.empty())) + return _SetRange{__a.__pos + __a.__len + __b.__pos, __b.__len, __b.__buf_pos}; + return _SetRange{__b.__pos + __b.__len + __a.__pos, __a.__len, __a.__buf_pos}; + }, + __scan, // Scan + [&__m, &__scan](const _SetRange& __total) { // Apex + //final scan + __scan(0, 0, __total); + __m = __total.__pos + __total.__len; + }); + return __result + __m; + }); +} + //a shared parallel pattern for '__pattern_set_union' and '__pattern_set_symmetric_difference' template From 96863214ec59393431d690e6d1df29c033029bdd Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Thu, 15 Feb 2024 12:23:01 +0100 Subject: [PATCH 215/566] __internal::__parallel_set_op + tag calls --- include/oneapi/dpl/pstl/algorithm_impl.h | 43 ++++++++++++++---------- 1 file changed, 25 insertions(+), 18 deletions(-) diff --git a/include/oneapi/dpl/pstl/algorithm_impl.h b/include/oneapi/dpl/pstl/algorithm_impl.h index 158f65791d9..db217b7b0a4 100644 --- a/include/oneapi/dpl/pstl/algorithm_impl.h +++ b/include/oneapi/dpl/pstl/algorithm_impl.h @@ -5424,9 +5424,9 @@ __parallel_set_union_op(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ }, [=, &__result] { __result = __internal::__parallel_set_op( + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __left_bound_seq_1, __last1, __first2, __last2, __result, - __comp, [](_DifferenceType __n, _DifferenceType __m) { return __n + __m; }, __set_union_op, - __is_vector); + __comp, [](_DifferenceType __n, _DifferenceType __m) { return __n + __m; }, __set_union_op); }); return __result; } @@ -5446,16 +5446,17 @@ __parallel_set_union_op(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ }, [=, &__result] { __result = __internal::__parallel_set_op( + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __left_bound_seq_2, __last2, __result, - __comp, [](_DifferenceType __n, _DifferenceType __m) { return __n + __m; }, __set_union_op, - __is_vector); + __comp, [](_DifferenceType __n, _DifferenceType __m) { return __n + __m; }, __set_union_op); }); return __result; } return __internal::__parallel_set_op( + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2, __result, __comp, - [](_DifferenceType __n, _DifferenceType __m) { return __n + __m; }, __set_union_op, __is_vector); + [](_DifferenceType __n, _DifferenceType __m) { return __n + __m; }, __set_union_op); } //------------------------------------------------------------------------ @@ -5622,6 +5623,10 @@ __pattern_set_intersection(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __f _RandomAccessIterator3 __result, _Compare __comp, _IsVector __is_vector, /*is_parallel=*/::std::true_type) { + constexpr auto __dispatch_tag = + oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _RandomAccessIterator1, _RandomAccessIterator2, + _RandomAccessIterator3>(); + typedef typename ::std::iterator_traits<_RandomAccessIterator3>::value_type _T; typedef typename ::std::iterator_traits<_RandomAccessIterator1>::difference_type _DifferenceType; @@ -5649,14 +5654,14 @@ __pattern_set_intersection(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __f { //we know proper offset due to [first1; left_bound_seq_1) < [first2; last2) return __internal::__parallel_set_op( + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __left_bound_seq_1, __last1, __first2, __last2, __result, __comp, [](_DifferenceType __n, _DifferenceType __m) { return ::std::min(__n, __m); }, [](_RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, _RandomAccessIterator2 __last2, _T* __result, _Compare __comp) { return oneapi::dpl::__utils::__set_intersection_construct(__first1, __last1, __first2, __last2, __result, __comp); - }, - __is_vector); + }); } const auto __m2 = __last2 - __left_bound_seq_2 + __n1; @@ -5664,14 +5669,14 @@ __pattern_set_intersection(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __f { //we know proper offset due to [first2; left_bound_seq_2) < [first1; last1) __result = __internal::__parallel_set_op( + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __left_bound_seq_2, __last2, __result, __comp, [](_DifferenceType __n, _DifferenceType __m) { return ::std::min(__n, __m); }, [](_RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, _RandomAccessIterator2 __last2, _T* __result, _Compare __comp) { return oneapi::dpl::__utils::__set_intersection_construct(__first2, __last2, __first1, __last1, __result, __comp); - }, - __is_vector); + }); return __result; } @@ -5713,14 +5718,14 @@ __pattern_set_intersection(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& _ { //we know proper offset due to [first1; left_bound_seq_1) < [first2; last2) return __internal::__parallel_set_op( + __tag, ::std::forward<_ExecutionPolicy>(__exec), __left_bound_seq_1, __last1, __first2, __last2, __result, __comp, [](_DifferenceType __n, _DifferenceType __m) { return ::std::min(__n, __m); }, [](_RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, _RandomAccessIterator2 __last2, _T* __result, _Compare __comp) { return oneapi::dpl::__utils::__set_intersection_construct(__first1, __last1, __first2, __last2, __result, __comp); - }, - _IsVector{}); + }); } const auto __m2 = __last2 - __left_bound_seq_2 + __n1; @@ -5728,14 +5733,14 @@ __pattern_set_intersection(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& _ { //we know proper offset due to [first2; left_bound_seq_2) < [first1; last1) __result = __internal::__parallel_set_op( + __tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __left_bound_seq_2, __last2, __result, __comp, [](_DifferenceType __n, _DifferenceType __m) { return ::std::min(__n, __m); }, [](_RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, _RandomAccessIterator2 __last2, _T* __result, _Compare __comp) { return oneapi::dpl::__utils::__set_intersection_construct(__first2, __last2, __first1, __last1, __result, __comp); - }, - _IsVector{}); + }); return __result; } @@ -5800,7 +5805,9 @@ __pattern_set_difference(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __fir typedef typename ::std::iterator_traits<_RandomAccessIterator3>::value_type _T; typedef typename ::std::iterator_traits<_RandomAccessIterator1>::difference_type _DifferenceType; - constexpr auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _RandomAccessIterator1, _RandomAccessIterator2, _RandomAccessIterator3>(); + constexpr auto __dispatch_tag = + oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _RandomAccessIterator1, _RandomAccessIterator2, + _RandomAccessIterator3>(); const auto __n1 = __last1 - __first1; const auto __n2 = __last2 - __first2; @@ -5830,14 +5837,14 @@ __pattern_set_difference(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __fir if (__n1 + __n2 > __set_algo_cut_off) return __parallel_set_op( + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2, __result, __comp, [](_DifferenceType __n, _DifferenceType) { return __n; }, [](_RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, _RandomAccessIterator2 __last2, _T* __result, _Compare __comp) { return oneapi::dpl::__utils::__set_difference_construct(__first1, __last1, __first2, __last2, __result, __comp, __BrickCopyConstruct<_IsVector>()); - }, - __is_vector); + }); // use serial algorithm return ::std::set_difference(__first1, __last1, __first2, __last2, __result, __comp); @@ -5882,14 +5889,14 @@ __pattern_set_difference(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __e if (__n1 + __n2 > __set_algo_cut_off) return __parallel_set_op( + __tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2, __result, __comp, [](_DifferenceType __n, _DifferenceType) { return __n; }, [](_RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, _RandomAccessIterator2 __last2, _T* __result, _Compare __comp) { return oneapi::dpl::__utils::__set_difference_construct(__first1, __last1, __first2, __last2, __result, __comp, __BrickCopyConstruct<_IsVector>()); - }, - _IsVector{}); + }); // use serial algorithm return ::std::set_difference(__first1, __last1, __first2, __last2, __result, __comp); From c6998fa5b12b0a5101a560087765e098f896dcd4 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Thu, 15 Feb 2024 12:31:37 +0100 Subject: [PATCH 216/566] __pattern_set_union + tag impls --- include/oneapi/dpl/pstl/algorithm_impl.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/oneapi/dpl/pstl/algorithm_impl.h b/include/oneapi/dpl/pstl/algorithm_impl.h index db217b7b0a4..8ff4c73d8e6 100644 --- a/include/oneapi/dpl/pstl/algorithm_impl.h +++ b/include/oneapi/dpl/pstl/algorithm_impl.h @@ -5546,7 +5546,7 @@ __pattern_set_union(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, template _OutputIterator -__pattern_set_union(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, +__pattern_set_union(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, _RandomAccessIterator2 __last2, _OutputIterator __result, _Compare __comp) { @@ -5980,7 +5980,7 @@ __pattern_set_symmetric_difference(_ExecutionPolicy&& __exec, _RandomAccessItera template _RandomAccessIterator3 -__pattern_set_symmetric_difference(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, +__pattern_set_symmetric_difference(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, _RandomAccessIterator2 __last2, _RandomAccessIterator3 __result, _Compare __comp) From a2b414e70b632472d3d988e5f51d1b8c05063aab Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Thu, 15 Feb 2024 12:29:56 +0100 Subject: [PATCH 217/566] __internal::__parallel_set_union_op + tag impls --- include/oneapi/dpl/pstl/algorithm_impl.h | 113 +++++++++++++++++++++++ 1 file changed, 113 insertions(+) diff --git a/include/oneapi/dpl/pstl/algorithm_impl.h b/include/oneapi/dpl/pstl/algorithm_impl.h index 8ff4c73d8e6..796c743056c 100644 --- a/include/oneapi/dpl/pstl/algorithm_impl.h +++ b/include/oneapi/dpl/pstl/algorithm_impl.h @@ -5459,6 +5459,119 @@ __parallel_set_union_op(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ [](_DifferenceType __n, _DifferenceType __m) { return __n + __m; }, __set_union_op); } +//a shared parallel pattern for '__pattern_set_union' and '__pattern_set_symmetric_difference' +template +_OutputIterator +__parallel_set_union_op(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _ForwardIterator1 __first1, + _ForwardIterator1 __last1, _ForwardIterator2 __first2, _ForwardIterator2 __last2, + _OutputIterator __result, _Compare __comp, _SetUnionOp __set_union_op) +{ + typedef typename ::std::iterator_traits<_ForwardIterator1>::difference_type _DifferenceType; + + using __backend_tag = typename decltype(__tag)::__backend_tag; + + const auto __n1 = __last1 - __first1; + const auto __n2 = __last2 - __first2; + + __brick_copy<_ExecutionPolicy> __copy_range{}; + + // {1} {}: parallel copying just first sequence + if (__n2 == 0) + return __internal::__pattern_walk2_brick(__tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, + __last1, __result, __copy_range); + + // {} {2}: parallel copying justmake second sequence + if (__n1 == 0) + return __internal::__pattern_walk2_brick(__tag, ::std::forward<_ExecutionPolicy>(__exec), __first2, + __last2, __result, __copy_range); + + // testing whether the sequences are intersected + _ForwardIterator1 __left_bound_seq_1 = ::std::lower_bound(__first1, __last1, *__first2, __comp); + + if (__left_bound_seq_1 == __last1) + { + //{1} < {2}: seq2 is wholly greater than seq1, so, do parallel copying seq1 and seq2 + __par_backend::__parallel_invoke( + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), + [=] { + __internal::__pattern_walk2_brick(__tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, + __last1, __result, __copy_range); + }, + [=] { + __internal::__pattern_walk2_brick(__tag, ::std::forward<_ExecutionPolicy>(__exec), __first2, + __last2, __result + __n1, __copy_range); + }); + return __result + __n1 + __n2; + } + + // testing whether the sequences are intersected + _ForwardIterator2 __left_bound_seq_2 = ::std::lower_bound(__first2, __last2, *__first1, __comp); + + if (__left_bound_seq_2 == __last2) + { + //{2} < {1}: seq2 is wholly greater than seq1, so, do parallel copying seq1 and seq2 + __par_backend::__parallel_invoke( + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), + [=] { + __internal::__pattern_walk2_brick(__tag, ::std::forward<_ExecutionPolicy>(__exec), __first2, + __last2, __result, __copy_range); + }, + [=] { + __internal::__pattern_walk2_brick(__tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, + __last1, __result + __n2, __copy_range); + }); + return __result + __n1 + __n2; + } + + const auto __m1 = __left_bound_seq_1 - __first1; + if (__m1 > __set_algo_cut_off) + { + auto __res_or = __result; + __result += __m1; //we know proper offset due to [first1; left_bound_seq_1) < [first2; last2) + __par_backend::__parallel_invoke( + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), + //do parallel copying of [first1; left_bound_seq_1) + [=] { + __internal::__pattern_walk2_brick(__tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, + __left_bound_seq_1, __res_or, __copy_range); + }, + [=, &__result] { + __result = __internal::__parallel_set_op( + __tag, ::std::forward<_ExecutionPolicy>(__exec), __left_bound_seq_1, __last1, __first2, + __last2, __result, __comp, [](_DifferenceType __n, _DifferenceType __m) { return __n + __m; }, + __set_union_op); + }); + return __result; + } + + const auto __m2 = __left_bound_seq_2 - __first2; + assert(__m1 == 0 || __m2 == 0); + if (__m2 > __set_algo_cut_off) + { + auto __res_or = __result; + __result += __m2; //we know proper offset due to [first2; left_bound_seq_2) < [first1; last1) + __par_backend::__parallel_invoke( + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), + //do parallel copying of [first2; left_bound_seq_2) + [=] { + __internal::__pattern_walk2_brick(__tag, ::std::forward<_ExecutionPolicy>(__exec), __first2, + __left_bound_seq_2, __res_or, __copy_range); + }, + [=, &__result] { + __result = __internal::__parallel_set_op( + __tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __left_bound_seq_2, + __last2, __result, __comp, [](_DifferenceType __n, _DifferenceType __m) { return __n + __m; }, + __set_union_op); + }); + return __result; + } + + return __internal::__parallel_set_op( + __tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2, __result, + __comp, [](_DifferenceType __n, _DifferenceType __m) { return __n + __m; }, __set_union_op); +} + //------------------------------------------------------------------------ // set_union //------------------------------------------------------------------------ From 16f7eb5b814996c8e01ee90e57ecaac875f3696f Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Thu, 15 Feb 2024 12:31:11 +0100 Subject: [PATCH 218/566] __internal::__parallel_set_union_op + tag calls --- include/oneapi/dpl/pstl/algorithm_impl.h | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/include/oneapi/dpl/pstl/algorithm_impl.h b/include/oneapi/dpl/pstl/algorithm_impl.h index 796c743056c..92ec5572b52 100644 --- a/include/oneapi/dpl/pstl/algorithm_impl.h +++ b/include/oneapi/dpl/pstl/algorithm_impl.h @@ -5637,6 +5637,9 @@ __pattern_set_union(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, _RandomAccessIterator2 __first2, _RandomAccessIterator2 __last2, _OutputIterator __result, _Compare __comp, _IsVector __is_vector, /*__is_parallel=*/::std::true_type) { + constexpr auto __dispatch_tag = + oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _RandomAccessIterator1, _RandomAccessIterator2, + _OutputIterator>(); const auto __n1 = __last1 - __first1; const auto __n2 = __last2 - __first2; @@ -5647,13 +5650,13 @@ __pattern_set_union(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, typedef typename ::std::iterator_traits<_OutputIterator>::value_type _Tp; return __parallel_set_union_op( + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2, __result, __comp, [](_RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, _RandomAccessIterator2 __last2, _Tp* __result, _Compare __comp) { return oneapi::dpl::__utils::__set_union_construct(__first1, __last1, __first2, __last2, __result, __comp, __BrickCopyConstruct<_IsVector>()); - }, - __is_vector); + }); } template __tag, _ExecutionPolicy&& __exec, typedef typename ::std::iterator_traits<_OutputIterator>::value_type _Tp; return __parallel_set_union_op( + __tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2, __result, __comp, [](_RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, _RandomAccessIterator2 __last2, _Tp* __result, _Compare __comp) { return oneapi::dpl::__utils::__set_union_construct(__first1, __last1, __first2, __last2, __result, __comp, __BrickCopyConstruct<_IsVector>()); - }, - _IsVector{}); + }); } //------------------------------------------------------------------------ @@ -6071,6 +6074,9 @@ __pattern_set_symmetric_difference(_ExecutionPolicy&& __exec, _RandomAccessItera _RandomAccessIterator2 __last2, _RandomAccessIterator3 __result, _Compare __comp, _IsVector __is_vector, /*is_parallel=*/::std::true_type) { + constexpr auto __dispatch_tag = + oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _RandomAccessIterator1, _RandomAccessIterator2, + _RandomAccessIterator3>(); const auto __n1 = __last1 - __first1; const auto __n2 = __last2 - __first2; @@ -6081,13 +6087,13 @@ __pattern_set_symmetric_difference(_ExecutionPolicy&& __exec, _RandomAccessItera typedef typename ::std::iterator_traits<_RandomAccessIterator3>::value_type _T; return __internal::__parallel_set_union_op( + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2, __result, __comp, [](_RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, _RandomAccessIterator2 __last2, _T* __result, _Compare __comp) { return oneapi::dpl::__utils::__set_symmetric_difference_construct( __first1, __last1, __first2, __last2, __result, __comp, __BrickCopyConstruct<_IsVector>()); - }, - __is_vector); + }); } template __tag, _ExecutionPo typedef typename ::std::iterator_traits<_RandomAccessIterator3>::value_type _T; return __internal::__parallel_set_union_op( + __tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2, __result, __comp, [](_RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, _RandomAccessIterator2 __last2, _T* __result, _Compare __comp) { return oneapi::dpl::__utils::__set_symmetric_difference_construct( __first1, __last1, __first2, __last2, __result, __comp, __BrickCopyConstruct<_IsVector>()); - }, - _IsVector{}); + }); } //------------------------------------------------------------------------ From 9d151e62e2cf812bac84150df9a79d0254737cab Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Thu, 15 Feb 2024 14:05:24 +0100 Subject: [PATCH 219/566] __pattern_search + tag impls --- include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h index c6e23ae6808..43a2d40c716 100644 --- a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h +++ b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h @@ -1422,7 +1422,7 @@ __pattern_search(_ExecutionPolicy&& __exec, _Iterator1 __first, _Iterator1 __las template _Iterator1 -__pattern_search(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _Iterator1 __first, _Iterator1 __last, +__pattern_search(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Iterator1 __first, _Iterator1 __last, _Iterator2 __s_first, _Iterator2 __s_last, _Pred __pred) { if (__s_last == __s_first) From 94bcc7c0e5fa93e444e3f39ae28bde428ce39181 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Thu, 15 Feb 2024 13:55:57 +0100 Subject: [PATCH 220/566] Move declarations of __serial_backend_tag, __tbb_backend_tag and __omp_backend_tag --- include/oneapi/dpl/pstl/execution_defs.h | 16 ++++++++++++++++ include/oneapi/dpl/pstl/execution_impl.h | 12 ------------ 2 files changed, 16 insertions(+), 12 deletions(-) diff --git a/include/oneapi/dpl/pstl/execution_defs.h b/include/oneapi/dpl/pstl/execution_defs.h index d16a030b216..5001ec9fd0f 100644 --- a/include/oneapi/dpl/pstl/execution_defs.h +++ b/include/oneapi/dpl/pstl/execution_defs.h @@ -213,6 +213,22 @@ __check_size(...) -> typename ::std::iterator_traits<_It>::difference_type; template using __difference_t = ::std::make_signed_t(0))>; +//------------------------------------------------------------------------ +// backend tags +//------------------------------------------------------------------------ + +struct __serial_backend_tag +{ +}; + +struct __tbb_backend_tag +{ +}; + +struct __omp_backend_tag +{ +}; + } // namespace __internal } // namespace dpl diff --git a/include/oneapi/dpl/pstl/execution_impl.h b/include/oneapi/dpl/pstl/execution_impl.h index 33ada982ff7..dae807121cf 100644 --- a/include/oneapi/dpl/pstl/execution_impl.h +++ b/include/oneapi/dpl/pstl/execution_impl.h @@ -97,18 +97,6 @@ __is_parallelization_preferred() // backend selector with tags //------------------------------------------------------------------------ -struct __serial_backend_tag -{ -}; - -struct __tbb_backend_tag -{ -}; - -struct __omp_backend_tag -{ -}; - #if _ONEDPL_PAR_BACKEND_TBB using __par_backend_tag = __tbb_backend_tag; #elif _ONEDPL_PAR_BACKEND_OPENMP From cdc3cb91dcc09eac755b4dabb999c12ca9591e29 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Thu, 15 Feb 2024 14:42:52 +0100 Subject: [PATCH 221/566] include/oneapi/dpl/pstl/parallel_backend.h - fix compile error --- include/oneapi/dpl/pstl/parallel_backend.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/oneapi/dpl/pstl/parallel_backend.h b/include/oneapi/dpl/pstl/parallel_backend.h index 1e78d1f635b..b243e8fb492 100644 --- a/include/oneapi/dpl/pstl/parallel_backend.h +++ b/include/oneapi/dpl/pstl/parallel_backend.h @@ -18,14 +18,14 @@ // Select a parallel backend #if ONEDPL_USE_TBB_BACKEND || (!defined(ONEDPL_USE_TBB_BACKEND) && !ONEDPL_USE_OPENMP_BACKEND && _ONEDPL_TBB_AVAILABLE) -# include "parallel_backend_tbb.h" # define _ONEDPL_PAR_BACKEND_TBB 1 +# include "parallel_backend_tbb.h" #elif ONEDPL_USE_OPENMP_BACKEND || (!defined(ONEDPL_USE_OPENMP_BACKEND) && _ONEDPL_OPENMP_AVAILABLE) -# include "parallel_backend_omp.h" # define _ONEDPL_PAR_BACKEND_OPENMP 1 +# include "parallel_backend_omp.h" #else -# include "parallel_backend_serial.h" # define _ONEDPL_PAR_BACKEND_SERIAL 1 +# include "parallel_backend_serial.h" #endif #if _ONEDPL_BACKEND_SYCL From f564fd904f62cc16e1ec5459588bf796a7d0fc02 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Thu, 15 Feb 2024 15:07:48 +0100 Subject: [PATCH 222/566] __pattern_is_heap_until + tag impls --- include/oneapi/dpl/pstl/algorithm_impl.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/oneapi/dpl/pstl/algorithm_impl.h b/include/oneapi/dpl/pstl/algorithm_impl.h index 92ec5572b52..9b85f235ddb 100644 --- a/include/oneapi/dpl/pstl/algorithm_impl.h +++ b/include/oneapi/dpl/pstl/algorithm_impl.h @@ -6206,7 +6206,7 @@ __pattern_is_heap_until(_ExecutionPolicy&& __exec, _RandomAccessIterator __first template _RandomAccessIterator -__pattern_is_heap_until(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, +__pattern_is_heap_until(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp) { return __internal::__except_handler([&]() { From 347751509e2aae1bfc23ef51341f99097c4fe4b8 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Thu, 15 Feb 2024 16:16:58 +0100 Subject: [PATCH 223/566] Apply GitHUB clang format --- include/oneapi/dpl/pstl/algorithm_impl.h | 359 ++++++++---------- .../experimental/internal/for_loop_impl.h | 24 +- include/oneapi/dpl/pstl/glue_algorithm_impl.h | 42 +- include/oneapi/dpl/pstl/glue_memory_impl.h | 20 +- .../dpl/pstl/hetero/algorithm_impl_hetero.h | 93 ++--- .../pstl/hetero/dpcpp/parallel_backend_sycl.h | 22 +- .../hetero/dpcpp/parallel_backend_sycl_fpga.h | 35 +- .../dpl/pstl/hetero/numeric_impl_hetero.h | 10 +- include/oneapi/dpl/pstl/histogram_impl.h | 3 +- include/oneapi/dpl/pstl/numeric_impl.h | 12 +- include/oneapi/dpl/pstl/omp/parallel_merge.h | 6 +- .../dpl/pstl/omp/parallel_stable_sort.h | 1 - .../oneapi/dpl/pstl/parallel_backend_tbb.h | 6 +- 13 files changed, 283 insertions(+), 350 deletions(-) diff --git a/include/oneapi/dpl/pstl/algorithm_impl.h b/include/oneapi/dpl/pstl/algorithm_impl.h index 9b85f235ddb..8790cc64f25 100644 --- a/include/oneapi/dpl/pstl/algorithm_impl.h +++ b/include/oneapi/dpl/pstl/algorithm_impl.h @@ -194,8 +194,8 @@ __pattern_walk1(__parallel_forward_tag, _ExecutionPolicy&& __exec, _ForwardItera template void -__pattern_walk1(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _ForwardIterator __first, - _ForwardIterator __last, _Function __f) +__pattern_walk1(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, + _Function __f) { using __backend_tag = typename __parallel_tag<_IsVector>::__backend_tag; @@ -348,7 +348,8 @@ __pattern_walk1_n(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _Siz _IsVector __is_vector, /*is_parallel=*/::std::true_type) { - constexpr auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _RandomAccessIterator>(); + constexpr auto __dispatch_tag = + oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _RandomAccessIterator>(); oneapi::dpl::__internal::__pattern_walk1(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __first + __n, __f); @@ -360,8 +361,8 @@ _RandomAccessIterator __pattern_walk1_n(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, _Size __n, _Function __f) { - oneapi::dpl::__internal::__pattern_walk1(__tag, ::std::forward<_ExecutionPolicy>(__exec), __first, - __first + __n, __f); + oneapi::dpl::__internal::__pattern_walk1(__tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __first + __n, + __f); return __first + __n; } @@ -499,7 +500,8 @@ __pattern_walk2(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, _Ran }); } -template +template _RandomAccessIterator2 __pattern_walk2(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, _Function __f) @@ -597,7 +599,8 @@ oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _Ra __pattern_walk2_n(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, _Size __n, _RandomAccessIterator2 __first2, _Function __f, _IsVector __is_vector, /*parallel=*/::std::true_type) { - constexpr auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _RandomAccessIterator1, _RandomAccessIterator2>(); + constexpr auto __dispatch_tag = + oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _RandomAccessIterator1, _RandomAccessIterator2>(); return __internal::__pattern_walk2(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __first1 + __n, __first2, __f); @@ -649,8 +652,7 @@ __pattern_walk2_brick(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1 return __except_handler([&]() { __par_backend::__parallel_for( - __backend_tag{}, - ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, [&__is_vector, __first1, __first2, __brick](_RandomAccessIterator1 __i, _RandomAccessIterator1 __j) { __brick(__i, __j, __first2 + (__i - __first1), __is_vector); }); @@ -749,8 +751,7 @@ __pattern_walk2_brick_n(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __firs return __except_handler([&]() { __par_backend::__parallel_for( - __backend_tag{}, - ::std::forward<_ExecutionPolicy>(__exec), __first1, __first1 + __n, + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first1, __first1 + __n, [&__is_vector, __first1, __first2, __brick](_RandomAccessIterator1 __i, _RandomAccessIterator1 __j) { __brick(__i, __j - __i, __first2 + (__i - __first1), __is_vector); }); @@ -758,7 +759,8 @@ __pattern_walk2_brick_n(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __firs }); } -template +template _RandomAccessIterator2 __pattern_walk2_brick_n(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, _Size __n, _RandomAccessIterator2 __first2, _Brick __brick) @@ -767,8 +769,7 @@ __pattern_walk2_brick_n(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _R return __except_handler([&]() { __par_backend::__parallel_for( - __backend_tag{}, - ::std::forward<_ExecutionPolicy>(__exec), __first1, __first1 + __n, + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first1, __first1 + __n, [__first1, __first2, __brick](_RandomAccessIterator1 __i, _RandomAccessIterator1 __j) { __brick(__i, __j - __i, __first2 + (__i - __first1), _IsVector{}); }); @@ -858,8 +859,7 @@ __pattern_walk3(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, _Ran return __internal::__except_handler([&]() { __par_backend::__parallel_for( - __backend_tag{}, - ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, [__f, __first1, __first2, __first3, __is_vector](_RandomAccessIterator1 __i, _RandomAccessIterator1 __j) { __internal::__brick_walk3(__i, __j, __first2 + (__i - __first1), __first3 + (__i - __first1), __f, __is_vector); @@ -871,15 +871,15 @@ __pattern_walk3(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, _Ran template _RandomAccessIterator3 -__pattern_walk3(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, - _RandomAccessIterator2 __first2, _RandomAccessIterator3 __first3, _Function __f) +__pattern_walk3(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, + _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, _RandomAccessIterator3 __first3, + _Function __f) { using __backend_tag = typename __parallel_tag<_IsVector>::__backend_tag; return __internal::__except_handler([&]() { __par_backend::__parallel_for( - __backend_tag{}, - ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, [__f, __first1, __first2, __first3](_RandomAccessIterator1 __i, _RandomAccessIterator1 __j) { __internal::__brick_walk3(__i, __j, __first2 + (__i - __first1), __first3 + (__i - __first1), __f, _IsVector{}); @@ -969,8 +969,8 @@ __pattern_walk2_transform_if(_ExecutionPolicy&& __exec, _ForwardIterator1 __firs template _ForwardIterator2 -__pattern_walk2_transform_if(_Tag __tag, _ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, - _ForwardIterator2 __first2, _Function __func) noexcept +__pattern_walk2_transform_if(_Tag __tag, _ExecutionPolicy&& __exec, _ForwardIterator1 __first1, + _ForwardIterator1 __last1, _ForwardIterator2 __first2, _Function __func) noexcept { static_assert(__is_backend_tag_v<_Tag>); @@ -1038,8 +1038,7 @@ __pattern_equal(_ExecutionPolicy&&, _ForwardIterator1 __first1, _ForwardIterator template bool __pattern_equal(_Tag, _ExecutionPolicy&&, _ForwardIterator1 __first1, _ForwardIterator1 __last1, - _ForwardIterator2 __first2, - _ForwardIterator2 __last2, _BinaryPredicate __p) noexcept + _ForwardIterator2 __first2, _ForwardIterator2 __last2, _BinaryPredicate __p) noexcept { static_assert(__is_backend_tag_v<_Tag>); @@ -1081,8 +1080,7 @@ __pattern_equal(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _Ran return __internal::__except_handler([&]() { return !__internal::__parallel_or( - __tag, - ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, + __tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, [__first1, __first2, __p](_RandomAccessIterator1 __i, _RandomAccessIterator1 __j) { return !__internal::__brick_equal(__i, __j, __first2 + (__i - __first1), __first2 + (__j - __first1), __p, _IsVector{}); @@ -1090,7 +1088,6 @@ __pattern_equal(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _Ran }); } - //------------------------------------------------------------------------ // equal version for sequences with equal length //------------------------------------------------------------------------ @@ -1143,8 +1140,7 @@ __pattern_equal(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, _Ran return __internal::__except_handler([&]() { return !__internal::__parallel_or( - __dispatch_tag, - ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, [__first1, __first2, __p, __is_vector](_RandomAccessIterator1 __i, _RandomAccessIterator1 __j) { return !__internal::__brick_equal(__i, __j, __first2 + (__i - __first1), __p, __is_vector); }); @@ -1343,15 +1339,14 @@ __pattern_find_end(_ExecutionPolicy&&, _ForwardIterator1 __first, _ForwardIterat template _ForwardIterator1 -__pattern_find_end(_Tag, _ExecutionPolicy&&, _ForwardIterator1 __first, _ForwardIterator1 __last, _ForwardIterator2 __s_first, - _ForwardIterator2 __s_last, _BinaryPredicate __pred) noexcept +__pattern_find_end(_Tag, _ExecutionPolicy&&, _ForwardIterator1 __first, _ForwardIterator1 __last, + _ForwardIterator2 __s_first, _ForwardIterator2 __s_last, _BinaryPredicate __pred) noexcept { static_assert(__is_backend_tag_v<_Tag>); return __internal::__brick_find_end(__first, __last, __s_first, __s_last, __pred, typename _Tag::__is_vector{}); } - template oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator1> @@ -1388,13 +1383,13 @@ template _RandomAccessIterator1 __pattern_find_end(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first, - _RandomAccessIterator1 __last, - _RandomAccessIterator2 __s_first, _RandomAccessIterator2 __s_last, _BinaryPredicate __pred) + _RandomAccessIterator1 __last, _RandomAccessIterator2 __s_first, _RandomAccessIterator2 __s_last, + _BinaryPredicate __pred) { if (__last - __first == __s_last - __s_first) { - const bool __res = __internal::__pattern_equal(__tag, ::std::forward<_ExecutionPolicy>(__exec), - __first, __last, __s_first, __pred); + const bool __res = __internal::__pattern_equal(__tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, + __s_first, __pred); return __res ? __first : __last; } else @@ -1411,7 +1406,6 @@ __pattern_find_end(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _ } } - //------------------------------------------------------------------------ // find_first_of //------------------------------------------------------------------------ @@ -1686,7 +1680,6 @@ __pattern_search_n(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _ } } - //------------------------------------------------------------------------ // copy_n //------------------------------------------------------------------------ @@ -1992,8 +1985,7 @@ __pattern_copy_if(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first, _Ra bool* __mask = __mask_buf.get(); _DifferenceType __m{}; __par_backend::__parallel_strict_scan( - __backend_tag{}, - ::std::forward<_ExecutionPolicy>(__exec), __n, _DifferenceType(0), + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __n, _DifferenceType(0), [=](_DifferenceType __i, _DifferenceType __len) { // Reduce return __internal::__brick_calc_mask_1<_DifferenceType>(__first + __i, __first + (__i + __len), __mask + __i, __pred, __is_vector) @@ -2030,8 +2022,7 @@ __pattern_copy_if(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomA bool* __mask = __mask_buf.get(); _DifferenceType __m{}; __par_backend::__parallel_strict_scan( - __backend_tag{}, - ::std::forward<_ExecutionPolicy>(__exec), __n, _DifferenceType(0), + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __n, _DifferenceType(0), [=](_DifferenceType __i, _DifferenceType __len) { // Reduce return __internal::__brick_calc_mask_1<_DifferenceType>(__first + __i, __first + (__i + __len), __mask + __i, __pred, _IsVector{}) @@ -2168,7 +2159,8 @@ __pattern_unique(_ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator template _ForwardIterator -__pattern_unique(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, _BinaryPredicate __pred) noexcept +__pattern_unique(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, + _BinaryPredicate __pred) noexcept { static_assert(__is_backend_tag_v<_Tag>); @@ -2231,8 +2223,7 @@ __remove_elements(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardI _DifferenceType __m{}; // 2. Elements that doesn't satisfy pred are moved to result __par_backend::__parallel_strict_scan( - __backend_tag{}, - ::std::forward<_ExecutionPolicy>(__exec), __n, _DifferenceType(0), + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __n, _DifferenceType(0), [__mask, __is_vector](_DifferenceType __i, _DifferenceType __len) { return __internal::__brick_count( __mask + __i, __mask + __i + __len, [](bool __val) { return __val; }, __is_vector); @@ -2400,8 +2391,7 @@ __pattern_unique_copy(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first, bool* __mask = __mask_buf.get(); _DifferenceType __m{}; __par_backend::__parallel_strict_scan( - __backend_tag{}, - ::std::forward<_ExecutionPolicy>(__exec), __n, _DifferenceType(0), + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __n, _DifferenceType(0), [=](_DifferenceType __i, _DifferenceType __len) -> _DifferenceType { // Reduce _DifferenceType __extra = 0; if (__i == 0) @@ -2452,8 +2442,7 @@ __pattern_unique_copy(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _Ran bool* __mask = __mask_buf.get(); _DifferenceType __m{}; __par_backend::__parallel_strict_scan( - __backend_tag{}, - ::std::forward<_ExecutionPolicy>(__exec), __n, _DifferenceType(0), + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __n, _DifferenceType(0), [=](_DifferenceType __i, _DifferenceType __len) -> _DifferenceType { // Reduce _DifferenceType __extra = 0; if (__i == 0) @@ -2728,7 +2717,8 @@ __pattern_rotate(_ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator template _ForwardIterator -__pattern_rotate(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __middle, _ForwardIterator __last) noexcept +__pattern_rotate(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __middle, + _ForwardIterator __last) noexcept { static_assert(__is_backend_tag_v<_Tag>); @@ -2920,8 +2910,7 @@ __pattern_rotate_copy(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first, using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; __par_backend::__parallel_for( - __backend_tag{}, - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, [__first, __last, __middle, __result, __is_vector](_RandomAccessIterator1 __b, _RandomAccessIterator1 __e) { __internal::__brick_copy<_ExecutionPolicy> __copy{}; if (__b > __middle) @@ -2953,8 +2942,7 @@ __pattern_rotate_copy(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _Ran using __backend_tag = typename __parallel_tag<_IsVector>::__backend_tag; __par_backend::__parallel_for( - __backend_tag{}, - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, [__first, __last, __middle, __result](_RandomAccessIterator1 __b, _RandomAccessIterator1 __e) { __internal::__brick_copy<_ExecutionPolicy> __copy{}; if (__b > __middle) @@ -3027,7 +3015,8 @@ __pattern_is_partitioned(_ExecutionPolicy&&, _ForwardIterator __first, _ForwardI template bool -__pattern_is_partitioned(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, _UnaryPredicate __pred) noexcept +__pattern_is_partitioned(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, + _UnaryPredicate __pred) noexcept { static_assert(__is_backend_tag_v<_Tag>); @@ -3183,7 +3172,7 @@ __pattern_is_partitioned(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _ _ReduceType __result = __par_backend::__parallel_reduce( __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __identity, [&__pred, __combine](_RandomAccessIterator __i, _RandomAccessIterator __j, - _ReduceType __value) -> _ReduceType { + _ReduceType __value) -> _ReduceType { if (__value.__val == __broken) return _ReduceType{__broken, __i}; @@ -3312,8 +3301,7 @@ __pattern_partition(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _R else if (__size2 > __size1) { __par_backend::__parallel_for( - __backend_tag{}, - ::std::forward<_ExecutionPolicy>(__exec), __val1.__pivot, __val1.__pivot + __size1, + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __val1.__pivot, __val1.__pivot + __size1, [__val1, __val2, __size1, __is_vector](_RandomAccessIterator __i, _RandomAccessIterator __j) { __internal::__brick_swap_ranges(__i, __j, (__val2.__pivot - __size1) + (__i - __val1.__pivot), __is_vector); @@ -3324,8 +3312,7 @@ __pattern_partition(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _R else { __par_backend::__parallel_for( - __backend_tag{}, - ::std::forward<_ExecutionPolicy>(__exec), __val1.__pivot, __val1.__pivot + __size2, + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __val1.__pivot, __val1.__pivot + __size2, [__val1, __val2, __is_vector](_RandomAccessIterator __i, _RandomAccessIterator __j) { __internal::__brick_swap_ranges(__i, __j, __val2.__begin + (__i - __val1.__pivot), __is_vector); }); @@ -3334,8 +3321,7 @@ __pattern_partition(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _R }; _PartitionRange __result = __par_backend::__parallel_reduce( - __backend_tag{}, - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __init, + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __init, [__pred, __is_vector, __reductor](_RandomAccessIterator __i, _RandomAccessIterator __j, _PartitionRange __value) -> _PartitionRange { //1. serial partition @@ -3381,12 +3367,13 @@ __pattern_partition(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _Rando // then we should swap the false part of left range and last part of true part of right range else if (__size2 > __size1) { - constexpr auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, decltype(__val1.__pivot), decltype(__val1.__pivot + __size1)>(); + constexpr auto __dispatch_tag = + oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, decltype(__val1.__pivot), + decltype(__val1.__pivot + __size1)>(); using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; __par_backend::__parallel_for( - __backend_tag{}, - ::std::forward<_ExecutionPolicy>(__exec), __val1.__pivot, __val1.__pivot + __size1, + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __val1.__pivot, __val1.__pivot + __size1, [__val1, __val2, __size1](_RandomAccessIterator __i, _RandomAccessIterator __j) { __internal::__brick_swap_ranges(__i, __j, (__val2.__pivot - __size1) + (__i - __val1.__pivot), _IsVector{}); @@ -3396,12 +3383,13 @@ __pattern_partition(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _Rando // else we should swap the first part of false part of left range and true part of right range else { - constexpr auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, decltype(__val1.__pivot), decltype(__val1.__pivot + __size2)>(); + constexpr auto __dispatch_tag = + oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, decltype(__val1.__pivot), + decltype(__val1.__pivot + __size2)>(); using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; __par_backend::__parallel_for( - __backend_tag{}, - ::std::forward<_ExecutionPolicy>(__exec), __val1.__pivot, __val1.__pivot + __size2, + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __val1.__pivot, __val1.__pivot + __size2, [__val1, __val2](_RandomAccessIterator __i, _RandomAccessIterator __j) { __internal::__brick_swap_ranges(__i, __j, __val2.__begin + (__i - __val1.__pivot), _IsVector{}); }); @@ -3510,8 +3498,7 @@ __pattern_stable_partition(_ExecutionPolicy&& __exec, _RandomAccessIterator __fi using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; _PartitionRange __result = __par_backend::__parallel_reduce( - __backend_tag{}, - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __init, + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __init, [&__pred, __is_vector, __reductor](_RandomAccessIterator __i, _RandomAccessIterator __j, _PartitionRange __value) -> _PartitionRange { //1. serial stable_partition @@ -3653,8 +3640,8 @@ __pattern_partition_copy(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __fir bool* __mask = __mask_buf.get(); _ReturnType __m{}; __par_backend::__parallel_strict_scan( - __backend_tag{}, - ::std::forward<_ExecutionPolicy>(__exec), __n, ::std::make_pair(_DifferenceType(0), _DifferenceType(0)), + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __n, + ::std::make_pair(_DifferenceType(0), _DifferenceType(0)), [=](_DifferenceType __i, _DifferenceType __len) { // Reduce return __internal::__brick_calc_mask_1<_DifferenceType>(__first + __i, __first + (__i + __len), __mask + __i, __pred, __is_vector); @@ -3694,8 +3681,8 @@ __pattern_partition_copy(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _ bool* __mask = __mask_buf.get(); _ReturnType __m{}; __par_backend::__parallel_strict_scan( - __backend_tag{}, - ::std::forward<_ExecutionPolicy>(__exec), __n, ::std::make_pair(_DifferenceType(0), _DifferenceType(0)), + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __n, + ::std::make_pair(_DifferenceType(0), _DifferenceType(0)), [=](_DifferenceType __i, _DifferenceType __len) { // Reduce return __internal::__brick_calc_mask_1<_DifferenceType>(__first + __i, __first + (__i + __len), __mask + __i, __pred, _IsVector{}); @@ -3750,8 +3737,7 @@ __pattern_sort(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _Random __internal::__except_handler([&]() { __par_backend::__parallel_stable_sort( - __backend_tag{}, - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __comp, + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __comp, [](_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp) { ::std::sort(__first, __last, __comp); }, @@ -3769,8 +3755,7 @@ __pattern_sort(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAcce __internal::__except_handler([&]() { __par_backend::__parallel_stable_sort( - __backend_tag{}, - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __comp, + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __comp, [](_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp) { ::std::sort(__first, __last, __comp); }, @@ -3792,7 +3777,8 @@ __pattern_stable_sort(_ExecutionPolicy&&, _RandomAccessIterator __first, _Random template void -__pattern_stable_sort(_Tag, _ExecutionPolicy&&, _RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp) noexcept +__pattern_stable_sort(_Tag, _ExecutionPolicy&&, _RandomAccessIterator __first, _RandomAccessIterator __last, + _Compare __comp) noexcept { static_assert(__is_backend_tag_v<_Tag>); @@ -3810,8 +3796,7 @@ __pattern_stable_sort(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, __internal::__except_handler([&]() { __par_backend::__parallel_stable_sort( - __backend_tag{}, - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __comp, + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __comp, [](_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp) { ::std::stable_sort(__first, __last, __comp); }, @@ -3828,8 +3813,7 @@ __pattern_stable_sort(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _Ran __internal::__except_handler([&]() { __par_backend::__parallel_stable_sort( - __backend_tag{}, - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __comp, + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __comp, [](_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp) { ::std::stable_sort(__first, __last, __comp); }, @@ -3899,8 +3883,7 @@ __pattern_sort_by_key(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __keys_f __internal::__except_handler([&]() { __par_backend::__parallel_stable_sort( - __backend_tag{}, - ::std::forward<_ExecutionPolicy>(__exec), __beg, __end, __cmp_f, + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __beg, __end, __cmp_f, [](auto __first, auto __last, auto __cmp) { ::std::sort(__first, __last, __cmp); }, __end - __beg); }); } @@ -3926,8 +3909,7 @@ __pattern_sort_by_key(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _Ran __internal::__except_handler([&]() { __par_backend::__parallel_stable_sort( - __backend_tag{}, - ::std::forward<_ExecutionPolicy>(__exec), __beg, __end, __cmp_f, + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __beg, __end, __cmp_f, [](auto __first, auto __last, auto __cmp) { ::std::sort(__first, __last, __cmp); }, __end - __beg); }); } @@ -3970,8 +3952,7 @@ __pattern_partial_sort(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, __except_handler([&]() { __par_backend::__parallel_stable_sort( - __backend_tag{}, - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __comp, + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __comp, [__n](_RandomAccessIterator __begin, _RandomAccessIterator __end, _Compare __comp) { if (__n < __end - __begin) ::std::partial_sort(__begin, __begin + __n, __end, __comp); @@ -3995,8 +3976,7 @@ __pattern_partial_sort(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _Ra __except_handler([&]() { __par_backend::__parallel_stable_sort( - __backend_tag{}, - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __comp, + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __comp, [__n](_RandomAccessIterator __begin, _RandomAccessIterator __end, _Compare __comp) { if (__n < __end - __begin) ::std::partial_sort(__begin, __begin + __n, __end, __comp); @@ -4052,8 +4032,7 @@ __pattern_partial_sort_copy(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __ if (__n2 >= __n1) { __par_backend::__parallel_stable_sort( - __backend_tag{}, - ::std::forward<_ExecutionPolicy>(__exec), __d_first, __d_first + __n1, __comp, + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __d_first, __d_first + __n1, __comp, [__first, __d_first, __is_vector](_RandomAccessIterator2 __i, _RandomAccessIterator2 __j, _Compare __comp) { _RandomAccessIterator1 __i1 = __first + (__i - __d_first); @@ -4075,8 +4054,7 @@ __pattern_partial_sort_copy(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __ _T1* __r = __buf.get(); __par_backend::__parallel_stable_sort( - __backend_tag{}, - ::std::forward<_ExecutionPolicy>(__exec), __r, __r + __n1, __comp, + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __r, __r + __n1, __comp, [__n2, __first, __r](_T1* __i, _T1* __j, _Compare __comp) { _RandomAccessIterator1 __it = __first + (__i - __r); @@ -4130,8 +4108,7 @@ __pattern_partial_sort_copy(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec if (__n2 >= __n1) { __par_backend::__parallel_stable_sort( - __backend_tag{}, - ::std::forward<_ExecutionPolicy>(__exec), __d_first, __d_first + __n1, __comp, + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __d_first, __d_first + __n1, __comp, [__first, __d_first](_RandomAccessIterator2 __i, _RandomAccessIterator2 __j, _Compare __comp) { _RandomAccessIterator1 __i1 = __first + (__i - __d_first); _RandomAccessIterator1 __j1 = __first + (__j - __d_first); @@ -4152,8 +4129,7 @@ __pattern_partial_sort_copy(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec _T1* __r = __buf.get(); __par_backend::__parallel_stable_sort( - __backend_tag{}, - ::std::forward<_ExecutionPolicy>(__exec), __r, __r + __n1, __comp, + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __r, __r + __n1, __comp, [__n2, __first, __r](_T1* __i, _T1* __j, _Compare __comp) { _RandomAccessIterator1 __it = __first + (__i - __r); @@ -4240,8 +4216,7 @@ __pattern_adjacent_find(_ExecutionPolicy&& __exec, _RandomAccessIterator __first return __internal::__except_handler([&]() { return __par_backend::__parallel_reduce( - __backend_tag{}, - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __last, + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __last, [__last, __pred, __is_vector, __or_semantic](_RandomAccessIterator __begin, _RandomAccessIterator __end, _RandomAccessIterator __value) -> _RandomAccessIterator { // TODO: investigate performance benefits from the use of shared variable for the result, @@ -4289,8 +4264,7 @@ __pattern_adjacent_find(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _R return __internal::__except_handler([&]() { return __par_backend::__parallel_reduce( - __backend_tag{}, - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __last, + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __last, [__last, __pred, __or_semantic](_RandomAccessIterator __begin, _RandomAccessIterator __end, _RandomAccessIterator __value) -> _RandomAccessIterator { // TODO: investigate performance benefits from the use of shared variable for the result, @@ -4358,7 +4332,8 @@ __pattern_nth_element(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, return; } - constexpr auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _RandomAccessIterator>(); + constexpr auto __dispatch_tag = + oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _RandomAccessIterator>(); using ::std::iter_swap; typedef typename ::std::iterator_traits<_RandomAccessIterator>::value_type _Tp; @@ -4487,8 +4462,7 @@ __pattern_fill(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _Random return __internal::__except_handler([&__exec, __first, __last, &__value, __is_vector]() { __par_backend::__parallel_for( - __backend_tag{}, - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, [&__value, __is_vector](_RandomAccessIterator __begin, _RandomAccessIterator __end) { __internal::__brick_fill<_Tp, _ExecutionPolicy>{__value}(__begin, __end, __is_vector); }); @@ -4558,7 +4532,8 @@ _RandomAccessIterator __pattern_fill_n(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _Size __count, const _Tp& __value, /*is_parallel=*/::std::true_type, _IsVector /*__is_vector*/) { - constexpr auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _RandomAccessIterator>(); + constexpr auto __dispatch_tag = + oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _RandomAccessIterator>(); return __internal::__pattern_fill(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __first + __count, __value); @@ -4820,8 +4795,9 @@ __pattern_merge(_ExecutionPolicy&&, _ForwardIterator1 __first1, _ForwardIterator template _OutputIterator -__pattern_merge(_Tag, _ExecutionPolicy&&, _ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, - _ForwardIterator2 __last2, _OutputIterator __d_first, _Compare __comp) noexcept +__pattern_merge(_Tag, _ExecutionPolicy&&, _ForwardIterator1 __first1, _ForwardIterator1 __last1, + _ForwardIterator2 __first2, _ForwardIterator2 __last2, _OutputIterator __d_first, + _Compare __comp) noexcept { static_assert(__is_backend_tag_v<_Tag>); @@ -4842,8 +4818,8 @@ __pattern_merge(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, _Ran using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; __par_backend::__parallel_merge( - __backend_tag{}, - ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2, __d_first, __comp, + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2, __d_first, + __comp, [__is_vector](_RandomAccessIterator1 __f1, _RandomAccessIterator1 __l1, _RandomAccessIterator2 __f2, _RandomAccessIterator2 __l2, _RandomAccessIterator3 __f3, _Compare __comp) { return __internal::__brick_merge(__f1, __l1, __f2, __l2, __f3, __comp, __is_vector); @@ -4861,8 +4837,8 @@ __pattern_merge(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAcc using __backend_tag = typename __parallel_tag<_IsVector>::__backend_tag; __par_backend::__parallel_merge( - __backend_tag{}, - ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2, __d_first, __comp, + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2, __d_first, + __comp, [](_RandomAccessIterator1 __f1, _RandomAccessIterator1 __l1, _RandomAccessIterator2 __f2, _RandomAccessIterator2 __l2, _RandomAccessIterator3 __f3, _Compare __comp) { return __internal::__brick_merge(__f1, __l1, __f2, __l2, __f3, __comp, _IsVector{}); }); @@ -4940,8 +4916,7 @@ __pattern_inplace_merge(_ExecutionPolicy&& __exec, _RandomAccessIterator __first }; __par_backend::__parallel_merge( - __backend_tag{}, - ::std::forward<_ExecutionPolicy>(__exec), __first, __middle, __middle, __last, __r, __comp, + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __middle, __middle, __last, __r, __comp, [__n, __move_values, __move_sequences](_RandomAccessIterator __f1, _RandomAccessIterator __l1, _RandomAccessIterator __f2, _RandomAccessIterator __l2, _Tp* __f3, _Compare __comp) { @@ -4986,8 +4961,7 @@ __pattern_inplace_merge(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _R }; __par_backend::__parallel_merge( - __backend_tag{}, - ::std::forward<_ExecutionPolicy>(__exec), __first, __middle, __middle, __last, __r, __comp, + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __middle, __middle, __last, __r, __comp, [__n, __move_values, __move_sequences](_RandomAccessIterator __f1, _RandomAccessIterator __l1, _RandomAccessIterator __f2, _RandomAccessIterator __l2, _Tp* __f3, _Compare __comp) { @@ -4997,7 +4971,8 @@ __pattern_inplace_merge(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _R }); __par_backend::__parallel_for(__backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __r, __r + __n, [__r, __first](_Tp* __i, _Tp* __j) { - __brick_move_destroy<_ExecutionPolicy>{}(__i, __j, __first + (__i - __r), _IsVector{}); + __brick_move_destroy<_ExecutionPolicy>{}(__i, __j, __first + (__i - __r), + _IsVector{}); }); }); } @@ -5055,8 +5030,7 @@ __pattern_includes(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, _ return __internal::__except_handler([&]() { return !__internal::__parallel_or( - __dispatch_tag, - ::std::forward<_ExecutionPolicy>(__exec), __first2, __last2, + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first2, __last2, [__first1, __last1, __first2, __last2, &__comp](_RandomAccessIterator2 __i, _RandomAccessIterator2 __j) { assert(__j > __i); //assert(__j - __i > 1); @@ -5091,7 +5065,8 @@ __pattern_includes(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, _ }); } -template +template bool __pattern_includes(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, _RandomAccessIterator2 __last2, @@ -5117,8 +5092,7 @@ __pattern_includes(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _ return __internal::__except_handler([&]() { return !__internal::__parallel_or( - __tag, - ::std::forward<_ExecutionPolicy>(__exec), __first2, __last2, + __tag, ::std::forward<_ExecutionPolicy>(__exec), __first2, __last2, [__first1, __last1, __first2, __last2, &__comp](_RandomAccessIterator2 __i, _RandomAccessIterator2 __j) { assert(__j > __i); //assert(__j - __i > 1); @@ -5195,9 +5169,8 @@ __parallel_set_op(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _Forwar __result + __s.__pos, __is_vector); }; __par_backend::__parallel_strict_scan( - __backend_tag{}, - ::std::forward<_ExecutionPolicy>(__exec), __n1, _SetRange{0, 0, 0}, //-1, 0}, - [=](_DifferenceType __i, _DifferenceType __len) { // Reduce + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __n1, _SetRange{0, 0, 0}, //-1, 0}, + [=](_DifferenceType __i, _DifferenceType __len) { // Reduce //[__b; __e) - a subrange of the first sequence, to reduce _ForwardIterator1 __b = __first1 + __i, __e = __first1 + (__i + __len); @@ -5251,13 +5224,11 @@ __parallel_set_op(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _Forwar } template + class _OutputIterator, class _Compare, class _SizeFunction, class _SetOP> _OutputIterator __parallel_set_op(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _ForwardIterator1 __first1, - _ForwardIterator1 __last1, - _ForwardIterator2 __first2, _ForwardIterator2 __last2, _OutputIterator __result, _Compare __comp, - _SizeFunction __size_func, _SetOP __set_op) + _ForwardIterator1 __last1, _ForwardIterator2 __first2, _ForwardIterator2 __last2, + _OutputIterator __result, _Compare __comp, _SizeFunction __size_func, _SetOP __set_op) { using __backend_tag = typename decltype(__tag)::__backend_tag; @@ -5279,8 +5250,8 @@ __parallel_set_op(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _F __par_backend::__buffer<_ExecutionPolicy, _T> __buf(__size_func(__n1, __n2)); - return __internal::__except_handler([&__exec, __n1, __first1, __last1, __first2, __last2, __result, - __comp, __size_func, __set_op, &__buf]() { + return __internal::__except_handler([&__exec, __n1, __first1, __last1, __first2, __last2, __result, __comp, + __size_func, __set_op, &__buf]() { auto __tmp_memory = __buf.get(); _DifferenceType __m{}; auto __scan = [=](_DifferenceType, _DifferenceType, const _SetRange& __s) { // Scan @@ -5354,7 +5325,8 @@ __parallel_set_union_op(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ { typedef typename ::std::iterator_traits<_ForwardIterator1>::difference_type _DifferenceType; - constexpr auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2, _OutputIterator>(); + constexpr auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _ForwardIterator1, + _ForwardIterator2, _OutputIterator>(); using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; const auto __n1 = __last1 - __first1; @@ -5424,9 +5396,9 @@ __parallel_set_union_op(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ }, [=, &__result] { __result = __internal::__parallel_set_op( - __dispatch_tag, - ::std::forward<_ExecutionPolicy>(__exec), __left_bound_seq_1, __last1, __first2, __last2, __result, - __comp, [](_DifferenceType __n, _DifferenceType __m) { return __n + __m; }, __set_union_op); + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __left_bound_seq_1, __last1, __first2, + __last2, __result, __comp, [](_DifferenceType __n, _DifferenceType __m) { return __n + __m; }, + __set_union_op); }); return __result; } @@ -5446,17 +5418,16 @@ __parallel_set_union_op(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ }, [=, &__result] { __result = __internal::__parallel_set_op( - __dispatch_tag, - ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __left_bound_seq_2, __last2, __result, - __comp, [](_DifferenceType __n, _DifferenceType __m) { return __n + __m; }, __set_union_op); + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __left_bound_seq_2, + __last2, __result, __comp, [](_DifferenceType __n, _DifferenceType __m) { return __n + __m; }, + __set_union_op); }); return __result; } return __internal::__parallel_set_op( - __dispatch_tag, - ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2, __result, __comp, - [](_DifferenceType __n, _DifferenceType __m) { return __n + __m; }, __set_union_op); + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2, __result, + __comp, [](_DifferenceType __n, _DifferenceType __m) { return __n + __m; }, __set_union_op); } //a shared parallel pattern for '__pattern_set_union' and '__pattern_set_symmetric_difference' @@ -5478,13 +5449,13 @@ __parallel_set_union_op(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __ex // {1} {}: parallel copying just first sequence if (__n2 == 0) - return __internal::__pattern_walk2_brick(__tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, - __last1, __result, __copy_range); + return __internal::__pattern_walk2_brick(__tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, + __result, __copy_range); // {} {2}: parallel copying justmake second sequence if (__n1 == 0) - return __internal::__pattern_walk2_brick(__tag, ::std::forward<_ExecutionPolicy>(__exec), __first2, - __last2, __result, __copy_range); + return __internal::__pattern_walk2_brick(__tag, ::std::forward<_ExecutionPolicy>(__exec), __first2, __last2, + __result, __copy_range); // testing whether the sequences are intersected _ForwardIterator1 __left_bound_seq_1 = ::std::lower_bound(__first1, __last1, *__first2, __comp); @@ -5495,12 +5466,12 @@ __parallel_set_union_op(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __ex __par_backend::__parallel_invoke( __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), [=] { - __internal::__pattern_walk2_brick(__tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, - __last1, __result, __copy_range); + __internal::__pattern_walk2_brick(__tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, + __result, __copy_range); }, [=] { - __internal::__pattern_walk2_brick(__tag, ::std::forward<_ExecutionPolicy>(__exec), __first2, - __last2, __result + __n1, __copy_range); + __internal::__pattern_walk2_brick(__tag, ::std::forward<_ExecutionPolicy>(__exec), __first2, __last2, + __result + __n1, __copy_range); }); return __result + __n1 + __n2; } @@ -5514,12 +5485,12 @@ __parallel_set_union_op(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __ex __par_backend::__parallel_invoke( __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), [=] { - __internal::__pattern_walk2_brick(__tag, ::std::forward<_ExecutionPolicy>(__exec), __first2, - __last2, __result, __copy_range); + __internal::__pattern_walk2_brick(__tag, ::std::forward<_ExecutionPolicy>(__exec), __first2, __last2, + __result, __copy_range); }, [=] { - __internal::__pattern_walk2_brick(__tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, - __last1, __result + __n2, __copy_range); + __internal::__pattern_walk2_brick(__tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, + __result + __n2, __copy_range); }); return __result + __n1 + __n2; } @@ -5538,8 +5509,8 @@ __parallel_set_union_op(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __ex }, [=, &__result] { __result = __internal::__parallel_set_op( - __tag, ::std::forward<_ExecutionPolicy>(__exec), __left_bound_seq_1, __last1, __first2, - __last2, __result, __comp, [](_DifferenceType __n, _DifferenceType __m) { return __n + __m; }, + __tag, ::std::forward<_ExecutionPolicy>(__exec), __left_bound_seq_1, __last1, __first2, __last2, + __result, __comp, [](_DifferenceType __n, _DifferenceType __m) { return __n + __m; }, __set_union_op); }); return __result; @@ -5560,16 +5531,16 @@ __parallel_set_union_op(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __ex }, [=, &__result] { __result = __internal::__parallel_set_op( - __tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __left_bound_seq_2, - __last2, __result, __comp, [](_DifferenceType __n, _DifferenceType __m) { return __n + __m; }, + __tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __left_bound_seq_2, __last2, + __result, __comp, [](_DifferenceType __n, _DifferenceType __m) { return __n + __m; }, __set_union_op); }); return __result; } return __internal::__parallel_set_op( - __tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2, __result, - __comp, [](_DifferenceType __n, _DifferenceType __m) { return __n + __m; }, __set_union_op); + __tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2, __result, __comp, + [](_DifferenceType __n, _DifferenceType __m) { return __n + __m; }, __set_union_op); } //------------------------------------------------------------------------ @@ -5650,8 +5621,8 @@ __pattern_set_union(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, typedef typename ::std::iterator_traits<_OutputIterator>::value_type _Tp; return __parallel_set_union_op( - __dispatch_tag, - ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2, __result, __comp, + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2, __result, + __comp, [](_RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, _RandomAccessIterator2 __last2, _Tp* __result, _Compare __comp) { return oneapi::dpl::__utils::__set_union_construct(__first1, __last1, __first2, __last2, __result, __comp, @@ -5675,8 +5646,7 @@ __pattern_set_union(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, typedef typename ::std::iterator_traits<_OutputIterator>::value_type _Tp; return __parallel_set_union_op( - __tag, - ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2, __result, __comp, + __tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2, __result, __comp, [](_RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, _RandomAccessIterator2 __last2, _Tp* __result, _Compare __comp) { return oneapi::dpl::__utils::__set_union_construct(__first1, __last1, __first2, __last2, __result, __comp, @@ -5770,9 +5740,8 @@ __pattern_set_intersection(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __f { //we know proper offset due to [first1; left_bound_seq_1) < [first2; last2) return __internal::__parallel_set_op( - __dispatch_tag, - ::std::forward<_ExecutionPolicy>(__exec), __left_bound_seq_1, __last1, __first2, __last2, __result, __comp, - [](_DifferenceType __n, _DifferenceType __m) { return ::std::min(__n, __m); }, + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __left_bound_seq_1, __last1, __first2, __last2, + __result, __comp, [](_DifferenceType __n, _DifferenceType __m) { return ::std::min(__n, __m); }, [](_RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, _RandomAccessIterator2 __last2, _T* __result, _Compare __comp) { return oneapi::dpl::__utils::__set_intersection_construct(__first1, __last1, __first2, __last2, @@ -5785,9 +5754,8 @@ __pattern_set_intersection(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __f { //we know proper offset due to [first2; left_bound_seq_2) < [first1; last1) __result = __internal::__parallel_set_op( - __dispatch_tag, - ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __left_bound_seq_2, __last2, __result, __comp, - [](_DifferenceType __n, _DifferenceType __m) { return ::std::min(__n, __m); }, + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __left_bound_seq_2, __last2, + __result, __comp, [](_DifferenceType __n, _DifferenceType __m) { return ::std::min(__n, __m); }, [](_RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, _RandomAccessIterator2 __last2, _T* __result, _Compare __comp) { return oneapi::dpl::__utils::__set_intersection_construct(__first2, __last2, __first1, __last1, @@ -5834,9 +5802,8 @@ __pattern_set_intersection(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& _ { //we know proper offset due to [first1; left_bound_seq_1) < [first2; last2) return __internal::__parallel_set_op( - __tag, - ::std::forward<_ExecutionPolicy>(__exec), __left_bound_seq_1, __last1, __first2, __last2, __result, __comp, - [](_DifferenceType __n, _DifferenceType __m) { return ::std::min(__n, __m); }, + __tag, ::std::forward<_ExecutionPolicy>(__exec), __left_bound_seq_1, __last1, __first2, __last2, __result, + __comp, [](_DifferenceType __n, _DifferenceType __m) { return ::std::min(__n, __m); }, [](_RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, _RandomAccessIterator2 __last2, _T* __result, _Compare __comp) { return oneapi::dpl::__utils::__set_intersection_construct(__first1, __last1, __first2, __last2, @@ -5849,9 +5816,8 @@ __pattern_set_intersection(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& _ { //we know proper offset due to [first2; left_bound_seq_2) < [first1; last1) __result = __internal::__parallel_set_op( - __tag, - ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __left_bound_seq_2, __last2, __result, __comp, - [](_DifferenceType __n, _DifferenceType __m) { return ::std::min(__n, __m); }, + __tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __left_bound_seq_2, __last2, __result, + __comp, [](_DifferenceType __n, _DifferenceType __m) { return ::std::min(__n, __m); }, [](_RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, _RandomAccessIterator2 __last2, _T* __result, _Compare __comp) { return oneapi::dpl::__utils::__set_intersection_construct(__first2, __last2, __first1, __last1, @@ -5953,9 +5919,8 @@ __pattern_set_difference(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __fir if (__n1 + __n2 > __set_algo_cut_off) return __parallel_set_op( - __dispatch_tag, - ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2, __result, __comp, - [](_DifferenceType __n, _DifferenceType) { return __n; }, + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2, __result, + __comp, [](_DifferenceType __n, _DifferenceType) { return __n; }, [](_RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, _RandomAccessIterator2 __last2, _T* __result, _Compare __comp) { return oneapi::dpl::__utils::__set_difference_construct(__first1, __last1, __first2, __last2, __result, @@ -5970,9 +5935,8 @@ template _RandomAccessIterator3 __pattern_set_difference(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, - _RandomAccessIterator1 __last1, - _RandomAccessIterator2 __first2, _RandomAccessIterator2 __last2, - _RandomAccessIterator3 __result, _Compare __comp) + _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, + _RandomAccessIterator2 __last2, _RandomAccessIterator3 __result, _Compare __comp) { typedef typename ::std::iterator_traits<_RandomAccessIterator3>::value_type _T; typedef typename ::std::iterator_traits<_RandomAccessIterator1>::difference_type _DifferenceType; @@ -6005,8 +5969,7 @@ __pattern_set_difference(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __e if (__n1 + __n2 > __set_algo_cut_off) return __parallel_set_op( - __tag, - ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2, __result, __comp, + __tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2, __result, __comp, [](_DifferenceType __n, _DifferenceType) { return __n; }, [](_RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, _RandomAccessIterator2 __last2, _T* __result, _Compare __comp) { @@ -6087,8 +6050,8 @@ __pattern_set_symmetric_difference(_ExecutionPolicy&& __exec, _RandomAccessItera typedef typename ::std::iterator_traits<_RandomAccessIterator3>::value_type _T; return __internal::__parallel_set_union_op( - __dispatch_tag, - ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2, __result, __comp, + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2, __result, + __comp, [](_RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, _RandomAccessIterator2 __last2, _T* __result, _Compare __comp) { return oneapi::dpl::__utils::__set_symmetric_difference_construct( @@ -6113,8 +6076,7 @@ __pattern_set_symmetric_difference(__parallel_tag<_IsVector> __tag, _ExecutionPo typedef typename ::std::iterator_traits<_RandomAccessIterator3>::value_type _T; return __internal::__parallel_set_union_op( - __tag, - ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2, __result, __comp, + __tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2, __result, __comp, [](_RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, _RandomAccessIterator2 __last2, _T* __result, _Compare __comp) { return oneapi::dpl::__utils::__set_symmetric_difference_construct( @@ -6219,7 +6181,6 @@ __pattern_is_heap_until(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __ex }); } - //------------------------------------------------------------------------ // is_heap //------------------------------------------------------------------------ @@ -6271,7 +6232,8 @@ __pattern_is_heap(_ExecutionPolicy&&, _RandomAccessIterator __first, _RandomAcce template bool -__pattern_is_heap(_Tag, _ExecutionPolicy&&, _RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp) noexcept +__pattern_is_heap(_Tag, _ExecutionPolicy&&, _RandomAccessIterator __first, _RandomAccessIterator __last, + _Compare __comp) noexcept { static_assert(__is_backend_tag_v<_Tag>); @@ -6343,7 +6305,8 @@ __pattern_min_element(_ExecutionPolicy&&, _ForwardIterator __first, _ForwardIter template _ForwardIterator -__pattern_min_element(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, _Compare __comp) noexcept +__pattern_min_element(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, + _Compare __comp) noexcept { static_assert(__is_backend_tag_v<_Tag>); @@ -6365,8 +6328,7 @@ __pattern_min_element(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, return __internal::__except_handler([&]() { return __par_backend::__parallel_reduce( - __backend_tag{}, - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, /*identity*/ __last, + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, /*identity*/ __last, [=](_RandomAccessIterator __begin, _RandomAccessIterator __end, _RandomAccessIterator __init) -> _RandomAccessIterator { const _RandomAccessIterator __subresult = @@ -6399,8 +6361,7 @@ __pattern_min_element(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _Ran return __internal::__except_handler([&]() { return __par_backend::__parallel_reduce( - __backend_tag{}, - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, /*identity*/ __last, + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, /*identity*/ __last, [=](_RandomAccessIterator __begin, _RandomAccessIterator __end, _RandomAccessIterator __init) -> _RandomAccessIterator { const _RandomAccessIterator __subresult = @@ -6481,8 +6442,7 @@ __pattern_minmax_element(_ExecutionPolicy&& __exec, _RandomAccessIterator __firs typedef ::std::pair<_RandomAccessIterator, _RandomAccessIterator> _Result; return __par_backend::__parallel_reduce( - __backend_tag{}, - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, /*identity*/ ::std::make_pair(__last, __last), [=, &__comp](_RandomAccessIterator __begin, _RandomAccessIterator __end, _Result __init) -> _Result { const _Result __subresult = __internal::__brick_minmax_element(__begin, __end, __comp, __is_vector); @@ -6525,8 +6485,7 @@ __pattern_minmax_element(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _ typedef ::std::pair<_RandomAccessIterator, _RandomAccessIterator> _Result; return __par_backend::__parallel_reduce( - __backend_tag{}, - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, /*identity*/ ::std::make_pair(__last, __last), [=, &__comp](_RandomAccessIterator __begin, _RandomAccessIterator __end, _Result __init) -> _Result { const _Result __subresult = __internal::__brick_minmax_element(__begin, __end, __comp, _IsVector{}); diff --git a/include/oneapi/dpl/pstl/experimental/internal/for_loop_impl.h b/include/oneapi/dpl/pstl/experimental/internal/for_loop_impl.h index 35e7433a09f..e86beeb276d 100644 --- a/include/oneapi/dpl/pstl/experimental/internal/for_loop_impl.h +++ b/include/oneapi/dpl/pstl/experimental/internal/for_loop_impl.h @@ -262,7 +262,8 @@ inline constexpr bool __is_random_access_or_integral_v = __is_random_access_or_i // Vectorized version of for_loop template void -__pattern_for_loop(_Tag __tag, _ExecutionPolicy&& __exec, _Ip __first, _Ip __last, _Function __f, _Sp __stride, _Rest&&... __rest) noexcept +__pattern_for_loop(_Tag __tag, _ExecutionPolicy&& __exec, _Ip __first, _Ip __last, _Function __f, _Sp __stride, + _Rest&&... __rest) noexcept { static_assert(__is_backend_tag_serial_v<_Tag>); @@ -362,7 +363,8 @@ __execute_loop_strided(_Ip __first, _Ip __last, _Function __f, _Sp __stride, _Pa // Sequenced version of for_loop for non-RAI and non-integral types template ::std::enable_if_t> -__pattern_for_loop(_Tag, _ExecutionPolicy&&, _Ip __first, _Ip __last, _Function __f, __single_stride_type, _Rest&&... __rest) noexcept +__pattern_for_loop(_Tag, _ExecutionPolicy&&, _Ip __first, _Ip __last, _Function __f, __single_stride_type, + _Rest&&... __rest) noexcept { static_assert(__is_backend_tag_serial_v<_Tag>); @@ -400,8 +402,7 @@ __pattern_for_loop_n(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, oneapi::dpl::__internal::__except_handler([&]() { return __par_backend::__parallel_reduce( - __backend_tag{}, - ::std::forward<_ExecutionPolicy>(__exec), _Size(0), __n, __identity, + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), _Size(0), __n, __identity, [__first, __f](_Size __i, _Size __j, __pack_type __value) { const auto __subseq_start = __first + __i; const auto __length = __j - __i; @@ -438,8 +439,7 @@ __pattern_for_loop_n(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, oneapi::dpl::__internal::__except_handler([&]() { return __par_backend::__parallel_reduce( - __backend_tag{}, - ::std::forward<_ExecutionPolicy>(__exec), _Size(0), __n, __identity, + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), _Size(0), __n, __identity, [__first, __f, __stride](_Size __i, _Size __j, __pack_type __value) { const auto __subseq_start = __first + __i * __stride; const auto __length = __j - __i; @@ -517,16 +517,14 @@ struct __use_par_vec_helper<_Ip, ::std::enable_if_t>> // Special versions for for_loop: handles both iterators and integral types(treated as random access iterators) template constexpr auto -__use_vectorization() - -> decltype(__use_par_vec_helper<_Ip>::template __use_vector<_ExecutionPolicy>()) +__use_vectorization() -> decltype(__use_par_vec_helper<_Ip>::template __use_vector<_ExecutionPolicy>()) { return __use_par_vec_helper<_Ip>::template __use_vector<_ExecutionPolicy>(); } template constexpr auto -__use_parallelization() - -> decltype(__use_par_vec_helper<_Ip>::template __use_parallel<_ExecutionPolicy>()) +__use_parallelization() -> decltype(__use_par_vec_helper<_Ip>::template __use_parallel<_ExecutionPolicy>()) { return __use_par_vec_helper<_Ip>::template __use_parallel<_ExecutionPolicy>(); } @@ -547,9 +545,9 @@ __for_loop_impl(_ExecutionPolicy&& __exec, _Ip __start, _Ip __finish, _Fp&& __f, } else { - oneapi::dpl::__internal::__pattern_for_loop(__serial_tag<_IsVector>{}, - ::std::forward<_ExecutionPolicy>(__exec), __start, __finish, __f, - __stride, ::std::get<_Is>(::std::move(__t))...); + oneapi::dpl::__internal::__pattern_for_loop(__serial_tag<_IsVector>{}, ::std::forward<_ExecutionPolicy>(__exec), + __start, __finish, __f, __stride, + ::std::get<_Is>(::std::move(__t))...); } } diff --git a/include/oneapi/dpl/pstl/glue_algorithm_impl.h b/include/oneapi/dpl/pstl/glue_algorithm_impl.h index 19b0208acca..495ff06be8c 100644 --- a/include/oneapi/dpl/pstl/glue_algorithm_impl.h +++ b/include/oneapi/dpl/pstl/glue_algorithm_impl.h @@ -187,10 +187,9 @@ adjacent_find(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardItera { constexpr auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _ForwardIterator>(); - return oneapi::dpl::__internal::__pattern_adjacent_find( - __dispatch_tag, - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __pred, - oneapi::dpl::__internal::__first_semantic()); + return oneapi::dpl::__internal::__pattern_adjacent_find(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), + __first, __last, __pred, + oneapi::dpl::__internal::__first_semantic()); } // [alg.count] @@ -271,7 +270,8 @@ template copy(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 __last, _ForwardIterator2 __result) { - constexpr auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>(); + constexpr auto __dispatch_tag = + oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>(); return oneapi::dpl::__internal::__pattern_walk2_brick(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __result, @@ -284,11 +284,11 @@ copy_n(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _Size __n, _Forward { using _DecayedExecutionPolicy = ::std::decay_t<_ExecutionPolicy>; - constexpr auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>(); + constexpr auto __dispatch_tag = + oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>(); return oneapi::dpl::__internal::__pattern_walk2_brick_n( - __dispatch_tag, - ::std::forward<_ExecutionPolicy>(__exec), __first, __n, __result, + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __n, __result, oneapi::dpl::__internal::__brick_copy_n<_DecayedExecutionPolicy>{}); } @@ -331,11 +331,11 @@ oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _Forward transform(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 __last, _ForwardIterator2 __result, _UnaryOperation __op) { - constexpr auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>(); + constexpr auto __dispatch_tag = + oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>(); return oneapi::dpl::__internal::__pattern_walk2( - __dispatch_tag, - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __result, + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __result, oneapi::dpl::__internal::__transform_functor<_UnaryOperation>{::std::move(__op)}); } @@ -346,11 +346,11 @@ oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _Forward transform(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, _ForwardIterator __result, _BinaryOperation __op) { - constexpr auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2, _ForwardIterator>(); + constexpr auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _ForwardIterator1, + _ForwardIterator2, _ForwardIterator>(); return oneapi::dpl::__internal::__pattern_walk3( - __dispatch_tag, - ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __result, + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __result, oneapi::dpl::__internal::__transform_functor<_BinaryOperation>(::std::move(__op))); } @@ -416,7 +416,8 @@ oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _Forward replace_copy_if(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 __last, _ForwardIterator2 __result, _UnaryPredicate __pred, const _Tp& __new_value) { - constexpr auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>(); + constexpr auto __dispatch_tag = + oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>(); return oneapi::dpl::__internal::__pattern_walk2( __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __result, @@ -681,10 +682,8 @@ sort(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIter typedef typename ::std::iterator_traits<_RandomAccessIterator>::value_type _InputType; - oneapi::dpl::__internal::__pattern_sort( - __dispatch_tag, - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __comp, - typename ::std::is_move_constructible<_InputType>::type()); + oneapi::dpl::__internal::__pattern_sort(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, + __comp, typename ::std::is_move_constructible<_InputType>::type()); } template @@ -793,7 +792,7 @@ oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, bool> equal(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, _BinaryPredicate __p) { - // TODO is it correct that we check _ForwardIterator2 in __select_backend ? + // TODO is it correct that we check _ForwardIterator2 in __select_backend ? constexpr auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>(); @@ -838,7 +837,8 @@ move(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 __l { using _DecayedExecutionPolicy = ::std::decay_t<_ExecutionPolicy>; - constexpr auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>(); + constexpr auto __dispatch_tag = + oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>(); return oneapi::dpl::__internal::__pattern_walk2_brick( __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __d_first, diff --git a/include/oneapi/dpl/pstl/glue_memory_impl.h b/include/oneapi/dpl/pstl/glue_memory_impl.h index cc46b189b0f..9058b0dc07c 100644 --- a/include/oneapi/dpl/pstl/glue_memory_impl.h +++ b/include/oneapi/dpl/pstl/glue_memory_impl.h @@ -45,7 +45,8 @@ uninitialized_copy(_ExecutionPolicy&& __exec, _InputIterator __first, _InputIter typedef typename ::std::iterator_traits<_ForwardIterator>::value_type _ValueType2; typedef ::std::decay_t<_ExecutionPolicy> _DecayedExecutionPolicy; - constexpr auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _InputIterator, _ForwardIterator>(); + constexpr auto __dispatch_tag = + oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _InputIterator, _ForwardIterator>(); if constexpr (::std::is_trivial_v<_ValueType1> && ::std::is_trivial_v<_ValueType2>) { @@ -69,7 +70,8 @@ uninitialized_copy_n(_ExecutionPolicy&& __exec, _InputIterator __first, _Size __ typedef typename ::std::iterator_traits<_ForwardIterator>::value_type _ValueType2; typedef ::std::decay_t<_ExecutionPolicy> _DecayedExecutionPolicy; - constexpr auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _InputIterator, _ForwardIterator>(); + constexpr auto __dispatch_tag = + oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _InputIterator, _ForwardIterator>(); if constexpr (::std::is_trivial_v<_ValueType1> && ::std::is_trivial_v<_ValueType2>) { @@ -95,7 +97,8 @@ uninitialized_move(_ExecutionPolicy&& __exec, _InputIterator __first, _InputIter typedef typename ::std::iterator_traits<_ForwardIterator>::value_type _ValueType2; typedef ::std::decay_t<_ExecutionPolicy> _DecayedExecutionPolicy; - constexpr auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _InputIterator, _ForwardIterator>(); + constexpr auto __dispatch_tag = + oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _InputIterator, _ForwardIterator>(); if constexpr (::std::is_trivial_v<_ValueType1> && ::std::is_trivial_v<_ValueType2>) { @@ -119,7 +122,8 @@ uninitialized_move_n(_ExecutionPolicy&& __exec, _InputIterator __first, _Size __ typedef typename ::std::iterator_traits<_ForwardIterator>::value_type _ValueType2; typedef ::std::decay_t<_ExecutionPolicy> _DecayedExecutionPolicy; - constexpr auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _InputIterator, _ForwardIterator>(); + constexpr auto __dispatch_tag = + oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _InputIterator, _ForwardIterator>(); if constexpr (::std::is_trivial_v<_ValueType1> && ::std::is_trivial_v<_ValueType2>) { @@ -149,15 +153,13 @@ uninitialized_fill(_ExecutionPolicy&& __exec, _ForwardIterator __first, _Forward if constexpr (::std::is_arithmetic_v<_ValueType>) { oneapi::dpl::__internal::__pattern_walk_brick( - __dispatch_tag, - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, oneapi::dpl::__internal::__brick_fill<_ValueType, _DecayedExecutionPolicy>{_ValueType(__value)}); } else { oneapi::dpl::__internal::__pattern_walk1( - __dispatch_tag, - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, oneapi::dpl::__internal::__op_uninitialized_fill<_Tp, _DecayedExecutionPolicy>{__value}); } } @@ -224,7 +226,7 @@ destroy(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __ #endif // _PSTL_ICPX_OMP_SIMD_DESTROY_WINDOWS_BROKEN || _ONEDPL_ICPX_OMP_SIMD_DESTROY_WINDOWS_BROKEN constexpr auto __dispatch_tag = - oneapi::dpl::__internal::__select_backend<_ExecutionPolicyDest, _ForwardIterator>(); + oneapi::dpl::__internal::__select_backend<_ExecutionPolicyDest, _ForwardIterator>(); oneapi::dpl::__internal::__pattern_walk1(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, [](_ReferenceType __val) { __val.~_ValueType(); }); diff --git a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h index 43a2d40c716..cd2bebea667 100644 --- a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h +++ b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h @@ -205,8 +205,7 @@ __pattern_swap(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIt using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; - return __pattern_walk2<__backend_tag, /*_IsSync=*/::std::true_type, - __par_backend_hetero::access_mode::read_write, + return __pattern_walk2<__backend_tag, /*_IsSync=*/::std::true_type, __par_backend_hetero::access_mode::read_write, __par_backend_hetero::access_mode::read_write>( __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __f); } @@ -259,8 +258,7 @@ __pattern_walk3(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardI return __first3 + __n; } -template __tag, _ExecutionPolicy&& { // Require `read_write` access mode for output sequence to force a copy in for host iterators to capture incoming // values of the output sequence for elements where the predicate is false. - return __pattern_walk2<_BackendTag, /*_IsSync=*/::std::true_type, - __par_backend_hetero::access_mode::read, + return __pattern_walk2<_BackendTag, /*_IsSync=*/::std::true_type, __par_backend_hetero::access_mode::read, __par_backend_hetero::access_mode::read_write>( __tag, __par_backend_hetero::make_wrapped_policy<__walk2_transform_if_wrapper>( @@ -563,7 +560,6 @@ __pattern_fill_n(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Fo __count, fill_functor<_T>{__value}); } - //------------------------------------------------------------------------ // generate //------------------------------------------------------------------------ @@ -976,9 +972,8 @@ __pattern_adjacent_find(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __ex // TODO: in case of confilicting names // __par_backend_hetero::make_wrapped_policy<__par_backend_hetero::__or_policy_wrapper>() bool result = __par_backend_hetero::__parallel_find_or( - _BackendTag{}, - ::std::forward<_ExecutionPolicy>(__exec), _Predicate{adjacent_find_fn<_BinaryPredicate>{__predicate}}, - __par_backend_hetero::__parallel_or_tag{}, + _BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), + _Predicate{adjacent_find_fn<_BinaryPredicate>{__predicate}}, __par_backend_hetero::__parallel_or_tag{}, oneapi::dpl::__ranges::make_zip_view(__buf1.all_view(), __buf2.all_view())); // inverted conditional because of @@ -1021,8 +1016,7 @@ __pattern_adjacent_find(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator template _Iterator __pattern_adjacent_find(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, - _BinaryPredicate __predicate, - oneapi::dpl::__internal::__first_semantic) + _BinaryPredicate __predicate, oneapi::dpl::__internal::__first_semantic) { if (__last - __first < 2) return __last; @@ -1182,8 +1176,7 @@ __pattern_equal(_ExecutionPolicy&& __exec, _Iterator1 __first1, _Iterator1 __las // TODO: in case of confilicting names // __par_backend_hetero::make_wrapped_policy<__par_backend_hetero::__or_policy_wrapper>() return !__par_backend_hetero::__parallel_find_or( - __backend_tag{}, - ::std::forward<_ExecutionPolicy>(__exec), _Predicate{equal_predicate<_Pred>{__pred}}, + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), _Predicate{equal_predicate<_Pred>{__pred}}, __par_backend_hetero::__parallel_or_tag{}, oneapi::dpl::__ranges::make_zip_view(__buf1.all_view(), __buf2.all_view())); } @@ -1206,13 +1199,11 @@ __pattern_equal(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Ite // TODO: in case of confilicting names // __par_backend_hetero::make_wrapped_policy<__par_backend_hetero::__or_policy_wrapper>() return !__par_backend_hetero::__parallel_find_or( - _BackendTag{}, - ::std::forward<_ExecutionPolicy>(__exec), _Predicate{equal_predicate<_Pred>{__pred}}, + _BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), _Predicate{equal_predicate<_Pred>{__pred}}, __par_backend_hetero::__parallel_or_tag{}, oneapi::dpl::__ranges::make_zip_view(__buf1.all_view(), __buf2.all_view())); } - //------------------------------------------------------------------------ // equal version for sequences with equal length //------------------------------------------------------------------------ @@ -1375,7 +1366,6 @@ __pattern_find_first_of(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __ex ::std::true_type{}); } - //------------------------------------------------------------------------ // search //------------------------------------------------------------------------ @@ -1607,8 +1597,8 @@ __pattern_scan_copy(_ExecutionPolicy&& __exec, _Iterator1 __first, _Iterator1 __ auto __buf2 = __keep2(__output_first, __output_first + __n); auto __res = __par_backend_hetero::__parallel_scan_copy(__backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), - __buf1.all_view(), - __buf2.all_view(), __n, __create_mask_op, __copy_by_mask_op); + __buf1.all_view(), __buf2.all_view(), __n, __create_mask_op, + __copy_by_mask_op); ::std::size_t __num_copied = __res.get(); return ::std::make_pair(__output_first + __n, __num_copied); @@ -2064,8 +2054,7 @@ __pattern_is_heap(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _Ran oneapi::dpl::unseq_backend::single_match_pred_by_idx<_ExecutionPolicy, __is_heap_check<_Compare>>; return !__par_backend_hetero::__parallel_or( - __backend_tag{}, - ::std::forward<_ExecutionPolicy>(__exec), + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__first), __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__last), _Predicate{__comp}); } @@ -2082,8 +2071,7 @@ __pattern_is_heap(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _R oneapi::dpl::unseq_backend::single_match_pred_by_idx<_ExecutionPolicy, __is_heap_check<_Compare>>; return !__par_backend_hetero::__parallel_or( - _BackendTag{}, - ::std::forward<_ExecutionPolicy>(__exec), + _BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__first), __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__last), _Predicate{__comp}); } @@ -2103,7 +2091,8 @@ __pattern_merge(_ExecutionPolicy&& __exec, _Iterator1 __first1, _Iterator1 __las if (__n == 0) return __d_first; - const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _Iterator1, _Iterator2, _Iterator3>(); + const auto __dispatch_tag = + oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _Iterator1, _Iterator2, _Iterator3>(); using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; //To consider the direct copying pattern call in case just one of sequences is empty. @@ -2129,8 +2118,8 @@ __pattern_merge(_ExecutionPolicy&& __exec, _Iterator1 __first1, _Iterator1 __las auto __keep3 = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::write, _Iterator3>(); auto __buf3 = __keep3(__d_first, __d_first + __n); - __par_backend_hetero::__parallel_merge(__backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __buf1.all_view(), - __buf2.all_view(), __buf3.all_view(), __comp) + __par_backend_hetero::__parallel_merge(__backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), + __buf1.all_view(), __buf2.all_view(), __buf3.all_view(), __comp) .wait(); } return __d_first + __n; @@ -2171,8 +2160,8 @@ __pattern_merge(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Ite auto __keep3 = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::write, _Iterator3>(); auto __buf3 = __keep3(__d_first, __d_first + __n); - __par_backend_hetero::__parallel_merge(_BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), __buf1.all_view(), - __buf2.all_view(), __buf3.all_view(), __comp) + __par_backend_hetero::__parallel_merge(_BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), + __buf1.all_view(), __buf2.all_view(), __buf3.all_view(), __comp) .wait(); } return __d_first + __n; @@ -2438,7 +2427,8 @@ __pattern_partition(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __la constexpr auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _Iterator>(); //TODO: consider nonstable approaches - return __pattern_stable_partition(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __pred); + return __pattern_stable_partition(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, + __pred); } template @@ -2449,7 +2439,8 @@ __pattern_partition(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, constexpr auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _Iterator>(); //TODO: consider nonstable approaches - return __pattern_stable_partition(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __pred); + return __pattern_stable_partition(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, + __pred); } //------------------------------------------------------------------------ @@ -2577,8 +2568,7 @@ __pattern_includes(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _Forwa using __brick_include_type = unseq_backend::__brick_includes<_ExecutionPolicy, _Compare, _Size1, _Size2>; return !__par_backend_hetero::__parallel_or( - __backend_tag{}, - ::std::forward<_ExecutionPolicy>(__exec), + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__first2), __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__last2), __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__first1), @@ -2606,8 +2596,7 @@ __pattern_includes(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _ using __brick_include_type = unseq_backend::__brick_includes<_ExecutionPolicy, _Compare, _Size1, _Size2>; return !__par_backend_hetero::__parallel_or( - _BackendTag{}, - ::std::forward<_ExecutionPolicy>(__exec), + _BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__first2), __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__last2), __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__first1), @@ -2630,8 +2619,7 @@ __pattern_partial_sort(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator _ using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; __par_backend_hetero::__parallel_partial_sort( - __backend_tag{}, - ::std::forward<_ExecutionPolicy>(__exec), + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read_write>(__first), __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read_write>(__mid), __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read_write>(__last), __comp) @@ -2647,8 +2635,7 @@ __pattern_partial_sort(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exe return; __par_backend_hetero::__parallel_partial_sort( - _BackendTag{}, - ::std::forward<_ExecutionPolicy>(__exec), + _BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read_write>(__first), __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read_write>(__mid), __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read_write>(__last), __comp) @@ -2753,8 +2740,7 @@ __pattern_partial_sort_copy(_ExecutionPolicy&& __exec, _InIterator __first, _InI using __backend_tag11 = typename decltype(__dispatch_tag11)::__backend_tag; __par_backend_hetero::__parallel_partial_sort( - __backend_tag11{}, - __par_backend_hetero::make_wrapped_policy<__partial_sort_2>(__exec), + __backend_tag11{}, __par_backend_hetero::make_wrapped_policy<__partial_sort_2>(__exec), __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read_write>(__buf_first), __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read_write>(__buf_mid), __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read_write>(__buf_last), __comp); @@ -2799,7 +2785,8 @@ __pattern_partial_sort_copy(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& constexpr auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, decltype(__out_first), decltype(__out_end)>(); - __pattern_sort(__dispatch_tag, + __pattern_sort( + __dispatch_tag, __par_backend_hetero::make_wrapped_policy<__partial_sort_1>(::std::forward<_ExecutionPolicy>(__exec)), __out_first, __out_end, __comp, ::std::true_type{}); @@ -2832,8 +2819,7 @@ __pattern_partial_sort_copy(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& using __backend_tag11 = typename decltype(__dispatch_tag11)::__backend_tag; __par_backend_hetero::__parallel_partial_sort( - __backend_tag11{}, - __par_backend_hetero::make_wrapped_policy<__partial_sort_2>(__exec), + __backend_tag11{}, __par_backend_hetero::make_wrapped_policy<__partial_sort_2>(__exec), __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read_write>(__buf_first), __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read_write>(__buf_mid), __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read_write>(__buf_last), __comp); @@ -2866,7 +2852,8 @@ __pattern_nth_element(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __ // TODO: check partition-based implementation // - try to avoid host dereference issue // - measure performance of the issue-free implementation - __pattern_partial_sort(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __nth + 1, __last, __comp); + __pattern_partial_sort(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __nth + 1, __last, + __comp); } template @@ -3060,8 +3047,8 @@ __pattern_rotate(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _Iterator using _Function = __brick_move<_ExecutionPolicy>; auto __brick = unseq_backend::walk_n<_ExecutionPolicy, _Function>{_Function{}}; - oneapi::dpl::__par_backend_hetero::__parallel_for(_BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), - __brick, __n, __temp_rng, __buf.all_view()) + oneapi::dpl::__par_backend_hetero::__parallel_for(_BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), __brick, + __n, __temp_rng, __buf.all_view()) .wait(); return __first + (__last - __new_first); @@ -3094,8 +3081,7 @@ __pattern_rotate_copy(_ExecutionPolicy&& __exec, _BidirectionalIterator __first, const auto __shift = __new_first - __first; oneapi::dpl::__par_backend_hetero::__parallel_for( - __backend_tag{}, - ::std::forward<_ExecutionPolicy>(__exec), + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), unseq_backend::__rotate_copy::difference_type>{__n, __shift}, __n, __buf1.all_view(), __buf2.all_view()) @@ -3313,7 +3299,8 @@ __pattern_set_difference(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, // {1} \ {}: the difference is {1} if (__first2 == __last2) { - const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2, _OutputIterator>(); + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _ForwardIterator1, + _ForwardIterator2, _OutputIterator>(); return oneapi::dpl::__internal::__pattern_walk2_brick( __dispatch_tag, @@ -3372,7 +3359,8 @@ __pattern_set_union(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _Forw if (__first1 == __last1 && __first2 == __last2) return __result; - const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2, _OutputIterator>(); + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _ForwardIterator1, + _ForwardIterator2, _OutputIterator>(); //{1} is empty if (__first1 == __last1) @@ -3509,7 +3497,8 @@ __pattern_set_symmetric_difference(_ExecutionPolicy&& __exec, _ForwardIterator1 if (__first1 == __last1 && __first2 == __last2) return __result; - const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2, _OutputIterator>(); + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _ForwardIterator1, + _ForwardIterator2, _OutputIterator>(); //{1} is empty if (__first1 == __last1) diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h index 22a07100d96..0187a91a2b8 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h @@ -848,8 +848,8 @@ template auto __parallel_transform_scan(oneapi::dpl::__internal::__device_backend_tag, _ExecutionPolicy&& __exec, _Range1&& __in_rng, - _Range2&& __out_rng, ::std::size_t __n, - _UnaryOperation __unary_op, _InitType __init, _BinaryOperation __binary_op, _Inclusive) + _Range2&& __out_rng, ::std::size_t __n, _UnaryOperation __unary_op, _InitType __init, + _BinaryOperation __binary_op, _Inclusive) { using _Type = typename _InitType::__value_type; @@ -1451,7 +1451,6 @@ __parallel_find_or(oneapi::dpl::__internal::__device_backend_tag, _ExecutionPoli return __result != __init_value ? __result : __rng_n; } - //------------------------------------------------------------------------ // parallel_or - sync pattern //------------------------------------------------------------------------ @@ -1584,7 +1583,6 @@ __parallel_find(oneapi::dpl::__internal::__device_backend_tag, _ExecutionPolicy& __f, _TagType{}, __buf.all_view(), __s_buf.all_view()); } - // Special overload for single sequence cases. // TODO: check if similar pattern may apply to other algorithms. If so, these overloads should be moved out of // backend code. @@ -2184,10 +2182,10 @@ __parallel_stable_sort(_ExecutionPolicy&& __exec, _Range&& __rng, _Compare, _Pro ::std::forward<_ExecutionPolicy>(__exec), ::std::forward<_Range>(__rng), __proj); } -template , _Compare>::value, - int> = 0> +template < + typename _ExecutionPolicy, typename _Range, typename _Compare, typename _Proj, + ::std::enable_if_t< + __is_radix_sort_usable_for_type, _Compare>::value, int> = 0> auto __parallel_stable_sort(oneapi::dpl::__internal::__device_backend_tag, _ExecutionPolicy&& __exec, _Range&& __rng, _Compare, _Proj __proj) @@ -2211,10 +2209,10 @@ __parallel_stable_sort(_ExecutionPolicy&& __exec, _Range&& __rng, _Compare __com return __parallel_sort_impl(::std::forward<_ExecutionPolicy>(__exec), ::std::forward<_Range>(__rng), __cmp_f); } -template , _Compare>::value, - int> = 0> +template < + typename _ExecutionPolicy, typename _Range, typename _Compare, typename _Proj, + ::std::enable_if_t< + !__is_radix_sort_usable_for_type, _Compare>::value, int> = 0> auto __parallel_stable_sort(oneapi::dpl::__internal::__device_backend_tag, _ExecutionPolicy&& __exec, _Range&& __rng, _Compare __comp, _Proj __proj) diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h index b6950c6f75c..4d2568f72c1 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h @@ -169,9 +169,8 @@ __parallel_transform_scan(_ExecutionPolicy&& __exec, _Range1&& __in_rng, _Range2 { // workaround until we implement more performant version for patterns return oneapi::dpl::__par_backend_hetero::__parallel_transform_scan( - oneapi::dpl::__internal::__device_backend_tag{}, - __exec.__device_policy(), ::std::forward<_Range1>(__in_rng), ::std::forward<_Range2>(__out_rng), __n, - __unary_op, __init, __binary_op, _Inclusive{}); + oneapi::dpl::__internal::__device_backend_tag{}, __exec.__device_policy(), ::std::forward<_Range1>(__in_rng), + ::std::forward<_Range2>(__out_rng), __n, __unary_op, __init, __binary_op, _Inclusive{}); } template (__in_rng), ::std::forward<_OutRng>(__out_rng), __n, __pred); + oneapi::dpl::__internal::__device_backend_tag{}, __exec.__device_policy(), ::std::forward<_InRng>(__in_rng), + ::std::forward<_OutRng>(__out_rng), __n, __pred); } template @@ -219,8 +218,8 @@ __parallel_copy_if(oneapi::dpl::__internal::__fpga_backend_tag, _ExecutionPolicy { // workaround until we implement more performant version for patterns return oneapi::dpl::__par_backend_hetero::__parallel_copy_if( - oneapi::dpl::__internal::__device_backend_tag{}, - __exec.__device_policy(), ::std::forward<_InRng>(__in_rng), ::std::forward<_OutRng>(__out_rng), __n, __pred); + oneapi::dpl::__internal::__device_backend_tag{}, __exec.__device_policy(), ::std::forward<_InRng>(__in_rng), + ::std::forward<_OutRng>(__out_rng), __n, __pred); } template (__in_rng), ::std::forward<_OutRng>(__out_rng), __n, - __create_mask_op, __copy_by_mask_op); + oneapi::dpl::__internal::__device_backend_tag{}, __exec.__device_policy(), ::std::forward<_InRng>(__in_rng), + ::std::forward<_OutRng>(__out_rng), __n, __create_mask_op, __copy_by_mask_op); } template auto __parallel_scan_copy(oneapi::dpl::__internal::__fpga_backend_tag, _ExecutionPolicy&& __exec, _InRng&& __in_rng, - _OutRng&& __out_rng, _Size __n, - _CreateMaskOp __create_mask_op, _CopyByMaskOp __copy_by_mask_op) + _OutRng&& __out_rng, _Size __n, _CreateMaskOp __create_mask_op, _CopyByMaskOp __copy_by_mask_op) { // workaround until we implement more performant version for patterns return oneapi::dpl::__par_backend_hetero::__parallel_scan_copy( - oneapi::dpl::__internal::__device_backend_tag{}, - __exec.__device_policy(), ::std::forward<_InRng>(__in_rng), ::std::forward<_OutRng>(__out_rng), __n, - __create_mask_op, __copy_by_mask_op); + oneapi::dpl::__internal::__device_backend_tag{}, __exec.__device_policy(), ::std::forward<_InRng>(__in_rng), + ::std::forward<_OutRng>(__out_rng), __n, __create_mask_op, __copy_by_mask_op); } //------------------------------------------------------------------------ @@ -268,9 +264,9 @@ __parallel_find_or(_ExecutionPolicy&& __exec, _Brick __f, _BrickTag __brick_tag, } template -::std::conditional_t<::std::is_same_v<_BrickTag, __parallel_or_tag>, bool, - oneapi::dpl::__internal::__difference_t< - typename oneapi::dpl::__ranges::__get_first_range_type<_Ranges...>::type>> +::std::conditional_t< + ::std::is_same_v<_BrickTag, __parallel_or_tag>, bool, + oneapi::dpl::__internal::__difference_t::type>> __parallel_find_or(oneapi::dpl::__internal::__fpga_backend_tag, _ExecutionPolicy&& __exec, _Brick __f, _BrickTag __brick_tag, _Ranges&&... __rngs) { @@ -340,8 +336,7 @@ __parallel_find(_ExecutionPolicy&& __exec, _Iterator1 __first, _Iterator1 __last template _Iterator1 __parallel_find(oneapi::dpl::__internal::__fpga_backend_tag, _ExecutionPolicy&& __exec, _Iterator1 __first, - _Iterator1 __last, _Iterator2 __s_first, - _Iterator2 __s_last, _Brick __f, _IsFirst __is_first) + _Iterator1 __last, _Iterator2 __s_first, _Iterator2 __s_last, _Brick __f, _IsFirst __is_first) { // workaround until we implement more performant version for patterns return oneapi::dpl::__par_backend_hetero::__parallel_find(oneapi::dpl::__internal::__device_backend_tag{}, diff --git a/include/oneapi/dpl/pstl/hetero/numeric_impl_hetero.h b/include/oneapi/dpl/pstl/hetero/numeric_impl_hetero.h index 16521a0835b..dde4eb10f58 100644 --- a/include/oneapi/dpl/pstl/hetero/numeric_impl_hetero.h +++ b/include/oneapi/dpl/pstl/hetero/numeric_impl_hetero.h @@ -320,12 +320,11 @@ __pattern_transform_scan_base(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __ex return __result + __n; } template + typename _UnaryOperation, typename _Type, typename _BinaryOperation, typename _Inclusive> _Iterator2 __pattern_transform_scan(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Iterator1 __first, - _Iterator1 __last, _Iterator2 __result, - _UnaryOperation __unary_op, _Type __init, _BinaryOperation __binary_op, _Inclusive) + _Iterator1 __last, _Iterator2 __result, _UnaryOperation __unary_op, _Type __init, + _BinaryOperation __binary_op, _Inclusive) { using _RepackedType = __par_backend_hetero::__repacked_tuple_t<_Type>; using _InitType = unseq_backend::__init_value<_RepackedType>; @@ -385,7 +384,8 @@ __pattern_adjacent_difference(_ExecutionPolicy&& __exec, _ForwardIterator1 __fir if (__n <= 0) return __d_first; - const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>(); + const auto __dispatch_tag = + oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>(); using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; using _It1ValueT = typename ::std::iterator_traits<_ForwardIterator1>::value_type; diff --git a/include/oneapi/dpl/pstl/histogram_impl.h b/include/oneapi/dpl/pstl/histogram_impl.h index a67455df13e..b9fe40422f9 100644 --- a/include/oneapi/dpl/pstl/histogram_impl.h +++ b/include/oneapi/dpl/pstl/histogram_impl.h @@ -56,8 +56,7 @@ histogram(_ExecutionPolicy&& exec, _RandomAccessIterator1 first, _RandomAccessIt oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _RandomAccessIterator1, _RandomAccessIterator2>(); oneapi::dpl::__internal::__pattern_histogram( - __dispatch_tag, - ::std::forward<_ExecutionPolicy>(exec), first, last, num_bins, + __dispatch_tag, ::std::forward<_ExecutionPolicy>(exec), first, last, num_bins, oneapi::dpl::__internal::__evenly_divided_binhash<_ValueType>(first_bin_min_val, last_bin_max_val, num_bins), histogram_first); return histogram_first + num_bins; diff --git a/include/oneapi/dpl/pstl/numeric_impl.h b/include/oneapi/dpl/pstl/numeric_impl.h index 7a1c05f7af7..6c099bb4689 100644 --- a/include/oneapi/dpl/pstl/numeric_impl.h +++ b/include/oneapi/dpl/pstl/numeric_impl.h @@ -382,8 +382,7 @@ __pattern_transform_scan(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __e __unary_op, /*__is_vector*/ ::std::false_type()); }, - [__first, __unary_op, __binary_op, __result](_DifferenceType __i, _DifferenceType __j, - _Tp __init) { + [__first, __unary_op, __binary_op, __result](_DifferenceType __i, _DifferenceType __j, _Tp __init) { return __internal::__brick_transform_scan(__first + __i, __first + __j, __result + __i, __unary_op, __init, __binary_op, _Inclusive(), _IsVector{}) .second; @@ -414,8 +413,7 @@ __pattern_transform_scan(_ExecutionPolicy&& __exec, _RandomAccessIterator __firs return __internal::__except_handler([&]() { __par_backend::__parallel_strict_scan( - __backend_tag{}, - ::std::forward<_ExecutionPolicy>(__exec), __n, __init, + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __n, __init, [__first, __unary_op, __binary_op, __result, __is_vector](_DifferenceType __i, _DifferenceType __len) { return __internal::__brick_transform_scan(__first + __i, __first + (__i + __len), __result + __i, __unary_op, _Tp{}, __binary_op, _Inclusive(), __is_vector) @@ -454,8 +452,7 @@ __pattern_transform_scan(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _ return __internal::__except_handler([&]() { __par_backend::__parallel_strict_scan( - __backend_tag{}, - ::std::forward<_ExecutionPolicy>(__exec), __n, __init, + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __n, __init, [__first, __unary_op, __binary_op, __result](_DifferenceType __i, _DifferenceType __len) { return __internal::__brick_transform_scan(__first + __i, __first + (__i + __len), __result + __i, __unary_op, _Tp{}, __binary_op, _Inclusive(), _IsVector{}) @@ -493,8 +490,7 @@ __pattern_transform_scan(_ExecutionPolicy&& __exec, _ForwardIterator __first, _F oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _ForwardIterator, _OutputIterator>(); return __pattern_transform_scan(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), ++__first, __last, - ++__result, - __unary_op, __tmp, __binary_op, _Inclusive()); + ++__result, __unary_op, __tmp, __binary_op, _Inclusive()); } else { diff --git a/include/oneapi/dpl/pstl/omp/parallel_merge.h b/include/oneapi/dpl/pstl/omp/parallel_merge.h index a708a502bd4..39e8606d4ea 100644 --- a/include/oneapi/dpl/pstl/omp/parallel_merge.h +++ b/include/oneapi/dpl/pstl/omp/parallel_merge.h @@ -102,9 +102,9 @@ __parallel_merge(_ExecutionPolicy&& /*__exec*/, _RandomAccessIterator1 __xs, _Ra template void -__parallel_merge(oneapi::dpl::__internal::__omp_backend_tag, _ExecutionPolicy&& /*__exec*/, - _RandomAccessIterator1 __xs, _RandomAccessIterator1 __xe, _RandomAccessIterator2 __ys, - _RandomAccessIterator2 __ye, _RandomAccessIterator3 __zs, _Compare __comp, _LeafMerge __leaf_merge) +__parallel_merge(oneapi::dpl::__internal::__omp_backend_tag, _ExecutionPolicy&& /*__exec*/, _RandomAccessIterator1 __xs, + _RandomAccessIterator1 __xe, _RandomAccessIterator2 __ys, _RandomAccessIterator2 __ye, + _RandomAccessIterator3 __zs, _Compare __comp, _LeafMerge __leaf_merge) { std::size_t __size_x = __xe - __xs; std::size_t __size_y = __ye - __ys; diff --git a/include/oneapi/dpl/pstl/omp/parallel_stable_sort.h b/include/oneapi/dpl/pstl/omp/parallel_stable_sort.h index 5c05452a428..2c5c760f584 100644 --- a/include/oneapi/dpl/pstl/omp/parallel_stable_sort.h +++ b/include/oneapi/dpl/pstl/omp/parallel_stable_sort.h @@ -204,7 +204,6 @@ __parallel_stable_sort(oneapi::dpl::__internal::__omp_backend_tag, _ExecutionPol } } - } // namespace __omp_backend } // namespace dpl } // namespace oneapi diff --git a/include/oneapi/dpl/pstl/parallel_backend_tbb.h b/include/oneapi/dpl/pstl/parallel_backend_tbb.h index 39caf6cbc21..7639551a1c6 100644 --- a/include/oneapi/dpl/pstl/parallel_backend_tbb.h +++ b/include/oneapi/dpl/pstl/parallel_backend_tbb.h @@ -518,7 +518,6 @@ __parallel_transform_scan(oneapi::dpl::__internal::__tbb_backend_tag, _Execution return __body.sum(); } - //------------------------------------------------------------------------ // parallel_stable_sort //------------------------------------------------------------------------ @@ -1420,9 +1419,8 @@ template void __parallel_merge(oneapi::dpl::__internal::__tbb_backend_tag, _ExecutionPolicy&&, _RandomAccessIterator1 __xs, - _RandomAccessIterator1 __xe, - _RandomAccessIterator2 __ys, _RandomAccessIterator2 __ye, _RandomAccessIterator3 __zs, _Compare __comp, - _LeafMerge __leaf_merge) + _RandomAccessIterator1 __xe, _RandomAccessIterator2 __ys, _RandomAccessIterator2 __ye, + _RandomAccessIterator3 __zs, _Compare __comp, _LeafMerge __leaf_merge) { typedef typename ::std::iterator_traits<_RandomAccessIterator1>::difference_type _DifferenceType1; typedef typename ::std::iterator_traits<_RandomAccessIterator2>::difference_type _DifferenceType2; From 56406dfe64df2f20ce038f78d9032bb952728a07 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Thu, 15 Feb 2024 17:05:00 +0100 Subject: [PATCH 224/566] Fix review comment: I would propose to add a parameter _BackendType to this template definition... --- include/oneapi/dpl/pstl/algorithm_fwd.h | 3 ++- include/oneapi/dpl/pstl/execution_impl.h | 7 ++++--- include/oneapi/dpl/pstl/numeric_fwd.h | 2 +- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/include/oneapi/dpl/pstl/algorithm_fwd.h b/include/oneapi/dpl/pstl/algorithm_fwd.h index ec2cd79a135..e456f7a5093 100644 --- a/include/oneapi/dpl/pstl/algorithm_fwd.h +++ b/include/oneapi/dpl/pstl/algorithm_fwd.h @@ -26,9 +26,10 @@ namespace dpl namespace __internal { -template +template struct __parallel_tag; +template struct __parallel_forward_tag; //------------------------------------------------------------------------ diff --git a/include/oneapi/dpl/pstl/execution_impl.h b/include/oneapi/dpl/pstl/execution_impl.h index dae807121cf..cb2b71225b1 100644 --- a/include/oneapi/dpl/pstl/execution_impl.h +++ b/include/oneapi/dpl/pstl/execution_impl.h @@ -113,21 +113,22 @@ struct __serial_tag using __is_vector = _IsVector; }; -template +template struct __parallel_tag { using __is_vector = _IsVector; // backend tag can be change depending on // TBB availability in the environment - using __backend_tag = __par_backend_tag; + using __backend_tag = _BackendType; }; +template struct __parallel_forward_tag { using __is_vector = ::std::false_type; // backend tag can be change depending on // TBB availability in the environment - using __backend_tag = __par_backend_tag; + using __backend_tag = _BackendType; }; template diff --git a/include/oneapi/dpl/pstl/numeric_fwd.h b/include/oneapi/dpl/pstl/numeric_fwd.h index fdd4928c230..0e8ba2a96e7 100644 --- a/include/oneapi/dpl/pstl/numeric_fwd.h +++ b/include/oneapi/dpl/pstl/numeric_fwd.h @@ -25,7 +25,7 @@ namespace dpl { namespace __internal { -template +template struct __parallel_tag; //------------------------------------------------------------------------ From ae2208d6364bd3bba343bc467be0781298ef8fac Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Fri, 16 Feb 2024 09:53:27 +0100 Subject: [PATCH 225/566] __parallel_transform_scan + tag calls --- include/oneapi/dpl/internal/async_impl/async_impl_hetero.h | 1 + include/oneapi/dpl/pstl/numeric_impl.h | 2 ++ 2 files changed, 3 insertions(+) diff --git a/include/oneapi/dpl/internal/async_impl/async_impl_hetero.h b/include/oneapi/dpl/internal/async_impl/async_impl_hetero.h index 232957a9b4a..d14f2aed022 100644 --- a/include/oneapi/dpl/internal/async_impl/async_impl_hetero.h +++ b/include/oneapi/dpl/internal/async_impl/async_impl_hetero.h @@ -225,6 +225,7 @@ __pattern_transform_scan_base_async(_ExecutionPolicy&& __exec, _Iterator1 __firs auto __buf2 = __keep2(__result, __result + __n); auto __res = oneapi::dpl::__par_backend_hetero::__parallel_transform_scan( + oneapi::dpl::__internal::__fpga_backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __buf1.all_view(), __buf2.all_view(), __n, __unary_op, __init, __binary_op, _Inclusive{}); return __res.__make_future(__result + __n); diff --git a/include/oneapi/dpl/pstl/numeric_impl.h b/include/oneapi/dpl/pstl/numeric_impl.h index 6c099bb4689..216f313ffbc 100644 --- a/include/oneapi/dpl/pstl/numeric_impl.h +++ b/include/oneapi/dpl/pstl/numeric_impl.h @@ -343,6 +343,7 @@ __pattern_transform_scan(_ExecutionPolicy&& __exec, _RandomAccessIterator __firs return __internal::__except_handler([&]() { __par_backend::__parallel_transform_scan( + oneapi::dpl::__internal::__serial_backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __last - __first, [__first, __unary_op](_DifferenceType __i) mutable { return __unary_op(__first[__i]); }, __init, __binary_op, @@ -373,6 +374,7 @@ __pattern_transform_scan(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __e return __internal::__except_handler([&]() { __par_backend::__parallel_transform_scan( + oneapi::dpl::__internal::__serial_backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __last - __first, [__first, __unary_op](_DifferenceType __i) mutable { return __unary_op(__first[__i]); }, __init, __binary_op, From 4937245cc45553021f66f6461b8a94ac2ed2bdc6 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Fri, 16 Feb 2024 10:35:29 +0100 Subject: [PATCH 226/566] __internal::__stable_sort_with_projection + tag impls --- include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h index cd2bebea667..65471caa67d 100644 --- a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h +++ b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h @@ -2233,10 +2233,10 @@ __pattern_inplace_merge(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __ex //------------------------------------------------------------------------ // sort //------------------------------------------------------------------------ -template +template void -__stable_sort_with_projection(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, _Compare __comp, - _Proj __proj) +__stable_sort_with_projection(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Iterator __first, + _Iterator __last, _Compare __comp, _Proj __proj) { if (__last - __first < 2) return; From 33c9cf63f1418bd3360112ce3532462a4c422b7e Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Fri, 16 Feb 2024 10:36:20 +0100 Subject: [PATCH 227/566] __internal::__stable_sort_with_projection + tag calls --- include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h index 65471caa67d..623087f919e 100644 --- a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h +++ b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h @@ -2266,7 +2266,7 @@ void __pattern_sort(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, _Compare __comp, /*is_move_constructible=*/::std::true_type) { - __stable_sort_with_projection(::std::forward<_ExecutionPolicy>(__exec), __first, __last, __comp, + __stable_sort_with_projection(__tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __comp, oneapi::dpl::identity{}); } @@ -2287,7 +2287,7 @@ void __pattern_stable_sort(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, _Compare __comp) { - __stable_sort_with_projection(::std::forward<_ExecutionPolicy>(__exec), __first, __last, __comp, + __stable_sort_with_projection(__tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __comp, oneapi::dpl::identity{}); } @@ -2318,7 +2318,7 @@ __pattern_sort_by_key(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec auto __beg = oneapi::dpl::make_zip_iterator(__keys_first, __values_first); auto __end = __beg + (__keys_last - __keys_first); - __stable_sort_with_projection(::std::forward<_ExecutionPolicy>(__exec), __beg, __end, __comp, + __stable_sort_with_projection(__tag, ::std::forward<_ExecutionPolicy>(__exec), __beg, __end, __comp, [](const auto& __a) { return ::std::get<0>(__a); }); } From 5b3988096f14eec13517f46d4ad304eb45e3f6e7 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Fri, 16 Feb 2024 10:45:38 +0100 Subject: [PATCH 228/566] __parallel_stable_sort + tag calls --- include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h index 623087f919e..7e2cd9e51a2 100644 --- a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h +++ b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h @@ -2244,10 +2244,7 @@ __stable_sort_with_projection(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy& auto __keep = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read_write, _Iterator>(); auto __buf = __keep(__first, __last); - constexpr auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _Iterator>(); - using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; - - __par_backend_hetero::__parallel_stable_sort(__backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), + __par_backend_hetero::__parallel_stable_sort(_BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), __buf.all_view(), __comp, __proj) .wait(); } From 73e913a7978cfdb49df3aab4413d6b4ef4d143b6 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Fri, 16 Feb 2024 10:50:29 +0100 Subject: [PATCH 229/566] __pattern_sort + tag calls --- include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h index 7e2cd9e51a2..d4726b3cf29 100644 --- a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h +++ b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h @@ -2779,11 +2779,8 @@ __pattern_partial_sort_copy(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __brick_copy<_ExecutionPolicy>{}); // Use regular sort as partial_sort isn't required to be stable - constexpr auto __dispatch_tag = - oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, decltype(__out_first), decltype(__out_end)>(); - __pattern_sort( - __dispatch_tag, + __tag, __par_backend_hetero::make_wrapped_policy<__partial_sort_1>(::std::forward<_ExecutionPolicy>(__exec)), __out_first, __out_end, __comp, ::std::true_type{}); From 2a2736ede8d279cedaf46e0435a0f0656cfaa8b3 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Fri, 16 Feb 2024 10:53:17 +0100 Subject: [PATCH 230/566] __pattern_shift_left + tag calls --- include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h index d4726b3cf29..4bbce19b233 100644 --- a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h +++ b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h @@ -3678,8 +3678,8 @@ __pattern_shift_left(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __l auto __keep = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read_write, _Iterator>(); auto __buf = __keep(__first, __last); - auto __res = - oneapi::dpl::__internal::__pattern_shift_left(::std::forward<_ExecutionPolicy>(__exec), __buf.all_view(), __n); + auto __res = oneapi::dpl::__internal::__pattern_shift_left(__tag, ::std::forward<_ExecutionPolicy>(__exec), + __buf.all_view(), __n); return __first + __res; } @@ -3723,7 +3723,7 @@ __pattern_shift_right(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec //A shift right is the shift left with a reverse logic. auto __rng = oneapi::dpl::__ranges::reverse_view_simple{__buf.all_view()}; - auto __res = oneapi::dpl::__internal::__pattern_shift_left(::std::forward<_ExecutionPolicy>(__exec), __rng, __n); + auto __res = oneapi::dpl::__internal::__pattern_shift_left(__tag, ::std::forward<_ExecutionPolicy>(__exec), __rng, __n); return __last - __res; } From 5e548881ec5ba8d9434455936c71b420104f0f04 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Fri, 16 Feb 2024 10:55:44 +0100 Subject: [PATCH 231/566] __parallel_transform_scan_single_group + tag impls --- .../oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h index 0187a91a2b8..43b6079a658 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h @@ -692,9 +692,10 @@ struct __parallel_copy_if_static_single_group_submitter<_Size, _ElemsPerItem, _W template auto -__parallel_transform_scan_single_group(_ExecutionPolicy&& __exec, _InRng&& __in_rng, _OutRng&& __out_rng, - ::std::size_t __n, _UnaryOperation __unary_op, _InitType __init, - _BinaryOperation __binary_op, _Inclusive) +__parallel_transform_scan_single_group(oneapi::dpl::__internal::__device_backend_tag, _ExecutionPolicy&& __exec, + _InRng&& __in_rng, _OutRng&& __out_rng, ::std::size_t __n, + _UnaryOperation __unary_op, _InitType __init, _BinaryOperation __binary_op, + _Inclusive) { using _CustomName = oneapi::dpl::__internal::__policy_kernel_name<_ExecutionPolicy>; From 6df1714c7d9a8d63942ff153807ae572c27ce572 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Fri, 16 Feb 2024 10:55:56 +0100 Subject: [PATCH 232/566] __parallel_transform_scan_single_group + tag calls --- include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h index 43b6079a658..e7ba291b27d 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h @@ -872,8 +872,9 @@ __parallel_transform_scan(oneapi::dpl::__internal::__device_backend_tag, _Execut if (__n <= __single_group_upper_limit && __max_slm_size >= __req_slm_size) { return __parallel_transform_scan_single_group( - std::forward<_ExecutionPolicy>(__exec), ::std::forward<_Range1>(__in_rng), - ::std::forward<_Range2>(__out_rng), __n, __unary_op, __init, __binary_op, _Inclusive{}); + oneapi::dpl::__internal::__device_backend_tag{}, std::forward<_ExecutionPolicy>(__exec), + ::std::forward<_Range1>(__in_rng), ::std::forward<_Range2>(__out_rng), __n, __unary_op, __init, + __binary_op, _Inclusive{}); } } From e2578c53a4cc60853b4067f75d5fc05f7040d18f Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Fri, 16 Feb 2024 10:59:21 +0100 Subject: [PATCH 233/566] __parallel_transform_scan_base + tag impls --- .../oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h | 9 ++++----- .../dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h | 9 ++++----- 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h index e7ba291b27d..838973de76a 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h @@ -769,12 +769,11 @@ __parallel_transform_scan_single_group(oneapi::dpl::__internal::__device_backend } template = 0> + typename _LocalScan, typename _GroupScan, typename _GlobalScan> auto -__parallel_transform_scan_base(_ExecutionPolicy&& __exec, _Range1&& __in_rng, _Range2&& __out_rng, - _BinaryOperation __binary_op, _InitType __init, _LocalScan __local_scan, - _GroupScan __group_scan, _GlobalScan __global_scan) +__parallel_transform_scan_base(oneapi::dpl::__internal::__device_backend_tag, _ExecutionPolicy&& __exec, + _Range1&& __in_rng, _Range2&& __out_rng, _BinaryOperation __binary_op, _InitType __init, + _LocalScan __local_scan, _GroupScan __group_scan, _GlobalScan __global_scan) { using _CustomName = oneapi::dpl::__internal::__policy_kernel_name<_ExecutionPolicy>; diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h index e5a24731ede..8f3f7debd7c 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h @@ -187,12 +187,11 @@ __parallel_transform_scan(oneapi::dpl::__internal::__fpga_backend_tag, _Executio } template = 0> + typename _LocalScan, typename _GroupScan, typename _GlobalScan> auto -__parallel_transform_scan_base(_ExecutionPolicy&& __exec, _Range1&& __rng1, _Range2&& __rng2, - _BinaryOperation __binary_op, _InitType __init, _LocalScan __local_scan, - _GroupScan __group_scan, _GlobalScan __global_scan) +__parallel_transform_scan_base(oneapi::dpl::__internal::__fpga_backend_tag, _ExecutionPolicy&& __exec, _Range1&& __rng1, + _Range2&& __rng2, _BinaryOperation __binary_op, _InitType __init, + _LocalScan __local_scan, _GroupScan __group_scan, _GlobalScan __global_scan) { // workaround until we implement more performant version for patterns return oneapi::dpl::__par_backend_hetero::__parallel_transform_scan_base( From 027a881b3c24981f0192a8905f5691e49c7fbd8d Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Fri, 16 Feb 2024 11:01:09 +0100 Subject: [PATCH 234/566] __parallel_transform_scan_base + tag calls --- include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h | 1 + include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h | 3 +++ .../oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h | 4 ++-- 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h index 4bbce19b233..2b7108c7ba3 100644 --- a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h +++ b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h @@ -3236,6 +3236,7 @@ __pattern_hetero_set_op(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _F auto __result_size = __par_backend_hetero::__parallel_transform_scan_base( + _BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), oneapi::dpl::__ranges::make_zip_view( __buf1.all_view(), __buf2.all_view(), diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h index 838973de76a..bde1afc65a8 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h @@ -889,6 +889,7 @@ __parallel_transform_scan(oneapi::dpl::__internal::__device_backend_tag, _Execut return __future( __parallel_transform_scan_base( + oneapi::dpl::__internal::__device_backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), ::std::forward<_Range1>(__in_rng), ::std::forward<_Range2>(__out_rng), __binary_op, __init, // local scan @@ -967,6 +968,7 @@ __parallel_scan_copy(_ExecutionPolicy&& __exec, _InRng&& __in_rng, _OutRng&& __o oneapi::dpl::__par_backend_hetero::__buffer<_ExecutionPolicy, int32_t> __mask_buf(__exec, __n); return __parallel_transform_scan_base( + oneapi::dpl::__internal::__device_backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), oneapi::dpl::__ranges::make_zip_view( ::std::forward<_InRng>(__in_rng), @@ -1007,6 +1009,7 @@ __parallel_scan_copy(oneapi::dpl::__internal::__device_backend_tag, _ExecutionPo oneapi::dpl::__par_backend_hetero::__buffer<_ExecutionPolicy, int32_t> __mask_buf(__exec, __n); return __parallel_transform_scan_base( + oneapi::dpl::__internal::__device_backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), oneapi::dpl::__ranges::make_zip_view( ::std::forward<_InRng>(__in_rng), diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h index 8f3f7debd7c..de69c3db67d 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h @@ -195,8 +195,8 @@ __parallel_transform_scan_base(oneapi::dpl::__internal::__fpga_backend_tag, _Exe { // workaround until we implement more performant version for patterns return oneapi::dpl::__par_backend_hetero::__parallel_transform_scan_base( - __exec.__device_policy(), ::std::forward<_Range1>(__rng1), ::std::forward<_Range2>(__rng2), __binary_op, __init, - __local_scan, __group_scan, __global_scan); + oneapi::dpl::__internal::__device_backend_tag{}, __exec.__device_policy(), ::std::forward<_Range1>(__rng1), + ::std::forward<_Range2>(__rng2), __binary_op, __init, __local_scan, __group_scan, __global_scan); } template Date: Fri, 16 Feb 2024 11:13:38 +0100 Subject: [PATCH 235/566] __parallel_or + tag impls --- .../dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h index de69c3db67d..5e4b84c0888 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h @@ -278,9 +278,9 @@ __parallel_find_or(oneapi::dpl::__internal::__fpga_backend_tag, _ExecutionPolicy // parallel_or //----------------------------------------------------------------------- template -oneapi::dpl::__internal::__enable_if_fpga_execution_policy<_ExecutionPolicy, bool> -__parallel_or(_ExecutionPolicy&& __exec, _Iterator1 __first, _Iterator1 __last, _Iterator2 __s_first, - _Iterator2 __s_last, _Brick __f) +bool +__parallel_or(oneapi::dpl::__internal::__fpga_backend_tag, _ExecutionPolicy&& __exec, _Iterator1 __first, + _Iterator1 __last, _Iterator2 __s_first, _Iterator2 __s_last, _Brick __f) { // workaround until we implement more performant version for patterns return oneapi::dpl::__par_backend_hetero::__parallel_or(__exec.__device_policy(), __first, __last, __s_first, From 964d983ba44abe65de13424c39cdc2ff647182ab Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Fri, 16 Feb 2024 11:14:16 +0100 Subject: [PATCH 236/566] __parallel_or + tag calls --- .../oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h index 5e4b84c0888..fd4a9f7697a 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h @@ -283,7 +283,8 @@ __parallel_or(oneapi::dpl::__internal::__fpga_backend_tag, _ExecutionPolicy&& __ _Iterator1 __last, _Iterator2 __s_first, _Iterator2 __s_last, _Brick __f) { // workaround until we implement more performant version for patterns - return oneapi::dpl::__par_backend_hetero::__parallel_or(__exec.__device_policy(), __first, __last, __s_first, + return oneapi::dpl::__par_backend_hetero::__parallel_or(oneapi::dpl::__internal::__device_backend_tag{}, + __exec.__device_policy(), __first, __last, __s_first, __s_last, __f); } From 27f4a95ec1307279bef67209e44796cf6836bc48 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Fri, 16 Feb 2024 11:30:06 +0100 Subject: [PATCH 237/566] __parallel_sort_impl + tag impls --- .../oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h index bde1afc65a8..79e30906ec8 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h @@ -2035,10 +2035,10 @@ struct __parallel_sort_submitter<_IdType, __internal::__optional_kernel_name<_Le } }; -template = 0> +template auto -__parallel_sort_impl(_ExecutionPolicy&& __exec, _Range&& __rng, _Compare __comp) +__parallel_sort_impl(oneapi::dpl::__internal::__device_backend_tag, _ExecutionPolicy&& __exec, _Range&& __rng, + _Compare __comp) { using _CustomName = oneapi::dpl::__internal::__policy_kernel_name<_ExecutionPolicy>; From bb8aa37ea9c4e169a436bfbaec196b9fce25527d Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Fri, 16 Feb 2024 11:30:26 +0100 Subject: [PATCH 238/566] __parallel_sort_impl + tag calls --- include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h index 79e30906ec8..7698ce0c85a 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h @@ -2224,7 +2224,8 @@ __parallel_stable_sort(oneapi::dpl::__internal::__device_backend_tag, _Execution auto __cmp_f = [__comp, __proj](const auto& __a, const auto& __b) mutable { return __comp(__proj(__a), __proj(__b)); }; - return __parallel_sort_impl(::std::forward<_ExecutionPolicy>(__exec), ::std::forward<_Range>(__rng), __cmp_f); + return __parallel_sort_impl(oneapi::dpl::__internal::__device_backend_tag{}, + ::std::forward<_ExecutionPolicy>(__exec), ::std::forward<_Range>(__rng), __cmp_f); } //------------------------------------------------------------------------ From 84d5ada9ecfccefc8b925ae652cfc9c2fcf157ca Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Fri, 16 Feb 2024 11:32:32 +0100 Subject: [PATCH 239/566] __parallel_partial_sort_impl + tag impls --- .../oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h index 7698ce0c85a..5027ebace7a 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h @@ -2142,10 +2142,10 @@ struct __parallel_partial_sort_submitter<__internal::__optional_kernel_name<_Glo } }; -template = 0> +template auto -__parallel_partial_sort_impl(_ExecutionPolicy&& __exec, _Range&& __rng, _Merge __merge, _Compare __comp) +__parallel_partial_sort_impl(oneapi::dpl::__internal::__device_backend_tag, _ExecutionPolicy&& __exec, _Range&& __rng, + _Merge __merge, _Compare __comp) { using _CustomName = oneapi::dpl::__internal::__policy_kernel_name<_ExecutionPolicy>; using _GlobalSortKernel = From 0bb8b080666a45f0c6bafe9ee9732fb166dde5db Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Fri, 16 Feb 2024 11:32:40 +0100 Subject: [PATCH 240/566] __parallel_partial_sort_impl + tag calls --- .../oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h index 5027ebace7a..cbd63bc9259 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h @@ -2246,7 +2246,8 @@ __parallel_partial_sort(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator auto __keep = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read_write, _Iterator>(); auto __buf = __keep(__first, __last); - return __parallel_partial_sort_impl(::std::forward<_ExecutionPolicy>(__exec), __buf.all_view(), + return __parallel_partial_sort_impl(oneapi::dpl::__internal::__device_backend_tag{}, + ::std::forward<_ExecutionPolicy>(__exec), __buf.all_view(), __partial_merge_kernel{__mid_idx}, __comp); } @@ -2263,7 +2264,8 @@ __parallel_partial_sort(oneapi::dpl::__internal::__device_backend_tag, _Executio auto __keep = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read_write, _Iterator>(); auto __buf = __keep(__first, __last); - return __parallel_partial_sort_impl(::std::forward<_ExecutionPolicy>(__exec), __buf.all_view(), + return __parallel_partial_sort_impl(oneapi::dpl::__internal::__device_backend_tag{}, + ::std::forward<_ExecutionPolicy>(__exec), __buf.all_view(), __partial_merge_kernel{__mid_idx}, __comp); } } // namespace __par_backend_hetero From 4c6ffbff0a243ea2948cb81df6265c341bac0caa Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Fri, 16 Feb 2024 11:37:57 +0100 Subject: [PATCH 241/566] __parallel_histogram + tag impls --- .../dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h | 6 +++--- .../dpl/pstl/hetero/dpcpp/parallel_backend_sycl_histogram.h | 3 ++- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h index fd4a9f7697a..1f85484f809 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h @@ -452,10 +452,10 @@ __parallel_partial_sort(oneapi::dpl::__internal::__fpga_backend_tag, _ExecutionP //----------------------------------------------------------------------- // TODO: check if it makes sense to move these wrappers out of backend to a common place -template = 0> +template auto -__parallel_histogram(_ExecutionPolicy&& __exec, const _Event& __init_event, _Range1&& __input, _Range2&& __bins, +__parallel_histogram(oneapi::dpl::__internal::__fpga_backend_tag, _ExecutionPolicy&& __exec, const _Event& __init_event, + _Range1&& __input, _Range2&& __bins, const _BinHashMgr& __binhash_manager) { if constexpr (sizeof(oneapi::dpl::__internal::__value_t<_Range2>) <= sizeof(::std::uint32_t)) diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_histogram.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_histogram.h index fdcf06ad984..ae569b96544 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_histogram.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_histogram.h @@ -540,7 +540,8 @@ __parallel_histogram_select_kernel(_ExecutionPolicy&& __exec, const sycl::event& template auto -__parallel_histogram(_ExecutionPolicy&& __exec, const sycl::event& __init_event, _Range1&& __input, _Range2&& __bins, +__parallel_histogram(oneapi::dpl::__internal::__device_backend_tag, _ExecutionPolicy&& __exec, + const sycl::event& __init_event, _Range1&& __input, _Range2&& __bins, const _BinHashMgr& __binhash_manager) { if (__input.size() < 1048576) // 2^20 From 6138b635ea4f11d48b0d39f728370c56ee8ec4e1 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Fri, 16 Feb 2024 11:38:11 +0100 Subject: [PATCH 242/566] __parallel_histogram + tag calls --- .../oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h | 1 + include/oneapi/dpl/pstl/hetero/histogram_impl_hetero.h | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h index 1f85484f809..224e6b6f9fe 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h @@ -462,6 +462,7 @@ __parallel_histogram(oneapi::dpl::__internal::__fpga_backend_tag, _ExecutionPoli { // workaround until we implement more performant version for patterns return oneapi::dpl::__par_backend_hetero::__parallel_histogram( + oneapi::dpl::__internal::__device_backend_tag{}, __exec.__device_policy(), __init_event, ::std::forward<_Range1>(__input), ::std::forward<_Range2>(__bins), __binhash_manager); } diff --git a/include/oneapi/dpl/pstl/hetero/histogram_impl_hetero.h b/include/oneapi/dpl/pstl/hetero/histogram_impl_hetero.h index 491bc107951..87d22e9a0a7 100644 --- a/include/oneapi/dpl/pstl/hetero/histogram_impl_hetero.h +++ b/include/oneapi/dpl/pstl/hetero/histogram_impl_hetero.h @@ -157,8 +157,8 @@ __pattern_histogram(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _Rando _RandomAccessIterator1>(); auto __input_buf = __keep_input(__first, __last); - __parallel_histogram(::std::forward<_ExecutionPolicy>(__exec), __init_event, __input_buf.all_view(), - ::std::move(__bins), __binhash_manager) + __parallel_histogram(_BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), __init_event, + __input_buf.all_view(), ::std::move(__bins), __binhash_manager) .wait(); } else From 9dee971c9f51313e311cd0db07008d917591dcab Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Fri, 16 Feb 2024 11:41:56 +0100 Subject: [PATCH 243/566] __histogram_general_registers_local_reduction + tag impls --- .../dpl/pstl/hetero/dpcpp/parallel_backend_sycl_histogram.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_histogram.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_histogram.h index ae569b96544..e113ea38e8a 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_histogram.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_histogram.h @@ -285,9 +285,9 @@ struct __histogram_general_registers_local_reduction_submitter<__iters_per_work_ template <::std::uint16_t __iters_per_work_item, ::std::uint8_t __bins_per_work_item, typename _ExecutionPolicy, typename _Range1, typename _Range2, typename _BinHashMgr> auto -__histogram_general_registers_local_reduction(_ExecutionPolicy&& __exec, const sycl::event& __init_event, - ::std::uint16_t __work_group_size, _Range1&& __input, _Range2&& __bins, - const _BinHashMgr& __binhash_manager) +__histogram_general_registers_local_reduction(oneapi::dpl::__internal::__device_backend_tag, _ExecutionPolicy&& __exec, + const sycl::event& __init_event, ::std::uint16_t __work_group_size, + _Range1&& __input, _Range2&& __bins, const _BinHashMgr& __binhash_manager) { using _kernel_base_name = typename ::std::decay_t<_ExecutionPolicy>::kernel_name; From 1f54f6d463f08de9b6b321aa6bf5684de0088df1 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Fri, 16 Feb 2024 11:42:05 +0100 Subject: [PATCH 244/566] __histogram_general_registers_local_reduction + tag calls --- .../dpl/pstl/hetero/dpcpp/parallel_backend_sycl_histogram.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_histogram.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_histogram.h index e113ea38e8a..b90ab4a4015 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_histogram.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_histogram.h @@ -513,6 +513,7 @@ __parallel_histogram_select_kernel(_ExecutionPolicy&& __exec, const sycl::event& { return __future( __histogram_general_registers_local_reduction<__iters_per_work_item, __max_work_item_private_bins>( + oneapi::dpl::__internal::__device_backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __init_event, __work_group_size, ::std::forward<_Range1>(__input), ::std::forward<_Range2>(__bins), __binhash_manager)); } From 6a31d18c6f9ab946da8a0899de2e35d2ac15e211 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Fri, 16 Feb 2024 11:43:35 +0100 Subject: [PATCH 245/566] __histogram_general_local_atomics + tag impls --- .../dpl/pstl/hetero/dpcpp/parallel_backend_sycl_histogram.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_histogram.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_histogram.h index b90ab4a4015..a85ef4f90ce 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_histogram.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_histogram.h @@ -380,9 +380,9 @@ struct __histogram_general_local_atomics_submitter<__iters_per_work_item, template <::std::uint16_t __iters_per_work_item, typename _ExecutionPolicy, typename _Range1, typename _Range2, typename _BinHashMgr> auto -__histogram_general_local_atomics(_ExecutionPolicy&& __exec, const sycl::event& __init_event, - ::std::uint16_t __work_group_size, _Range1&& __input, _Range2&& __bins, - const _BinHashMgr& __binhash_manager) +__histogram_general_local_atomics(oneapi::dpl::__internal::__device_backend_tag, _ExecutionPolicy&& __exec, + const sycl::event& __init_event, ::std::uint16_t __work_group_size, _Range1&& __input, + _Range2&& __bins, const _BinHashMgr& __binhash_manager) { using _kernel_base_name = typename ::std::decay_t<_ExecutionPolicy>::kernel_name; From 59429cb063621dac7fdb78749dbdd551ee76b9c0 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Fri, 16 Feb 2024 11:43:42 +0100 Subject: [PATCH 246/566] __histogram_general_local_atomics + tag calls --- .../dpl/pstl/hetero/dpcpp/parallel_backend_sycl_histogram.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_histogram.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_histogram.h index a85ef4f90ce..65174273bb6 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_histogram.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_histogram.h @@ -523,8 +523,8 @@ __parallel_histogram_select_kernel(_ExecutionPolicy&& __exec, const sycl::event& __local_mem_size) { return __future(__histogram_general_local_atomics<__iters_per_work_item>( - ::std::forward<_ExecutionPolicy>(__exec), __init_event, __work_group_size, ::std::forward<_Range1>(__input), - ::std::forward<_Range2>(__bins), __binhash_manager)); + oneapi::dpl::__internal::__device_backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __init_event, + __work_group_size, ::std::forward<_Range1>(__input), ::std::forward<_Range2>(__bins), __binhash_manager)); } else // otherwise, use global atomics (private copies per workgroup) { From 901d67bd4e39950bb2f2aa6e6eb94fdc16f02c45 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Fri, 16 Feb 2024 11:44:40 +0100 Subject: [PATCH 247/566] __histogram_general_private_global_atomics + tag impls --- .../pstl/hetero/dpcpp/parallel_backend_sycl_histogram.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_histogram.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_histogram.h index 65174273bb6..7c56570bc29 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_histogram.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_histogram.h @@ -477,9 +477,10 @@ struct __histogram_general_private_global_atomics_submitter<__internal::__option }; template auto -__histogram_general_private_global_atomics(_ExecutionPolicy&& __exec, const sycl::event& __init_event, - ::std::uint16_t __min_iters_per_work_item, ::std::uint16_t __work_group_size, - _Range1&& __input, _Range2&& __bins, const _BinHashMgr& __binhash_manager) +__histogram_general_private_global_atomics(oneapi::dpl::__internal::__device_backend_tag, _ExecutionPolicy&& __exec, + const sycl::event& __init_event, ::std::uint16_t __min_iters_per_work_item, + ::std::uint16_t __work_group_size, _Range1&& __input, _Range2&& __bins, + const _BinHashMgr& __binhash_manager) { using _kernel_base_name = typename ::std::decay_t<_ExecutionPolicy>::kernel_name; From 9bc97a282017165b6fbad5079a1da5a27b2596e3 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Fri, 16 Feb 2024 11:44:47 +0100 Subject: [PATCH 248/566] __histogram_general_private_global_atomics + tag calls --- .../dpl/pstl/hetero/dpcpp/parallel_backend_sycl_histogram.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_histogram.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_histogram.h index 7c56570bc29..119a5b49bed 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_histogram.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_histogram.h @@ -535,8 +535,9 @@ __parallel_histogram_select_kernel(_ExecutionPolicy&& __exec, const sycl::event& // private copies of the histogram bins in global memory. No unrolling is taken advantage of here because it // is a runtime argument. return __future(__histogram_general_private_global_atomics( - ::std::forward<_ExecutionPolicy>(__exec), __init_event, __iters_per_work_item, __work_group_size, - ::std::forward<_Range1>(__input), ::std::forward<_Range2>(__bins), __binhash_manager)); + oneapi::dpl::__internal::__device_backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __init_event, + __iters_per_work_item, __work_group_size, ::std::forward<_Range1>(__input), ::std::forward<_Range2>(__bins), + __binhash_manager)); } } From 141a922594bed90857ba54a85c7d40f0f79398b2 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Fri, 16 Feb 2024 11:45:57 +0100 Subject: [PATCH 249/566] __parallel_histogram_select_kernel + tag impls --- .../dpl/pstl/hetero/dpcpp/parallel_backend_sycl_histogram.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_histogram.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_histogram.h index 119a5b49bed..508fd1f9a18 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_histogram.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_histogram.h @@ -495,8 +495,9 @@ __histogram_general_private_global_atomics(oneapi::dpl::__internal::__device_bac template <::std::uint16_t __iters_per_work_item, typename _ExecutionPolicy, typename _Range1, typename _Range2, typename _BinHashMgr> auto -__parallel_histogram_select_kernel(_ExecutionPolicy&& __exec, const sycl::event& __init_event, _Range1&& __input, - _Range2&& __bins, const _BinHashMgr& __binhash_manager) +__parallel_histogram_select_kernel(oneapi::dpl::__internal::__device_backend_tag, _ExecutionPolicy&& __exec, + const sycl::event& __init_event, _Range1&& __input, _Range2&& __bins, + const _BinHashMgr& __binhash_manager) { using _private_histogram_type = ::std::uint16_t; using _local_histogram_type = ::std::uint32_t; From fb5d2b88ca8506714748b4dd71b43ed20442cb0f Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Fri, 16 Feb 2024 11:46:04 +0100 Subject: [PATCH 250/566] __parallel_histogram_select_kernel + tag calls --- .../dpl/pstl/hetero/dpcpp/parallel_backend_sycl_histogram.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_histogram.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_histogram.h index 508fd1f9a18..8fbe0ec5af6 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_histogram.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_histogram.h @@ -551,12 +551,14 @@ __parallel_histogram(oneapi::dpl::__internal::__device_backend_tag, _ExecutionPo if (__input.size() < 1048576) // 2^20 { return __parallel_histogram_select_kernel( + oneapi::dpl::__internal::__device_backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __init_event, ::std::forward<_Range1>(__input), ::std::forward<_Range2>(__bins), __binhash_manager); } else { return __parallel_histogram_select_kernel( + oneapi::dpl::__internal::__device_backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __init_event, ::std::forward<_Range1>(__input), ::std::forward<_Range2>(__bins), __binhash_manager); } From f1aad06b0ad94fe4807fb17551f24a54ff8bc432 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Fri, 16 Feb 2024 11:53:21 +0100 Subject: [PATCH 251/566] __parallel_radix_sort + tag impls --- .../dpl/pstl/hetero/dpcpp/parallel_backend_sycl_radix_sort.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_radix_sort.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_radix_sort.h index 9b9f1bf9d5c..df262d54dfc 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_radix_sort.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_radix_sort.h @@ -743,7 +743,8 @@ struct __parallel_radix_sort_iteration //----------------------------------------------------------------------- template auto -__parallel_radix_sort(_ExecutionPolicy&& __exec, _Range&& __in_rng, _Proj __proj) +__parallel_radix_sort(oneapi::dpl::__internal::__device_backend_tag, _ExecutionPolicy&& __exec, _Range&& __in_rng, + _Proj __proj) { const ::std::size_t __n = __in_rng.size(); assert(__n > 1); From 9fd8b676876cd0f73b75c8009e1ba4607617ca71 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Fri, 16 Feb 2024 11:53:28 +0100 Subject: [PATCH 252/566] __parallel_radix_sort + tag calls --- include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h index cbd63bc9259..7ee5ba6fb9c 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h @@ -2195,6 +2195,7 @@ __parallel_stable_sort(oneapi::dpl::__internal::__device_backend_tag, _Execution _Compare, _Proj __proj) { return __parallel_radix_sort<__internal::__is_comp_ascending<::std::decay_t<_Compare>>::value>( + oneapi::dpl::__internal::__device_backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), ::std::forward<_Range>(__rng), __proj); } #endif From b126b3388786fdc69040206b1085afaed5c57922 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Fri, 16 Feb 2024 11:55:24 +0100 Subject: [PATCH 253/566] __parallel_transform_reduce_small_impl + tag impls --- .../dpl/pstl/hetero/dpcpp/parallel_backend_sycl_reduce.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_reduce.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_reduce.h index 6eab6ac03ec..178a802e5ea 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_reduce.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_reduce.h @@ -138,10 +138,11 @@ struct __parallel_transform_reduce_small_submitter<_Tp, __work_group_size, __ite template = 0, typename... _Ranges> + typename... _Ranges> auto -__parallel_transform_reduce_small_impl(_ExecutionPolicy&& __exec, const _Size __n, _ReduceOp __reduce_op, - _TransformOp __transform_op, _InitType __init, _Ranges&&... __rngs) +__parallel_transform_reduce_small_impl(oneapi::dpl::__internal::__device_backend_tag, _ExecutionPolicy&& __exec, + const _Size __n, _ReduceOp __reduce_op, _TransformOp __transform_op, + _InitType __init, _Ranges&&... __rngs) { using _CustomName = oneapi::dpl::__internal::__policy_kernel_name<_ExecutionPolicy>; using _ReduceKernel = oneapi::dpl::__par_backend_hetero::__internal::__kernel_name_provider< From 10da4e77e77025f48ccf4116c9ecea8c0e0013df Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Fri, 16 Feb 2024 11:55:34 +0100 Subject: [PATCH 254/566] __parallel_transform_reduce_small_impl + tag calls --- .../dpl/pstl/hetero/dpcpp/parallel_backend_sycl_reduce.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_reduce.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_reduce.h index 178a802e5ea..d2ebe069483 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_reduce.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_reduce.h @@ -532,36 +532,42 @@ __parallel_transform_reduce(oneapi::dpl::__internal::__device_backend_tag, _Exec if (__n <= 256) { return __parallel_transform_reduce_small_impl<_Tp, 256, 1, _Commutative>( + oneapi::dpl::__internal::__device_backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __n, __reduce_op, __transform_op, __init, ::std::forward<_Ranges>(__rngs)...); } else if (__n <= 512) { return __parallel_transform_reduce_small_impl<_Tp, 256, 2, _Commutative>( + oneapi::dpl::__internal::__device_backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __n, __reduce_op, __transform_op, __init, ::std::forward<_Ranges>(__rngs)...); } else if (__n <= 1024) { return __parallel_transform_reduce_small_impl<_Tp, 256, 4, _Commutative>( + oneapi::dpl::__internal::__device_backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __n, __reduce_op, __transform_op, __init, ::std::forward<_Ranges>(__rngs)...); } else if (__n <= 2048) { return __parallel_transform_reduce_small_impl<_Tp, 256, 8, _Commutative>( + oneapi::dpl::__internal::__device_backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __n, __reduce_op, __transform_op, __init, ::std::forward<_Ranges>(__rngs)...); } else if (__n <= 4096) { return __parallel_transform_reduce_small_impl<_Tp, 256, 16, _Commutative>( + oneapi::dpl::__internal::__device_backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __n, __reduce_op, __transform_op, __init, ::std::forward<_Ranges>(__rngs)...); } else if (__n <= 8192) { return __parallel_transform_reduce_small_impl<_Tp, 256, 32, _Commutative>( + oneapi::dpl::__internal::__device_backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __n, __reduce_op, __transform_op, __init, ::std::forward<_Ranges>(__rngs)...); } From 53f0693780e72c73ce25486441dc59445f5fad06 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Fri, 16 Feb 2024 11:57:10 +0100 Subject: [PATCH 255/566] __parallel_transform_reduce_mid_impl + tag impls --- .../dpl/pstl/hetero/dpcpp/parallel_backend_sycl_reduce.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_reduce.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_reduce.h index d2ebe069483..7dcb52bd767 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_reduce.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_reduce.h @@ -256,11 +256,11 @@ struct __parallel_transform_reduce_work_group_kernel_submitter< template = 0, typename... _Ranges> + typename _Size, typename _ReduceOp, typename _TransformOp, typename _InitType, typename... _Ranges> auto -__parallel_transform_reduce_mid_impl(_ExecutionPolicy&& __exec, _Size __n, _ReduceOp __reduce_op, - _TransformOp __transform_op, _InitType __init, _Ranges&&... __rngs) +__parallel_transform_reduce_mid_impl(oneapi::dpl::__internal::__device_backend_tag, _ExecutionPolicy&& __exec, + _Size __n, _ReduceOp __reduce_op, _TransformOp __transform_op, _InitType __init, + _Ranges&&... __rngs) { using _CustomName = oneapi::dpl::__internal::__policy_kernel_name<_ExecutionPolicy>; From 39b069615a97b002b7d2b29927000d01ea181f89 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Fri, 16 Feb 2024 11:57:17 +0100 Subject: [PATCH 256/566] __parallel_transform_reduce_mid_impl + tag calls --- .../dpl/pstl/hetero/dpcpp/parallel_backend_sycl_reduce.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_reduce.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_reduce.h index 7dcb52bd767..f1f8c04cac2 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_reduce.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_reduce.h @@ -578,36 +578,42 @@ __parallel_transform_reduce(oneapi::dpl::__internal::__device_backend_tag, _Exec else if (__n <= 2097152) { return __parallel_transform_reduce_mid_impl<_Tp, 256, 32, 1, _Commutative>( + oneapi::dpl::__internal::__device_backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __n, __reduce_op, __transform_op, __init, ::std::forward<_Ranges>(__rngs)...); } else if (__n <= 4194304) { return __parallel_transform_reduce_mid_impl<_Tp, 256, 32, 2, _Commutative>( + oneapi::dpl::__internal::__device_backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __n, __reduce_op, __transform_op, __init, ::std::forward<_Ranges>(__rngs)...); } else if (__n <= 8388608) { return __parallel_transform_reduce_mid_impl<_Tp, 256, 32, 4, _Commutative>( + oneapi::dpl::__internal::__device_backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __n, __reduce_op, __transform_op, __init, ::std::forward<_Ranges>(__rngs)...); } else if (__n <= 16777216) { return __parallel_transform_reduce_mid_impl<_Tp, 256, 32, 8, _Commutative>( + oneapi::dpl::__internal::__device_backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __n, __reduce_op, __transform_op, __init, ::std::forward<_Ranges>(__rngs)...); } else if (__n <= 33554432) { return __parallel_transform_reduce_mid_impl<_Tp, 256, 32, 16, _Commutative>( + oneapi::dpl::__internal::__device_backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __n, __reduce_op, __transform_op, __init, ::std::forward<_Ranges>(__rngs)...); } else if (__n <= 67108864) { return __parallel_transform_reduce_mid_impl<_Tp, 256, 32, 32, _Commutative>( + oneapi::dpl::__internal::__device_backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __n, __reduce_op, __transform_op, __init, ::std::forward<_Ranges>(__rngs)...); } From 901d99085889be93d0005743cfa54251dbfe03ff Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Fri, 16 Feb 2024 12:32:16 +0100 Subject: [PATCH 257/566] __internal::__remove_elements + tag impls --- include/oneapi/dpl/pstl/algorithm_impl.h | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/include/oneapi/dpl/pstl/algorithm_impl.h b/include/oneapi/dpl/pstl/algorithm_impl.h index 8790cc64f25..e9d589d2efe 100644 --- a/include/oneapi/dpl/pstl/algorithm_impl.h +++ b/include/oneapi/dpl/pstl/algorithm_impl.h @@ -2169,13 +2169,12 @@ __pattern_unique(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _ForwardIte // That function is shared between two algorithms - remove_if (__pattern_remove_if) and unique (pattern unique). But a mask calculation is different. // So, a caller passes _CalcMask brick into remove_elements. -template +template _ForwardIterator -__remove_elements(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _CalcMask __calc_mask, - _IsVector __is_vector) +__remove_elements(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _ForwardIterator __first, + _ForwardIterator __last, _CalcMask __calc_mask) { - constexpr auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _ForwardIterator>(); - using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; + using __backend_tag = typename decltype(__tag)::__backend_tag; typedef typename ::std::iterator_traits<_ForwardIterator>::difference_type _DifferenceType; typedef typename ::std::iterator_traits<_ForwardIterator>::value_type _Tp; @@ -2186,7 +2185,7 @@ __remove_elements(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardI bool* __mask = __mask_buf.get(); _DifferenceType __min = __par_backend::__parallel_reduce( __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), _DifferenceType(0), __n, __n, - [__first, __mask, &__calc_mask, __is_vector](_DifferenceType __i, _DifferenceType __j, + [__first, __mask, &__calc_mask](_DifferenceType __i, _DifferenceType __j, _DifferenceType __local_min) -> _DifferenceType { // Create mask __calc_mask(__mask + __i, __mask + __j, __first + __i); @@ -2198,7 +2197,7 @@ __remove_elements(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardI } // find first iterator that should be removed bool* __result = __internal::__brick_find_if( - __mask + __i, __mask + __j, [](bool __val) { return !__val; }, __is_vector); + __mask + __i, __mask + __j, [](bool __val) { return !__val; }, _IsVector{}); if (__result - __mask == __j) { return __local_min; @@ -2224,9 +2223,9 @@ __remove_elements(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardI // 2. Elements that doesn't satisfy pred are moved to result __par_backend::__parallel_strict_scan( __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __n, _DifferenceType(0), - [__mask, __is_vector](_DifferenceType __i, _DifferenceType __len) { + [__mask](_DifferenceType __i, _DifferenceType __len) { return __internal::__brick_count( - __mask + __i, __mask + __i + __len, [](bool __val) { return __val; }, __is_vector); + __mask + __i, __mask + __i + __len, [](bool __val) { return __val; }, _IsVector{}); }, ::std::plus<_DifferenceType>(), [=](_DifferenceType __i, _DifferenceType __len, _DifferenceType __initial) { From 8c60029e6355753989418ca64896211bdab71378 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Fri, 16 Feb 2024 12:32:43 +0100 Subject: [PATCH 258/566] __internal::__remove_elements + tag calls --- include/oneapi/dpl/pstl/algorithm_impl.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/include/oneapi/dpl/pstl/algorithm_impl.h b/include/oneapi/dpl/pstl/algorithm_impl.h index e9d589d2efe..8c4569daf09 100644 --- a/include/oneapi/dpl/pstl/algorithm_impl.h +++ b/include/oneapi/dpl/pstl/algorithm_impl.h @@ -2279,7 +2279,7 @@ __pattern_unique(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _Rand template _RandomAccessIterator -__pattern_unique(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, +__pattern_unique(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last, _BinaryPredicate __pred) { typedef typename ::std::iterator_traits<_RandomAccessIterator>::reference _ReferenceType; @@ -2294,6 +2294,7 @@ __pattern_unique(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAc return __internal::__brick_unique(__first, __last, __pred, _IsVector{}); } return __internal::__remove_elements( + __tag, ::std::forward<_ExecutionPolicy>(__exec), ++__first, __last, [&__pred](bool* __b, bool* __e, _RandomAccessIterator __it) { __internal::__brick_walk3( @@ -4738,7 +4739,7 @@ __pattern_remove_if(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _R template _RandomAccessIterator -__pattern_remove_if(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, +__pattern_remove_if(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last, _UnaryPredicate __pred) { typedef typename ::std::iterator_traits<_RandomAccessIterator>::reference _ReferenceType; @@ -4750,6 +4751,7 @@ __pattern_remove_if(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _Rando } return __internal::__remove_elements( + __tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, [&__pred](bool* __b, bool* __e, _RandomAccessIterator __it) { __internal::__brick_walk2( From ed3f0e93e90b819710ba821c11f974f4a9cdabee Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Fri, 16 Feb 2024 13:22:37 +0100 Subject: [PATCH 259/566] internal::lower_bound_impl + tag impls --- .../oneapi/dpl/internal/binary_search_impl.h | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/include/oneapi/dpl/internal/binary_search_impl.h b/include/oneapi/dpl/internal/binary_search_impl.h index 3fd155f03a0..cd5b384a46b 100644 --- a/include/oneapi/dpl/internal/binary_search_impl.h +++ b/include/oneapi/dpl/internal/binary_search_impl.h @@ -68,12 +68,14 @@ struct custom_brick } }; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy -lower_bound_impl(Policy&& policy, InputIterator1 start, InputIterator1 end, InputIterator2 value_start, +OutputIterator +lower_bound_impl(_Tag, Policy&& policy, InputIterator1 start, InputIterator1 end, InputIterator2 value_start, InputIterator2 value_end, OutputIterator result, StrictWeakOrdering comp) { + static_assert(__is_backend_tag_v<_Tag>); + return oneapi::dpl::transform(policy, value_start, value_end, result, [=](typename ::std::iterator_traits::reference val) { return ::std::lower_bound(start, end, val, comp) - start; @@ -105,11 +107,11 @@ binary_search_impl(Policy&& policy, InputIterator1 start, InputIterator1 end, In } #if _ONEDPL_BACKEND_SYCL -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy -lower_bound_impl(Policy&& policy, InputIterator1 start, InputIterator1 end, InputIterator2 value_start, - InputIterator2 value_end, OutputIterator result, StrictWeakOrdering comp) +template +OutputIterator +lower_bound_impl(__hetero_tag<_BackendTag> __tag, Policy&& policy, InputIterator1 start, InputIterator1 end, + InputIterator2 value_start, InputIterator2 value_end, OutputIterator result, StrictWeakOrdering comp) { constexpr auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(); From 925553c2fd27082ac4a5253cc1d9bfc27d0eb82b Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Fri, 16 Feb 2024 13:22:45 +0100 Subject: [PATCH 260/566] internal::lower_bound_impl + tag calls --- include/oneapi/dpl/internal/binary_search_impl.h | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/include/oneapi/dpl/internal/binary_search_impl.h b/include/oneapi/dpl/internal/binary_search_impl.h index cd5b384a46b..ffa1de8896d 100644 --- a/include/oneapi/dpl/internal/binary_search_impl.h +++ b/include/oneapi/dpl/internal/binary_search_impl.h @@ -218,8 +218,11 @@ oneapi::dpl::__internal::__enable_if_execution_policy lower_bound(Policy&& policy, InputIterator1 start, InputIterator1 end, InputIterator2 value_start, InputIterator2 value_end, OutputIterator result) { - return internal::lower_bound_impl(::std::forward(policy), start, end, value_start, value_end, result, - oneapi::dpl::__internal::__pstl_less()); + constexpr auto __dispatch_tag = + oneapi::dpl::__internal::__select_backend(); + + return internal::lower_bound_impl(__dispatch_tag, ::std::forward(policy), start, end, value_start, + value_end, result, oneapi::dpl::__internal::__pstl_less()); } template lower_bound(Policy&& policy, InputIterator1 start, InputIterator1 end, InputIterator2 value_start, InputIterator2 value_end, OutputIterator result, StrictWeakOrdering comp) { - return internal::lower_bound_impl(::std::forward(policy), start, end, value_start, value_end, result, comp); + constexpr auto __dispatch_tag = + oneapi::dpl::__internal::__select_backend(); + + return internal::lower_bound_impl(__dispatch_tag, ::std::forward(policy), start, end, value_start, + value_end, result, comp); } //Lower Bound end From ce75efc204ff8252e737b85b4f6b448313a10701 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Fri, 16 Feb 2024 13:26:46 +0100 Subject: [PATCH 261/566] internal::binary_search_impl + tag impls --- .../oneapi/dpl/internal/binary_search_impl.h | 22 +++++++++---------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/include/oneapi/dpl/internal/binary_search_impl.h b/include/oneapi/dpl/internal/binary_search_impl.h index ffa1de8896d..2c9873c7962 100644 --- a/include/oneapi/dpl/internal/binary_search_impl.h +++ b/include/oneapi/dpl/internal/binary_search_impl.h @@ -94,12 +94,14 @@ upper_bound_impl(Policy&& policy, InputIterator1 start, InputIterator1 end, Inpu }); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy -binary_search_impl(Policy&& policy, InputIterator1 start, InputIterator1 end, InputIterator2 value_start, +OutputIterator +binary_search_impl(_Tag, Policy&& policy, InputIterator1 start, InputIterator1 end, InputIterator2 value_start, InputIterator2 value_end, OutputIterator result, StrictWeakOrdering comp) { + static_assert(__is_backend_tag_v<_Tag>); + return oneapi::dpl::transform(policy, value_start, value_end, result, [=](typename ::std::iterator_traits::reference val) { return ::std::binary_search(start, end, val, comp); @@ -175,15 +177,13 @@ upper_bound_impl(Policy&& policy, InputIterator1 start, InputIterator1 end, Inpu return result + value_size; } -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy -binary_search_impl(Policy&& policy, InputIterator1 start, InputIterator1 end, InputIterator2 value_start, - InputIterator2 value_end, OutputIterator result, StrictWeakOrdering comp) +template +OutputIterator +binary_search_impl(__hetero_tag<_BackendTag> __tag, Policy&& policy, InputIterator1 start, InputIterator1 end, + InputIterator2 value_start, InputIterator2 value_end, OutputIterator result, StrictWeakOrdering comp) { - constexpr auto __dispatch_tag = - oneapi::dpl::__internal::__select_backend(); - using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; + using __backend_tag = typename decltype(__tag)::__backend_tag; namespace __bknd = __par_backend_hetero; const auto size = ::std::distance(start, end); From 08e00e8d35733b4863a3887150bc5dd650ada9f4 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Fri, 16 Feb 2024 13:26:52 +0100 Subject: [PATCH 262/566] internal::binary_search_impl + tag calls --- include/oneapi/dpl/internal/binary_search_impl.h | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/include/oneapi/dpl/internal/binary_search_impl.h b/include/oneapi/dpl/internal/binary_search_impl.h index 2c9873c7962..198b9c27170 100644 --- a/include/oneapi/dpl/internal/binary_search_impl.h +++ b/include/oneapi/dpl/internal/binary_search_impl.h @@ -268,8 +268,11 @@ oneapi::dpl::__internal::__enable_if_execution_policy binary_search(Policy&& policy, InputIterator1 start, InputIterator1 end, InputIterator2 value_start, InputIterator2 value_end, OutputIterator result) { - return internal::binary_search_impl(::std::forward(policy), start, end, value_start, value_end, result, - oneapi::dpl::__internal::__pstl_less()); + constexpr auto __dispatch_tag = + oneapi::dpl::__internal::__select_backend(); + + return internal::binary_search_impl(__dispatch_tag, ::std::forward(policy), start, end, value_start, + value_end, result, oneapi::dpl::__internal::__pstl_less()); } template binary_search(Policy&& policy, InputIterator1 start, InputIterator1 end, InputIterator2 value_start, InputIterator2 value_end, OutputIterator result, StrictWeakOrdering comp) { - return internal::binary_search_impl(::std::forward(policy), start, end, value_start, value_end, result, - comp); + constexpr auto __dispatch_tag = + oneapi::dpl::__internal::__select_backend(); + + return internal::binary_search_impl(__dispatch_tag, ::std::forward(policy), start, end, value_start, + value_end, result, comp); } //Binary search end From 6b831c6734a026382286f349d39562703de8a2c5 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Fri, 16 Feb 2024 13:30:27 +0100 Subject: [PATCH 263/566] internal::pattern_exclusive_scan_by_segment + tag impls --- .../internal/exclusive_scan_by_segment_impl.h | 24 +++++++++++-------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/include/oneapi/dpl/internal/exclusive_scan_by_segment_impl.h b/include/oneapi/dpl/internal/exclusive_scan_by_segment_impl.h index 79d8239ea12..ee000e175aa 100644 --- a/include/oneapi/dpl/internal/exclusive_scan_by_segment_impl.h +++ b/include/oneapi/dpl/internal/exclusive_scan_by_segment_impl.h @@ -37,12 +37,15 @@ class ExclusiveScan1; template class ExclusiveScan2; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy -pattern_exclusive_scan_by_segment(Policy&& policy, InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, - OutputIterator result, T init, BinaryPredicate binary_pred, Operator binary_op) +template +OutputIterator +pattern_exclusive_scan_by_segment(_Tag, Policy&& policy, InputIterator1 first1, InputIterator1 last1, + InputIterator2 first2, OutputIterator result, T init, BinaryPredicate binary_pred, + Operator binary_op) { + static_assert(__is_backend_tag_v<_Tag>); + const auto n = ::std::distance(first1, last1); // Check for empty and single element ranges @@ -160,11 +163,12 @@ exclusive_scan_by_segment_impl(Policy&& policy, InputIterator1 first1, InputIter return result + n; } -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy -pattern_exclusive_scan_by_segment(Policy&& policy, InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, - OutputIterator result, T init, BinaryPredicate binary_pred, Operator binary_op) +template +OutputIterator +pattern_exclusive_scan_by_segment(__hetero_tag<_BackendTag> __tag, Policy&& policy, InputIterator1 first1, + InputIterator1 last1, InputIterator2 first2, OutputIterator result, T init, + BinaryPredicate binary_pred, Operator binary_op) { return internal::exclusive_scan_by_segment_impl( ::std::forward(policy), first1, last1, first2, result, init, binary_pred, binary_op, From c88c653a6fe9ae4d06e06e0c01b26e8283e5c02f Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Fri, 16 Feb 2024 13:30:36 +0100 Subject: [PATCH 264/566] internal::pattern_exclusive_scan_by_segment + tag calls --- .../oneapi/dpl/internal/exclusive_scan_by_segment_impl.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/include/oneapi/dpl/internal/exclusive_scan_by_segment_impl.h b/include/oneapi/dpl/internal/exclusive_scan_by_segment_impl.h index ee000e175aa..7e25849a09f 100644 --- a/include/oneapi/dpl/internal/exclusive_scan_by_segment_impl.h +++ b/include/oneapi/dpl/internal/exclusive_scan_by_segment_impl.h @@ -185,8 +185,11 @@ oneapi::dpl::__internal::__enable_if_execution_policy exclusive_scan_by_segment(Policy&& policy, InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, OutputIterator result, T init, BinaryPredicate binary_pred, Operator binary_op) { - return internal::pattern_exclusive_scan_by_segment(::std::forward(policy), first1, last1, first2, result, - init, binary_pred, binary_op); + constexpr auto __dispatch_tag = + oneapi::dpl::__internal::__select_backend(); + + return internal::pattern_exclusive_scan_by_segment(__dispatch_tag, ::std::forward(policy), first1, last1, + first2, result, init, binary_pred, binary_op); } template Date: Fri, 16 Feb 2024 13:32:54 +0100 Subject: [PATCH 265/566] internal::pattern_inclusive_scan_by_segment + tag impls --- .../internal/inclusive_scan_by_segment_impl.h | 22 +++++++++++-------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/include/oneapi/dpl/internal/inclusive_scan_by_segment_impl.h b/include/oneapi/dpl/internal/inclusive_scan_by_segment_impl.h index 0f0433b2bb8..c92b6127b70 100644 --- a/include/oneapi/dpl/internal/inclusive_scan_by_segment_impl.h +++ b/include/oneapi/dpl/internal/inclusive_scan_by_segment_impl.h @@ -36,12 +36,15 @@ namespace internal template class InclusiveScan1; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy -pattern_inclusive_scan_by_segment(Policy&& policy, InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, - OutputIterator result, BinaryPredicate binary_pred, BinaryOperator binary_op) +OutputIterator +pattern_inclusive_scan_by_segment(_Tag, Policy&& policy, InputIterator1 first1, InputIterator1 last1, + InputIterator2 first2, OutputIterator result, BinaryPredicate binary_pred, + BinaryOperator binary_op) { + static_assert(__is_backend_tag_v<_Tag>); + const auto n = ::std::distance(first1, last1); // Check for empty and single element ranges @@ -123,11 +126,12 @@ inclusive_scan_by_segment_impl(Policy&& policy, InputIterator1 first1, InputIter return result + n; } -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy -pattern_inclusive_scan_by_segment(Policy&& policy, InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, - OutputIterator result, BinaryPredicate binary_pred, BinaryOperator binary_op) +template +OutputIterator +pattern_inclusive_scan_by_segment(__hetero_tag<_BackendTag> __tag, Policy&& policy, InputIterator1 first1, + InputIterator1 last1, InputIterator2 first2, OutputIterator result, + BinaryPredicate binary_pred, BinaryOperator binary_op) { return internal::inclusive_scan_by_segment_impl( ::std::forward(policy), first1, last1, first2, result, binary_pred, binary_op, From 9c1447355ac2de2a409a19ece8d0e346c3925956 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Fri, 16 Feb 2024 13:33:00 +0100 Subject: [PATCH 266/566] internal::pattern_inclusive_scan_by_segment + tag calls --- .../oneapi/dpl/internal/inclusive_scan_by_segment_impl.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/include/oneapi/dpl/internal/inclusive_scan_by_segment_impl.h b/include/oneapi/dpl/internal/inclusive_scan_by_segment_impl.h index c92b6127b70..a44ebd6b617 100644 --- a/include/oneapi/dpl/internal/inclusive_scan_by_segment_impl.h +++ b/include/oneapi/dpl/internal/inclusive_scan_by_segment_impl.h @@ -148,8 +148,11 @@ oneapi::dpl::__internal::__enable_if_execution_policy inclusive_scan_by_segment(Policy&& policy, InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, OutputIterator result, BinaryPredicate binary_pred, BinaryOperator binary_op) { - return internal::pattern_inclusive_scan_by_segment(::std::forward(policy), first1, last1, first2, result, - binary_pred, binary_op); + constexpr auto __dispatch_tag = + oneapi::dpl::__internal::__select_backend(); + + return internal::pattern_inclusive_scan_by_segment(__dispatch_tag, ::std::forward(policy), first1, last1, + first2, result, binary_pred, binary_op); } template From b6b67966ade6a0a7dd8212c686fbca1d1307ad57 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Fri, 16 Feb 2024 13:39:53 +0100 Subject: [PATCH 267/566] internal::reduce_by_segment_impl + tag impls --- .../dpl/internal/reduce_by_segment_impl.h | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/include/oneapi/dpl/internal/reduce_by_segment_impl.h b/include/oneapi/dpl/internal/reduce_by_segment_impl.h index 0102d6c2925..e664e563b4d 100644 --- a/include/oneapi/dpl/internal/reduce_by_segment_impl.h +++ b/include/oneapi/dpl/internal/reduce_by_segment_impl.h @@ -78,13 +78,15 @@ class Reduce3; template class Reduce4; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy> -reduce_by_segment_impl(Policy&& policy, InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, +::std::pair +reduce_by_segment_impl(_Tag, Policy&& policy, InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, OutputIterator1 result1, OutputIterator2 result2, BinaryPred binary_pred, BinaryOperator binary_op) { + static_assert(__is_backend_tag_v<_Tag>); + // The algorithm reduces values in [first2, first2 + (last1-first1)) where the associated // keys for the values are equal to the adjacent key. This function's implementation is a derivative work // and responsible for the second copyright notice in this header. @@ -570,11 +572,11 @@ __sycl_reduce_by_segment(_ExecutionPolicy&& __exec, _Range1&& __keys, _Range2&& return __end_idx.get_host_access()[0] + 1; } -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy> -reduce_by_segment_impl(Policy&& policy, InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, - OutputIterator1 result1, OutputIterator2 result2, BinaryPred binary_pred, +template +::std::pair +reduce_by_segment_impl(__hetero_tag<_BackendTag>, Policy&& policy, InputIterator1 first1, InputIterator1 last1, + InputIterator2 first2, OutputIterator1 result1, OutputIterator2 result2, BinaryPred binary_pred, BinaryOperator binary_op) { // The algorithm reduces values in [first2, first2 + (last1-first1)) where the associated From 4209762b15686a9f0af2f95d82873e3420436a06 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Fri, 16 Feb 2024 13:40:01 +0100 Subject: [PATCH 268/566] internal::reduce_by_segment_impl + tag calls --- include/oneapi/dpl/internal/reduce_by_segment_impl.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/include/oneapi/dpl/internal/reduce_by_segment_impl.h b/include/oneapi/dpl/internal/reduce_by_segment_impl.h index e664e563b4d..8a44c0836bb 100644 --- a/include/oneapi/dpl/internal/reduce_by_segment_impl.h +++ b/include/oneapi/dpl/internal/reduce_by_segment_impl.h @@ -626,8 +626,12 @@ oneapi::dpl::__internal::__enable_if_execution_policy(policy), first1, last1, first2, result1, result2, - binary_pred, binary_op); + constexpr auto __dispatch_tag = + oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, InputIterator1, InputIterator2, OutputIterator1, + OutputIterator2>(); + + return internal::reduce_by_segment_impl(__dispatch_tag, ::std::forward(policy), first1, last1, first2, + result1, result2, binary_pred, binary_op); } template Date: Fri, 16 Feb 2024 13:44:14 +0100 Subject: [PATCH 269/566] internal::upper_bound_impl + tag impls --- .../oneapi/dpl/internal/binary_search_impl.h | 22 +++++++++---------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/include/oneapi/dpl/internal/binary_search_impl.h b/include/oneapi/dpl/internal/binary_search_impl.h index 198b9c27170..252e7f37009 100644 --- a/include/oneapi/dpl/internal/binary_search_impl.h +++ b/include/oneapi/dpl/internal/binary_search_impl.h @@ -82,12 +82,14 @@ lower_bound_impl(_Tag, Policy&& policy, InputIterator1 start, InputIterator1 end }); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy -upper_bound_impl(Policy&& policy, InputIterator1 start, InputIterator1 end, InputIterator2 value_start, +OutputIterator +upper_bound_impl(_Tag, Policy&& policy, InputIterator1 start, InputIterator1 end, InputIterator2 value_start, InputIterator2 value_end, OutputIterator result, StrictWeakOrdering comp) { + static_assert(__is_backend_tag_v<_Tag>); + return oneapi::dpl::transform(policy, value_start, value_end, result, [=](typename ::std::iterator_traits::reference val) { return ::std::upper_bound(start, end, val, comp) - start; @@ -143,15 +145,13 @@ lower_bound_impl(__hetero_tag<_BackendTag> __tag, Policy&& policy, InputIterator return result + value_size; } -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy -upper_bound_impl(Policy&& policy, InputIterator1 start, InputIterator1 end, InputIterator2 value_start, - InputIterator2 value_end, OutputIterator result, StrictWeakOrdering comp) +template +OutputIterator +upper_bound_impl(__hetero_tag<_BackendTag> __tag, Policy&& policy, InputIterator1 start, InputIterator1 end, + InputIterator2 value_start, InputIterator2 value_end, OutputIterator result, StrictWeakOrdering comp) { - constexpr auto __dispatch_tag = - oneapi::dpl::__internal::__select_backend(); - using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; + using __backend_tag = typename decltype(__tag)::__backend_tag; namespace __bknd = __par_backend_hetero; const auto size = ::std::distance(start, end); From 1ffe860eb495b986580a72f7b5c469b071dedc03 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Fri, 16 Feb 2024 13:44:22 +0100 Subject: [PATCH 270/566] internal::upper_bound_impl + tag calls --- include/oneapi/dpl/internal/binary_search_impl.h | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/include/oneapi/dpl/internal/binary_search_impl.h b/include/oneapi/dpl/internal/binary_search_impl.h index 252e7f37009..8a6f6b2a910 100644 --- a/include/oneapi/dpl/internal/binary_search_impl.h +++ b/include/oneapi/dpl/internal/binary_search_impl.h @@ -246,8 +246,11 @@ oneapi::dpl::__internal::__enable_if_execution_policy upper_bound(Policy&& policy, InputIterator1 start, InputIterator1 end, InputIterator2 value_start, InputIterator2 value_end, OutputIterator result) { - return internal::upper_bound_impl(::std::forward(policy), start, end, value_start, value_end, result, - oneapi::dpl::__internal::__pstl_less()); + constexpr auto __dispatch_tag = + oneapi::dpl::__internal::__select_backend(); + + return internal::upper_bound_impl(__dispatch_tag, ::std::forward(policy), start, end, value_start, + value_end, result, oneapi::dpl::__internal::__pstl_less()); } template upper_bound(Policy&& policy, InputIterator1 start, InputIterator1 end, InputIterator2 value_start, InputIterator2 value_end, OutputIterator result, StrictWeakOrdering comp) { - return internal::upper_bound_impl(::std::forward(policy), start, end, value_start, value_end, result, comp); + constexpr auto __dispatch_tag = + oneapi::dpl::__internal::__select_backend(); + + return internal::upper_bound_impl(__dispatch_tag, ::std::forward(policy), start, end, value_start, + value_end, result, comp); } //Upper Bound end From 1dcc5f23c5907f04a855193fa20853235b4581b0 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Fri, 16 Feb 2024 13:54:17 +0100 Subject: [PATCH 271/566] internal::exclusive_scan_by_segment_impl + tag impls --- .../internal/exclusive_scan_by_segment_impl.h | 24 +++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/include/oneapi/dpl/internal/exclusive_scan_by_segment_impl.h b/include/oneapi/dpl/internal/exclusive_scan_by_segment_impl.h index 7e25849a09f..ddc9b774abb 100644 --- a/include/oneapi/dpl/internal/exclusive_scan_by_segment_impl.h +++ b/include/oneapi/dpl/internal/exclusive_scan_by_segment_impl.h @@ -94,23 +94,23 @@ pattern_exclusive_scan_by_segment(_Tag, Policy&& policy, InputIterator1 first1, } #if _ONEDPL_BACKEND_SYCL -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy -exclusive_scan_by_segment_impl(Policy&& policy, InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, - OutputIterator result, T init, BinaryPredicate binary_pred, Operator binary_op, - ::std::true_type /* has_known_identity*/) +template +OutputIterator +exclusive_scan_by_segment_impl(__hetero_tag<_BackendTag>, Policy&& policy, InputIterator1 first1, InputIterator1 last1, + InputIterator2 first2, OutputIterator result, T init, BinaryPredicate binary_pred, + Operator binary_op, ::std::true_type /* has_known_identity*/) { return internal::__scan_by_segment_impl_common(::std::forward(policy), first1, last1, first2, result, init, binary_pred, binary_op, ::std::false_type{}); } -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy -exclusive_scan_by_segment_impl(Policy&& policy, InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, - OutputIterator result, T init, BinaryPredicate binary_pred, Operator binary_op, - ::std::false_type /* has_known_identity*/) +template +OutputIterator +exclusive_scan_by_segment_impl(__hetero_tag<_BackendTag>, Policy&& policy, InputIterator1 first1, InputIterator1 last1, + InputIterator2 first2, OutputIterator result, T init, BinaryPredicate binary_pred, + Operator binary_op, ::std::false_type /* has_known_identity*/) { const auto n = ::std::distance(first1, last1); From 5543b62163c14c9300e02124957c0132bf6e08ec Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Fri, 16 Feb 2024 13:54:47 +0100 Subject: [PATCH 272/566] internal::exclusive_scan_by_segment_impl + tag calls --- include/oneapi/dpl/internal/exclusive_scan_by_segment_impl.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/oneapi/dpl/internal/exclusive_scan_by_segment_impl.h b/include/oneapi/dpl/internal/exclusive_scan_by_segment_impl.h index ddc9b774abb..fb1beb9b3eb 100644 --- a/include/oneapi/dpl/internal/exclusive_scan_by_segment_impl.h +++ b/include/oneapi/dpl/internal/exclusive_scan_by_segment_impl.h @@ -171,7 +171,7 @@ pattern_exclusive_scan_by_segment(__hetero_tag<_BackendTag> __tag, Policy&& poli BinaryPredicate binary_pred, Operator binary_op) { return internal::exclusive_scan_by_segment_impl( - ::std::forward(policy), first1, last1, first2, result, init, binary_pred, binary_op, + __tag, ::std::forward(policy), first1, last1, first2, result, init, binary_pred, binary_op, typename unseq_backend::__has_known_identity< Operator, typename ::std::iterator_traits::value_type>::type{}); } From 4a23e3e39722ec5551c25f436d7c09ba69d52dc4 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Fri, 16 Feb 2024 13:59:03 +0100 Subject: [PATCH 273/566] internal::inclusive_scan_by_segment_impl + tag impls --- .../internal/inclusive_scan_by_segment_impl.h | 24 +++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/include/oneapi/dpl/internal/inclusive_scan_by_segment_impl.h b/include/oneapi/dpl/internal/inclusive_scan_by_segment_impl.h index a44ebd6b617..64a77e8bd5d 100644 --- a/include/oneapi/dpl/internal/inclusive_scan_by_segment_impl.h +++ b/include/oneapi/dpl/internal/inclusive_scan_by_segment_impl.h @@ -75,12 +75,12 @@ pattern_inclusive_scan_by_segment(_Tag, Policy&& policy, InputIterator1 first1, } #if _ONEDPL_BACKEND_SYCL -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy -inclusive_scan_by_segment_impl(Policy&& policy, InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, - OutputIterator result, BinaryPredicate binary_pred, BinaryOperator binary_op, - ::std::true_type /* has_known_identity */) +template +OutputIterator +inclusive_scan_by_segment_impl(__hetero_tag<_BackendTag>, Policy&& policy, InputIterator1 first1, InputIterator1 last1, + InputIterator2 first2, OutputIterator result, BinaryPredicate binary_pred, + BinaryOperator binary_op, ::std::true_type /* has_known_identity */) { using iter_value_t = typename ::std::iterator_traits::value_type; iter_value_t identity = unseq_backend::__known_identity; @@ -88,12 +88,12 @@ inclusive_scan_by_segment_impl(Policy&& policy, InputIterator1 first1, InputIter identity, binary_pred, binary_op, ::std::true_type{}); } -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy -inclusive_scan_by_segment_impl(Policy&& policy, InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, - OutputIterator result, BinaryPredicate binary_pred, BinaryOperator binary_op, - ::std::false_type /* has_known_identity */) +template +OutputIterator +inclusive_scan_by_segment_impl(__hetero_tag<_BackendTag>, Policy&& policy, InputIterator1 first1, InputIterator1 last1, + InputIterator2 first2, OutputIterator result, BinaryPredicate binary_pred, + BinaryOperator binary_op, ::std::false_type /* has_known_identity */) { typedef unsigned int FlagType; From 40cc8d4632bf9dc9fc936a736b1d63a7cd899f29 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Fri, 16 Feb 2024 13:59:14 +0100 Subject: [PATCH 274/566] internal::inclusive_scan_by_segment_impl + tag calls --- include/oneapi/dpl/internal/inclusive_scan_by_segment_impl.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/oneapi/dpl/internal/inclusive_scan_by_segment_impl.h b/include/oneapi/dpl/internal/inclusive_scan_by_segment_impl.h index 64a77e8bd5d..08d92bed504 100644 --- a/include/oneapi/dpl/internal/inclusive_scan_by_segment_impl.h +++ b/include/oneapi/dpl/internal/inclusive_scan_by_segment_impl.h @@ -134,7 +134,7 @@ pattern_inclusive_scan_by_segment(__hetero_tag<_BackendTag> __tag, Policy&& poli BinaryPredicate binary_pred, BinaryOperator binary_op) { return internal::inclusive_scan_by_segment_impl( - ::std::forward(policy), first1, last1, first2, result, binary_pred, binary_op, + __tag, ::std::forward(policy), first1, last1, first2, result, binary_pred, binary_op, typename unseq_backend::__has_known_identity< BinaryOperator, typename ::std::iterator_traits::value_type>::type{}); } From fe6bc53e40f8214b0a1e51c1ae86c03f3f7eb845 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Fri, 16 Feb 2024 14:07:21 +0100 Subject: [PATCH 275/566] internal::__sycl_reduce_by_segment + tag impls --- .../dpl/internal/reduce_by_segment_impl.h | 26 +++++++++---------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/include/oneapi/dpl/internal/reduce_by_segment_impl.h b/include/oneapi/dpl/internal/reduce_by_segment_impl.h index 8a44c0836bb..35465ffc5d1 100644 --- a/include/oneapi/dpl/internal/reduce_by_segment_impl.h +++ b/include/oneapi/dpl/internal/reduce_by_segment_impl.h @@ -190,26 +190,24 @@ template using _SegReducePrefixPhase = __seg_reduce_prefix_kernel<_Name...>; } // namespace -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, - oneapi::dpl::__internal::__difference_t<_Range3>> -__sycl_reduce_by_segment(_ExecutionPolicy&& __exec, _Range1&& __keys, _Range2&& __values, _Range3&& __out_keys, - _Range4&& __out_values, _BinaryPredicate __binary_pred, _BinaryOperator __binary_op, - ::std::false_type /* has_known_identity */) +template +oneapi::dpl::__internal::__difference_t<_Range3> +__sycl_reduce_by_segment(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _Range1&& __keys, _Range2&& __values, + _Range3&& __out_keys, _Range4&& __out_values, _BinaryPredicate __binary_pred, + _BinaryOperator __binary_op, ::std::false_type /* has_known_identity */) { return oneapi::dpl::experimental::ranges::reduce_by_segment( ::std::forward<_ExecutionPolicy>(__exec), ::std::forward<_Range1>(__keys), ::std::forward<_Range2>(__values), ::std::forward<_Range3>(__out_keys), ::std::forward<_Range4>(__out_values), __binary_pred, __binary_op); } -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, - oneapi::dpl::__internal::__difference_t<_Range3>> -__sycl_reduce_by_segment(_ExecutionPolicy&& __exec, _Range1&& __keys, _Range2&& __values, _Range3&& __out_keys, - _Range4&& __out_values, _BinaryPredicate __binary_pred, _BinaryOperator __binary_op, - ::std::true_type /* has_known_identity */) +template +oneapi::dpl::__internal::__difference_t<_Range3> +__sycl_reduce_by_segment(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _Range1&& __keys, _Range2&& __values, + _Range3&& __out_keys, _Range4&& __out_values, _BinaryPredicate __binary_pred, + _BinaryOperator __binary_op, ::std::true_type /* has_known_identity */) { using _CustomName = oneapi::dpl::__internal::__policy_kernel_name<_ExecutionPolicy>; From 6697736479e95532b4696d7e8079a7f484fc3fef Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Fri, 16 Feb 2024 14:07:47 +0100 Subject: [PATCH 276/566] internal::__sycl_reduce_by_segment + tag calls --- include/oneapi/dpl/internal/reduce_by_segment_impl.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/include/oneapi/dpl/internal/reduce_by_segment_impl.h b/include/oneapi/dpl/internal/reduce_by_segment_impl.h index 35465ffc5d1..860664bde67 100644 --- a/include/oneapi/dpl/internal/reduce_by_segment_impl.h +++ b/include/oneapi/dpl/internal/reduce_by_segment_impl.h @@ -573,7 +573,7 @@ __sycl_reduce_by_segment(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _ template ::std::pair -reduce_by_segment_impl(__hetero_tag<_BackendTag>, Policy&& policy, InputIterator1 first1, InputIterator1 last1, +reduce_by_segment_impl(__hetero_tag<_BackendTag> __tag, Policy&& policy, InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, OutputIterator1 result1, OutputIterator2 result2, BinaryPred binary_pred, BinaryOperator binary_op) { @@ -609,9 +609,9 @@ reduce_by_segment_impl(__hetero_tag<_BackendTag>, Policy&& policy, InputIterator typename ::std::iterator_traits::value_type>::type; // number of unique keys - _CountType __n = __sycl_reduce_by_segment(::std::forward(policy), key_buf.all_view(), value_buf.all_view(), - key_output_buf.all_view(), value_output_buf.all_view(), binary_pred, - binary_op, has_known_identity{}); + _CountType __n = __sycl_reduce_by_segment( + __tag, ::std::forward(policy), key_buf.all_view(), value_buf.all_view(), key_output_buf.all_view(), + value_output_buf.all_view(), binary_pred, binary_op, has_known_identity{}); return ::std::make_pair(result1 + __n, result2 + __n); } From c195783439bb99b94088548f49483173b382b95e Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Fri, 16 Feb 2024 14:10:00 +0100 Subject: [PATCH 277/566] internal::__scan_by_segment_impl_common + tag impls --- include/oneapi/dpl/internal/scan_by_segment_impl.h | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/include/oneapi/dpl/internal/scan_by_segment_impl.h b/include/oneapi/dpl/internal/scan_by_segment_impl.h index 2d2cafaa038..98cc9e1d93a 100644 --- a/include/oneapi/dpl/internal/scan_by_segment_impl.h +++ b/include/oneapi/dpl/internal/scan_by_segment_impl.h @@ -364,11 +364,12 @@ struct __sycl_scan_by_segment_impl } }; -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy -__scan_by_segment_impl_common(Policy&& policy, InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, - OutputIterator result, T init, BinaryPredicate binary_pred, Operator binary_op, Inclusive) +template +OutputIterator +__scan_by_segment_impl_common(__hetero_tag<_BackendTag>, Policy&& policy, InputIterator1 first1, InputIterator1 last1, + InputIterator2 first2, OutputIterator result, T init, BinaryPredicate binary_pred, + Operator binary_op, Inclusive) { const auto n = ::std::distance(first1, last1); From 41980d370a24c7423e94d6a3754e05a159811c83 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Fri, 16 Feb 2024 14:10:07 +0100 Subject: [PATCH 278/566] internal::__scan_by_segment_impl_common + tag calls --- .../oneapi/dpl/internal/exclusive_scan_by_segment_impl.h | 6 +++--- .../oneapi/dpl/internal/inclusive_scan_by_segment_impl.h | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/include/oneapi/dpl/internal/exclusive_scan_by_segment_impl.h b/include/oneapi/dpl/internal/exclusive_scan_by_segment_impl.h index fb1beb9b3eb..07d5b930c2f 100644 --- a/include/oneapi/dpl/internal/exclusive_scan_by_segment_impl.h +++ b/include/oneapi/dpl/internal/exclusive_scan_by_segment_impl.h @@ -97,12 +97,12 @@ pattern_exclusive_scan_by_segment(_Tag, Policy&& policy, InputIterator1 first1, template OutputIterator -exclusive_scan_by_segment_impl(__hetero_tag<_BackendTag>, Policy&& policy, InputIterator1 first1, InputIterator1 last1, +exclusive_scan_by_segment_impl(__hetero_tag<_BackendTag> __tag, Policy&& policy, InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, OutputIterator result, T init, BinaryPredicate binary_pred, Operator binary_op, ::std::true_type /* has_known_identity*/) { - return internal::__scan_by_segment_impl_common(::std::forward(policy), first1, last1, first2, result, init, - binary_pred, binary_op, ::std::false_type{}); + return internal::__scan_by_segment_impl_common(__tag, ::std::forward(policy), first1, last1, first2, result, + init, binary_pred, binary_op, ::std::false_type{}); } template OutputIterator -inclusive_scan_by_segment_impl(__hetero_tag<_BackendTag>, Policy&& policy, InputIterator1 first1, InputIterator1 last1, +inclusive_scan_by_segment_impl(__hetero_tag<_BackendTag> __tag, Policy&& policy, InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, OutputIterator result, BinaryPredicate binary_pred, BinaryOperator binary_op, ::std::true_type /* has_known_identity */) { using iter_value_t = typename ::std::iterator_traits::value_type; iter_value_t identity = unseq_backend::__known_identity; - return internal::__scan_by_segment_impl_common(::std::forward(policy), first1, last1, first2, result, + return internal::__scan_by_segment_impl_common(__tag, ::std::forward(policy), first1, last1, first2, result, identity, binary_pred, binary_op, ::std::true_type{}); } From 1e2b03485c1330fd3abd6c852820cab14175d86b Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Fri, 16 Feb 2024 14:18:37 +0100 Subject: [PATCH 279/566] internal::__pattern_transform_scan_base_async + tag impls --- .../dpl/internal/async_impl/async_impl_hetero.h | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/include/oneapi/dpl/internal/async_impl/async_impl_hetero.h b/include/oneapi/dpl/internal/async_impl/async_impl_hetero.h index d14f2aed022..6c151386e03 100644 --- a/include/oneapi/dpl/internal/async_impl/async_impl_hetero.h +++ b/include/oneapi/dpl/internal/async_impl/async_impl_hetero.h @@ -208,13 +208,12 @@ __pattern_fill_async(_ExecutionPolicy&& __exec, _ForwardIterator __first, _Forwa // transform_scan //------------------------------------------------------------------------ -template = 0> +template auto -__pattern_transform_scan_base_async(_ExecutionPolicy&& __exec, _Iterator1 __first, _Iterator1 __last, - _Iterator2 __result, _UnaryOperation __unary_op, _InitType __init, - _BinaryOperation __binary_op, _Inclusive) +__pattern_transform_scan_base_async(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _Iterator1 __first, + _Iterator1 __last, _Iterator2 __result, _UnaryOperation __unary_op, + _InitType __init, _BinaryOperation __binary_op, _Inclusive) { assert(__first < __last); From 868e686d09a200117fdd652bd6e256e53880d2a6 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Fri, 16 Feb 2024 14:18:48 +0100 Subject: [PATCH 280/566] internal::__pattern_transform_scan_base_async + tag calls --- .../dpl/internal/async_impl/async_impl_hetero.h | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/include/oneapi/dpl/internal/async_impl/async_impl_hetero.h b/include/oneapi/dpl/internal/async_impl/async_impl_hetero.h index 6c151386e03..c8b40ce399e 100644 --- a/include/oneapi/dpl/internal/async_impl/async_impl_hetero.h +++ b/include/oneapi/dpl/internal/async_impl/async_impl_hetero.h @@ -224,7 +224,7 @@ __pattern_transform_scan_base_async(__hetero_tag<_BackendTag>, _ExecutionPolicy& auto __buf2 = __keep2(__result, __result + __n); auto __res = oneapi::dpl::__par_backend_hetero::__parallel_transform_scan( - oneapi::dpl::__internal::__fpga_backend_tag{}, + _BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), __buf1.all_view(), __buf2.all_view(), __n, __unary_op, __init, __binary_op, _Inclusive{}); return __res.__make_future(__result + __n); @@ -240,8 +240,12 @@ __pattern_transform_scan_async(_ExecutionPolicy&& __exec, _Iterator1 __first, _I using _RepackedType = __par_backend_hetero::__repacked_tuple_t<_Type>; using _InitType = unseq_backend::__init_value<_RepackedType>; - return __pattern_transform_scan_base_async(::std::forward<_ExecutionPolicy>(__exec), __first, __last, __result, - __unary_op, _InitType{__init}, __binary_op, _Inclusive{}); + constexpr auto __dispatch_tag = + oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _Iterator1, _Iterator2>(); + + return __pattern_transform_scan_base_async(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, + __last, __result, __unary_op, _InitType{__init}, __binary_op, + _Inclusive{}); } // scan without initial element @@ -256,8 +260,11 @@ __pattern_transform_scan_async(_ExecutionPolicy&& __exec, _Iterator1 __first, _I using _RepackedType = __par_backend_hetero::__repacked_tuple_t<_ValueType>; using _InitType = unseq_backend::__no_init_value<_RepackedType>; - return __pattern_transform_scan_base_async(::std::forward<_ExecutionPolicy>(__exec), __first, __last, __result, - __unary_op, _InitType{}, __binary_op, _Inclusive{}); + constexpr auto __dispatch_tag = + oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _Iterator1, _Iterator2>(); + + return __pattern_transform_scan_base_async(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, + __last, __result, __unary_op, _InitType{}, __binary_op, _Inclusive{}); } } // namespace __internal From a728717e678c9dbebcc2a5bf1fe4689c820c52ca Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Fri, 16 Feb 2024 14:24:02 +0100 Subject: [PATCH 281/566] internal::__pattern_transform_scan_async + tag impls --- .../internal/async_impl/async_impl_hetero.h | 35 ++++++++----------- 1 file changed, 14 insertions(+), 21 deletions(-) diff --git a/include/oneapi/dpl/internal/async_impl/async_impl_hetero.h b/include/oneapi/dpl/internal/async_impl/async_impl_hetero.h index c8b40ce399e..dc08e7ba2d1 100644 --- a/include/oneapi/dpl/internal/async_impl/async_impl_hetero.h +++ b/include/oneapi/dpl/internal/async_impl/async_impl_hetero.h @@ -230,41 +230,34 @@ __pattern_transform_scan_base_async(__hetero_tag<_BackendTag>, _ExecutionPolicy& return __res.__make_future(__result + __n); } -template = 0> +template auto -__pattern_transform_scan_async(_ExecutionPolicy&& __exec, _Iterator1 __first, _Iterator1 __last, _Iterator2 __result, - _UnaryOperation __unary_op, _Type __init, _BinaryOperation __binary_op, _Inclusive) +__pattern_transform_scan_async(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Iterator1 __first, + _Iterator1 __last, _Iterator2 __result, _UnaryOperation __unary_op, _Type __init, + _BinaryOperation __binary_op, _Inclusive) { using _RepackedType = __par_backend_hetero::__repacked_tuple_t<_Type>; using _InitType = unseq_backend::__init_value<_RepackedType>; - constexpr auto __dispatch_tag = - oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _Iterator1, _Iterator2>(); - - return __pattern_transform_scan_base_async(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, - __last, __result, __unary_op, _InitType{__init}, __binary_op, - _Inclusive{}); + return __pattern_transform_scan_base_async(__tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, + __result, __unary_op, _InitType{__init}, __binary_op, _Inclusive{}); } // scan without initial element -template = 0> +template auto -__pattern_transform_scan_async(_ExecutionPolicy&& __exec, _Iterator1 __first, _Iterator1 __last, _Iterator2 __result, - _UnaryOperation __unary_op, _BinaryOperation __binary_op, _Inclusive) +__pattern_transform_scan_async(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Iterator1 __first, + _Iterator1 __last, _Iterator2 __result, _UnaryOperation __unary_op, + _BinaryOperation __binary_op, _Inclusive) { using _ValueType = typename ::std::iterator_traits<_Iterator1>::value_type; using _RepackedType = __par_backend_hetero::__repacked_tuple_t<_ValueType>; using _InitType = unseq_backend::__no_init_value<_RepackedType>; - constexpr auto __dispatch_tag = - oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _Iterator1, _Iterator2>(); - - return __pattern_transform_scan_base_async(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, - __last, __result, __unary_op, _InitType{}, __binary_op, _Inclusive{}); + return __pattern_transform_scan_base_async(__tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, + __result, __unary_op, _InitType{}, __binary_op, _Inclusive{}); } } // namespace __internal From 38e5f66b20d1689bef95e45372c6d88cd5fecc5b Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Fri, 16 Feb 2024 14:24:20 +0100 Subject: [PATCH 282/566] internal::__pattern_transform_scan_async + tag calls --- .../dpl/internal/async_impl/glue_async_impl.h | 49 ++++++++++++++----- 1 file changed, 36 insertions(+), 13 deletions(-) diff --git a/include/oneapi/dpl/internal/async_impl/glue_async_impl.h b/include/oneapi/dpl/internal/async_impl/glue_async_impl.h index 3cceec2f5cb..d89da19493e 100644 --- a/include/oneapi/dpl/internal/async_impl/glue_async_impl.h +++ b/include/oneapi/dpl/internal/async_impl/glue_async_impl.h @@ -234,10 +234,13 @@ auto inclusive_scan_async(_ExecutionPolicy&& __exec, _ForwardIt1 __first1, _ForwardIt1 __last1, _ForwardIt2 __first2, _BinaryOperation __binary_op, _Events&&... __dependencies) { + constexpr auto __dispatch_tag = + oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _ForwardIt1, _ForwardIt2>(); + wait_for_all(::std::forward<_Events>(__dependencies)...); return oneapi::dpl::__internal::__pattern_transform_scan_async( - ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, oneapi::dpl::__internal::__no_op(), - __binary_op, /*inclusive=*/::std::true_type()); + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, + oneapi::dpl::__internal::__no_op(), __binary_op, /*inclusive=*/::std::true_type()); } template (); + wait_for_all(::std::forward<_Events>(__dependencies)...); return oneapi::dpl::__internal::__pattern_transform_scan_async( - ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, oneapi::dpl::__internal::__no_op(), - __init, __binary_op, /*inclusive=*/::std::true_type()); + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, + oneapi::dpl::__internal::__no_op(), __init, __binary_op, /*inclusive=*/::std::true_type()); } template (); + wait_for_all(::std::forward<_Events>(__dependencies)...); return oneapi::dpl::__internal::__pattern_transform_scan_async( - ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, oneapi::dpl::__internal::__no_op(), - __init, ::std::plus<_T>(), /*exclusive=*/::std::false_type()); + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, + oneapi::dpl::__internal::__no_op(), __init, ::std::plus<_T>(), /*exclusive=*/::std::false_type()); } template (); + wait_for_all(::std::forward<_Events>(__dependencies)...); return oneapi::dpl::__internal::__pattern_transform_scan_async( - ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, oneapi::dpl::__internal::__no_op(), - __init, __binary_op, /*exclusive=*/::std::false_type()); + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, + oneapi::dpl::__internal::__no_op(), __init, __binary_op, /*exclusive=*/::std::false_type()); } template (); + wait_for_all(::std::forward<_Events>(__dependencies)...); - return oneapi::dpl::__internal::__pattern_transform_scan_async(::std::forward<_ExecutionPolicy>(__exec), __first1, + return oneapi::dpl::__internal::__pattern_transform_scan_async(__dispatch_tag, + ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __unary_op, __init, __binary_op, /*exclusive=*/::std::false_type()); } @@ -302,10 +318,13 @@ transform_inclusive_scan_async(_ExecutionPolicy&& __exec, _ForwardIt1 __first1, _ForwardIt2 __first2, _BinaryOperation __binary_op, _UnaryOperation __unary_op, _Events&&... __dependencies) { + constexpr auto __dispatch_tag = + oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _ForwardIt1, _ForwardIt2>(); + wait_for_all(::std::forward<_Events>(__dependencies)...); - return oneapi::dpl::__internal::__pattern_transform_scan_async(::std::forward<_ExecutionPolicy>(__exec), __first1, - __last1, __first2, __unary_op, __binary_op, - /*inclusive=*/::std::true_type()); + return oneapi::dpl::__internal::__pattern_transform_scan_async( + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __unary_op, __binary_op, + /*inclusive=*/::std::true_type()); } template (); + wait_for_all(::std::forward<_Events>(__dependencies)...); - return oneapi::dpl::__internal::__pattern_transform_scan_async(::std::forward<_ExecutionPolicy>(__exec), __first1, + return oneapi::dpl::__internal::__pattern_transform_scan_async(__dispatch_tag, + ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __unary_op, __init, __binary_op, /*inclusive=*/::std::true_type()); } From 665980179d1742d06be5095b0bfef9a7e755fc18 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Thu, 15 Feb 2024 19:05:50 +0100 Subject: [PATCH 283/566] Remove __parallel_for implementations without tags --- .../pstl/hetero/dpcpp/parallel_backend_sycl.h | 12 ------------ .../hetero/dpcpp/parallel_backend_sycl_fpga.h | 14 +------------- include/oneapi/dpl/pstl/omp/parallel_for.h | 19 ------------------- .../oneapi/dpl/pstl/parallel_backend_serial.h | 7 ------- .../oneapi/dpl/pstl/parallel_backend_tbb.h | 11 ----------- 5 files changed, 1 insertion(+), 62 deletions(-) diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h index 7ee5ba6fb9c..af3724db1e5 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h @@ -263,18 +263,6 @@ struct __parallel_for_submitter<__internal::__optional_kernel_name<_Name...>> //General version of parallel_for, one additional parameter - __count of iterations of loop __cgh.parallel_for, //for some algorithms happens that size of processing range is n, but amount of iterations is n/2. -template = 0, typename... _Ranges> -auto -__parallel_for(_ExecutionPolicy&& __exec, _Fp __brick, _Index __count, _Ranges&&... __rngs) -{ - using _CustomName = oneapi::dpl::__internal::__policy_kernel_name<_ExecutionPolicy>; - using _ForKernel = oneapi::dpl::__par_backend_hetero::__internal::__kernel_name_provider<_CustomName>; - - return __parallel_for_submitter<_ForKernel>()(::std::forward<_ExecutionPolicy>(__exec), __brick, __count, - ::std::forward<_Ranges>(__rngs)...); -} - template auto __parallel_for(oneapi::dpl::__internal::__device_backend_tag, _ExecutionPolicy&& __exec, _Fp __brick, _Index __count, diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h index 224e6b6f9fe..769e5abe4af 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h @@ -76,7 +76,7 @@ struct __parallel_for_fpga_submitter<__internal::__optional_kernel_name<_Name... return __future(__event); } -// KSATODO is this define check really required here? +// TODO is this define check really required here? #if _ONEDPL_FPGA_DEVICE template auto @@ -104,18 +104,6 @@ struct __parallel_for_fpga_submitter<__internal::__optional_kernel_name<_Name... #endif // _ONEDPL_FPGA_DEVICE }; -template = 0> -auto -__parallel_for(_ExecutionPolicy&& __exec, _Fp __brick, _Index __count, _Ranges&&... __rngs) -{ - using _CustomName = oneapi::dpl::__internal::__policy_kernel_name<_ExecutionPolicy>; - using __parallel_for_name = __internal::__kernel_name_provider<_CustomName>; - - return __parallel_for_fpga_submitter<__parallel_for_name>()(std::forward<_ExecutionPolicy>(__exec), __brick, - __count, std::forward<_Ranges>(__rngs)...); -} - template auto __parallel_for(oneapi::dpl::__internal::__fpga_backend_tag, _ExecutionPolicy&& __exec, _Fp __brick, _Index __count, diff --git a/include/oneapi/dpl/pstl/omp/parallel_for.h b/include/oneapi/dpl/pstl/omp/parallel_for.h index 6f870dc4b4a..1a0ea24d798 100644 --- a/include/oneapi/dpl/pstl/omp/parallel_for.h +++ b/include/oneapi/dpl/pstl/omp/parallel_for.h @@ -47,25 +47,6 @@ __parallel_for_body(_Index __first, _Index __last, _Fp __f) // Evaluation of brick f[i,j) for each subrange [i,j) of [first, last) //------------------------------------------------------------------------ -template -void -__parallel_for(_ExecutionPolicy&&, _Index __first, _Index __last, _Fp __f) -{ - if (omp_in_parallel()) - { - // we don't create a nested parallel region in an existing parallel - // region: just create tasks - oneapi::dpl::__omp_backend::__parallel_for_body(__first, __last, __f); - } - else - { - // in any case (nested or non-nested) one parallel region is created and - // only one thread creates a set of tasks - _PSTL_PRAGMA(omp parallel) - _PSTL_PRAGMA(omp single nowait) { oneapi::dpl::__omp_backend::__parallel_for_body(__first, __last, __f); } - } -} - template void __parallel_for(oneapi::dpl::__internal::__omp_backend_tag, _ExecutionPolicy&&, _Index __first, _Index __last, _Fp __f) diff --git a/include/oneapi/dpl/pstl/parallel_backend_serial.h b/include/oneapi/dpl/pstl/parallel_backend_serial.h index 264947fe550..9d9f3ea2314 100644 --- a/include/oneapi/dpl/pstl/parallel_backend_serial.h +++ b/include/oneapi/dpl/pstl/parallel_backend_serial.h @@ -64,13 +64,6 @@ __cancel_execution() { } -template -void -__parallel_for(_ExecutionPolicy&&, _Index __first, _Index __last, _Fp __f) -{ - __f(__first, __last); -} - template void __parallel_for(oneapi::dpl::__internal::__serial_backend_tag, _ExecutionPolicy&&, _Index __first, _Index __last, diff --git a/include/oneapi/dpl/pstl/parallel_backend_tbb.h b/include/oneapi/dpl/pstl/parallel_backend_tbb.h index 7639551a1c6..74c1eade762 100644 --- a/include/oneapi/dpl/pstl/parallel_backend_tbb.h +++ b/include/oneapi/dpl/pstl/parallel_backend_tbb.h @@ -115,17 +115,6 @@ class __parallel_for_body _RealBody _M_body; }; -//! Evaluation of brick f[i,j) for each subrange [i,j) of [first,last) -// wrapper over tbb::parallel_for -template -void -__parallel_for(_ExecutionPolicy&&, _Index __first, _Index __last, _Fp __f) -{ - tbb::this_task_arena::isolate([=]() { - tbb::parallel_for(tbb::blocked_range<_Index>(__first, __last), __parallel_for_body<_Index, _Fp>(__f)); - }); -} - //! Evaluation of brick f[i,j) for each subrange [i,j) of [first,last) // wrapper over tbb::parallel_for template From 3c493ea4a56e6a35470277dfd63a26e240e25658 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Fri, 16 Feb 2024 09:55:26 +0100 Subject: [PATCH 284/566] Remove __parallel_transform_scan implementations without tags --- include/oneapi/dpl/pstl/omp/parallel_transform_scan.h | 9 --------- include/oneapi/dpl/pstl/parallel_backend_serial.h | 7 ------- include/oneapi/dpl/pstl/parallel_backend_tbb.h | 11 ----------- 3 files changed, 27 deletions(-) diff --git a/include/oneapi/dpl/pstl/omp/parallel_transform_scan.h b/include/oneapi/dpl/pstl/omp/parallel_transform_scan.h index f0093ccab98..35c28b4330c 100644 --- a/include/oneapi/dpl/pstl/omp/parallel_transform_scan.h +++ b/include/oneapi/dpl/pstl/omp/parallel_transform_scan.h @@ -25,15 +25,6 @@ namespace dpl namespace __omp_backend { -template -_Tp -__parallel_transform_scan(_ExecutionPolicy&&, _Index __n, _Up /* __u */, _Tp __init, _Cp /* __combine */, - _Rp /* __brick_reduce */, _Sp __scan) -{ - // TODO: parallelize this function. - return __scan(_Index(0), __n, __init); -} - template _Tp __parallel_transform_scan(oneapi::dpl::__internal::__omp_backend_tag, _ExecutionPolicy&&, _Index __n, _Up /* __u */, diff --git a/include/oneapi/dpl/pstl/parallel_backend_serial.h b/include/oneapi/dpl/pstl/parallel_backend_serial.h index 9d9f3ea2314..a73a973de32 100644 --- a/include/oneapi/dpl/pstl/parallel_backend_serial.h +++ b/include/oneapi/dpl/pstl/parallel_backend_serial.h @@ -144,13 +144,6 @@ __parallel_strict_scan(oneapi::dpl::__internal::__serial_backend_tag, _Execution __scan(_Index(0), __n, __initial); } -template -_Tp -__parallel_transform_scan(_ExecutionPolicy&&, _Index __n, _UnaryOp, _Tp __init, _BinaryOp, _Reduce, _Scan __scan) -{ - return __scan(_Index(0), __n, __init); -} - template _Tp __parallel_transform_scan(oneapi::dpl::__internal::__serial_backend_tag, _ExecutionPolicy&&, _Index __n, _UnaryOp, diff --git a/include/oneapi/dpl/pstl/parallel_backend_tbb.h b/include/oneapi/dpl/pstl/parallel_backend_tbb.h index 74c1eade762..2f48fe9cc98 100644 --- a/include/oneapi/dpl/pstl/parallel_backend_tbb.h +++ b/include/oneapi/dpl/pstl/parallel_backend_tbb.h @@ -485,17 +485,6 @@ __parallel_strict_scan(oneapi::dpl::__internal::__tbb_backend_tag, _ExecutionPol }); } -template -_Tp -__parallel_transform_scan(_ExecutionPolicy&&, _Index __n, _Up __u, _Tp __init, _Cp __combine, _Rp __brick_reduce, - _Sp __scan) -{ - __trans_scan_body<_Index, _Up, _Tp, _Cp, _Rp, _Sp> __body(__u, __init, __combine, __brick_reduce, __scan); - auto __range = tbb::blocked_range<_Index>(0, __n); - tbb::this_task_arena::isolate([__range, &__body]() { tbb::parallel_scan(__range, __body); }); - return __body.sum(); -} - template _Tp __parallel_transform_scan(oneapi::dpl::__internal::__tbb_backend_tag, _ExecutionPolicy&&, _Index __n, _Up __u, From f8321f6c4f93750c67ae1e6f7183a74e4cc19841 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Thu, 15 Feb 2024 19:10:12 +0100 Subject: [PATCH 285/566] Remove extra changes from __parallel_for_fpga_submitter --- .../hetero/dpcpp/parallel_backend_sycl_fpga.h | 30 +------------------ 1 file changed, 1 insertion(+), 29 deletions(-) diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h index 769e5abe4af..8d563fd1cd1 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h @@ -75,33 +75,6 @@ struct __parallel_for_fpga_submitter<__internal::__optional_kernel_name<_Name... }); return __future(__event); } - -// TODO is this define check really required here? -#if _ONEDPL_FPGA_DEVICE - template - auto - operator()(oneapi::dpl::__internal::__fpga_backend_tag, _ExecutionPolicy&& __exec, _Fp __brick, _Index __count, - _Ranges&&... __rngs) const - { - auto __n = oneapi::dpl::__ranges::__get_first_range_size(__rngs...); - assert(__n > 0); - - _PRINT_INFO_IN_DEBUG_MODE(__exec); - auto __event = __exec.queue().submit([&__rngs..., &__brick, __count](sycl::handler& __cgh) { - //get an access to data under SYCL buffer: - oneapi::dpl::__ranges::__require_access(__cgh, __rngs...); - - __cgh.single_task<_Name...>([=]() { -# pragma unroll(::std::decay <_ExecutionPolicy>::type::unroll_factor) - for (auto __idx = 0; __idx < __count; ++__idx) - { - __brick(__idx, __rngs...); - } - }); - }); - return __future(__event); - } -#endif // _ONEDPL_FPGA_DEVICE }; template @@ -112,8 +85,7 @@ __parallel_for(oneapi::dpl::__internal::__fpga_backend_tag, _ExecutionPolicy&& _ using _Policy = ::std::decay_t<_ExecutionPolicy>; using __parallel_for_name = __internal::__kernel_name_provider; - return __parallel_for_fpga_submitter<__parallel_for_name>()(oneapi::dpl::__internal::__fpga_backend_tag{}, - std::forward<_ExecutionPolicy>(__exec), __brick, + return __parallel_for_fpga_submitter<__parallel_for_name>()(std::forward<_ExecutionPolicy>(__exec), __brick, __count, std::forward<_Ranges>(__rngs)...); } From f2c5a7b2856ba79894d23ac3164a43af28a55761 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Fri, 16 Feb 2024 10:39:54 +0100 Subject: [PATCH 286/566] Remove __parallel_stable_sort implementations without tags --- .../dpl/pstl/omp/parallel_stable_sort.h | 41 ------------------- .../oneapi/dpl/pstl/parallel_backend_serial.h | 8 ---- .../oneapi/dpl/pstl/parallel_backend_tbb.h | 25 ----------- 3 files changed, 74 deletions(-) diff --git a/include/oneapi/dpl/pstl/omp/parallel_stable_sort.h b/include/oneapi/dpl/pstl/omp/parallel_stable_sort.h index 2c5c760f584..4633a3fcade 100644 --- a/include/oneapi/dpl/pstl/omp/parallel_stable_sort.h +++ b/include/oneapi/dpl/pstl/omp/parallel_stable_sort.h @@ -121,47 +121,6 @@ __parallel_stable_sort_body(_RandomAccessIterator __xs, _RandomAccessIterator __ } } -template -void -__parallel_stable_sort(_ExecutionPolicy&& /*__exec*/, _RandomAccessIterator __xs, _RandomAccessIterator __xe, - _Compare __comp, _LeafSort __leaf_sort, std::size_t __nsort = 0) -{ - auto __count = static_cast(__xe - __xs); - if (__count <= __default_chunk_size || __nsort < __count) - { - __leaf_sort(__xs, __xe, __comp); - return; - } - - // TODO: the partial sort implementation should - // be shared with the other backends. - - if (omp_in_parallel()) - { - if (__count <= __nsort) - { - oneapi::dpl::__omp_backend::__parallel_stable_sort_body(__xs, __xe, __comp, __leaf_sort); - } - else - { - oneapi::dpl::__omp_backend::__parallel_stable_partial_sort(__xs, __xe, __comp, __leaf_sort, __nsort); - } - } - else - { - _PSTL_PRAGMA(omp parallel) - _PSTL_PRAGMA(omp single nowait) - if (__count <= __nsort) - { - oneapi::dpl::__omp_backend::__parallel_stable_sort_body(__xs, __xe, __comp, __leaf_sort); - } - else - { - oneapi::dpl::__omp_backend::__parallel_stable_partial_sort(__xs, __xe, __comp, __leaf_sort, __nsort); - } - } -} - template void __parallel_stable_sort(oneapi::dpl::__internal::__omp_backend_tag, _ExecutionPolicy&& /*__exec*/, diff --git a/include/oneapi/dpl/pstl/parallel_backend_serial.h b/include/oneapi/dpl/pstl/parallel_backend_serial.h index a73a973de32..cfb799fc73a 100644 --- a/include/oneapi/dpl/pstl/parallel_backend_serial.h +++ b/include/oneapi/dpl/pstl/parallel_backend_serial.h @@ -152,14 +152,6 @@ __parallel_transform_scan(oneapi::dpl::__internal::__serial_backend_tag, _Execut return __scan(_Index(0), __n, __init); } -template -void -__parallel_stable_sort(_ExecutionPolicy&&, _RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp, - _LeafSort __leaf_sort, ::std::size_t = 0) -{ - __leaf_sort(__first, __last, __comp); -} - template void __parallel_stable_sort(oneapi::dpl::__internal::__serial_backend_tag, _ExecutionPolicy&&, _RandomAccessIterator __first, diff --git a/include/oneapi/dpl/pstl/parallel_backend_tbb.h b/include/oneapi/dpl/pstl/parallel_backend_tbb.h index 2f48fe9cc98..1a4bd50a68e 100644 --- a/include/oneapi/dpl/pstl/parallel_backend_tbb.h +++ b/include/oneapi/dpl/pstl/parallel_backend_tbb.h @@ -1248,31 +1248,6 @@ __stable_sort_func<_RandomAccessIterator1, _RandomAccessIterator2, _Compare, _Le return __self; } -template -void -__parallel_stable_sort(_ExecutionPolicy&&, _RandomAccessIterator __xs, _RandomAccessIterator __xe, _Compare __comp, - _LeafSort __leaf_sort, ::std::size_t __nsort) -{ - tbb::this_task_arena::isolate([=, &__nsort]() { - //sorting based on task tree and parallel merge - typedef typename ::std::iterator_traits<_RandomAccessIterator>::value_type _ValueType; - typedef typename ::std::iterator_traits<_RandomAccessIterator>::difference_type _DifferenceType; - const _DifferenceType __n = __xe - __xs; - - const _DifferenceType __sort_cut_off = _ONEDPL_STABLE_SORT_CUT_OFF; - if (__n > __sort_cut_off) - { - __tbb_backend::__buffer<_ExecutionPolicy, _ValueType> __buf(__n); - __root_task<__stable_sort_func<_RandomAccessIterator, _ValueType*, _Compare, _LeafSort>> __root{ - __xs, __xe, __buf.get(), true, __comp, __leaf_sort, __nsort, __xs, __buf.get()}; - __task::spawn_root_and_wait(__root); - return; - } - //serial sort - __leaf_sort(__xs, __xe, __comp); - }); -} - template void __parallel_stable_sort(oneapi::dpl::__internal::__tbb_backend_tag, _ExecutionPolicy&&, _RandomAccessIterator __xs, From a97ec6f14c84fecd8521e9e8b529903f2259bc56 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Fri, 16 Feb 2024 11:14:56 +0100 Subject: [PATCH 287/566] Remove __parallel_or implementations without tags --- include/oneapi/dpl/pstl/parallel_impl.h | 19 ------------------- 1 file changed, 19 deletions(-) diff --git a/include/oneapi/dpl/pstl/parallel_impl.h b/include/oneapi/dpl/pstl/parallel_impl.h index 2d3f327bf53..846fc993e8c 100644 --- a/include/oneapi/dpl/pstl/parallel_impl.h +++ b/include/oneapi/dpl/pstl/parallel_impl.h @@ -110,25 +110,6 @@ __parallel_find(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _Index __f // parallel_or //------------------------------------------------------------------------ //! Return true if brick f[i,j) returns true for some subrange [i,j) of [first,last) -template -bool -__parallel_or(_ExecutionPolicy&& __exec, _Index __first, _Index __last, _Brick __f) -{ - constexpr auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _Index>(); - using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; - - ::std::atomic __found(false); - __par_backend::__parallel_for(__backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, - [__f, &__found](_Index __i, _Index __j) { - if (!__found.load(::std::memory_order_relaxed) && __f(__i, __j)) - { - __found.store(true, ::std::memory_order_relaxed); - __par_backend::__cancel_execution(); - } - }); - return __found; -} - template bool __parallel_or(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _Index __first, _Index __last, _Brick __f) From 25efb732527fabe2d2a70b3c5a491eac30ea507e Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Fri, 16 Feb 2024 11:23:36 +0100 Subject: [PATCH 288/566] Remove __parallel_find implementations without tags --- include/oneapi/dpl/pstl/parallel_impl.h | 37 ------------------------- 1 file changed, 37 deletions(-) diff --git a/include/oneapi/dpl/pstl/parallel_impl.h b/include/oneapi/dpl/pstl/parallel_impl.h index 846fc993e8c..fc5507f50d1 100644 --- a/include/oneapi/dpl/pstl/parallel_impl.h +++ b/include/oneapi/dpl/pstl/parallel_impl.h @@ -32,43 +32,6 @@ namespace __internal //----------------------------------------------------------------------- /** Return extremum value returned by brick f[i,j) for subranges [i,j) of [first,last) Each f[i,j) must return a value in [i,j). */ -template -_Index -__parallel_find(_ExecutionPolicy&& __exec, _Index __first, _Index __last, _Brick __f, _IsFirst) -{ - typedef typename ::std::iterator_traits<_Index>::difference_type _DifferenceType; - const _DifferenceType __n = __last - __first; - _DifferenceType __initial_dist = _IsFirst::value ? __n : -1; - - constexpr auto __comp = ::std::conditional_t<_IsFirst::value, __pstl_less, __pstl_greater>{}; - - constexpr auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _Index>(); - using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; - - ::std::atomic<_DifferenceType> __extremum(__initial_dist); - // TODO: find out what is better here: parallel_for or parallel_reduce - __par_backend::__parallel_for(__backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, - [__comp, __f, __first, &__extremum](_Index __i, _Index __j) { - // See "Reducing Contention Through Priority Updates", PPoPP '13, for discussion of - // why using a shared variable scales fairly well in this situation. - if (__comp(__i - __first, __extremum)) - { - _Index __res = __f(__i, __j); - // If not '__last' returned then we found what we want so put this to extremum - if (__res != __j) - { - const _DifferenceType __k = __res - __first; - for (_DifferenceType __old = __extremum; __comp(__k, __old); - __old = __extremum) - { - __extremum.compare_exchange_weak(__old, __k); - } - } - } - }); - return __extremum != __initial_dist ? __first + __extremum : __last; -} - template _Index __parallel_find(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _Index __first, _Index __last, _Brick __f, From a081f6e44b451e01c9cc57e7644f6a40efd22e68 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Fri, 16 Feb 2024 11:27:28 +0100 Subject: [PATCH 289/566] Remove __parallel_merge implementations without tags --- .../pstl/hetero/dpcpp/parallel_backend_sycl.h | 27 ---------------- include/oneapi/dpl/pstl/omp/parallel_merge.h | 31 ------------------- .../oneapi/dpl/pstl/parallel_backend_serial.h | 10 ------ .../oneapi/dpl/pstl/parallel_backend_tbb.h | 29 ----------------- 4 files changed, 97 deletions(-) diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h index af3724db1e5..ada6a261fd3 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h @@ -1841,33 +1841,6 @@ struct __parallel_merge_submitter<_IdType, __internal::__optional_kernel_name<_N template class __merge_kernel_name; -template = 0> -auto -__parallel_merge(_ExecutionPolicy&& __exec, _Range1&& __rng1, _Range2&& __rng2, _Range3&& __rng3, _Compare __comp) -{ - using _CustomName = oneapi::dpl::__internal::__policy_kernel_name<_ExecutionPolicy>; - - const auto __n = __rng1.size() + __rng2.size(); - if (__n <= std::numeric_limits<::std::uint32_t>::max()) - { - using _wi_index_type = ::std::uint32_t; - using _MergeKernel = oneapi::dpl::__par_backend_hetero::__internal::__kernel_name_provider<__merge_kernel_name<_CustomName, _wi_index_type>>; - return __parallel_merge_submitter<_wi_index_type, _MergeKernel>()(::std::forward<_ExecutionPolicy>(__exec), - ::std::forward<_Range1>(__rng1), ::std::forward<_Range2>(__rng2), - ::std::forward<_Range3>(__rng3), __comp); - - } - else - { - using _wi_index_type = ::std::uint64_t; - using _MergeKernel = oneapi::dpl::__par_backend_hetero::__internal::__kernel_name_provider<__merge_kernel_name<_CustomName, _wi_index_type>>; - return __parallel_merge_submitter<_wi_index_type, _MergeKernel>()(::std::forward<_ExecutionPolicy>(__exec), - ::std::forward<_Range1>(__rng1), ::std::forward<_Range2>(__rng2), - ::std::forward<_Range3>(__rng3), __comp); - } -} - template auto __parallel_merge(oneapi::dpl::__internal::__device_backend_tag, _ExecutionPolicy&& __exec, _Range1&& __rng1, diff --git a/include/oneapi/dpl/pstl/omp/parallel_merge.h b/include/oneapi/dpl/pstl/omp/parallel_merge.h index 39e8606d4ea..162ef097801 100644 --- a/include/oneapi/dpl/pstl/omp/parallel_merge.h +++ b/include/oneapi/dpl/pstl/omp/parallel_merge.h @@ -68,37 +68,6 @@ __parallel_merge_body(std::size_t __size_x, std::size_t __size_y, _RandomAccessI _PSTL_PRAGMA(omp taskwait) } -template -void -__parallel_merge(_ExecutionPolicy&& /*__exec*/, _RandomAccessIterator1 __xs, _RandomAccessIterator1 __xe, - _RandomAccessIterator2 __ys, _RandomAccessIterator2 __ye, _RandomAccessIterator3 __zs, _Compare __comp, - _LeafMerge __leaf_merge) -{ - std::size_t __size_x = __xe - __xs; - std::size_t __size_y = __ye - __ys; - - /* - * Run the merge in parallel by chunking it up. Use the smaller range (if any) as the iteration range, and the - * larger range as the search range. - */ - - if (omp_in_parallel()) - { - oneapi::dpl::__omp_backend::__parallel_merge_body(__size_x, __size_y, __xs, __xe, __ys, __ye, __zs, __comp, - __leaf_merge); - } - else - { - _PSTL_PRAGMA(omp parallel) - { - _PSTL_PRAGMA(omp single nowait) - oneapi::dpl::__omp_backend::__parallel_merge_body(__size_x, __size_y, __xs, __xe, __ys, __ye, __zs, __comp, - __leaf_merge); - } - } -} - template void diff --git a/include/oneapi/dpl/pstl/parallel_backend_serial.h b/include/oneapi/dpl/pstl/parallel_backend_serial.h index cfb799fc73a..ff8f5e162e2 100644 --- a/include/oneapi/dpl/pstl/parallel_backend_serial.h +++ b/include/oneapi/dpl/pstl/parallel_backend_serial.h @@ -160,16 +160,6 @@ __parallel_stable_sort(oneapi::dpl::__internal::__serial_backend_tag, _Execution __leaf_sort(__first, __last, __comp); } -template -void -__parallel_merge(_ExecutionPolicy&&, _RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, - _RandomAccessIterator2 __first2, _RandomAccessIterator2 __last2, _RandomAccessIterator3 __outit, - _Compare __comp, _LeafMerge __leaf_merge) -{ - __leaf_merge(__first1, __last1, __first2, __last2, __outit, __comp); -} - template void diff --git a/include/oneapi/dpl/pstl/parallel_backend_tbb.h b/include/oneapi/dpl/pstl/parallel_backend_tbb.h index 1a4bd50a68e..47c43867d54 100644 --- a/include/oneapi/dpl/pstl/parallel_backend_tbb.h +++ b/include/oneapi/dpl/pstl/parallel_backend_tbb.h @@ -1339,35 +1339,6 @@ operator()(__task* __self) return __self; } -template -void -__parallel_merge(_ExecutionPolicy&&, _RandomAccessIterator1 __xs, _RandomAccessIterator1 __xe, - _RandomAccessIterator2 __ys, _RandomAccessIterator2 __ye, _RandomAccessIterator3 __zs, _Compare __comp, - _LeafMerge __leaf_merge) -{ - typedef typename ::std::iterator_traits<_RandomAccessIterator1>::difference_type _DifferenceType1; - typedef typename ::std::iterator_traits<_RandomAccessIterator2>::difference_type _DifferenceType2; - typedef typename ::std::common_type_t<_DifferenceType1, _DifferenceType2> _SizeType; - const _SizeType __n = (__xe - __xs) + (__ye - __ys); - const _SizeType __merge_cut_off = _ONEDPL_MERGE_CUT_OFF; - if (__n <= __merge_cut_off) - { - // Fall back on serial merge - __leaf_merge(__xs, __xe, __ys, __ye, __zs, __comp); - } - else - { - tbb::this_task_arena::isolate([=]() { - typedef __merge_func_static<_RandomAccessIterator1, _RandomAccessIterator2, _RandomAccessIterator3, - _Compare, _LeafMerge> - _TaskType; - __root_task<_TaskType> __root{__xs, __xe, __ys, __ye, __zs, __comp, __leaf_merge}; - __task::spawn_root_and_wait(__root); - }); - } -} - template void From 57119b1fe7f2d2bf0964c6a6e1fea720bfb0eaef Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Fri, 16 Feb 2024 11:34:53 +0100 Subject: [PATCH 290/566] Remove __parallel_partial_sort implementations without tags --- .../pstl/hetero/dpcpp/parallel_backend_sycl.h | 19 ------------------- 1 file changed, 19 deletions(-) diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h index ada6a261fd3..ebbe14843c8 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h @@ -2194,25 +2194,6 @@ __parallel_stable_sort(oneapi::dpl::__internal::__device_backend_tag, _Execution // parallel_partial_sort - async pattern //----------------------------------------------------------------------- -// TODO: check if it makes sense to move these wrappers out of backend to a common place -// TODO: consider changing __partial_merge_kernel to make it compatible with -// __full_merge_kernel in order to use __parallel_sort_impl routine -template = 0> -auto -__parallel_partial_sort(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __mid, _Iterator __last, - _Compare __comp) -{ - const auto __mid_idx = __mid - __first; - - auto __keep = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read_write, _Iterator>(); - auto __buf = __keep(__first, __last); - - return __parallel_partial_sort_impl(oneapi::dpl::__internal::__device_backend_tag{}, - ::std::forward<_ExecutionPolicy>(__exec), __buf.all_view(), - __partial_merge_kernel{__mid_idx}, __comp); -} - // TODO: check if it makes sense to move these wrappers out of backend to a common place // TODO: consider changing __partial_merge_kernel to make it compatible with // __full_merge_kernel in order to use __parallel_sort_impl routine From 3c37fdd31e9b871698b6c97646a533245b4404fa Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Fri, 16 Feb 2024 12:40:45 +0100 Subject: [PATCH 291/566] Remove __parallel_for_each implementations without tags --- .../oneapi/dpl/pstl/omp/parallel_for_each.h | 19 ------------------- .../oneapi/dpl/pstl/parallel_backend_serial.h | 8 -------- .../oneapi/dpl/pstl/parallel_backend_tbb.h | 6 ------ 3 files changed, 33 deletions(-) diff --git a/include/oneapi/dpl/pstl/omp/parallel_for_each.h b/include/oneapi/dpl/pstl/omp/parallel_for_each.h index 9cc4f249f80..32410cbe927 100644 --- a/include/oneapi/dpl/pstl/omp/parallel_for_each.h +++ b/include/oneapi/dpl/pstl/omp/parallel_for_each.h @@ -42,25 +42,6 @@ __parallel_for_each_body(_ForwardIterator __first, _ForwardIterator __last, _Fp } } -template -void -__parallel_for_each(_ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, _Fp __f) -{ - if (omp_in_parallel()) - { - // we don't create a nested parallel region in an existing parallel - // region: just create tasks - oneapi::dpl::__omp_backend::__parallel_for_each_body(__first, __last, __f); - } - else - { - // in any case (nested or non-nested) one parallel region is created and - // only one thread creates a set of tasks - _PSTL_PRAGMA(omp parallel) - _PSTL_PRAGMA(omp single nowait) { oneapi::dpl::__omp_backend::__parallel_for_each_body(__first, __last, __f); } - } -} - template void __parallel_for_each(oneapi::dpl::__internal::__omp_backend_tag, _ExecutionPolicy&&, _ForwardIterator __first, diff --git a/include/oneapi/dpl/pstl/parallel_backend_serial.h b/include/oneapi/dpl/pstl/parallel_backend_serial.h index ff8f5e162e2..4bdfa18f21c 100644 --- a/include/oneapi/dpl/pstl/parallel_backend_serial.h +++ b/include/oneapi/dpl/pstl/parallel_backend_serial.h @@ -186,14 +186,6 @@ __parallel_invoke(oneapi::dpl::__internal::__serial_backend_tag, _ExecutionPolic ::std::forward<_F2>(__f2)(); } -template -void -__parallel_for_each(_ExecutionPolicy&&, _ForwardIterator __begin, _ForwardIterator __end, _Fp __f) -{ - for (auto __iter = __begin; __iter != __end; ++__iter) - __f(*__iter); -} - template void __parallel_for_each(oneapi::dpl::__internal::__serial_backend_tag, _ExecutionPolicy&&, _ForwardIterator __begin, diff --git a/include/oneapi/dpl/pstl/parallel_backend_tbb.h b/include/oneapi/dpl/pstl/parallel_backend_tbb.h index 47c43867d54..bddf9b04849 100644 --- a/include/oneapi/dpl/pstl/parallel_backend_tbb.h +++ b/include/oneapi/dpl/pstl/parallel_backend_tbb.h @@ -1392,12 +1392,6 @@ __parallel_invoke(oneapi::dpl::__internal::__tbb_backend_tag, _ExecutionPolicy&& //------------------------------------------------------------------------ // parallel_for_each //------------------------------------------------------------------------ -template -void -__parallel_for_each(_ExecutionPolicy&&, _ForwardIterator __begin, _ForwardIterator __end, _Fp __f) -{ - tbb::this_task_arena::isolate([&]() { tbb::parallel_for_each(__begin, __end, __f); }); -} template void From 81076d59159a9aef700e3bedf27cbc39e31c0354 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Fri, 16 Feb 2024 12:41:29 +0100 Subject: [PATCH 292/566] Remove __parallel_invoke implementations without tags --- include/oneapi/dpl/pstl/omp/parallel_invoke.h | 16 ---------------- .../oneapi/dpl/pstl/parallel_backend_serial.h | 8 -------- include/oneapi/dpl/pstl/parallel_backend_tbb.h | 8 -------- 3 files changed, 32 deletions(-) diff --git a/include/oneapi/dpl/pstl/omp/parallel_invoke.h b/include/oneapi/dpl/pstl/omp/parallel_invoke.h index 2520526a191..3503096add5 100644 --- a/include/oneapi/dpl/pstl/omp/parallel_invoke.h +++ b/include/oneapi/dpl/pstl/omp/parallel_invoke.h @@ -36,22 +36,6 @@ __parallel_invoke_body(_F1&& __f1, _F2&& __f2) } } -template -void -__parallel_invoke(_ExecutionPolicy&&, _F1&& __f1, _F2&& __f2) -{ - if (omp_in_parallel()) - { - oneapi::dpl::__omp_backend::__parallel_invoke_body(std::forward<_F1>(__f1), std::forward<_F2>(__f2)); - } - else - { - _PSTL_PRAGMA(omp parallel) - _PSTL_PRAGMA(omp single nowait) - oneapi::dpl::__omp_backend::__parallel_invoke_body(std::forward<_F1>(__f1), std::forward<_F2>(__f2)); - } -} - template void __parallel_invoke(oneapi::dpl::__internal::__omp_backend_tag, _ExecutionPolicy&&, _F1&& __f1, _F2&& __f2) diff --git a/include/oneapi/dpl/pstl/parallel_backend_serial.h b/include/oneapi/dpl/pstl/parallel_backend_serial.h index 4bdfa18f21c..b26707adf77 100644 --- a/include/oneapi/dpl/pstl/parallel_backend_serial.h +++ b/include/oneapi/dpl/pstl/parallel_backend_serial.h @@ -170,14 +170,6 @@ __parallel_merge(oneapi::dpl::__internal::__serial_backend_tag, _ExecutionPolicy __leaf_merge(__first1, __last1, __first2, __last2, __outit, __comp); } -template -void -__parallel_invoke(_ExecutionPolicy&&, _F1&& __f1, _F2&& __f2) -{ - ::std::forward<_F1>(__f1)(); - ::std::forward<_F2>(__f2)(); -} - template void __parallel_invoke(oneapi::dpl::__internal::__serial_backend_tag, _ExecutionPolicy&&, _F1&& __f1, _F2&& __f2) diff --git a/include/oneapi/dpl/pstl/parallel_backend_tbb.h b/include/oneapi/dpl/pstl/parallel_backend_tbb.h index bddf9b04849..a43665f05b6 100644 --- a/include/oneapi/dpl/pstl/parallel_backend_tbb.h +++ b/include/oneapi/dpl/pstl/parallel_backend_tbb.h @@ -1371,14 +1371,6 @@ __parallel_merge(oneapi::dpl::__internal::__tbb_backend_tag, _ExecutionPolicy&&, //------------------------------------------------------------------------ // parallel_invoke //------------------------------------------------------------------------ -template -void -__parallel_invoke(_ExecutionPolicy&&, _F1&& __f1, _F2&& __f2) -{ - //TODO: a version of tbb::this_task_arena::isolate with variadic arguments pack should be added in the future - tbb::this_task_arena::isolate( - [&]() { tbb::parallel_invoke(::std::forward<_F1>(__f1), ::std::forward<_F2>(__f2)); }); -} template void From 9ccfa70c3829ec1fc4f28ac9f3c055f359015bd2 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Fri, 16 Feb 2024 12:43:11 +0100 Subject: [PATCH 293/566] Remove __parallel_reduce implementations without tags --- include/oneapi/dpl/pstl/omp/parallel_reduce.h | 27 ------------------- .../oneapi/dpl/pstl/parallel_backend_serial.h | 15 ----------- .../oneapi/dpl/pstl/parallel_backend_tbb.h | 15 ----------- 3 files changed, 57 deletions(-) diff --git a/include/oneapi/dpl/pstl/omp/parallel_reduce.h b/include/oneapi/dpl/pstl/omp/parallel_reduce.h index a59a56d374f..4fc62cdf3d8 100644 --- a/include/oneapi/dpl/pstl/omp/parallel_reduce.h +++ b/include/oneapi/dpl/pstl/omp/parallel_reduce.h @@ -50,33 +50,6 @@ __parallel_reduce_body(_RandomAccessIterator __first, _RandomAccessIterator __la // c(x,y) combines values x and y that were the result of r //------------------------------------------------------------------------ -template -_Value -__parallel_reduce(_ExecutionPolicy&&, _RandomAccessIterator __first, _RandomAccessIterator __last, _Value __identity, - _RealBody __real_body, _Reduction __reduction) -{ - // We don't create a nested parallel region in an existing parallel region: - // just create tasks. - if (omp_in_parallel()) - { - return oneapi::dpl::__omp_backend::__parallel_reduce_body(__first, __last, __identity, __real_body, - __reduction); - } - - // In any case (nested or non-nested) one parallel region is created and only - // one thread creates a set of tasks. - _Value __res = __identity; - - _PSTL_PRAGMA(omp parallel) - _PSTL_PRAGMA(omp single nowait) - { - __res = - oneapi::dpl::__omp_backend::__parallel_reduce_body(__first, __last, __identity, __real_body, __reduction); - } - - return __res; -} - template _Value __parallel_reduce(oneapi::dpl::__internal::__omp_backend_tag, _ExecutionPolicy&&, _RandomAccessIterator __first, diff --git a/include/oneapi/dpl/pstl/parallel_backend_serial.h b/include/oneapi/dpl/pstl/parallel_backend_serial.h index b26707adf77..1e597f277c9 100644 --- a/include/oneapi/dpl/pstl/parallel_backend_serial.h +++ b/include/oneapi/dpl/pstl/parallel_backend_serial.h @@ -72,21 +72,6 @@ __parallel_for(oneapi::dpl::__internal::__serial_backend_tag, _ExecutionPolicy&& __f(__first, __last); } -template -_Value -__parallel_reduce(_ExecutionPolicy&&, _Index __first, _Index __last, const _Value& __identity, - const _RealBody& __real_body, const _Reduction&) -{ - if (__first == __last) - { - return __identity; - } - else - { - return __real_body(__first, __last, __identity); - } -} - template _Value __parallel_reduce(oneapi::dpl::__internal::__serial_backend_tag, _ExecutionPolicy&&, _Index __first, _Index __last, diff --git a/include/oneapi/dpl/pstl/parallel_backend_tbb.h b/include/oneapi/dpl/pstl/parallel_backend_tbb.h index a43665f05b6..e85ce3c4cf6 100644 --- a/include/oneapi/dpl/pstl/parallel_backend_tbb.h +++ b/include/oneapi/dpl/pstl/parallel_backend_tbb.h @@ -128,21 +128,6 @@ __parallel_for(oneapi::dpl::__internal::__tbb_backend_tag, _ExecutionPolicy&&, _ //! Evaluation of brick f[i,j) for each subrange [i,j) of [first,last) // wrapper over tbb::parallel_reduce -template -_Value -__parallel_reduce(_ExecutionPolicy&&, _Index __first, _Index __last, const _Value& __identity, - const _RealBody& __real_body, const _Reduction& __reduction) -{ - return tbb::this_task_arena::isolate([__first, __last, &__identity, &__real_body, &__reduction]() -> _Value { - return tbb::parallel_reduce( - tbb::blocked_range<_Index>(__first, __last), __identity, - [__real_body](const tbb::blocked_range<_Index>& __r, const _Value& __value) -> _Value { - return __real_body(__r.begin(), __r.end(), __value); - }, - __reduction); - }); -} - template _Value __parallel_reduce(oneapi::dpl::__internal::__tbb_backend_tag, _ExecutionPolicy&&, _Index __first, _Index __last, From 2f7f3ecedecaaa166001587439282fc6f08ee52a Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Fri, 16 Feb 2024 12:44:13 +0100 Subject: [PATCH 294/566] Remove __parallel_strict_scan implementations without tags --- include/oneapi/dpl/pstl/omp/parallel_scan.h | 36 ----------------- .../oneapi/dpl/pstl/parallel_backend_serial.h | 13 ------ .../oneapi/dpl/pstl/parallel_backend_tbb.h | 40 ------------------- 3 files changed, 89 deletions(-) diff --git a/include/oneapi/dpl/pstl/omp/parallel_scan.h b/include/oneapi/dpl/pstl/omp/parallel_scan.h index 7001d4267b3..d3609152319 100644 --- a/include/oneapi/dpl/pstl/omp/parallel_scan.h +++ b/include/oneapi/dpl/pstl/omp/parallel_scan.h @@ -106,42 +106,6 @@ __parallel_strict_scan_body(_Index __n, _Tp __initial, _Rp __reduce, _Cp __combi __initial, __combine, __scan); } -template -void -__parallel_strict_scan(_ExecutionPolicy&&, _Index __n, _Tp __initial, _Rp __reduce, _Cp __combine, _Sp __scan, - _Ap __apex) -{ - if (__n <= __default_chunk_size) - { - _Tp __sum = __initial; - if (__n) - { - __sum = __combine(__sum, __reduce(_Index(0), __n)); - } - __apex(__sum); - if (__n) - { - __scan(_Index(0), __n, __initial); - } - return; - } - - if (omp_in_parallel()) - { - oneapi::dpl::__omp_backend::__parallel_strict_scan_body<_ExecutionPolicy>(__n, __initial, __reduce, __combine, - __scan, __apex); - } - else - { - _PSTL_PRAGMA(omp parallel) - _PSTL_PRAGMA(omp single nowait) - { - oneapi::dpl::__omp_backend::__parallel_strict_scan_body<_ExecutionPolicy>(__n, __initial, __reduce, - __combine, __scan, __apex); - } - } -} - template void __parallel_strict_scan(oneapi::dpl::__internal::__omp_backend_tag, _ExecutionPolicy&&, _Index __n, _Tp __initial, diff --git a/include/oneapi/dpl/pstl/parallel_backend_serial.h b/include/oneapi/dpl/pstl/parallel_backend_serial.h index 1e597f277c9..54704bbcf6c 100644 --- a/include/oneapi/dpl/pstl/parallel_backend_serial.h +++ b/include/oneapi/dpl/pstl/parallel_backend_serial.h @@ -103,19 +103,6 @@ __parallel_transform_reduce(oneapi::dpl::__internal::__serial_backend_tag, _Exec return __reduce(__first, __last, __init); } -template -void -__parallel_strict_scan(_ExecutionPolicy&&, _Index __n, _Tp __initial, _Rp __reduce, _Cp __combine, _Sp __scan, - _Ap __apex) -{ - _Tp __sum = __initial; - if (__n) - __sum = __combine(__sum, __reduce(_Index(0), __n)); - __apex(__sum); - if (__n) - __scan(_Index(0), __n, __initial); -} - template void __parallel_strict_scan(oneapi::dpl::__internal::__serial_backend_tag, _ExecutionPolicy&&, _Index __n, _Tp __initial, diff --git a/include/oneapi/dpl/pstl/parallel_backend_tbb.h b/include/oneapi/dpl/pstl/parallel_backend_tbb.h index e85ce3c4cf6..7687094382b 100644 --- a/include/oneapi/dpl/pstl/parallel_backend_tbb.h +++ b/include/oneapi/dpl/pstl/parallel_backend_tbb.h @@ -390,46 +390,6 @@ __downsweep(_Index __i, _Index __m, _Index __tilesize, _Tp* __r, _Index __lastsi // apex is called exactly once, after all calls to reduce and before all calls to scan. // For example, it's useful for allocating a __buffer used by scan but whose size is the sum of all reduction values. // T must have a trivial constructor and destructor. -template -void -__parallel_strict_scan(_ExecutionPolicy&&, _Index __n, _Tp __initial, _Rp __reduce, _Cp __combine, _Sp __scan, - _Ap __apex) -{ - tbb::this_task_arena::isolate([=, &__combine]() { - if (__n > 1) - { - _Index __p = tbb::this_task_arena::max_concurrency(); - const _Index __slack = 4; - _Index __tilesize = (__n - 1) / (__slack * __p) + 1; - _Index __m = (__n - 1) / __tilesize; - __tbb_backend::__buffer<_ExecutionPolicy, _Tp> __buf(__m + 1); - _Tp* __r = __buf.get(); - __tbb_backend::__upsweep(_Index(0), _Index(__m + 1), __tilesize, __r, __n - __m * __tilesize, __reduce, - __combine); - - // When __apex is a no-op and __combine has no side effects, a good optimizer - // should be able to eliminate all code between here and __apex. - // Alternatively, provide a default value for __apex that can be - // recognized by metaprogramming that conditionlly executes the following. - size_t __k = __m + 1; - _Tp __t = __r[__k - 1]; - while ((__k &= __k - 1)) - __t = __combine(__r[__k - 1], __t); - __apex(__combine(__initial, __t)); - __tbb_backend::__downsweep(_Index(0), _Index(__m + 1), __tilesize, __r, __n - __m * __tilesize, __initial, - __combine, __scan); - return; - } - // Fewer than 2 elements in sequence, or out of memory. Handle has single block. - _Tp __sum = __initial; - if (__n) - __sum = __combine(__sum, __reduce(_Index(0), __n)); - __apex(__sum); - if (__n) - __scan(_Index(0), __n, __initial); - }); -} - template void __parallel_strict_scan(oneapi::dpl::__internal::__tbb_backend_tag, _ExecutionPolicy&&, _Index __n, _Tp __initial, From a3476cd702fbf6bc44638f1127f28134083b3eeb Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Fri, 16 Feb 2024 13:11:59 +0100 Subject: [PATCH 295/566] Remove __parallel_transform_reduce implementations without tags --- .../dpl/pstl/omp/parallel_transform_reduce.h | 29 ------------------- .../oneapi/dpl/pstl/parallel_backend_serial.h | 8 ----- .../oneapi/dpl/pstl/parallel_backend_tbb.h | 12 -------- 3 files changed, 49 deletions(-) diff --git a/include/oneapi/dpl/pstl/omp/parallel_transform_reduce.h b/include/oneapi/dpl/pstl/omp/parallel_transform_reduce.h index aa400692f51..2c6cf06577b 100644 --- a/include/oneapi/dpl/pstl/omp/parallel_transform_reduce.h +++ b/include/oneapi/dpl/pstl/omp/parallel_transform_reduce.h @@ -83,35 +83,6 @@ __transform_reduce_body(_RandomAccessIterator __first, _RandomAccessIterator __l return __init; } -template -_Value -__parallel_transform_reduce(_ExecutionPolicy&&, _RandomAccessIterator __first, _RandomAccessIterator __last, - _UnaryOp __unary_op, _Value __init, _Combiner __combiner, _Reduction __reduction) -{ - _Value __result = __init; - if (omp_in_parallel()) - { - // We don't create a nested parallel region in an existing parallel - // region: just create tasks - __result = oneapi::dpl::__omp_backend::__transform_reduce_body(__first, __last, __unary_op, __init, __combiner, - __reduction); - } - else - { - // Create a parallel region, and a single thread will create tasks - // for the region. - _PSTL_PRAGMA(omp parallel) - _PSTL_PRAGMA(omp single nowait) - { - __result = oneapi::dpl::__omp_backend::__transform_reduce_body(__first, __last, __unary_op, __init, - __combiner, __reduction); - } - } - - return __result; -} - template _Value diff --git a/include/oneapi/dpl/pstl/parallel_backend_serial.h b/include/oneapi/dpl/pstl/parallel_backend_serial.h index 54704bbcf6c..5876e3383d7 100644 --- a/include/oneapi/dpl/pstl/parallel_backend_serial.h +++ b/include/oneapi/dpl/pstl/parallel_backend_serial.h @@ -87,14 +87,6 @@ __parallel_reduce(oneapi::dpl::__internal::__serial_backend_tag, _ExecutionPolic } } -template -_Tp -__parallel_transform_reduce(_ExecutionPolicy&&, _Index __first, _Index __last, _UnaryOp, _Tp __init, _BinaryOp, - _Reduce __reduce) -{ - return __reduce(__first, __last, __init); -} - template _Tp __parallel_transform_reduce(oneapi::dpl::__internal::__serial_backend_tag, _ExecutionPolicy&&, _Index __first, diff --git a/include/oneapi/dpl/pstl/parallel_backend_tbb.h b/include/oneapi/dpl/pstl/parallel_backend_tbb.h index 7687094382b..b67e7c7931d 100644 --- a/include/oneapi/dpl/pstl/parallel_backend_tbb.h +++ b/include/oneapi/dpl/pstl/parallel_backend_tbb.h @@ -209,18 +209,6 @@ struct __par_trans_red_body } }; -template -_Tp -__parallel_transform_reduce(_ExecutionPolicy&&, _Index __first, _Index __last, _Up __u, _Tp __init, _Cp __combine, - _Rp __brick_reduce) -{ - __tbb_backend::__par_trans_red_body<_Index, _Up, _Tp, _Cp, _Rp> __body(__u, __init, __combine, __brick_reduce); - // The grain size of 3 is used in order to provide minimum 2 elements for each body - tbb::this_task_arena::isolate( - [__first, __last, &__body]() { tbb::parallel_reduce(tbb::blocked_range<_Index>(__first, __last, 3), __body); }); - return __body.sum(); -} - template _Tp __parallel_transform_reduce(oneapi::dpl::__internal::__tbb_backend_tag, _ExecutionPolicy&&, _Index __first, From 11dc7750c48cde521d6c1726b86f6403faa4515a Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Thu, 15 Feb 2024 17:42:20 +0100 Subject: [PATCH 296/566] Remove old implementations with __enable_if_host_execution_policy --- include/oneapi/dpl/pstl/algorithm_fwd.h | 630 +----- include/oneapi/dpl/pstl/algorithm_impl.h | 2635 +--------------------- include/oneapi/dpl/pstl/numeric_fwd.h | 54 - include/oneapi/dpl/pstl/numeric_impl.h | 144 -- 4 files changed, 55 insertions(+), 3408 deletions(-) diff --git a/include/oneapi/dpl/pstl/algorithm_fwd.h b/include/oneapi/dpl/pstl/algorithm_fwd.h index e456f7a5093..bcf5fa3b829 100644 --- a/include/oneapi/dpl/pstl/algorithm_fwd.h +++ b/include/oneapi/dpl/pstl/algorithm_fwd.h @@ -46,20 +46,10 @@ bool __brick_any_of(const _RandomAccessIterator, const _RandomAccessIterator, _Pred, /*__is_vector=*/::std::true_type) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, bool> -__pattern_any_of(_ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _Pred, _IsVector, - /*parallel=*/::std::false_type) noexcept; - template bool __pattern_any_of(_Tag, _ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _Pred) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, bool> -__pattern_any_of(_ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _Pred, _IsVector, - /*parallel=*/::std::true_type); - template bool __pattern_any_of(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _Pred); @@ -78,11 +68,6 @@ template void __brick_walk1(_RandomAccessIterator, _RandomAccessIterator, _Function, /*vector=*/::std::true_type) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy> -__pattern_walk1(_ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _Function, _IsVector, - /*parallel=*/::std::false_type) noexcept; - template void __pattern_walk1(_Tag, _ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _Function) noexcept; @@ -109,20 +94,10 @@ template , _ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _Function); -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy> -__pattern_walk_brick(_ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _Brick, - /*parallel=*/::std::false_type) noexcept; - template void __pattern_walk_brick(_Tag, _ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _Brick) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy> -__pattern_walk_brick(_ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _Brick, - /*parallel=*/::std::true_type); - template void __pattern_walk_brick(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, @@ -140,38 +115,18 @@ template _RandomAccessIterator __brick_walk1_n(_RandomAccessIterator, _DifferenceType, _Function, /*vectorTag=*/::std::true_type) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _ForwardIterator> -__pattern_walk1_n(_ExecutionPolicy&&, _ForwardIterator, _Size, _Function, _IsVector, - /*is_parallel=*/::std::false_type) noexcept; - template _ForwardIterator __pattern_walk1_n(_Tag, _ExecutionPolicy&&, _ForwardIterator, _Size, _Function) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator> -__pattern_walk1_n(_ExecutionPolicy&&, _RandomAccessIterator, _Size, _Function, _IsVector, - /*is_parallel=*/::std::true_type); - template _RandomAccessIterator __pattern_walk1_n(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _Size, _Function); -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _ForwardIterator> -__pattern_walk_brick_n(_ExecutionPolicy&&, _ForwardIterator, _Size, _Brick, - /*is_parallel=*/::std::false_type) noexcept; - template _ForwardIterator __pattern_walk_brick_n(_Tag, _ExecutionPolicy&&, _ForwardIterator, _Size, _Brick) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator> -__pattern_walk_brick_n(_ExecutionPolicy&&, _RandomAccessIterator, _Size, _Brick, - /*is_parallel=*/::std::true_type); - template _RandomAccessIterator __pattern_walk_brick_n(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _Size, _Brick); @@ -198,11 +153,6 @@ template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _ForwardIterator2> -__pattern_walk2(_ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, _Function, _IsVector, - /*parallel=*/::std::false_type) noexcept; - template _ForwardIterator2 __pattern_walk2(_Tag, _ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, _Function) noexcept; @@ -232,34 +182,17 @@ _ForwardIterator2 __pattern_walk2(__parallel_forward_tag, _ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, _Function); -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _ForwardIterator2> -__pattern_walk2_n(_ExecutionPolicy&&, _ForwardIterator1, _Size, _ForwardIterator2, _Function, _IsVector, - /*parallel=*/::std::false_type) noexcept; - template _ForwardIterator2 __pattern_walk2_n(_Tag, _ExecutionPolicy&&, _ForwardIterator1, _Size, _ForwardIterator2, _Function) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator2> -__pattern_walk2_n(_ExecutionPolicy&&, _RandomAccessIterator1, _Size, _RandomAccessIterator2, _Function, _IsVector, - /*parallel=*/::std::true_type); - template _RandomAccessIterator2 __pattern_walk2_n(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator1, _Size, _RandomAccessIterator2, _Function); -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _ForwardIterator2> -__pattern_walk2_brick(_ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, _Brick, - /*parallel=*/::std::false_type) noexcept; - template _ForwardIterator2 __pattern_walk2_brick(_Tag, _ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, @@ -290,21 +223,11 @@ _ForwardIterator2 __pattern_walk2_brick(__parallel_forward_tag, _ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, _Brick); -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _ForwardIterator2> -__pattern_walk2_brick_n(_ExecutionPolicy&&, _ForwardIterator1, _Size, _ForwardIterator2, _Brick, - /*parallel=*/::std::false_type) noexcept; - template _ForwardIterator2 __pattern_walk2_brick_n(_Tag, _ExecutionPolicy&&, _ForwardIterator1, _Size, _ForwardIterator2, _Brick) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator2> -__pattern_walk2_brick_n(_ExecutionPolicy&&, _RandomAccessIterator1, _Size, _RandomAccessIterator2, _Brick, - /*parallel=*/::std::true_type); - template _RandomAccessIterator2 @@ -326,13 +249,6 @@ _RandomAccessIterator3 __brick_walk3(_RandomAccessIterator1, _RandomAccessIterat _RandomAccessIterator3, _Function, /*vector=*/::std::true_type) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _ForwardIterator3> -__pattern_walk3(_ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, _ForwardIterator3, - _Function, _IsVector, - /*parallel=*/::std::false_type) noexcept; - template _ForwardIterator3 @@ -375,26 +291,12 @@ __pattern_walk3(__parallel_forward_tag, _ExecutionPolicy&&, _ForwardIterator1, _ // transform_if //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _ForwardIterator2> -__pattern_walk2_transform_if(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, - _ForwardIterator2 __first2, _Function __func, _IsVector __is_vector, - _IsParallel __is_parallel) noexcept; - template _ForwardIterator2 __pattern_walk2_transform_if(_Tag, _ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, _Function) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _ForwardIterator3> -__pattern_walk3_transform_if(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, - _ForwardIterator2 __first2, _ForwardIterator3 __first3, _Function __func, - _IsVector __is_vector, _IsParallel __is_parallel) noexcept; - template _ForwardIterator3 @@ -413,23 +315,11 @@ template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, bool> -__pattern_equal(_ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, _BinaryPredicate, - _IsVector, /* is_parallel = */ ::std::false_type) noexcept; - template bool __pattern_equal(_Tag, _ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, _ForwardIterator2, _BinaryPredicate) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, bool> -__pattern_equal(_ExecutionPolicy&&, _RandomAccessIterator1, _RandomAccessIterator1, _RandomAccessIterator2, - _BinaryPredicate, _IsVector, /* is_parallel = */ ::std::true_type); - template bool @@ -444,23 +334,11 @@ template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, bool> -__pattern_equal(_ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, _ForwardIterator2, - _BinaryPredicate, _IsVector, /* is_parallel = */ ::std::false_type) noexcept; - template bool __pattern_equal(_Tag, _ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, _BinaryPredicate) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, bool> -__pattern_equal(_ExecutionPolicy&&, _RandomAccessIterator1, _RandomAccessIterator1, _RandomAccessIterator2, - _RandomAccessIterator2, _BinaryPredicate, _IsVector, /* is_parallel = */ ::std::true_type); - template bool @@ -501,25 +379,11 @@ _RandomAccessIterator1 __brick_find_end(_RandomAccessIterator1, _RandomAccessIte _RandomAccessIterator2, _BinaryPredicate, /*__is_vector=*/::std::true_type) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _ForwardIterator1> -__pattern_find_end(_ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, _ForwardIterator2, - _BinaryPredicate, _IsVector, - /*is_parallel=*/::std::false_type) noexcept; - template _ForwardIterator1 __pattern_find_end(_Tag, _ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, _ForwardIterator2, _BinaryPredicate) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator1> -__pattern_find_end(_ExecutionPolicy&&, _RandomAccessIterator1, _RandomAccessIterator1, _RandomAccessIterator2, - _RandomAccessIterator2, _BinaryPredicate, _IsVector, - /*is_parallel=*/::std::true_type); - template _RandomAccessIterator1 @@ -540,23 +404,11 @@ _RandomAccessIterator1 __brick_find_first_of(_RandomAccessIterator1, _RandomAcce _RandomAccessIterator2, _BinaryPredicate, /*__is_vector=*/::std::true_type) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _ForwardIterator1> -__pattern_find_first_of(_ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, _ForwardIterator2, - _BinaryPredicate, _IsVector, /*is_parallel=*/::std::false_type) noexcept; - template _ForwardIterator1 __pattern_find_first_of(_Tag, _ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, _ForwardIterator2, _BinaryPredicate) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator1> -__pattern_find_first_of(_ExecutionPolicy&&, _RandomAccessIterator1, _RandomAccessIterator1, _RandomAccessIterator2, - _RandomAccessIterator2, _BinaryPredicate, _IsVector, /*is_parallel=*/::std::true_type); - template _ForwardIterator1 @@ -577,25 +429,11 @@ _RandomAccessIterator1 __brick_search(_RandomAccessIterator1, _RandomAccessItera _RandomAccessIterator2, _BinaryPredicate, /*vector=*/::std::true_type) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _ForwardIterator1> -__pattern_search(_ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, _ForwardIterator2, - _BinaryPredicate, _IsVector, - /*is_parallel=*/::std::false_type) noexcept; - template _ForwardIterator1 __pattern_search(_Tag, _ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, _ForwardIterator2, _BinaryPredicate) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator1> -__pattern_search(_ExecutionPolicy&&, _RandomAccessIterator1, _RandomAccessIterator1, _RandomAccessIterator2, - _RandomAccessIterator2, _BinaryPredicate, _IsVector, - /*is_parallel=*/::std::true_type); - template _RandomAccessIterator1 @@ -616,25 +454,11 @@ _RandomAccessIterator __brick_search_n(_RandomAccessIterator, _RandomAccessIterator, _Size, const _Tp&, _BinaryPredicate, /*vector=*/::std::true_type) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _ForwardIterator> -__pattern_search_n(_ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _Size, const _Tp&, _BinaryPredicate, - IsVector, - /*is_parallel=*/::std::false_type) noexcept; - template _ForwardIterator __pattern_search_n(_Tag, _ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _Size, const _Tp&, _BinaryPredicate) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator> -__pattern_search_n(_ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _Size, const _Tp&, - _BinaryPredicate, IsVector, - /*is_parallel=*/::std::true_type); - template _RandomAccessIterator @@ -708,22 +532,11 @@ void __brick_partition_by_mask(_RandomAccessIterator, _RandomAccessIterator, _OutputIterator1, _OutputIterator2, bool*, /*vector=*/::std::true_type) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _OutputIterator> -__pattern_copy_if(_ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _OutputIterator, _UnaryPredicate, _IsVector, - /*parallel=*/::std::false_type) noexcept; - template _OutputIterator __pattern_copy_if(_Tag, _ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _OutputIterator, _UnaryPredicate) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _OutputIterator> -__pattern_copy_if(_ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _OutputIterator, _UnaryPredicate, - _IsVector, /*parallel=*/::std::true_type); - template _RandomAccessIterator2 @@ -744,22 +557,10 @@ typename ::std::iterator_traits<_ForwardIterator>::difference_type __brick_count(_ForwardIterator, _ForwardIterator, _Predicate, /* is_vector = */ ::std::false_type) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy< - _ExecutionPolicy, typename ::std::iterator_traits<_ForwardIterator>::difference_type> -__pattern_count(_ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _Predicate, - /* is_parallel */ ::std::false_type, _IsVector) noexcept; - template typename ::std::iterator_traits<_ForwardIterator>::difference_type __pattern_count(_Tag, _ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _Predicate) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy< - _ExecutionPolicy, typename ::std::iterator_traits<_RandomAccessIterator>::difference_type> -__pattern_count(_ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _Predicate, - /* is_parallel */ ::std::true_type, _IsVector); - template typename ::std::iterator_traits<_RandomAccessIterator>::difference_type __pattern_count(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, @@ -777,20 +578,10 @@ template _RandomAccessIterator __brick_unique(_RandomAccessIterator, _RandomAccessIterator, _BinaryPredicate, /*is_vector=*/::std::true_type) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _ForwardIterator> -__pattern_unique(_ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _BinaryPredicate, _IsVector, - /*is_parallel=*/::std::false_type) noexcept; - template _ForwardIterator __pattern_unique(_Tag, _ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _BinaryPredicate) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator> -__pattern_unique(_ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _BinaryPredicate, _IsVector, - /*is_parallel=*/::std::true_type); - template _RandomAccessIterator __pattern_unique(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, @@ -808,12 +599,6 @@ template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _OutputIterator> -__pattern_unique_copy(_ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _OutputIterator, _BinaryPredicate, - _IsVector, /*parallel=*/::std::false_type) noexcept; - template _OutputIterator __pattern_unique_copy(_Tag, _ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _OutputIterator, @@ -829,12 +614,6 @@ _DifferenceType __brick_calc_mask_2(_RandomAccessIterator, _RandomAccessIterator, bool* __restrict, _BinaryPredicate, /*vector=*/::std::true_type) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _OutputIterator> -__pattern_unique_copy(_ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _OutputIterator, - _BinaryPredicate, _IsVector, /*parallel=*/::std::true_type); - template _RandomAccessIterator2 @@ -861,20 +640,10 @@ template void __brick_reverse(_RandomAccessIterator, _RandomAccessIterator, _RandomAccessIterator, /*is_vector=*/::std::true_type) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy> -__pattern_reverse(_ExecutionPolicy&&, _BidirectionalIterator, _BidirectionalIterator, _IsVector, - /*is_parallel=*/::std::false_type) noexcept; - template void __pattern_reverse(_Tag, _ExecutionPolicy&&, _BidirectionalIterator, _BidirectionalIterator) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy> -__pattern_reverse(_ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _IsVector, - /*is_parallel=*/::std::true_type); - template void __pattern_reverse(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator); @@ -891,21 +660,11 @@ template _OutputIterator __brick_reverse_copy(_RandomAccessIterator, _RandomAccessIterator, _OutputIterator, /*is_vector=*/::std::true_type) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _OutputIterator> -__pattern_reverse_copy(_ExecutionPolicy&&, _BidirectionalIterator, _BidirectionalIterator, _OutputIterator, _IsVector, - /*is_parallel=*/::std::false_type) noexcept; - template _OutputIterator __pattern_reverse_copy(_Tag, _ExecutionPolicy&&, _BidirectionalIterator, _BidirectionalIterator, _OutputIterator) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _OutputIterator> -__pattern_reverse_copy(_ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _OutputIterator, _IsVector, - /*is_parallel=*/::std::true_type); - template _RandomAccessIterator2 __pattern_reverse_copy(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator1, _RandomAccessIterator1, @@ -923,20 +682,10 @@ template _RandomAccessIterator __brick_rotate(_RandomAccessIterator, _RandomAccessIterator, _RandomAccessIterator, /*is_vector=*/::std::true_type) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _ForwardIterator> -__pattern_rotate(_ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _ForwardIterator, _IsVector, - /*is_parallel=*/::std::false_type) noexcept; - template _ForwardIterator __pattern_rotate(_Tag, _ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _ForwardIterator) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator> -__pattern_rotate(_ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _RandomAccessIterator, _IsVector, - /*is_parallel=*/::std::true_type); - template _RandomAccessIterator __pattern_rotate(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, @@ -955,23 +704,11 @@ _OutputIterator __brick_rotate_copy(_RandomAccessIterator, _RandomAccessIterator _OutputIterator, /*__is_vector=*/::std::true_type) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _OutputIterator> -__pattern_rotate_copy(_ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _ForwardIterator, _OutputIterator, - _IsVector, - /*is_parallel=*/::std::false_type) noexcept; - template _OutputIterator __pattern_rotate_copy(_Tag, _ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _ForwardIterator, _OutputIterator) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _OutputIterator> -__pattern_rotate_copy(_ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _RandomAccessIterator, - _OutputIterator, _IsVector, - /*is_parallel=*/::std::true_type); - template _RandomAccessIterator2 __pattern_rotate_copy(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator1, _RandomAccessIterator1, @@ -989,20 +726,10 @@ template bool __brick_is_partitioned(_RandomAccessIterator, _RandomAccessIterator, _UnaryPredicate, /*is_vector=*/::std::true_type) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, bool> -__pattern_is_partitioned(_ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _UnaryPredicate, _IsVector, - /*is_parallel=*/::std::false_type) noexcept; - template bool __pattern_is_partitioned(_Tag, _ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _UnaryPredicate) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, bool> -__pattern_is_partitioned(_ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _UnaryPredicate, _IsVector, - /*is_parallel=*/::std::true_type); - template bool __pattern_is_partitioned(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, @@ -1020,20 +747,10 @@ template _RandomAccessIterator __brick_partition(_RandomAccessIterator, _RandomAccessIterator, _UnaryPredicate, /*is_vector=*/::std::true_type) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _ForwardIterator> -__pattern_partition(_ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _UnaryPredicate, _IsVector, - /*is_parallel=*/::std::false_type) noexcept; - template _ForwardIterator __pattern_partition(_Tag, _ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _UnaryPredicate) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator> -__pattern_partition(_ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _UnaryPredicate, _IsVector, - /*is_parallel=*/::std::true_type); - template _RandomAccessIterator __pattern_partition(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, @@ -1051,22 +768,11 @@ template _RandomAccessIterator __brick_stable_partition(_RandomAccessIterator, _RandomAccessIterator, _UnaryPredicate, /*__is_vector=*/::std::true_type) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _BidirectionalIterator> -__pattern_stable_partition(_ExecutionPolicy&&, _BidirectionalIterator, _BidirectionalIterator, _UnaryPredicate, - _IsVector, - /*is_parallelization=*/::std::false_type) noexcept; - template _BidirectionalIterator __pattern_stable_partition(_Tag, _ExecutionPolicy&&, _BidirectionalIterator, _BidirectionalIterator, _UnaryPredicate) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator> -__pattern_stable_partition(_ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _UnaryPredicate, _IsVector, - /*is_parallelization=*/::std::true_type); - template _RandomAccessIterator __pattern_stable_partition(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, @@ -1087,28 +793,12 @@ ::std::pair<_OutputIterator1, _OutputIterator2> __brick_partition_copy(_RandomAc _UnaryPredicate, /*is_vector=*/::std::true_type) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, - ::std::pair<_OutputIterator1, _OutputIterator2>> -__pattern_partition_copy(_ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _OutputIterator1, _OutputIterator2, - _UnaryPredicate, _IsVector, - /*is_parallelization=*/::std::false_type) noexcept; - template ::std::pair<_OutputIterator1, _OutputIterator2> __pattern_partition_copy(_Tag, _ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _OutputIterator1, _OutputIterator2, _UnaryPredicate) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, - ::std::pair<_OutputIterator1, _OutputIterator2>> -__pattern_partition_copy(_ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _OutputIterator1, - _OutputIterator2, _UnaryPredicate, _IsVector, - /*is_parallelization=*/::std::true_type); - template ::std::pair<_RandomAccessIterator2, _RandomAccessIterator3> @@ -1119,23 +809,11 @@ __pattern_partition_copy(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomA // sort //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy> -__pattern_sort(_ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _Compare, _IsVector /*is_vector*/, - /*is_parallel=*/::std::false_type, _IsMoveConstructible) noexcept; - template void __pattern_sort(_Tag, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _Compare, _IsMoveConstructible) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy> -__pattern_sort(_ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _Compare, _IsVector /*is_vector*/, - /*is_parallel=*/::std::true_type, - /*is_move_constructible=*/::std::true_type); - template void __pattern_sort(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _Compare, @@ -1145,22 +823,10 @@ __pattern_sort(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessItera // stable_sort //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy> -__pattern_stable_sort(_ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _Compare, - _IsVector /*is_vector*/, - /*is_parallel=*/::std::false_type) noexcept; - template void __pattern_stable_sort(_Tag, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _Compare) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy> -__pattern_stable_sort(_ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _Compare, - _IsVector /*is_vector*/, - /*is_parallel=*/::std::true_type); - template void __pattern_stable_sort(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, @@ -1169,12 +835,6 @@ __pattern_stable_sort(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAcce //------------------------------------------------------------------------ // sort_by_key //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy> -__pattern_sort_by_key(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __keys_first, - _RandomAccessIterator1 __keys_last, _RandomAccessIterator2 __values_first, _Compare __comp, - _IsVector /*vector=*/, /*is_parallel=*/::std::false_type) noexcept; template @@ -1182,13 +842,6 @@ void __pattern_sort_by_key(_Tag, _ExecutionPolicy&&, _RandomAccessIterator1, _RandomAccessIterator1, _RandomAccessIterator2, _Compare) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy> -__pattern_sort_by_key(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __keys_first, - _RandomAccessIterator1 __keys_last, _RandomAccessIterator2 __values_first, _Compare __comp, - _IsVector /*vector=*/, /*is_parallel=*/::std::true_type); - template void @@ -1199,23 +852,11 @@ __pattern_sort_by_key(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAcce // partial_sort //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy> -__pattern_partial_sort(_ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _RandomAccessIterator, - _Compare, _IsVector, - /*is_parallel=*/::std::false_type) noexcept; - template void __pattern_partial_sort(_Tag, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _RandomAccessIterator, _Compare) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy> -__pattern_partial_sort(_ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _RandomAccessIterator, - _Compare, _IsVector, - /*is_parallel=*/::std::true_type); - template void __pattern_partial_sort(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, @@ -1225,25 +866,11 @@ __pattern_partial_sort(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAcc // partial_sort_copy //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator2> -__pattern_partial_sort_copy(_ExecutionPolicy&&, _RandomAccessIterator1, _RandomAccessIterator1, _RandomAccessIterator2, - _RandomAccessIterator2, _Compare, _IsVector, - /*is_parallel=*/::std::false_type) noexcept; - template _RandomAccessIterator __pattern_partial_sort_copy(_Tag, _ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _RandomAccessIterator, _RandomAccessIterator, _Compare) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator2> -__pattern_partial_sort_copy(_ExecutionPolicy&&, _RandomAccessIterator1, _RandomAccessIterator1, _RandomAccessIterator2, - _RandomAccessIterator2, _Compare, _IsVector, - /*is_parallel=*/::std::true_type); - template _RandomAccessIterator __pattern_partial_sort_copy(_Tag, _ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _RandomAccessIterator, @@ -1263,21 +890,11 @@ _ForwardIterator __brick_adjacent_find(_ForwardIterator, _ForwardIterator, _BinaryPredicate, /* IsVector = */ ::std::false_type, bool) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _ForwardIterator> -__pattern_adjacent_find(_ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _BinaryPredicate, - /* is_parallel */ ::std::false_type, _IsVector, _Semantic) noexcept; - template _ForwardIterator __pattern_adjacent_find(_Tag, _ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _BinaryPredicate, _Semantic) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator> -__pattern_adjacent_find(_ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _BinaryPredicate, - /* is_parallel */ ::std::true_type, _IsVector, _Semantic); - template _RandomAccessIterator __pattern_adjacent_find(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, @@ -1286,23 +903,12 @@ __pattern_adjacent_find(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAc //------------------------------------------------------------------------ // nth_element //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy> -__pattern_nth_element(_ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _RandomAccessIterator, _Compare, - _IsVector, - /*is_parallel=*/::std::false_type) noexcept; template void __pattern_nth_element(_Tag, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _RandomAccessIterator, _Compare) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy> -__pattern_nth_element(_ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _RandomAccessIterator, _Compare, - _IsVector, - /*is_parallel=*/::std::true_type); - template void __pattern_nth_element(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, @@ -1314,20 +920,10 @@ __pattern_nth_element(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAcce template struct __brick_fill; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy> -__pattern_fill(_ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, const _Tp&, - /*is_parallel=*/::std::false_type, _IsVector) noexcept; - template void __pattern_fill(_Tag, _ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, const _Tp&) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator> -__pattern_fill(_ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, const _Tp&, - /*is_parallel=*/::std::true_type, _IsVector); - template _RandomAccessIterator __pattern_fill(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, const _Tp&); @@ -1335,20 +931,10 @@ __pattern_fill(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessItera template struct __brick_fill_n; -template -_OutputIterator -__pattern_fill_n(_ExecutionPolicy&&, _OutputIterator, _Size, const _Tp&, - /*is_parallel=*/::std::false_type, _IsVector) noexcept; - template _OutputIterator __pattern_fill_n(_Tag, _ExecutionPolicy&&, _OutputIterator, _Size, const _Tp&) noexcept; -template -_RandomAccessIterator -__pattern_fill_n(_ExecutionPolicy&&, _RandomAccessIterator, _Size, const _Tp&, - /*is_parallel=*/::std::true_type, _IsVector); - template _RandomAccessIterator __pattern_fill_n(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _Size, const _Tp&); @@ -1365,20 +951,10 @@ template void __brick_generate(_ForwardIterator, _ForwardIterator, _Generator, /* is_vector = */ ::std::false_type) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy> -__pattern_generate(_ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _Generator, - /*is_parallel=*/::std::false_type, _IsVector) noexcept; - template void __pattern_generate(_Tag, _ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _Generator) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator> -__pattern_generate(_ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _Generator, - /*is_parallel=*/::std::true_type, _IsVector); - template _RandomAccessIterator __pattern_generate(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, @@ -1392,20 +968,10 @@ template OutputIterator __brick_generate_n(OutputIterator, Size, _Generator, /* is_vector = */ ::std::false_type) noexcept; -template -OutputIterator -__pattern_generate_n(_ExecutionPolicy&&, OutputIterator, Size, _Generator, - /*is_parallel=*/::std::false_type, _IsVector) noexcept; - template _OutputIterator __pattern_generate_n(_Tag, _ExecutionPolicy&&, _OutputIterator, _Size, _Generator) noexcept; -template -_RandomAccessIterator -__pattern_generate_n(_ExecutionPolicy&&, _RandomAccessIterator, Size, _Generator, - /*is_parallel=*/::std::true_type, _IsVector); - template _RandomAccessIterator __pattern_generate_n(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _Size, _Generator); @@ -1421,20 +987,10 @@ template _RandomAccessIterator __brick_remove_if(_RandomAccessIterator, _RandomAccessIterator, _UnaryPredicate, /* __is_vector = */ ::std::true_type) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _ForwardIterator> -__pattern_remove_if(_ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _UnaryPredicate, _IsVector, - /*is_parallel*/ ::std::false_type) noexcept; - template _ForwardIterator __pattern_remove_if(_Tag, _ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _UnaryPredicate) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator> -__pattern_remove_if(_ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _UnaryPredicate, _IsVector, - /*is_parallel*/ ::std::true_type); - template _RandomAccessIterator __pattern_remove_if(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, @@ -1454,25 +1010,12 @@ _OutputIterator __brick_merge(_RandomAccessIterator1, _RandomAccessIterator1, _R _RandomAccessIterator2, _OutputIterator, _Compare, /* __is_vector = */ ::std::true_type) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _OutputIterator> -__pattern_merge(_ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, _ForwardIterator2, - _OutputIterator, _Compare, _IsVector, /* is_parallel = */ ::std::false_type) noexcept; - template _OutputIterator __pattern_merge(_Tag, _ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, _ForwardIterator2, _OutputIterator, _Compare) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _OutputIterator> -__pattern_merge(_ExecutionPolicy&&, _RandomAccessIterator1, _RandomAccessIterator1, _RandomAccessIterator2, - _RandomAccessIterator2, _OutputIterator, _Compare, _IsVector, - /* is_parallel = */ ::std::true_type); - template _RandomAccessIterator3 @@ -1491,23 +1034,11 @@ template void __brick_inplace_merge(_RandomAccessIterator, _RandomAccessIterator, _RandomAccessIterator, _Compare, /* __is_vector = */ ::std::true_type) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy> -__pattern_inplace_merge(_ExecutionPolicy&&, _BidirectionalIterator, _BidirectionalIterator, _BidirectionalIterator, - _Compare, _IsVector, - /* is_parallel = */ ::std::false_type) noexcept; - template void __pattern_inplace_merge(_Tag, _ExecutionPolicy&&, _BidirectionalIterator, _BidirectionalIterator, _BidirectionalIterator, _Compare) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy> -__pattern_inplace_merge(_ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _RandomAccessIterator, - _Compare, _IsVector, - /*is_parallel=*/::std::true_type); - template void __pattern_inplace_merge(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, @@ -1517,24 +1048,11 @@ __pattern_inplace_merge(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAc // includes //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, bool> -__pattern_includes(_ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, _ForwardIterator2, - _Compare, _IsVector, - /*is_parallel=*/::std::false_type) noexcept; - template bool __pattern_includes(_Tag, _ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, _ForwardIterator2, _Compare) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, bool> -__pattern_includes(_ExecutionPolicy&&, _RandomAccessIterator1, _RandomAccessIterator1, _RandomAccessIterator2, - _RandomAccessIterator2, _Compare, _IsVector, - /*is_parallel=*/::std::true_type); - template bool @@ -1555,24 +1073,12 @@ _OutputIterator __brick_set_union(_RandomAccessIterator1, _RandomAccessIterator1 _RandomAccessIterator2, _OutputIterator, _Compare, /*__is_vector=*/::std::true_type) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _OutputIterator> -__pattern_set_union(_ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, _ForwardIterator2, - _OutputIterator, _Compare, _IsVector, /*is_parallel=*/::std::false_type) noexcept; - template _OutputIterator __pattern_set_union(_Tag, _ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, _ForwardIterator2, _OutputIterator, _Compare) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _OutputIterator> -__pattern_set_union(_ExecutionPolicy&&, _RandomAccessIterator1, _RandomAccessIterator1, _RandomAccessIterator2, - _RandomAccessIterator2, _OutputIterator, _Compare, _IsVector, /*is_parallel=*/::std::true_type); - template _OutputIterator @@ -1593,26 +1099,12 @@ _OutputIterator __brick_set_intersection(_RandomAccessIterator1, _RandomAccessIt _RandomAccessIterator2, _OutputIterator, _Compare, /*__is_vector=*/::std::true_type) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _OutputIterator> -__pattern_set_intersection(_ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, - _ForwardIterator2, _OutputIterator, _Compare, _IsVector, - /*is_parallel=*/::std::false_type) noexcept; - template _OutputIterator __pattern_set_intersection(_Tag, _ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, _ForwardIterator2, _OutputIterator, _Compare) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _OutputIterator> -__pattern_set_intersection(_ExecutionPolicy&&, _RandomAccessIterator1, _RandomAccessIterator1, _RandomAccessIterator2, - _RandomAccessIterator2, _OutputIterator, _Compare, _IsVector, - /*is_parallel=*/::std::true_type); - template _RandomAccessIterator3 @@ -1634,25 +1126,12 @@ _OutputIterator __brick_set_difference(_RandomAccessIterator1, _RandomAccessIter _RandomAccessIterator2, _OutputIterator, _Compare, /*__is_vector=*/::std::true_type) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _OutputIterator> -__pattern_set_difference(_ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, _ForwardIterator2, - _OutputIterator, _Compare, _IsVector, /*is_parallel=*/::std::false_type) noexcept; - template _OutputIterator __pattern_set_difference(_Tag, _ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, _ForwardIterator2, _OutputIterator, _Compare) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _OutputIterator> -__pattern_set_difference(_ExecutionPolicy&&, _RandomAccessIterator1, _RandomAccessIterator1, _RandomAccessIterator2, - _RandomAccessIterator2, _OutputIterator, _Compare, _IsVector, - /*is_parallel=*/::std::true_type); - template _RandomAccessIterator3 @@ -1673,26 +1152,12 @@ _OutputIterator __brick_set_symmetric_difference(_RandomAccessIterator1, _Random _RandomAccessIterator2, _OutputIterator, _Compare, /*__is_vector=*/::std::true_type) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _OutputIterator> -__pattern_set_symmetric_difference(_ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, - _ForwardIterator2, _OutputIterator, _Compare, _IsVector, - /*is_parallel=*/::std::false_type) noexcept; - template _OutputIterator __pattern_set_symmetric_difference(_Tag, _ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, _ForwardIterator2, _OutputIterator, _Compare) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _OutputIterator> -__pattern_set_symmetric_difference(_ExecutionPolicy&&, _RandomAccessIterator1, _RandomAccessIterator1, - _RandomAccessIterator2, _RandomAccessIterator2, _OutputIterator, _Compare, _IsVector, - /*is_parallel=*/::std::true_type); - template _RandomAccessIterator3 @@ -1712,20 +1177,10 @@ template _RandomAccessIterator __brick_is_heap_until(_RandomAccessIterator, _RandomAccessIterator, _Compare, /* __is_vector = */ ::std::true_type) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator> -__pattern_is_heap_until(_ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _Compare, _IsVector, - /* is_parallel = */ ::std::false_type) noexcept; - template _RandomAccessIterator __pattern_is_heap_until(_Tag, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _Compare) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator> -__pattern_is_heap_until(_ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _Compare, _IsVector, - /* is_parallel = */ ::std::true_type); - template _RandomAccessIterator __pattern_is_heap_until(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, @@ -1743,20 +1198,10 @@ template bool __brick_is_heap(_RandomAccessIterator, _RandomAccessIterator, _Compare, /* __is_vector = */ ::std::true_type) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, bool> -__pattern_is_heap(_ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _Compare, _IsVector, - /* is_parallel = */ ::std::false_type) noexcept; - template bool __pattern_is_heap(_Tag, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _Compare) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, bool> -__pattern_is_heap(_ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _Compare, _IsVector, - /* is_parallel = */ ::std::true_type); - template bool __pattern_is_heap(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, @@ -1774,20 +1219,10 @@ template _RandomAccessIterator __brick_min_element(_RandomAccessIterator, _RandomAccessIterator, _Compare, /* __is_vector = */ ::std::true_type) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _ForwardIterator> -__pattern_min_element(_ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _Compare, _IsVector, - /* is_parallel = */ ::std::false_type) noexcept; - template _ForwardIterator __pattern_min_element(_Tag, _ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _Compare) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator> -__pattern_min_element(_ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _Compare, _IsVector, - /* is_parallel = */ ::std::true_type); - template _RandomAccessIterator __pattern_min_element(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, @@ -1806,22 +1241,10 @@ ::std::pair<_RandomAccessIterator, _RandomAccessIterator> __brick_minmax_element(_RandomAccessIterator, _RandomAccessIterator, _Compare, /* __is_vector = */ ::std::true_type) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, - ::std::pair<_ForwardIterator, _ForwardIterator>> -__pattern_minmax_element(_ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _Compare, _IsVector, - /* is_parallel = */ ::std::false_type) noexcept; - template ::std::pair<_ForwardIterator, _ForwardIterator> __pattern_minmax_element(_Tag, _ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _Compare) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, - ::std::pair<_RandomAccessIterator, _RandomAccessIterator>> -__pattern_minmax_element(_ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _Compare, _IsVector, - /* is_parallel = */ ::std::true_type); - template ::std::pair<_RandomAccessIterator, _RandomAccessIterator> __pattern_minmax_element(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, @@ -1842,25 +1265,11 @@ ::std::pair<_RandomAccessIterator1, _RandomAccessIterator2> _Predicate, /* __is_vector = */ ::std::true_type) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, - ::std::pair<_ForwardIterator1, _ForwardIterator2>> -__pattern_mismatch(_ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, _ForwardIterator2, - _Predicate, _IsVector, - /* is_parallel = */ ::std::false_type) noexcept; - template ::std::pair<_ForwardIterator1, _ForwardIterator2> __pattern_mismatch(_Tag, _ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, _ForwardIterator2, _Predicate) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, - ::std::pair<_RandomAccessIterator1, _RandomAccessIterator2>> -__pattern_mismatch(_ExecutionPolicy&&, _RandomAccessIterator1, _RandomAccessIterator1, _RandomAccessIterator2, - _RandomAccessIterator2, _Predicate, _IsVector, /* is_parallel = */ ::std::true_type); - template ::std::pair<_RandomAccessIterator1, _RandomAccessIterator2> @@ -1881,34 +1290,19 @@ bool __brick_lexicographical_compare(_RandomAccessIterator1, _RandomAccessIterat _RandomAccessIterator2, _Compare, /* __is_vector = */ ::std::true_type) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, bool> -__pattern_lexicographical_compare(_ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, - _ForwardIterator2, _Compare, _IsVector, - /* is_parallel = */ ::std::false_type) noexcept; - template bool __pattern_lexicographical_compare(_Tag, _ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, _ForwardIterator2, _Compare) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, bool> -__pattern_lexicographical_compare(_ExecutionPolicy&&, _RandomAccessIterator1, _RandomAccessIterator1, - _RandomAccessIterator2, _RandomAccessIterator2, _Compare, _IsVector, - /* is_parallel = */ ::std::true_type); - template bool __pattern_lexicographical_compare(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, _ForwardIterator2, _Compare) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _ForwardIterator2> -__pattern_swap(_ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, _Function, _IsVector, - _IsParallel); +template +_ForwardIterator2 +__pattern_swap(_Tag, _ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, _Function); //------------------------------------------------------------------------ // shift_left @@ -1924,34 +1318,16 @@ _ForwardIterator __brick_shift_left(_ForwardIterator, _ForwardIterator, typename ::std::iterator_traits<_ForwardIterator>::difference_type, /*__is_vector=*/::std::true_type) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _ForwardIterator> -__pattern_shift_left(_ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, - typename ::std::iterator_traits<_ForwardIterator>::difference_type, _IsVector, - /*is_parallel=*/::std::false_type) noexcept; - template _ForwardIterator __pattern_shift_left(_Tag, _ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, typename ::std::iterator_traits<_ForwardIterator>::difference_type) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _ForwardIterator> -__pattern_shift_left(_ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, - typename ::std::iterator_traits<_ForwardIterator>::difference_type, _IsVector, - /*is_parallel=*/::std::true_type); - template _ForwardIterator __pattern_shift_left(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, typename ::std::iterator_traits<_ForwardIterator>::difference_type); -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _BidirectionalIterator> -__pattern_shift_right(_ExecutionPolicy&&, _BidirectionalIterator, _BidirectionalIterator, - typename ::std::iterator_traits<_BidirectionalIterator>::difference_type, _IsVector, - _IsParallel is_parallel); - } // namespace __internal } // namespace dpl } // namespace oneapi diff --git a/include/oneapi/dpl/pstl/algorithm_impl.h b/include/oneapi/dpl/pstl/algorithm_impl.h index 8c4569daf09..11c5d758454 100644 --- a/include/oneapi/dpl/pstl/algorithm_impl.h +++ b/include/oneapi/dpl/pstl/algorithm_impl.h @@ -63,14 +63,6 @@ __brick_any_of(const _RandomAccessIterator __first, const _RandomAccessIterator return __unseq_backend::__simd_or(__first, __last - __first, __pred); }; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, bool> -__pattern_any_of(_ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, _Pred __pred, - _IsVector __is_vector, /*parallel=*/::std::false_type) noexcept -{ - return __internal::__brick_any_of(__first, __last, __pred, __is_vector); -} - template bool __pattern_any_of(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, _Pred __pred) noexcept @@ -80,22 +72,6 @@ __pattern_any_of(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _ForwardIte return __internal::__brick_any_of(__first, __last, __pred, typename _Tag::__is_vector{}); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, bool> -__pattern_any_of(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last, _Pred __pred, - _IsVector __is_vector, /*parallel=*/::std::true_type) -{ - constexpr auto __dispatch_tag = - oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _RandomAccessIterator>(); - - return __internal::__except_handler([&]() { - return __internal::__parallel_or(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, - [__pred, __is_vector](_RandomAccessIterator __i, _RandomAccessIterator __j) { - return __internal::__brick_any_of(__i, __j, __pred, __is_vector); - }); - }); -} - template bool __pattern_any_of(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, @@ -168,15 +144,6 @@ __pattern_walk1(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _ForwardIter __internal::__brick_walk1(__first, __last, __f, typename _Tag::__is_vector{}); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy> -__pattern_walk1(_ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, _Function __f, - _IsVector __is_vector, - /*parallel=*/::std::false_type) noexcept -{ - __internal::__brick_walk1(__first, __last, __f, __is_vector); -} - template void __pattern_walk1(__parallel_forward_tag, _ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, @@ -221,48 +188,6 @@ __pattern_replace_if(_Tag __tag, _ExecutionPolicy&& __exec, _ForwardIterator __f oneapi::dpl::__internal::__ref_or_copy<_ExecutionPolicy, _UnaryPredicate>>(__new_value, __pred)); } -//template -template -oneapi::dpl::__internal::__enable_if_host_execution_policy_conditional< - _ExecutionPolicy, __is_random_access_iterator_v<_RandomAccessIterator>> -__pattern_walk1(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last, _Function __f, - _IsVector __is_vector, - /*parallel=*/::std::true_type) -{ - constexpr auto __dispatch_tag = - oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _RandomAccessIterator>(); - using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; - - __internal::__except_handler([&]() { - __par_backend::__parallel_for(__backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, - [__f, __is_vector](_RandomAccessIterator __i, _RandomAccessIterator __j) { - __internal::__brick_walk1(__i, __j, __f, __is_vector); - }); - }); -} - -template -oneapi::dpl::__internal::__enable_if_host_execution_policy_conditional<_ExecutionPolicy, - !__is_random_access_iterator_v<_ForwardIterator>> -__pattern_walk1(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _Function __f, _IsVector, - /*parallel=*/::std::true_type) -{ - typedef typename ::std::iterator_traits<_ForwardIterator>::reference _ReferenceType; - auto __func = [&__f](_ReferenceType arg) { __f(arg); }; - __internal::__except_handler([&]() { - __par_backend::__parallel_for_each(::std::forward<_ExecutionPolicy>(__exec), __first, __last, __func); - }); -} - -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy> -__pattern_walk_brick(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _Brick __brick, - /*parallel=*/::std::false_type) noexcept -{ - constexpr auto __is_vector = __internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(); - __brick(__first, __last, __is_vector); -} - template void __pattern_walk_brick(_Tag, _ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, @@ -273,25 +198,6 @@ __pattern_walk_brick(_Tag, _ExecutionPolicy&& __exec, _ForwardIterator __first, __brick(__first, __last, typename _Tag::__is_vector{}); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy> -__pattern_walk_brick(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last, - _Brick __brick, - /*parallel=*/::std::true_type) -{ - constexpr auto __dispatch_tag = - oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _RandomAccessIterator>(); - using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; - - constexpr auto __is_vector = __internal::__is_vectorization_preferred<_ExecutionPolicy, _RandomAccessIterator>(); - __internal::__except_handler([&]() { - __par_backend::__parallel_for(__backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, - [__brick, __is_vector](_RandomAccessIterator __i, _RandomAccessIterator __j) { - __brick(__i, __j, __is_vector); - }); - }); -} - template void __pattern_walk_brick(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, @@ -325,14 +231,6 @@ __brick_walk1_n(_RandomAccessIterator __first, _DifferenceType __n, _Function __ return __unseq_backend::__simd_walk_1(__first, __n, __f); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _ForwardIterator> -__pattern_walk1_n(_ExecutionPolicy&&, _ForwardIterator __first, _Size __n, _Function __f, _IsVector __is_vector, - /*is_parallel=*/::std::false_type) noexcept -{ - return __internal::__brick_walk1_n(__first, __n, __f, __is_vector); -} - template _ForwardIterator __pattern_walk1_n(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _Size __n, _Function __f) noexcept @@ -342,20 +240,6 @@ __pattern_walk1_n(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _Size __n, return __internal::__brick_walk1_n(__first, __n, __f, typename _Tag::__is_vector{}); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator> -__pattern_walk1_n(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _Size __n, _Function __f, - _IsVector __is_vector, - /*is_parallel=*/::std::true_type) -{ - constexpr auto __dispatch_tag = - oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _RandomAccessIterator>(); - - oneapi::dpl::__internal::__pattern_walk1(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, - __first + __n, __f); - return __first + __n; -} - template _RandomAccessIterator __pattern_walk1_n(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, _Size __n, @@ -366,15 +250,6 @@ __pattern_walk1_n(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _R return __first + __n; } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _ForwardIterator> -__pattern_walk_brick_n(_ExecutionPolicy&& __exec, _ForwardIterator __first, _Size __n, _Brick __brick, - /*is_parallel=*/::std::false_type) noexcept -{ - constexpr auto __is_vector = __internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(); - return __brick(__first, __n, __is_vector); -} - template _ForwardIterator __pattern_walk_brick_n(_Tag, _ExecutionPolicy&& __exec, _ForwardIterator __first, _Size __n, _Brick __brick) noexcept @@ -384,25 +259,6 @@ __pattern_walk_brick_n(_Tag, _ExecutionPolicy&& __exec, _ForwardIterator __first return __brick(__first, __n, typename _Tag::__is_vector{}); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator> -__pattern_walk_brick_n(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _Size __n, _Brick __brick, - /*is_parallel=*/::std::true_type) -{ - constexpr auto __dispatch_tag = - oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _RandomAccessIterator>(); - using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; - - constexpr auto __is_vector = __internal::__is_vectorization_preferred<_ExecutionPolicy, _RandomAccessIterator>(); - return __internal::__except_handler([&]() { - __par_backend::__parallel_for(__backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __first + __n, - [__brick, __is_vector](_RandomAccessIterator __i, _RandomAccessIterator __j) { - __brick(__i, __j - __i, __is_vector); - }); - return __first + __n; - }); -} - template _RandomAccessIterator __pattern_walk_brick_n(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, _Size __n, @@ -460,14 +316,6 @@ __brick_walk2_n(_RandomAccessIterator1 __first1, _Size __n, _RandomAccessIterato return __unseq_backend::__simd_walk_2(__first1, __n, __first2, __f); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _ForwardIterator2> -__pattern_walk2(_ExecutionPolicy&&, _ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, - _Function __f, _IsVector __is_vector, /*parallel=*/::std::false_type) noexcept -{ - return __internal::__brick_walk2(__first1, __last1, __first2, __f, __is_vector); -} - template _ForwardIterator2 __pattern_walk2(_Tag, _ExecutionPolicy&&, _ForwardIterator1 __first1, _ForwardIterator1 __last1, @@ -573,15 +421,6 @@ __pattern_walk2(__parallel_forward_tag, _ExecutionPolicy&& __exec, _ForwardItera }); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _ForwardIterator2> -__pattern_walk2_n(_ExecutionPolicy&&, _ForwardIterator1 __first1, _Size __n, _ForwardIterator2 __first2, _Function __f, - _IsVector __is_vector, /*parallel=*/::std::false_type) noexcept -{ - return __internal::__brick_walk2_n(__first1, __n, __first2, __f, __is_vector); -} - template _ForwardIterator2 @@ -593,19 +432,6 @@ __pattern_walk2_n(_Tag, _ExecutionPolicy&&, _ForwardIterator1 __first1, _Size __ return __internal::__brick_walk2_n(__first1, __n, __first2, __f, typename _Tag::__is_vector{}); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator2> -__pattern_walk2_n(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, _Size __n, - _RandomAccessIterator2 __first2, _Function __f, _IsVector __is_vector, /*parallel=*/::std::true_type) -{ - constexpr auto __dispatch_tag = - oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _RandomAccessIterator1, _RandomAccessIterator2>(); - - return __internal::__pattern_walk2(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, - __first1 + __n, __first2, __f); -} - template _RandomAccessIterator2 @@ -616,16 +442,6 @@ __pattern_walk2_n(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _R __first2, __f); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _ForwardIterator2> -__pattern_walk2_brick(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, - _ForwardIterator2 __first2, _Brick __brick, /*parallel=*/::std::false_type) noexcept -{ - constexpr auto __is_vector = - __internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>(); - return __brick(__first1, __last1, __first2, __is_vector); -} - template _ForwardIterator2 __pattern_walk2_brick(_Tag, _ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, @@ -636,30 +452,6 @@ __pattern_walk2_brick(_Tag, _ExecutionPolicy&& __exec, _ForwardIterator1 __first return __brick(__first1, __last1, __first2, typename _Tag::__is_vector{}); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy_conditional< - _ExecutionPolicy, __is_random_access_iterator_v<_RandomAccessIterator1, _RandomAccessIterator2>, - _RandomAccessIterator2> -__pattern_walk2_brick(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, - _RandomAccessIterator2 __first2, _Brick __brick, /*parallel=*/::std::true_type) -{ - constexpr auto __dispatch_tag = - oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _RandomAccessIterator1, _RandomAccessIterator2>(); - using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; - - constexpr auto __is_vector = - __internal::__is_vectorization_preferred<_ExecutionPolicy, _RandomAccessIterator1, _RandomAccessIterator2>(); - - return __except_handler([&]() { - __par_backend::__parallel_for( - __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, - [&__is_vector, __first1, __first2, __brick](_RandomAccessIterator1 __i, _RandomAccessIterator1 __j) { - __brick(__i, __j, __first2 + (__i - __first1), __is_vector); - }); - return __first2 + (__last1 - __first1); - }); -} - template _RandomAccessIterator2 @@ -737,28 +529,6 @@ __pattern_walk2_brick(__parallel_forward_tag, _ExecutionPolicy&& __exec, _Forwar }); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator2> -__pattern_walk2_brick_n(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, _Size __n, - _RandomAccessIterator2 __first2, _Brick __brick, /*parallel=*/::std::true_type) -{ - constexpr auto __dispatch_tag = - oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _RandomAccessIterator1, _RandomAccessIterator2>(); - using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; - - constexpr auto __is_vector = - __internal::__is_vectorization_preferred<_ExecutionPolicy, _RandomAccessIterator1, _RandomAccessIterator2>(); - - return __except_handler([&]() { - __par_backend::__parallel_for( - __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first1, __first1 + __n, - [&__is_vector, __first1, __first2, __brick](_RandomAccessIterator1 __i, _RandomAccessIterator1 __j) { - __brick(__i, __j - __i, __first2 + (__i - __first1), __is_vector); - }); - return __first2 + __n; - }); -} - template _RandomAccessIterator2 @@ -777,16 +547,6 @@ __pattern_walk2_brick_n(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _R }); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _ForwardIterator2> -__pattern_walk2_brick_n(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _Size __n, _ForwardIterator2 __first2, - _Brick __brick, /*parallel=*/::std::false_type) noexcept -{ - constexpr auto __is_vector = - __internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>(); - return __brick(__first1, __n, __first2, __is_vector); -} - template _ForwardIterator2 @@ -821,16 +581,6 @@ __brick_walk3(_RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, _ return __unseq_backend::__simd_walk_3(__first1, __last1 - __first1, __first2, __first3, __f); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _ForwardIterator3> -__pattern_walk3(_ExecutionPolicy&&, _ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, - _ForwardIterator3 __first3, _Function __f, _IsVector __is_vector, - /*parallel=*/::std::false_type) noexcept -{ - return __internal::__brick_walk3(__first1, __last1, __first2, __first3, __f, __is_vector); -} - template _ForwardIterator3 @@ -955,17 +705,6 @@ __pattern_walk3(__parallel_forward_tag, _ExecutionPolicy&& __exec, _ForwardItera // transform_if //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _ForwardIterator2> -__pattern_walk2_transform_if(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, - _ForwardIterator2 __first2, _Function __func, _IsVector __is_vector, - _IsParallel __is_parallel) noexcept -{ - return __pattern_walk2(::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __func, __is_vector, - __is_parallel); -} - template _ForwardIterator2 @@ -977,17 +716,6 @@ __pattern_walk2_transform_if(_Tag __tag, _ExecutionPolicy&& __exec, _ForwardIter return __pattern_walk2(__tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __func); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _ForwardIterator3> -__pattern_walk3_transform_if(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, - _ForwardIterator2 __first2, _ForwardIterator3 __first3, _Function __func, - _IsVector __is_vector, _IsParallel __is_parallel) noexcept -{ - return __pattern_walk3(::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __first3, __func, - __is_vector, __is_parallel); -} - template _ForwardIterator3 @@ -1025,16 +753,6 @@ __brick_equal(_RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, _ .first == __last1; } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, bool> -__pattern_equal(_ExecutionPolicy&&, _ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, - _ForwardIterator2 __last2, _BinaryPredicate __p, _IsVector __is_vector, /* is_parallel = */ - ::std::false_type) noexcept -{ - return __internal::__brick_equal(__first1, __last1, __first2, __last2, __p, __is_vector); -} - template bool __pattern_equal(_Tag, _ExecutionPolicy&&, _ForwardIterator1 __first1, _ForwardIterator1 __last1, @@ -1045,29 +763,6 @@ __pattern_equal(_Tag, _ExecutionPolicy&&, _ForwardIterator1 __first1, _ForwardIt return __internal::__brick_equal(__first1, __last1, __first2, __last2, __p, typename _Tag::__is_vector{}); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, bool> -__pattern_equal(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, - _RandomAccessIterator2 __first2, _RandomAccessIterator2 __last2, _BinaryPredicate __p, - _IsVector __is_vector, /*is_parallel=*/::std::true_type) -{ - constexpr auto __dispatch_tag = - oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _RandomAccessIterator1, _RandomAccessIterator2>(); - - if (__last1 - __first1 != __last2 - __first2) - return false; - - return __internal::__except_handler([&]() { - return !__internal::__parallel_or( - __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, - [__first1, __first2, __p, __is_vector](_RandomAccessIterator1 __i, _RandomAccessIterator1 __j) { - return !__internal::__brick_equal(__i, __j, __first2 + (__i - __first1), __first2 + (__j - __first1), - __p, __is_vector); - }); - }); -} - template bool @@ -1109,15 +804,6 @@ __brick_equal(_RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, _ .first == __last1; } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, bool> -__pattern_equal(_ExecutionPolicy&&, _ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, - _BinaryPredicate __p, _IsVector __is_vector, /* is_parallel = */ ::std::false_type) noexcept -{ - return __internal::__brick_equal(__first1, __last1, __first2, __p, __is_vector); -} - template bool __pattern_equal(_Tag, _ExecutionPolicy&&, _ForwardIterator1 __first1, _ForwardIterator1 __last1, @@ -1128,25 +814,6 @@ __pattern_equal(_Tag, _ExecutionPolicy&&, _ForwardIterator1 __first1, _ForwardIt return __internal::__brick_equal(__first1, __last1, __first2, __p, typename _Tag::__is_vector{}); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, bool> -__pattern_equal(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, - _RandomAccessIterator2 __first2, _BinaryPredicate __p, _IsVector __is_vector, - /*is_parallel=*/::std::true_type) -{ - constexpr auto __dispatch_tag = - oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _RandomAccessIterator1, _RandomAccessIterator2>(); - - return __internal::__except_handler([&]() { - return !__internal::__parallel_or( - __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, - [__first1, __first2, __p, __is_vector](_RandomAccessIterator1 __i, _RandomAccessIterator1 __j) { - return !__internal::__brick_equal(__i, __j, __first2 + (__i - __first1), __p, __is_vector); - }); - }); -} - template bool @@ -1327,16 +994,6 @@ __brick_find_end(_RandomAccessIterator1 __first, _RandomAccessIterator1 __last, return __find_subrange(__first, __last, __last, __s_first, __s_last, __pred, false, ::std::true_type()); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _ForwardIterator1> -__pattern_find_end(_ExecutionPolicy&&, _ForwardIterator1 __first, _ForwardIterator1 __last, _ForwardIterator2 __s_first, - _ForwardIterator2 __s_last, _BinaryPredicate __pred, _IsVector __is_vector, - /*is_parallel=*/::std::false_type) noexcept -{ - return __internal::__brick_find_end(__first, __last, __s_first, __s_last, __pred, __is_vector); -} - template _ForwardIterator1 __pattern_find_end(_Tag, _ExecutionPolicy&&, _ForwardIterator1 __first, _ForwardIterator1 __last, @@ -1347,38 +1004,6 @@ __pattern_find_end(_Tag, _ExecutionPolicy&&, _ForwardIterator1 __first, _Forward return __internal::__brick_find_end(__first, __last, __s_first, __s_last, __pred, typename _Tag::__is_vector{}); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator1> -__pattern_find_end(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first, _RandomAccessIterator1 __last, - _RandomAccessIterator2 __s_first, _RandomAccessIterator2 __s_last, _BinaryPredicate __pred, - _IsVector __is_vector, /*is_parallel=*/::std::true_type) -{ - // TODO is it correct that we check _RandomAccessIterator2 in __select_backend ? - constexpr auto __dispatch_tag = - oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _RandomAccessIterator1, _RandomAccessIterator2>(); - - if (__last - __first == __s_last - __s_first) - { - const bool __res = __internal::__pattern_equal(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), - __first, __last, __s_first, __pred); - return __res ? __first : __last; - } - else - { - return __internal::__except_handler([&]() { - return __internal::__parallel_find( - __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, - [__last, __s_first, __s_last, __pred, __is_vector](_RandomAccessIterator1 __i, - _RandomAccessIterator1 __j) { - return __internal::__find_subrange(__i, __j, __last, __s_first, __s_last, __pred, false, - __is_vector); - }, - ::std::false_type{}); - }); - } -} - template _RandomAccessIterator1 @@ -1425,16 +1050,6 @@ __brick_find_first_of(_ForwardIterator1 __first, _ForwardIterator1 __last, _Forw return __unseq_backend::__simd_find_first_of(__first, __last, __s_first, __s_last, __pred); } -template -_ForwardIterator1 -__pattern_find_first_of(_ExecutionPolicy&&, _ForwardIterator1 __first, _ForwardIterator1 __last, - _ForwardIterator2 __s_first, _ForwardIterator2 __s_last, _BinaryPredicate __pred, - _IsVector __is_vector, /*is_parallel=*/::std::false_type) noexcept -{ - return __internal::__brick_find_first_of(__first, __last, __s_first, __s_last, __pred, __is_vector); -} - template _ForwardIterator1 __pattern_find_first_of(_Tag, _ExecutionPolicy&&, _ForwardIterator1 __first, _ForwardIterator1 __last, @@ -1446,26 +1061,6 @@ __pattern_find_first_of(_Tag, _ExecutionPolicy&&, _ForwardIterator1 __first, _Fo typename _Tag::__is_vector{}); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _ForwardIterator1> -__pattern_find_first_of(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 __last, - _ForwardIterator2 __s_first, _ForwardIterator2 __s_last, _BinaryPredicate __pred, - _IsVector __is_vector, /*is_parallel=*/::std::true_type) -{ - constexpr auto __dispatch_tag = - oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>(); - - return __internal::__except_handler([&]() { - return __internal::__parallel_find( - __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, - [__s_first, __s_last, &__pred, __is_vector](_ForwardIterator1 __i, _ForwardIterator1 __j) { - return __internal::__brick_find_first_of(__i, __j, __s_first, __s_last, __pred, __is_vector); - }, - ::std::true_type{}); - }); -} - template _ForwardIterator1 @@ -1504,16 +1099,6 @@ __brick_search(_ForwardIterator1 __first, _ForwardIterator1 __last, _ForwardIter return __internal::__find_subrange(__first, __last, __last, __s_first, __s_last, __pred, true, ::std::true_type()); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _ForwardIterator1> -__pattern_search(_ExecutionPolicy&&, _ForwardIterator1 __first, _ForwardIterator1 __last, _ForwardIterator2 __s_first, - _ForwardIterator2 __s_last, _BinaryPredicate __pred, _IsVector __is_vector, - /*is_parallel=*/::std::false_type) noexcept -{ - return __internal::__brick_search(__first, __last, __s_first, __s_last, __pred, __is_vector); -} - template _ForwardIterator1 __pattern_search(_Tag, _ExecutionPolicy&&, _ForwardIterator1 __first, _ForwardIterator1 __last, @@ -1524,39 +1109,6 @@ __pattern_search(_Tag, _ExecutionPolicy&&, _ForwardIterator1 __first, _ForwardIt return __internal::__brick_search(__first, __last, __s_first, __s_last, __pred, typename _Tag::__is_vector{}); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator1> -__pattern_search(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first, _RandomAccessIterator1 __last, - _RandomAccessIterator2 __s_first, _RandomAccessIterator2 __s_last, _BinaryPredicate __pred, - _IsVector __is_vector, - /*is_parallel=*/::std::true_type) -{ - // TODO is it correct that we check _RandomAccessIterator2 in __select_backend ? - constexpr auto __dispatch_tag = - oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _RandomAccessIterator1, _RandomAccessIterator2>(); - - if (__last - __first == __s_last - __s_first) - { - const bool __res = __internal::__pattern_equal(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), - __first, __last, __s_first, __pred); - return __res ? __first : __last; - } - else - { - return __internal::__except_handler([&]() { - return __internal::__parallel_find( - __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, - [__last, __s_first, __s_last, __pred, __is_vector](_RandomAccessIterator1 __i, - _RandomAccessIterator1 __j) { - return __internal::__find_subrange(__i, __j, __last, __s_first, __s_last, __pred, true, - __is_vector); - }, - ::std::true_type{}); - }); - } -} - template _RandomAccessIterator1 @@ -1603,16 +1155,6 @@ __brick_search_n(_RandomAccessIterator __first, _RandomAccessIterator __last, _S return __internal::__find_subrange(__first, __last, __last, __count, __value, __pred, ::std::true_type()); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _ForwardIterator> -__pattern_search_n(_ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, _Size __count, - const _Tp& __value, _BinaryPredicate __pred, _IsVector __is_vector, - /*is_parallel=*/::std::false_type) noexcept -{ - return __internal::__brick_search_n(__first, __last, __count, __value, __pred, __is_vector); -} - template _ForwardIterator __pattern_search_n(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, _Size __count, @@ -1623,53 +1165,22 @@ __pattern_search_n(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _ForwardI return __internal::__brick_search_n(__first, __last, __count, __value, __pred, typename _Tag::__is_vector{}); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator> -__pattern_search_n(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last, - _Size __count, const _Tp& __value, _BinaryPredicate __pred, _IsVector __is_vector, - /*is_parallel=*/::std::true_type) +template +_RandomAccessIterator +__pattern_search_n(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, + _RandomAccessIterator __last, _Size __count, const _Tp& __value, _BinaryPredicate __pred) { - constexpr auto __dispatch_tag = - oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _RandomAccessIterator>(); - if (static_cast<_Size>(__last - __first) == __count) { - const bool __result = !__internal::__pattern_any_of( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, - [&__value, &__pred](const _Tp& __val) { return !__pred(__val, __value); }, __is_vector, - /*is_parallel*/ ::std::true_type()); + const bool __result = + !__internal::__pattern_any_of(__tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, + [&__value, &__pred](const _Tp& __val) { return !__pred(__val, __value); }); return __result ? __first : __last; } else { - return __internal::__except_handler([&__exec, __first, __last, __count, &__value, __pred, __is_vector]() { - return __internal::__parallel_find( - __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, - [__last, __count, &__value, __pred, __is_vector](_RandomAccessIterator __i, _RandomAccessIterator __j) { - return __internal::__find_subrange(__i, __j, __last, __count, __value, __pred, __is_vector); - }, - ::std::true_type{}); - }); - } -} - -template -_RandomAccessIterator -__pattern_search_n(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, - _RandomAccessIterator __last, _Size __count, const _Tp& __value, _BinaryPredicate __pred) -{ - if (static_cast<_Size>(__last - __first) == __count) - { - const bool __result = - !__internal::__pattern_any_of(__tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, - [&__value, &__pred](const _Tp& __val) { return !__pred(__val, __value); }); - return __result ? __first : __last; - } - else - { - return __internal::__except_handler([&__exec, __first, __last, __count, &__value, __pred, __tag]() { + return __internal::__except_handler([&__exec, __first, __last, __count, &__value, __pred, __tag]() { return __internal::__parallel_find( __tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, [__last, __count, &__value, __pred](_RandomAccessIterator __i, _RandomAccessIterator __j) { @@ -1947,14 +1458,6 @@ __brick_partition_by_mask(_RandomAccessIterator1 __first, _RandomAccessIterator1 #endif } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _OutputIterator> -__pattern_copy_if(_ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, _OutputIterator __result, - _UnaryPredicate __pred, _IsVector __is_vector, /*parallel=*/::std::false_type) noexcept -{ - return __internal::__brick_copy_if(__first, __last, __result, __pred, __is_vector); -} - template _OutputIterator __pattern_copy_if(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, _OutputIterator __result, @@ -1965,46 +1468,6 @@ __pattern_copy_if(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _ForwardIt return __internal::__brick_copy_if(__first, __last, __result, __pred, typename _Tag::__is_vector{}); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator2> -__pattern_copy_if(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first, _RandomAccessIterator1 __last, - _RandomAccessIterator2 __result, _UnaryPredicate __pred, _IsVector __is_vector, - /*parallel=*/::std::true_type) -{ - constexpr auto __dispatch_tag = - oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _RandomAccessIterator1, _RandomAccessIterator2>(); - using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; - - typedef typename ::std::iterator_traits<_RandomAccessIterator1>::difference_type _DifferenceType; - const _DifferenceType __n = __last - __first; - if (_DifferenceType(1) < __n) - { - __par_backend::__buffer<_ExecutionPolicy, bool> __mask_buf(__n); - return __internal::__except_handler([&__exec, __n, __first, __result, __is_vector, __pred, &__mask_buf]() { - bool* __mask = __mask_buf.get(); - _DifferenceType __m{}; - __par_backend::__parallel_strict_scan( - __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __n, _DifferenceType(0), - [=](_DifferenceType __i, _DifferenceType __len) { // Reduce - return __internal::__brick_calc_mask_1<_DifferenceType>(__first + __i, __first + (__i + __len), - __mask + __i, __pred, __is_vector) - .first; - }, - ::std::plus<_DifferenceType>(), // Combine - [=](_DifferenceType __i, _DifferenceType __len, _DifferenceType __initial) { // Scan - __internal::__brick_copy_by_mask( - __first + __i, __first + (__i + __len), __result + __initial, __mask + __i, - [](_RandomAccessIterator1 __x, _RandomAccessIterator2 __z) { *__z = *__x; }, __is_vector); - }, - [&__m](_DifferenceType __total) { __m = __total; }); - return __result + __m; - }); - } - // trivial sequence - use serial algorithm - return __internal::__brick_copy_if(__first, __last, __result, __pred, __is_vector); -} - template _RandomAccessIterator2 @@ -2061,15 +1524,6 @@ __brick_count(_ForwardIterator __first, _ForwardIterator __last, _Predicate __pr return ::std::count_if(__first, __last, __pred); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy< - _ExecutionPolicy, typename ::std::iterator_traits<_ForwardIterator>::difference_type> -__pattern_count(_ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, _Predicate __pred, - /* is_parallel */ ::std::false_type, _IsVector __is_vector) noexcept -{ - return __internal::__brick_count(__first, __last, __pred, __is_vector); -} - template typename ::std::iterator_traits<_ForwardIterator>::difference_type __pattern_count(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, _Predicate __pred) noexcept @@ -2079,32 +1533,6 @@ __pattern_count(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _ForwardIter return __internal::__brick_count(__first, __last, __pred, typename _Tag::__is_vector{}); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy< - _ExecutionPolicy, typename ::std::iterator_traits<_RandomAccessIterator>::difference_type> -__pattern_count(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last, - _Predicate __pred, - /* is_parallel */ ::std::true_type, _IsVector __is_vector) -{ - typedef typename ::std::iterator_traits<_RandomAccessIterator>::difference_type _SizeType; - - //trivial pre-checks - if (__first == __last) - return _SizeType(0); - - constexpr auto __dispatch_tag = - oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _RandomAccessIterator>(); - using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; - - return __internal::__except_handler([&]() { - return __par_backend::__parallel_reduce( - __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, _SizeType(0), - [__pred, __is_vector](_RandomAccessIterator __begin, _RandomAccessIterator __end, _SizeType __value) - -> _SizeType { return __value + __internal::__brick_count(__begin, __end, __pred, __is_vector); }, - ::std::plus<_SizeType>()); - }); -} - template typename ::std::iterator_traits<_RandomAccessIterator>::difference_type __pattern_count(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, @@ -2149,14 +1577,6 @@ __brick_unique(_RandomAccessIterator __first, _RandomAccessIterator __last, _Bin return ::std::unique(__first, __last, __pred); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _ForwardIterator> -__pattern_unique(_ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, _BinaryPredicate __pred, - _IsVector __is_vector, /*is_parallel=*/::std::false_type) noexcept -{ - return __internal::__brick_unique(__first, __last, __pred, __is_vector); -} - template _ForwardIterator __pattern_unique(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, @@ -2251,32 +1671,6 @@ __remove_elements(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _F }); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator> -__pattern_unique(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last, - _BinaryPredicate __pred, _IsVector __is_vector, /*is_parallel=*/::std::true_type) -{ - typedef typename ::std::iterator_traits<_RandomAccessIterator>::reference _ReferenceType; - - if (__first == __last) - { - return __last; - } - if (__first + 1 == __last || __first + 2 == __last) - { - // Trivial sequence - use serial algorithm - return __internal::__brick_unique(__first, __last, __pred, __is_vector); - } - return __internal::__remove_elements( - ::std::forward<_ExecutionPolicy>(__exec), ++__first, __last, - [&__pred, __is_vector](bool* __b, bool* __e, _RandomAccessIterator __it) { - __internal::__brick_walk3( - __b, __e, __it - 1, __it, - [&__pred](bool& __x, _ReferenceType __y, _ReferenceType __z) { __x = !__pred(__y, __z); }, __is_vector); - }, - __is_vector); -} - template _RandomAccessIterator __pattern_unique(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, @@ -2328,15 +1722,6 @@ __brick_unique_copy(_RandomAccessIterator1 __first, _RandomAccessIterator1 __las #endif } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _OutputIterator> -__pattern_unique_copy(_ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, _OutputIterator __result, - _BinaryPredicate __pred, _IsVector __is_vector, /*parallel=*/::std::false_type) noexcept -{ - return __internal::__brick_unique_copy(__first, __last, __result, __pred, __is_vector); -} - template _OutputIterator __pattern_unique_copy(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, @@ -2369,60 +1754,6 @@ __brick_calc_mask_2(_RandomAccessIterator __first, _RandomAccessIterator __last, return __unseq_backend::__simd_calc_mask_2(__first, __last - __first, __mask, __pred); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator2> -__pattern_unique_copy(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first, _RandomAccessIterator1 __last, - _RandomAccessIterator2 __result, _BinaryPredicate __pred, _IsVector __is_vector, - /*parallel=*/::std::true_type) -{ - constexpr auto __dispatch_tag = - oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _RandomAccessIterator1, _RandomAccessIterator2>(); - using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; - - typedef typename ::std::iterator_traits<_RandomAccessIterator1>::difference_type _DifferenceType; - const _DifferenceType __n = __last - __first; - if (_DifferenceType(2) < __n) - { - __par_backend::__buffer<_ExecutionPolicy, bool> __mask_buf(__n); - if (_DifferenceType(2) < __n) - { - return __internal::__except_handler([&__exec, __n, __first, __result, __pred, __is_vector, &__mask_buf]() { - bool* __mask = __mask_buf.get(); - _DifferenceType __m{}; - __par_backend::__parallel_strict_scan( - __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __n, _DifferenceType(0), - [=](_DifferenceType __i, _DifferenceType __len) -> _DifferenceType { // Reduce - _DifferenceType __extra = 0; - if (__i == 0) - { - // Special boundary case - __mask[__i] = true; - if (--__len == 0) - return 1; - ++__i; - ++__extra; - } - return __internal::__brick_calc_mask_2<_DifferenceType>(__first + __i, __first + (__i + __len), - __mask + __i, __pred, __is_vector) + - __extra; - }, - ::std::plus<_DifferenceType>(), // Combine - [=](_DifferenceType __i, _DifferenceType __len, _DifferenceType __initial) { // Scan - // Phase 2 is same as for __pattern_copy_if - __internal::__brick_copy_by_mask( - __first + __i, __first + (__i + __len), __result + __initial, __mask + __i, - [](_RandomAccessIterator1 __x, _RandomAccessIterator2 __z) { *__z = *__x; }, __is_vector); - }, - [&__m](_DifferenceType __total) { __m = __total; }); - return __result + __m; - }); - } - } - // trivial sequence - use serial algorithm - return __internal::__brick_unique_copy(__first, __last, __result, __pred, __is_vector); -} - template _RandomAccessIterator2 @@ -2528,15 +1859,6 @@ __brick_reverse(_RandomAccessIterator __first, _RandomAccessIterator __last, _Ra }); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy> -__pattern_reverse(_ExecutionPolicy&&, _BidirectionalIterator __first, _BidirectionalIterator __last, - _IsVector _is_vector, - /*is_parallel=*/::std::false_type) noexcept -{ - __internal::__brick_reverse(__first, __last, _is_vector); -} - template void __pattern_reverse(_Tag, _ExecutionPolicy&&, _BidirectionalIterator __first, _BidirectionalIterator __last) noexcept @@ -2546,22 +1868,6 @@ __pattern_reverse(_Tag, _ExecutionPolicy&&, _BidirectionalIterator __first, _Bid __internal::__brick_reverse(__first, __last, typename _Tag::__is_vector{}); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy> -__pattern_reverse(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last, - _IsVector __is_vector, /*is_parallel=*/::std::true_type) -{ - constexpr auto __dispatch_tag = - oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _RandomAccessIterator>(); - using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; - - __par_backend::__parallel_for( - __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __first + (__last - __first) / 2, - [__is_vector, __first, __last](_RandomAccessIterator __inner_first, _RandomAccessIterator __inner_last) { - __internal::__brick_reverse(__inner_first, __inner_last, __last - (__inner_first - __first), __is_vector); - }); -} - template void __pattern_reverse(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, @@ -2600,14 +1906,6 @@ __brick_reverse_copy(_RandomAccessIterator1 __first, _RandomAccessIterator1 __la __d_first, [](_ReferenceType1 __x, _ReferenceType2 __y) { __y = __x; }); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _OutputIterator> -__pattern_reverse_copy(_ExecutionPolicy&&, _BidirectionalIterator __first, _BidirectionalIterator __last, - _OutputIterator __d_first, _IsVector __is_vector, /*is_parallel=*/::std::false_type) noexcept -{ - return __internal::__brick_reverse_copy(__first, __last, __d_first, __is_vector); -} - template _OutputIterator __pattern_reverse_copy(_Tag, _ExecutionPolicy&&, _BidirectionalIterator __first, _BidirectionalIterator __last, @@ -2618,26 +1916,6 @@ __pattern_reverse_copy(_Tag, _ExecutionPolicy&&, _BidirectionalIterator __first, return __internal::__brick_reverse_copy(__first, __last, __d_first, typename _Tag::__is_vector{}); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator2> -__pattern_reverse_copy(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first, _RandomAccessIterator1 __last, - _RandomAccessIterator2 __d_first, _IsVector __is_vector, /*is_parallel=*/::std::true_type) -{ - constexpr auto __dispatch_tag = - oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _RandomAccessIterator1, _RandomAccessIterator2>(); - using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; - - auto __len = __last - __first; - __par_backend::__parallel_for(__backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, - [__is_vector, __first, __len, __d_first](_RandomAccessIterator1 __inner_first, - _RandomAccessIterator1 __inner_last) { - __internal::__brick_reverse_copy(__inner_first, __inner_last, - __d_first + (__len - (__inner_last - __first)), - __is_vector); - }); - return __d_first + __len; -} - template _RandomAccessIterator2 __pattern_reverse_copy(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first, @@ -2707,14 +1985,6 @@ __brick_rotate(_RandomAccessIterator __first, _RandomAccessIterator __middle, _R return __ret; } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _ForwardIterator> -__pattern_rotate(_ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __middle, _ForwardIterator __last, - _IsVector __is_vector, /*is_parallel=*/::std::false_type) noexcept -{ - return __internal::__brick_rotate(__first, __middle, __last, __is_vector); -} - template _ForwardIterator __pattern_rotate(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __middle, @@ -2725,73 +1995,6 @@ __pattern_rotate(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _ForwardIte return __internal::__brick_rotate(__first, __middle, __last, typename _Tag::__is_vector{}); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator> -__pattern_rotate(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __middle, - _RandomAccessIterator __last, _IsVector __is_vector, /*is_parallel=*/::std::true_type) -{ - constexpr auto __dispatch_tag = - oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _RandomAccessIterator>(); - using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; - - typedef typename ::std::iterator_traits<_RandomAccessIterator>::value_type _Tp; - auto __n = __last - __first; - auto __m = __middle - __first; - if (__m <= __n / 2) - { - __par_backend::__buffer<_ExecutionPolicy, _Tp> __buf(__n - __m); - return __internal::__except_handler([&__exec, __n, __m, __first, __middle, __last, __is_vector, &__buf]() { - _Tp* __result = __buf.get(); - __par_backend::__parallel_for( - __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __middle, __last, - [__middle, __result, __is_vector](_RandomAccessIterator __b, _RandomAccessIterator __e) { - __internal::__brick_uninitialized_move(__b, __e, __result + (__b - __middle), __is_vector); - }); - - __par_backend::__parallel_for( - __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __middle, - [__last, __middle, __is_vector](_RandomAccessIterator __b, _RandomAccessIterator __e) { - __internal::__brick_move<_ExecutionPolicy>{}(__b, __e, __b + (__last - __middle), __is_vector); - }); - - __par_backend::__parallel_for(__backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __result, - __result + (__n - __m), [__first, __result, __is_vector](_Tp* __b, _Tp* __e) { - __brick_move_destroy<_ExecutionPolicy>{}( - __b, __e, __first + (__b - __result), __is_vector); - }); - - return __first + (__last - __middle); - }); - } - else - { - __par_backend::__buffer<_ExecutionPolicy, _Tp> __buf(__m); - return __internal::__except_handler([&__exec, __n, __m, __first, __middle, __last, __is_vector, &__buf]() { - _Tp* __result = __buf.get(); - __par_backend::__parallel_for( - __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __middle, - [__first, __result, __is_vector](_RandomAccessIterator __b, _RandomAccessIterator __e) { - __internal::__brick_uninitialized_move(__b, __e, __result + (__b - __first), __is_vector); - }); - - __par_backend::__parallel_for( - __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __middle, __last, - [__first, __middle, __is_vector](_RandomAccessIterator __b, _RandomAccessIterator __e) { - __internal::__brick_move<_ExecutionPolicy>{}(__b, __e, __first + (__b - __middle), __is_vector); - }); - - __par_backend::__parallel_for(__backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __result, - __result + __m, - [__n, __m, __first, __result, __is_vector](_Tp* __b, _Tp* __e) { - __brick_move_destroy<_ExecutionPolicy>{}( - __b, __e, __first + ((__n - __m) + (__b - __result)), __is_vector); - }); - - return __first + (__last - __middle); - }); - } -} - template _RandomAccessIterator __pattern_rotate(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, @@ -2878,16 +2081,6 @@ __brick_rotate_copy(_ExecutionPolicy&&, _RandomAccessIterator1 __first, _RandomA return __internal::__brick_copy<_ExecutionPolicy>{}(__first, __middle, __res, ::std::true_type()); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _OutputIterator> -__pattern_rotate_copy(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __middle, - _ForwardIterator __last, _OutputIterator __result, _IsVector __is_vector, - /*is_parallel=*/::std::false_type) noexcept -{ - return __internal::__brick_rotate_copy(::std::forward<_ExecutionPolicy>(__exec), __first, __middle, __last, - __result, __is_vector); -} - template _OutputIterator __pattern_rotate_copy(_Tag, _ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __middle, @@ -2899,41 +2092,6 @@ __pattern_rotate_copy(_Tag, _ExecutionPolicy&& __exec, _ForwardIterator __first, __result, typename _Tag::__is_vector{}); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator2> -__pattern_rotate_copy(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first, _RandomAccessIterator1 __middle, - _RandomAccessIterator1 __last, _RandomAccessIterator2 __result, _IsVector __is_vector, - /*is_parallel=*/::std::true_type) -{ - constexpr auto __dispatch_tag = - oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _RandomAccessIterator1, _RandomAccessIterator2>(); - using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; - - __par_backend::__parallel_for( - __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, - [__first, __last, __middle, __result, __is_vector](_RandomAccessIterator1 __b, _RandomAccessIterator1 __e) { - __internal::__brick_copy<_ExecutionPolicy> __copy{}; - if (__b > __middle) - { - __copy(__b, __e, __result + (__b - __middle), __is_vector); - } - else - { - _RandomAccessIterator2 __new_result = __result + ((__last - __middle) + (__b - __first)); - if (__e < __middle) - { - __copy(__b, __e, __new_result, __is_vector); - } - else - { - __copy(__b, __middle, __new_result, __is_vector); - __copy(__middle, __e, __result, __is_vector); - } - } - }); - return __result + (__last - __first); -} - template _RandomAccessIterator2 __pattern_rotate_copy(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first, @@ -3005,14 +2163,6 @@ __brick_is_partitioned(_RandomAccessIterator __first, _RandomAccessIterator __la } } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, bool> -__pattern_is_partitioned(_ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, _UnaryPredicate __pred, - _IsVector __is_vector, /*is_parallel=*/::std::false_type) noexcept -{ - return __internal::__brick_is_partitioned(__first, __last, __pred, __is_vector); -} - template bool __pattern_is_partitioned(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, @@ -3023,19 +2173,19 @@ __pattern_is_partitioned(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _Fo return __internal::__brick_is_partitioned(__first, __last, __pred, typename _Tag::__is_vector{}); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, bool> -__pattern_is_partitioned(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last, - _UnaryPredicate __pred, _IsVector __is_vector, /*is_parallel=*/::std::true_type) +template +bool +__pattern_is_partitioned(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, + _RandomAccessIterator __last, _UnaryPredicate __pred) { - constexpr auto __dispatch_tag = - oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _RandomAccessIterator>(); - using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; - //trivial pre-checks if (__first == __last) return true; + constexpr auto __dispatch_tag = + oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _RandomAccessIterator>(); + using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; + return __internal::__except_handler([&]() { // State of current range: // broken - current range is not partitioned by pred @@ -3070,8 +2220,8 @@ __pattern_is_partitioned(_ExecutionPolicy&& __exec, _RandomAccessIterator __firs _ReduceType __result = __par_backend::__parallel_reduce( __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __identity, - [&__pred, __combine, __is_vector](_RandomAccessIterator __i, _RandomAccessIterator __j, - _ReduceType __value) -> _ReduceType { + [&__pred, __combine](_RandomAccessIterator __i, _RandomAccessIterator __j, + _ReduceType __value) -> _ReduceType { if (__value.__val == __broken) return _ReduceType{__broken, __i}; @@ -3081,112 +2231,11 @@ __pattern_is_partitioned(_ExecutionPolicy&& __exec, _RandomAccessIterator __firs { // find first element that don't satisfy pred _RandomAccessIterator __x = - __internal::__brick_find_if(__i + 1, __j, __not_pred<_UnaryPredicate&>(__pred), __is_vector); + __internal::__brick_find_if(__i + 1, __j, __not_pred<_UnaryPredicate&>(__pred), _IsVector{}); if (__x != __j) { // find first element after "x" that satisfy pred - _RandomAccessIterator __y = __internal::__brick_find_if(__x + 1, __j, __pred, __is_vector); - // if it was found then range isn't partitioned by pred - if (__y != __j) - return _ReduceType{__broken, __i}; - - __res = _ReduceType{__true_false, __i}; - } - else - __res = _ReduceType{__all_true, __i}; - } - else - { // if first element doesn't satisfy pred - // then we should find the first element that satisfy pred. - // If we found it then range isn't partitioned by pred - if (__internal::__brick_find_if(__i + 1, __j, __pred, __is_vector) != __j) - return _ReduceType{__broken, __i}; - - __res = _ReduceType{__all_false, __i}; - } - // if we have value from left range then we should calculate the result - return (__value.__val == __not_init) ? __res : __combine(__value, __res); - }, - - [__combine](_ReduceType __val1, _ReduceType __val2) -> _ReduceType { - if (__val1.__val == __not_init) - return __val2; - if (__val2.__val == __not_init) - return __val1; - assert(__val1.__val != __not_init && __val2.__val != __not_init); - - if (__val1.__val == __broken || __val2.__val == __broken) - return _ReduceType{__broken, __val1.__pos}; - // calculate the result for new big range - return __combine(__val1, __val2); - }); - return __result.__val != __broken; - }); -} - -template -bool -__pattern_is_partitioned(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, - _RandomAccessIterator __last, _UnaryPredicate __pred) -{ - //trivial pre-checks - if (__first == __last) - return true; - - constexpr auto __dispatch_tag = - oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _RandomAccessIterator>(); - using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; - - return __internal::__except_handler([&]() { - // State of current range: - // broken - current range is not partitioned by pred - // all_true - all elements in current range satisfy pred - // all_false - all elements in current range don't satisfy pred - // true_false - elements satisfy pred are placed before elements that don't satisfy pred - enum _ReduceRes - { - __not_init = -1, - __broken, - __all_true, - __all_false, - __true_false - }; - // Array with states that we'll have when state from the left branch is merged with state from the right branch. - // State is calculated by formula: new_state = table[left_state * 4 + right_state] - const _ReduceRes __table[] = {__broken, __broken, __broken, __broken, __broken, __all_true, - __true_false, __true_false, __broken, __broken, __all_false, __broken, - __broken, __broken, __true_false, __broken}; - struct _ReduceType - { - _ReduceRes __val; - _RandomAccessIterator __pos; - }; - //a commutative combiner - auto __combine = [&__table](_ReduceType __x, _ReduceType __y) { - return __x.__pos > __y.__pos ? _ReduceType{__table[__y.__val * 4 + __x.__val], __y.__pos} - : _ReduceType{__table[__x.__val * 4 + __y.__val], __x.__pos}; - }; - - const _ReduceType __identity{__not_init, __last}; - - _ReduceType __result = __par_backend::__parallel_reduce( - __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __identity, - [&__pred, __combine](_RandomAccessIterator __i, _RandomAccessIterator __j, - _ReduceType __value) -> _ReduceType { - if (__value.__val == __broken) - return _ReduceType{__broken, __i}; - - _ReduceType __res{__not_init, __i}; - // if first element satisfy pred - if (__pred(*__i)) - { - // find first element that don't satisfy pred - _RandomAccessIterator __x = - __internal::__brick_find_if(__i + 1, __j, __not_pred<_UnaryPredicate&>(__pred), _IsVector{}); - if (__x != __j) - { - // find first element after "x" that satisfy pred - _RandomAccessIterator __y = __internal::__brick_find_if(__x + 1, __j, __pred, _IsVector{}); + _RandomAccessIterator __y = __internal::__brick_find_if(__x + 1, __j, __pred, _IsVector{}); // if it was found then range isn't partitioned by pred if (__y != __j) return _ReduceType{__broken, __i}; @@ -3246,14 +2295,6 @@ __brick_partition(_RandomAccessIterator __first, _RandomAccessIterator __last, _ return ::std::partition(__first, __last, __pred); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _ForwardIterator> -__pattern_partition(_ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, _UnaryPredicate __pred, - _IsVector __is_vector, /*is_parallel=*/::std::false_type) noexcept -{ - return __internal::__brick_partition(__first, __last, __pred, __is_vector); -} - template _ForwardIterator __pattern_partition(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, @@ -3264,77 +2305,6 @@ __pattern_partition(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _Forward return __internal::__brick_partition(__first, __last, __pred, typename _Tag::__is_vector{}); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator> -__pattern_partition(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last, - _UnaryPredicate __pred, _IsVector __is_vector, /*is_parallel=*/::std::true_type) -{ - constexpr auto __dispatch_tag = - oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _RandomAccessIterator>(); - using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; - - // partitioned range: elements before pivot satisfy pred (true part), - // elements after pivot don't satisfy pred (false part) - struct _PartitionRange - { - _RandomAccessIterator __begin; - _RandomAccessIterator __pivot; - _RandomAccessIterator __end; - }; - - return __internal::__except_handler([&]() { - _PartitionRange __init{__last, __last, __last}; - - // lambda for merging two partitioned ranges to one partitioned range - auto __reductor = [&__exec, __is_vector](_PartitionRange __val1, _PartitionRange __val2) -> _PartitionRange { - auto __size1 = __val1.__end - __val1.__pivot; - auto __size2 = __val2.__pivot - __val2.__begin; - auto __new_begin = __val2.__begin - (__val1.__end - __val1.__begin); - - // if all elements in left range satisfy pred then we can move new pivot to pivot of right range - if (__val1.__end == __val1.__pivot) - { - return {__new_begin, __val2.__pivot, __val2.__end}; - } - // if true part of right range greater than false part of left range - // then we should swap the false part of left range and last part of true part of right range - else if (__size2 > __size1) - { - __par_backend::__parallel_for( - __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __val1.__pivot, __val1.__pivot + __size1, - [__val1, __val2, __size1, __is_vector](_RandomAccessIterator __i, _RandomAccessIterator __j) { - __internal::__brick_swap_ranges(__i, __j, (__val2.__pivot - __size1) + (__i - __val1.__pivot), - __is_vector); - }); - return {__new_begin, __val2.__pivot - __size1, __val2.__end}; - } - // else we should swap the first part of false part of left range and true part of right range - else - { - __par_backend::__parallel_for( - __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __val1.__pivot, __val1.__pivot + __size2, - [__val1, __val2, __is_vector](_RandomAccessIterator __i, _RandomAccessIterator __j) { - __internal::__brick_swap_ranges(__i, __j, __val2.__begin + (__i - __val1.__pivot), __is_vector); - }); - return {__new_begin, __val1.__pivot + __size2, __val2.__end}; - } - }; - - _PartitionRange __result = __par_backend::__parallel_reduce( - __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __init, - [__pred, __is_vector, __reductor](_RandomAccessIterator __i, _RandomAccessIterator __j, - _PartitionRange __value) -> _PartitionRange { - //1. serial partition - _RandomAccessIterator __pivot = __internal::__brick_partition(__i, __j, __pred, __is_vector); - - // 2. merging of two ranges (left and right respectively) - return __reductor(__value, {__i, __pivot, __j}); - }, - __reductor); - return __result.__pivot; - }); -} - template _RandomAccessIterator __pattern_partition(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, @@ -3437,15 +2407,6 @@ __brick_stable_partition(_RandomAccessIterator __first, _RandomAccessIterator __ return ::std::stable_partition(__first, __last, __pred); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _BidirectionalIterator> -__pattern_stable_partition(_ExecutionPolicy&&, _BidirectionalIterator __first, _BidirectionalIterator __last, - _UnaryPredicate __pred, _IsVector __is_vector, - /*is_parallelization=*/::std::false_type) noexcept -{ - return __internal::__brick_stable_partition(__first, __last, __pred, __is_vector); -} - template _BidirectionalIterator __pattern_stable_partition(_Tag, _ExecutionPolicy&&, _BidirectionalIterator __first, _BidirectionalIterator __last, @@ -3456,62 +2417,6 @@ __pattern_stable_partition(_Tag, _ExecutionPolicy&&, _BidirectionalIterator __fi return __internal::__brick_stable_partition(__first, __last, __pred, typename _Tag::__is_vector{}); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator> -__pattern_stable_partition(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last, - _UnaryPredicate __pred, _IsVector __is_vector, - /*is_parallelization=*/::std::true_type) -{ - // partitioned range: elements before pivot satisfy pred (true part), - // elements after pivot don't satisfy pred (false part) - struct _PartitionRange - { - _RandomAccessIterator __begin; - _RandomAccessIterator __pivot; - _RandomAccessIterator __end; - }; - - return __internal::__except_handler([&]() { - _PartitionRange __init{__last, __last, __last}; - - // lambda for merging two partitioned ranges to one partitioned range - auto __reductor = [__is_vector](_PartitionRange __val1, _PartitionRange __val2) -> _PartitionRange { - auto __size1 = __val1.__end - __val1.__pivot; - auto __new_begin = __val2.__begin - (__val1.__end - __val1.__begin); - - // if all elements in left range satisfy pred then we can move new pivot to pivot of right range - if (__val1.__end == __val1.__pivot) - { - return {__new_begin, __val2.__pivot, __val2.__end}; - } - // if true part of right range greater than false part of left range - // then we should swap the false part of left range and last part of true part of right range - else - { - __internal::__brick_rotate(__val1.__pivot, __val2.__begin, __val2.__pivot, __is_vector); - return {__new_begin, __val2.__pivot - __size1, __val2.__end}; - } - }; - - constexpr auto __dispatch_tag = - oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _RandomAccessIterator>(); - using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; - - _PartitionRange __result = __par_backend::__parallel_reduce( - __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __init, - [&__pred, __is_vector, __reductor](_RandomAccessIterator __i, _RandomAccessIterator __j, - _PartitionRange __value) -> _PartitionRange { - //1. serial stable_partition - _RandomAccessIterator __pivot = __internal::__brick_stable_partition(__i, __j, __pred, __is_vector); - - // 2. merging of two ranges (left and right respectively) - return __reductor(__value, {__i, __pivot, __j}); - }, - __reductor); - return __result.__pivot; - }); -} - template _RandomAccessIterator __pattern_stable_partition(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, @@ -3593,17 +2498,6 @@ __brick_partition_copy(_RandomAccessIterator1 __first, _RandomAccessIterator1 __ #endif } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, - ::std::pair<_OutputIterator1, _OutputIterator2>> -__pattern_partition_copy(_ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, - _OutputIterator1 __out_true, _OutputIterator2 __out_false, _UnaryPredicate __pred, - _IsVector __is_vector, /*is_parallelization=*/::std::false_type) noexcept -{ - return __internal::__brick_partition_copy(__first, __last, __out_true, __out_false, __pred, __is_vector); -} - template ::std::pair<_OutputIterator1, _OutputIterator2> @@ -3616,52 +2510,6 @@ __pattern_partition_copy(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _Fo typename _Tag::__is_vector{}); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, - ::std::pair<_RandomAccessIterator2, _RandomAccessIterator3>> -__pattern_partition_copy(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first, _RandomAccessIterator1 __last, - _RandomAccessIterator2 __out_true, _RandomAccessIterator3 __out_false, _UnaryPredicate __pred, - _IsVector __is_vector, /*is_parallelization=*/::std::true_type) -{ - constexpr auto __dispatch_tag = - oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _RandomAccessIterator1, _RandomAccessIterator2, - _RandomAccessIterator3>(); - using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; - - typedef typename ::std::iterator_traits<_RandomAccessIterator1>::difference_type _DifferenceType; - typedef ::std::pair<_DifferenceType, _DifferenceType> _ReturnType; - const _DifferenceType __n = __last - __first; - if (_DifferenceType(1) < __n) - { - __par_backend::__buffer<_ExecutionPolicy, bool> __mask_buf(__n); - return __internal::__except_handler([&__exec, __n, __first, __out_true, __out_false, __is_vector, __pred, - &__mask_buf]() { - bool* __mask = __mask_buf.get(); - _ReturnType __m{}; - __par_backend::__parallel_strict_scan( - __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __n, - ::std::make_pair(_DifferenceType(0), _DifferenceType(0)), - [=](_DifferenceType __i, _DifferenceType __len) { // Reduce - return __internal::__brick_calc_mask_1<_DifferenceType>(__first + __i, __first + (__i + __len), - __mask + __i, __pred, __is_vector); - }, - [](const _ReturnType& __x, const _ReturnType& __y) -> _ReturnType { - return ::std::make_pair(__x.first + __y.first, __x.second + __y.second); - }, // Combine - [=](_DifferenceType __i, _DifferenceType __len, _ReturnType __initial) { // Scan - __internal::__brick_partition_by_mask(__first + __i, __first + (__i + __len), - __out_true + __initial.first, __out_false + __initial.second, - __mask + __i, __is_vector); - }, - [&__m](_ReturnType __total) { __m = __total; }); - return ::std::make_pair(__out_true + __m.first, __out_false + __m.second); - }); - } - // trivial sequence - use serial algorithm - return __internal::__brick_partition_copy(__first, __last, __out_true, __out_false, __pred, __is_vector); -} - template ::std::pair<_RandomAccessIterator2, _RandomAccessIterator3> @@ -3707,15 +2555,6 @@ __pattern_partition_copy(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _ // sort //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy> -__pattern_sort(_ExecutionPolicy&&, _RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp, - _IsVector /*is_vector*/, /*is_parallel=*/::std::false_type, _IsMoveConstructible) noexcept -{ - ::std::sort(__first, __last, __comp); -} - template void __pattern_sort(_Tag, _ExecutionPolicy&&, _RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp, @@ -3726,25 +2565,6 @@ __pattern_sort(_Tag, _ExecutionPolicy&&, _RandomAccessIterator __first, _RandomA ::std::sort(__first, __last, __comp); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy> -__pattern_sort(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp, - _IsVector /*is_vector*/, /*is_parallel=*/::std::true_type, /*is_move_constructible=*/::std::true_type) -{ - constexpr auto __dispatch_tag = - oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _RandomAccessIterator>(); - using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; - - __internal::__except_handler([&]() { - __par_backend::__parallel_stable_sort( - __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __comp, - [](_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp) { - ::std::sort(__first, __last, __comp); - }, - __last - __first); - }); -} - template void __pattern_sort(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, @@ -3767,14 +2587,6 @@ __pattern_sort(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAcce // stable_sort //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy> -__pattern_stable_sort(_ExecutionPolicy&&, _RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp, - _IsVector /*is_vector*/, /*is_parallel=*/::std::false_type) noexcept -{ - ::std::stable_sort(__first, __last, __comp); -} - template void __pattern_stable_sort(_Tag, _ExecutionPolicy&&, _RandomAccessIterator __first, _RandomAccessIterator __last, @@ -3785,25 +2597,6 @@ __pattern_stable_sort(_Tag, _ExecutionPolicy&&, _RandomAccessIterator __first, _ ::std::stable_sort(__first, __last, __comp); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy> -__pattern_stable_sort(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last, - _Compare __comp, _IsVector /*is_vector*/, /*is_parallel=*/::std::true_type) -{ - constexpr auto __dispatch_tag = - oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _RandomAccessIterator>(); - using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; - - __internal::__except_handler([&]() { - __par_backend::__parallel_stable_sort( - __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __comp, - [](_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp) { - ::std::stable_sort(__first, __last, __comp); - }, - __last - __first); - }); -} - template void __pattern_stable_sort(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, @@ -3825,22 +2618,6 @@ __pattern_stable_sort(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _Ran // sort_by_key //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy> -__pattern_sort_by_key(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __keys_first, - _RandomAccessIterator1 __keys_last, _RandomAccessIterator2 __values_first, _Compare __comp, - _IsVector /*vector=*/, /*is_parallel=*/::std::false_type) noexcept -{ - auto __beg = oneapi::dpl::make_zip_iterator(__keys_first, __values_first); - auto __end = __beg + (__keys_last - __keys_first); - auto __cmp_f = [__comp](const auto& __a, const auto& __b) { - return __comp(::std::get<0>(__a), ::std::get<0>(__b)); - }; - - ::std::sort(__beg, __end, __cmp_f); -} - template void @@ -3859,35 +2636,6 @@ __pattern_sort_by_key(_Tag, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __ ::std::sort(__beg, __end, __cmp_f); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy> -__pattern_sort_by_key(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __keys_first, - _RandomAccessIterator1 __keys_last, _RandomAccessIterator2 __values_first, _Compare __comp, - _IsVector /*vector=*/, /*is_parallel=*/::std::true_type) -{ - static_assert( - ::std::is_move_constructible_v::value_type> && - ::std::is_move_constructible_v::value_type>, - "The keys and values should be move constructible in case of parallel execution."); - - auto __beg = oneapi::dpl::make_zip_iterator(__keys_first, __values_first); - auto __end = __beg + (__keys_last - __keys_first); - auto __cmp_f = [__comp](const auto& __a, const auto& __b) { - return __comp(::std::get<0>(__a), ::std::get<0>(__b)); - }; - - constexpr auto __dispatch_tag = - oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _RandomAccessIterator1, _RandomAccessIterator2>(); - using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; - - __internal::__except_handler([&]() { - __par_backend::__parallel_stable_sort( - __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __beg, __end, __cmp_f, - [](auto __first, auto __last, auto __cmp) { ::std::sort(__first, __last, __cmp); }, __end - __beg); - }); -} - template void @@ -3918,15 +2666,6 @@ __pattern_sort_by_key(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _Ran // partial_sort //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy> -__pattern_partial_sort(_ExecutionPolicy&&, _RandomAccessIterator __first, _RandomAccessIterator __middle, - _RandomAccessIterator __last, _Compare __comp, _IsVector, - /*is_parallel=*/::std::false_type) noexcept -{ - ::std::partial_sort(__first, __middle, __last, __comp); -} - template void __pattern_partial_sort(_Tag, _ExecutionPolicy&&, _RandomAccessIterator __first, _RandomAccessIterator __middle, @@ -3937,32 +2676,6 @@ __pattern_partial_sort(_Tag, _ExecutionPolicy&&, _RandomAccessIterator __first, ::std::partial_sort(__first, __middle, __last, __comp); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy> -__pattern_partial_sort(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __middle, - _RandomAccessIterator __last, _Compare __comp, _IsVector, /*is_parallel=*/::std::true_type) -{ - const auto __n = __middle - __first; - if (__n == 0) - return; - - constexpr auto __dispatch_tag = - oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _RandomAccessIterator>(); - using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; - - __except_handler([&]() { - __par_backend::__parallel_stable_sort( - __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __comp, - [__n](_RandomAccessIterator __begin, _RandomAccessIterator __end, _Compare __comp) { - if (__n < __end - __begin) - ::std::partial_sort(__begin, __begin + __n, __end, __comp); - else - ::std::sort(__begin, __end, __comp); - }, - __n); - }); -} - template void __pattern_partial_sort(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, @@ -3991,15 +2704,6 @@ __pattern_partial_sort(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _Ra // partial_sort_copy //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator> -__pattern_partial_sort_copy(_ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, - _RandomAccessIterator __d_first, _RandomAccessIterator __d_last, _Compare __comp, _IsVector, - /*is_parallel=*/::std::false_type) noexcept -{ - return ::std::partial_sort_copy(__first, __last, __d_first, __d_last, __comp); -} - template _RandomAccessIterator __pattern_partial_sort_copy(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, @@ -4010,85 +2714,6 @@ __pattern_partial_sort_copy(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, return ::std::partial_sort_copy(__first, __last, __d_first, __d_last, __comp); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator2> -__pattern_partial_sort_copy(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first, _RandomAccessIterator1 __last, - _RandomAccessIterator2 __d_first, _RandomAccessIterator2 __d_last, _Compare __comp, - _IsVector __is_vector, /*is_parallel=*/::std::true_type) -{ - if (__last == __first || __d_last == __d_first) - { - return __d_first; - } - - constexpr auto __dispatch_tag = - oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _RandomAccessIterator1, _RandomAccessIterator2>(); - using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; - - auto __n1 = __last - __first; - auto __n2 = __d_last - __d_first; - return __internal::__except_handler([&]() { - if (__n2 >= __n1) - { - __par_backend::__parallel_stable_sort( - __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __d_first, __d_first + __n1, __comp, - [__first, __d_first, __is_vector](_RandomAccessIterator2 __i, _RandomAccessIterator2 __j, - _Compare __comp) { - _RandomAccessIterator1 __i1 = __first + (__i - __d_first); - _RandomAccessIterator1 __j1 = __first + (__j - __d_first); - - // 1. Copy elements from input to output - __brick_copy<_ExecutionPolicy>{}(__i1, __j1, __i, __is_vector); - // 2. Sort elements in output sequence - ::std::sort(__i, __j, __comp); - }, - __n1); - return __d_first + __n1; - } - else - { - typedef typename ::std::iterator_traits<_RandomAccessIterator1>::value_type _T1; - typedef typename ::std::iterator_traits<_RandomAccessIterator2>::value_type _T2; - __par_backend::__buffer<_ExecutionPolicy, _T1> __buf(__n1); - _T1* __r = __buf.get(); - - __par_backend::__parallel_stable_sort( - __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __r, __r + __n1, __comp, - [__n2, __first, __r](_T1* __i, _T1* __j, _Compare __comp) { - _RandomAccessIterator1 __it = __first + (__i - __r); - - // 1. Copy elements from input to raw memory - for (_T1* __k = __i; __k != __j; ++__k, ++__it) - { - ::new (__k) _T2(*__it); - } - - // 2. Sort elements in temporary buffer - if (__n2 < __j - __i) - ::std::partial_sort(__i, __i + __n2, __j, __comp); - else - ::std::sort(__i, __j, __comp); - }, - __n2); - - // 3. Move elements from temporary buffer to output - __par_backend::__parallel_for(__backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __r, __r + __n2, - [__r, __d_first, __is_vector](_T1* __i, _T1* __j) { - __brick_move_destroy<_ExecutionPolicy>{}( - __i, __j, __d_first + (__i - __r), __is_vector); - }); - - if constexpr (!::std::is_trivially_destructible_v<_T1>) - __par_backend::__parallel_for( - __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __r + __n2, __r + __n1, - [__is_vector](_T1* __i, _T1* __j) { __brick_destroy(__i, __j, __is_vector); }); - - return __d_first + __n2; - } - }); -} - template _RandomAccessIterator2 @@ -4183,14 +2808,6 @@ __brick_adjacent_find(_ForwardIterator __first, _ForwardIterator __last, _Binary return ::std::adjacent_find(__first, __last, __pred); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _ForwardIterator> -__pattern_adjacent_find(_ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, _BinaryPredicate __pred, - /* is_parallel */ ::std::false_type, _IsVector __is_vector, _Semantic) noexcept -{ - return __internal::__brick_adjacent_find(__first, __last, __pred, __is_vector, _Semantic::value); -} - template _ForwardIterator __pattern_adjacent_find(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, @@ -4201,55 +2818,6 @@ __pattern_adjacent_find(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _For return __internal::__brick_adjacent_find(__first, __last, __pred, typename _Tag::__is_vector{}, _Semantic::value); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator> -__pattern_adjacent_find(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last, - _BinaryPredicate __pred, /* is_parallel */ ::std::true_type, _IsVector __is_vector, - _Semantic __or_semantic) -{ - if (__last - __first < 2) - return __last; - - constexpr auto __dispatch_tag = - oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _RandomAccessIterator>(); - using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; - - return __internal::__except_handler([&]() { - return __par_backend::__parallel_reduce( - __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __last, - [__last, __pred, __is_vector, __or_semantic](_RandomAccessIterator __begin, _RandomAccessIterator __end, - _RandomAccessIterator __value) -> _RandomAccessIterator { - // TODO: investigate performance benefits from the use of shared variable for the result, - // checking (compare_and_swap idiom) its __value at __first. - if (__or_semantic && __value < __last) - { //found - return __value; - } - - if (__value > __begin) - { - // modify __end to check the predicate on the boundary __values; - // TODO: to use a custom range with boundaries overlapping - // TODO: investigate what if we remove "if" below and run algorithm on range [__first, __last-1) - // then check the pair [__last-1, __last) - if (__end != __last) - ++__end; - - //correct the global result iterator if the "brick" returns a local "__last" - const _RandomAccessIterator __res = - __internal::__brick_adjacent_find(__begin, __end, __pred, __is_vector, __or_semantic); - if (__res < __end) - __value = __res; - } - return __value; - }, - [](_RandomAccessIterator __x, _RandomAccessIterator __y) -> _RandomAccessIterator { - return __x < __y ? __x : __y; - } //reduce a __value - ); - }); -} - template _RandomAccessIterator __pattern_adjacent_find(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, @@ -4302,15 +2870,6 @@ __pattern_adjacent_find(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _R // nth_element //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy> -__pattern_nth_element(_ExecutionPolicy&&, _RandomAccessIterator __first, _RandomAccessIterator __nth, - _RandomAccessIterator __last, _Compare __comp, _IsVector, - /*is_parallel=*/::std::false_type) noexcept -{ - ::std::nth_element(__first, __nth, __last, __comp); -} - template void __pattern_nth_element(_Tag, _ExecutionPolicy&&, _RandomAccessIterator __first, _RandomAccessIterator __nth, @@ -4321,52 +2880,6 @@ __pattern_nth_element(_Tag, _ExecutionPolicy&&, _RandomAccessIterator __first, _ ::std::nth_element(__first, __nth, __last, __comp); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy> -__pattern_nth_element(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __nth, - _RandomAccessIterator __last, _Compare __comp, _IsVector __is_vector, - /*is_parallel=*/::std::true_type) -{ - if (__first == __last || __nth == __last) - { - return; - } - - constexpr auto __dispatch_tag = - oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _RandomAccessIterator>(); - - using ::std::iter_swap; - typedef typename ::std::iterator_traits<_RandomAccessIterator>::value_type _Tp; - _RandomAccessIterator __x; - do - { - __x = __internal::__pattern_partition(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first + 1, - __last, - [&__comp, __first](const _Tp& __x) { return __comp(__x, *__first); }); - --__x; - if (__x != __first) - { - iter_swap(__first, __x); - } - // if x > nth then our new range for partition is [first, x) - if (__x - __nth > 0) - { - __last = __x; - } - // if x < nth then our new range for partition is [x, last) - else if (__x - __nth < 0) - { - // if *x == *nth then we start the new partition at the next index where *x != *nth - while (!__comp(*__nth, *__x) && !__comp(*__x, *__nth) && __x - __nth < 0) - { - ++__x; - } - iter_swap(__nth, __x); - __first = __x; - } - } while (__x != __nth); -} - template void __pattern_nth_element(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, @@ -4433,41 +2946,13 @@ struct __brick_fill<_Tp, _ExecutionPolicy, oneapi::dpl::__internal::__enable_if_ } }; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy> -__pattern_fill(_ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, const _Tp& __value, - /*is_parallel=*/::std::false_type, _IsVector __is_vector) noexcept -{ - __internal::__brick_fill<_Tp, _ExecutionPolicy>{__value}(__first, __last, __is_vector); -} - template void -__pattern_fill(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, const _Tp& __value) noexcept -{ - static_assert(__is_backend_tag_v<_Tag>); - - __internal::__brick_fill<_Tp, _ExecutionPolicy>{__value}(__first, __last, typename _Tag::__is_vector{}); -} - -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator> -__pattern_fill(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last, - const _Tp& __value, - /*is_parallel=*/::std::true_type, _IsVector __is_vector) -{ - constexpr auto __dispatch_tag = - oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _RandomAccessIterator>(); - using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; - - return __internal::__except_handler([&__exec, __first, __last, &__value, __is_vector]() { - __par_backend::__parallel_for( - __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, - [&__value, __is_vector](_RandomAccessIterator __begin, _RandomAccessIterator __end) { - __internal::__brick_fill<_Tp, _ExecutionPolicy>{__value}(__begin, __end, __is_vector); - }); - return __last; - }); +__pattern_fill(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, const _Tp& __value) noexcept +{ + static_assert(__is_backend_tag_v<_Tag>); + + __internal::__brick_fill<_Tp, _ExecutionPolicy>{__value}(__first, __last, typename _Tag::__is_vector{}); } template @@ -4510,14 +2995,6 @@ struct __brick_fill_n<_Tp, _ExecutionPolicy, } }; -template -_OutputIterator -__pattern_fill_n(_ExecutionPolicy&&, _OutputIterator __first, _Size __count, const _Tp& __value, - /*is_parallel=*/::std::false_type, _IsVector __is_vector) noexcept -{ - return __internal::__brick_fill_n<_Tp, _ExecutionPolicy>{__value}(__first, __count, __is_vector); -} - template _OutputIterator __pattern_fill_n(_Tag, _ExecutionPolicy&&, _OutputIterator __first, _Size __count, const _Tp& __value) noexcept @@ -4527,18 +3004,6 @@ __pattern_fill_n(_Tag, _ExecutionPolicy&&, _OutputIterator __first, _Size __coun return __internal::__brick_fill_n<_Tp, _ExecutionPolicy>{__value}(__first, __count, typename _Tag::__is_vector{}); } -template -_RandomAccessIterator -__pattern_fill_n(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _Size __count, const _Tp& __value, - /*is_parallel=*/::std::true_type, _IsVector /*__is_vector*/) -{ - constexpr auto __dispatch_tag = - oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _RandomAccessIterator>(); - - return __internal::__pattern_fill(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, - __first + __count, __value); -} - template _RandomAccessIterator __pattern_fill_n(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, @@ -4567,14 +3032,6 @@ __brick_generate(_ForwardIterator __first, _ForwardIterator __last, _Generator _ ::std::generate(__first, __last, __g); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy> -__pattern_generate(_ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, _Generator __g, - /*is_parallel=*/::std::false_type, _IsVector __is_vector) noexcept -{ - __internal::__brick_generate(__first, __last, __g, __is_vector); -} - template void __pattern_generate(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, _Generator __g) noexcept @@ -4584,25 +3041,6 @@ __pattern_generate(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _ForwardI __internal::__brick_generate(__first, __last, __g, typename _Tag::__is_vector{}); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator> -__pattern_generate(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last, - _Generator __g, - /*is_parallel=*/::std::true_type, _IsVector __is_vector) -{ - constexpr auto __dispatch_tag = - oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _RandomAccessIterator>(); - using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; - - return __internal::__except_handler([&]() { - __par_backend::__parallel_for(__backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, - [__g, __is_vector](_RandomAccessIterator __begin, _RandomAccessIterator __end) { - __internal::__brick_generate(__begin, __end, __g, __is_vector); - }); - return __last; - }); -} - template _RandomAccessIterator __pattern_generate(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, @@ -4634,14 +3072,6 @@ __brick_generate_n(OutputIterator __first, Size __count, _Generator __g, /* is_v return ::std::generate_n(__first, __count, __g); } -template -_OutputIterator -__pattern_generate_n(_ExecutionPolicy&&, _OutputIterator __first, _Size __count, _Generator __g, - /*is_parallel=*/::std::false_type, _IsVector __is_vector) noexcept -{ - return __internal::__brick_generate_n(__first, __count, __g, __is_vector); -} - template _OutputIterator __pattern_generate_n(_Tag, _ExecutionPolicy&&, _OutputIterator __first, _Size __count, _Generator __g) noexcept @@ -4651,17 +3081,6 @@ __pattern_generate_n(_Tag, _ExecutionPolicy&&, _OutputIterator __first, _Size __ return __internal::__brick_generate_n(__first, __count, __g, typename _Tag::__is_vector{}); } -template -_RandomAccessIterator -__pattern_generate_n(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _Size __count, _Generator __g, - /*is_parallel=*/::std::true_type, _IsVector __is_vector) -{ - static_assert(__is_random_access_iterator_v<_RandomAccessIterator>, - "Pattern-brick error. Should be a random access iterator."); - return __internal::__pattern_generate(::std::forward<_ExecutionPolicy>(__exec), __first, __first + __count, __g, - ::std::true_type(), __is_vector); -} - template _RandomAccessIterator __pattern_generate_n(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, @@ -4697,14 +3116,6 @@ __brick_remove_if(_RandomAccessIterator __first, _RandomAccessIterator __last, _ #endif } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _ForwardIterator> -__pattern_remove_if(_ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, _UnaryPredicate __pred, - _IsVector __is_vector, /*is_parallel*/ ::std::false_type) noexcept -{ - return __internal::__brick_remove_if(__first, __last, __pred, __is_vector); -} - template _ForwardIterator __pattern_remove_if(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, @@ -4715,28 +3126,6 @@ __pattern_remove_if(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _Forward return __internal::__brick_remove_if(__first, __last, __pred, typename _Tag::__is_vector{}); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator> -__pattern_remove_if(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last, - _UnaryPredicate __pred, _IsVector __is_vector, /*is_parallel*/ ::std::true_type) -{ - typedef typename ::std::iterator_traits<_RandomAccessIterator>::reference _ReferenceType; - - if (__first == __last || __first + 1 == __last) - { - // Trivial sequence - use serial algorithm - return __internal::__brick_remove_if(__first, __last, __pred, __is_vector); - } - - return __internal::__remove_elements( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, - [&__pred, __is_vector](bool* __b, bool* __e, _RandomAccessIterator __it) { - __internal::__brick_walk2( - __b, __e, __it, [&__pred](bool& __x, _ReferenceType __y) { __x = !__pred(__y); }, __is_vector); - }, - __is_vector); -} - template _RandomAccessIterator __pattern_remove_if(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, @@ -4783,16 +3172,6 @@ __brick_merge(_RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, _ return ::std::merge(__first1, __last1, __first2, __last2, __d_first, __comp); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _OutputIterator> -__pattern_merge(_ExecutionPolicy&&, _ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, - _ForwardIterator2 __last2, _OutputIterator __d_first, _Compare __comp, _IsVector __is_vector, - /* is_parallel = */ ::std::false_type) noexcept -{ - return __internal::__brick_merge(__first1, __last1, __first2, __last2, __d_first, __comp, __is_vector); -} - template _OutputIterator @@ -4806,28 +3185,6 @@ __pattern_merge(_Tag, _ExecutionPolicy&&, _ForwardIterator1 __first1, _ForwardIt typename _Tag::__is_vector{}); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator3> -__pattern_merge(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, - _RandomAccessIterator2 __first2, _RandomAccessIterator2 __last2, _RandomAccessIterator3 __d_first, - _Compare __comp, _IsVector __is_vector, /* is_parallel = */ ::std::true_type) -{ - constexpr auto __dispatch_tag = - oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _RandomAccessIterator1, _RandomAccessIterator2, - _RandomAccessIterator3>(); - using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; - - __par_backend::__parallel_merge( - __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2, __d_first, - __comp, - [__is_vector](_RandomAccessIterator1 __f1, _RandomAccessIterator1 __l1, _RandomAccessIterator2 __f2, - _RandomAccessIterator2 __l2, _RandomAccessIterator3 __f3, _Compare __comp) { - return __internal::__brick_merge(__f1, __l1, __f2, __l2, __f3, __comp, __is_vector); - }); - return __d_first + (__last1 - __first1) + (__last2 - __first2); -} - template _RandomAccessIterator3 @@ -4866,15 +3223,6 @@ __brick_inplace_merge(_RandomAccessIterator __first, _RandomAccessIterator __mid ::std::inplace_merge(__first, __middle, __last, __comp); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy> -__pattern_inplace_merge(_ExecutionPolicy&&, _BidirectionalIterator __first, _BidirectionalIterator __middle, - _BidirectionalIterator __last, _Compare __comp, _IsVector __is_vector, - /* is_parallel = */ ::std::false_type) noexcept -{ - __internal::__brick_inplace_merge(__first, __middle, __last, __comp, __is_vector); -} - template void __pattern_inplace_merge(_Tag, _ExecutionPolicy&&, _BidirectionalIterator __first, _BidirectionalIterator __middle, @@ -4885,54 +3233,6 @@ __pattern_inplace_merge(_Tag, _ExecutionPolicy&&, _BidirectionalIterator __first __internal::__brick_inplace_merge(__first, __middle, __last, __comp, typename _Tag::__is_vector{}); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy> -__pattern_inplace_merge(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __middle, - _RandomAccessIterator __last, _Compare __comp, _IsVector __is_vector, - /*is_parallel=*/::std::true_type) -{ - if (__first == __last || __first == __middle || __middle == __last) - { - return; - } - - constexpr auto __dispatch_tag = - oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _RandomAccessIterator>(); - using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; - - typedef typename ::std::iterator_traits<_RandomAccessIterator>::value_type _Tp; - auto __n = __last - __first; - __par_backend::__buffer<_ExecutionPolicy, _Tp> __buf(__n); - _Tp* __r = __buf.get(); - __internal::__except_handler([&]() { - auto __move_values = [](_RandomAccessIterator __x, _Tp* __z) { - if constexpr (::std::is_trivial_v<_Tp>) - *__z = ::std::move(*__x); - else - ::new (::std::addressof(*__z)) _Tp(::std::move(*__x)); - }; - - auto __move_sequences = [](_RandomAccessIterator __first1, _RandomAccessIterator __last1, _Tp* __first2) { - return __internal::__brick_uninitialized_move(__first1, __last1, __first2, _IsVector()); - }; - - __par_backend::__parallel_merge( - __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __middle, __middle, __last, __r, __comp, - [__n, __move_values, __move_sequences](_RandomAccessIterator __f1, _RandomAccessIterator __l1, - _RandomAccessIterator __f2, _RandomAccessIterator __l2, _Tp* __f3, - _Compare __comp) { - (__utils::__serial_move_merge(__n))(__f1, __l1, __f2, __l2, __f3, __comp, __move_values, __move_values, - __move_sequences, __move_sequences); - return __f3 + (__l1 - __f1) + (__l2 - __f2); - }); - __par_backend::__parallel_for(__backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __r, __r + __n, - [__r, __first, __is_vector](_Tp* __i, _Tp* __j) { - __brick_move_destroy<_ExecutionPolicy>{}(__i, __j, __first + (__i - __r), - __is_vector); - }); - }); -} - template void __pattern_inplace_merge(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, @@ -4982,15 +3282,6 @@ __pattern_inplace_merge(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _R // includes //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, bool> -__pattern_includes(_ExecutionPolicy&&, _ForwardIterator1 __first1, _ForwardIterator1 __last1, - _ForwardIterator2 __first2, _ForwardIterator2 __last2, _Compare __comp, _IsVector, - /*is_parallel=*/::std::false_type) noexcept -{ - return ::std::includes(__first1, __last1, __first2, __last2, __comp); -} - template bool __pattern_includes(_Tag, _ExecutionPolicy&&, _ForwardIterator1 __first1, _ForwardIterator1 __last1, @@ -5001,71 +3292,6 @@ __pattern_includes(_Tag, _ExecutionPolicy&&, _ForwardIterator1 __first1, _Forwar return ::std::includes(__first1, __last1, __first2, __last2, __comp); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, bool> -__pattern_includes(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, - _RandomAccessIterator2 __first2, _RandomAccessIterator2 __last2, _Compare __comp, _IsVector, - /*is_parallel=*/::std::true_type) -{ - constexpr auto __dispatch_tag = - oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _RandomAccessIterator1, _RandomAccessIterator2>(); - - if (__first2 == __last2) - return true; - - //optimization; {1} - the first sequence, {2} - the second sequence - //{1} is empty or size_of{2} > size_of{1} - if (__first1 == __last1 || __last2 - __first2 > __last1 - __first1 || - // {1}: [**********] or [**********] - // {2}: [***********] [***********] - __comp(*__first2, *__first1) || __comp(*(__last1 - 1), *(__last2 - 1))) - return false; - - __first1 = ::std::lower_bound(__first1, __last1, *__first2, __comp); - if (__first1 == __last1) - return false; - - if (__last2 - __first2 == 1) - return !__comp(*__first1, *__first2) && !__comp(*__first2, *__first1); - - return __internal::__except_handler([&]() { - return !__internal::__parallel_or( - __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first2, __last2, - [__first1, __last1, __first2, __last2, &__comp](_RandomAccessIterator2 __i, _RandomAccessIterator2 __j) { - assert(__j > __i); - //assert(__j - __i > 1); - - //1. moving boundaries to "consume" subsequence of equal elements - auto __is_equal_sorted = [&__comp](_RandomAccessIterator2 __a, _RandomAccessIterator2 __b) -> bool { - //enough one call of __comp due to compared couple belongs to one sorted sequience - return !__comp(*__a, *__b); - }; - - //1.1 left bound, case "aaa[aaaxyz...]" - searching "x" - if (__i > __first2 && __is_equal_sorted(__i - 1, __i)) - { - //whole subrange continues to have equal elements - return "no op" - if (__is_equal_sorted(__i, __j - 1)) - return false; - - __i = ::std::upper_bound(__i, __last2, *__i, __comp); - } - - //1.2 right bound, case "[...aaa]aaaxyz" - searching "x" - if (__j < __last2 && __is_equal_sorted(__j - 1, __j)) - __j = ::std::upper_bound(__j, __last2, *__j, __comp); - - //2. testing is __a subsequence of the second range included into the first range - auto __b = ::std::lower_bound(__first1, __last1, *__i, __comp); - - assert(!__comp(*(__last1 - 1), *__b)); - assert(!__comp(*(__j - 1), *__i)); - return !::std::includes(__b, __last1, __i, __j, __comp); - }); - }); -} - template bool @@ -5121,109 +3347,15 @@ __pattern_includes(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _ //2. testing is __a subsequence of the second range included into the first range auto __b = ::std::lower_bound(__first1, __last1, *__i, __comp); - assert(!__comp(*(__last1 - 1), *__b)); - assert(!__comp(*(__j - 1), *__i)); - return !::std::includes(__b, __last1, __i, __j, __comp); - }); - }); -} - -inline constexpr auto __set_algo_cut_off = 1000; - -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _OutputIterator> -__parallel_set_op(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, - _ForwardIterator2 __first2, _ForwardIterator2 __last2, _OutputIterator __result, _Compare __comp, - _SizeFunction __size_func, _SetOP __set_op, _IsVector __is_vector) -{ - constexpr auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _ForwardIterator1, - _ForwardIterator2, _OutputIterator>(); - using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; - - typedef typename ::std::iterator_traits<_ForwardIterator1>::difference_type _DifferenceType; - typedef typename ::std::iterator_traits<_OutputIterator>::value_type _T; - - struct _SetRange - { - _DifferenceType __pos, __len, __buf_pos; - bool - empty() const - { - return __len == 0; - } - }; - - const _DifferenceType __n1 = __last1 - __first1; - const _DifferenceType __n2 = __last2 - __first2; - - __par_backend::__buffer<_ExecutionPolicy, _T> __buf(__size_func(__n1, __n2)); - - return __internal::__except_handler([&__exec, __n1, __first1, __last1, __first2, __last2, __result, __is_vector, - __comp, __size_func, __set_op, &__buf]() { - auto __tmp_memory = __buf.get(); - _DifferenceType __m{}; - auto __scan = [=](_DifferenceType, _DifferenceType, const _SetRange& __s) { // Scan - if (!__s.empty()) - __brick_move_destroy<_ExecutionPolicy>{}(__tmp_memory + __s.__buf_pos, - __tmp_memory + (__s.__buf_pos + __s.__len), - __result + __s.__pos, __is_vector); - }; - __par_backend::__parallel_strict_scan( - __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __n1, _SetRange{0, 0, 0}, //-1, 0}, - [=](_DifferenceType __i, _DifferenceType __len) { // Reduce - //[__b; __e) - a subrange of the first sequence, to reduce - _ForwardIterator1 __b = __first1 + __i, __e = __first1 + (__i + __len); - - //try searching for the first element which not equal to *__b - if (__b != __first1) - __b = ::std::upper_bound(__b, __last1, *__b, __comp); - - //try searching for the first element which not equal to *__e - if (__e != __last1) - __e = ::std::upper_bound(__e, __last1, *__e, __comp); - - //check is [__b; __e) empty - if (__e - __b < 1) - { - _ForwardIterator2 __bb = __last2; - if (__b != __last1) - __bb = ::std::lower_bound(__first2, __last2, *__b, __comp); - - const _DifferenceType __buf_pos = __size_func((__b - __first1), (__bb - __first2)); - return _SetRange{0, 0, __buf_pos}; - } - - //try searching for "corresponding" subrange [__bb; __ee) in the second sequence - _ForwardIterator2 __bb = __first2; - if (__b != __first1) - __bb = ::std::lower_bound(__first2, __last2, *__b, __comp); - - _ForwardIterator2 __ee = __last2; - if (__e != __last1) - __ee = ::std::lower_bound(__bb, __last2, *__e, __comp); - - const _DifferenceType __buf_pos = __size_func((__b - __first1), (__bb - __first2)); - auto __buffer_b = __tmp_memory + __buf_pos; - auto __res = __set_op(__b, __e, __bb, __ee, __buffer_b, __comp); - - return _SetRange{0, __res - __buffer_b, __buf_pos}; - }, - [](const _SetRange& __a, const _SetRange& __b) { // Combine - if (__b.__buf_pos > __a.__buf_pos || ((__b.__buf_pos == __a.__buf_pos) && !__b.empty())) - return _SetRange{__a.__pos + __a.__len + __b.__pos, __b.__len, __b.__buf_pos}; - return _SetRange{__b.__pos + __b.__len + __a.__pos, __a.__len, __a.__buf_pos}; - }, - __scan, // Scan - [&__m, &__scan](const _SetRange& __total) { // Apex - //final scan - __scan(0, 0, __total); - __m = __total.__pos + __total.__len; + assert(!__comp(*(__last1 - 1), *__b)); + assert(!__comp(*(__j - 1), *__i)); + return !::std::includes(__b, __last1, __i, __j, __comp); }); - return __result + __m; }); } +inline constexpr auto __set_algo_cut_off = 1000; + template _OutputIterator @@ -5316,121 +3448,6 @@ __parallel_set_op(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _F }); } -//a shared parallel pattern for '__pattern_set_union' and '__pattern_set_symmetric_difference' -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _OutputIterator> -__parallel_set_union_op(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, - _ForwardIterator2 __first2, _ForwardIterator2 __last2, _OutputIterator __result, - _Compare __comp, _SetUnionOp __set_union_op, _IsVector __is_vector) -{ - typedef typename ::std::iterator_traits<_ForwardIterator1>::difference_type _DifferenceType; - - constexpr auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _ForwardIterator1, - _ForwardIterator2, _OutputIterator>(); - using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; - - const auto __n1 = __last1 - __first1; - const auto __n2 = __last2 - __first2; - - __brick_copy<_ExecutionPolicy> __copy_range{}; - - // {1} {}: parallel copying just first sequence - if (__n2 == 0) - return __internal::__pattern_walk2_brick(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, - __last1, __result, __copy_range); - - // {} {2}: parallel copying justmake second sequence - if (__n1 == 0) - return __internal::__pattern_walk2_brick(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first2, - __last2, __result, __copy_range); - - // testing whether the sequences are intersected - _ForwardIterator1 __left_bound_seq_1 = ::std::lower_bound(__first1, __last1, *__first2, __comp); - - if (__left_bound_seq_1 == __last1) - { - //{1} < {2}: seq2 is wholly greater than seq1, so, do parallel copying seq1 and seq2 - __par_backend::__parallel_invoke( - __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), - [=] { - __internal::__pattern_walk2_brick(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, - __last1, __result, __copy_range); - }, - [=] { - __internal::__pattern_walk2_brick(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first2, - __last2, __result + __n1, __copy_range); - }); - return __result + __n1 + __n2; - } - - // testing whether the sequences are intersected - _ForwardIterator2 __left_bound_seq_2 = ::std::lower_bound(__first2, __last2, *__first1, __comp); - - if (__left_bound_seq_2 == __last2) - { - //{2} < {1}: seq2 is wholly greater than seq1, so, do parallel copying seq1 and seq2 - __par_backend::__parallel_invoke( - __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), - [=] { - __internal::__pattern_walk2_brick(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first2, - __last2, __result, __copy_range); - }, - [=] { - __internal::__pattern_walk2_brick(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, - __last1, __result + __n2, __copy_range); - }); - return __result + __n1 + __n2; - } - - const auto __m1 = __left_bound_seq_1 - __first1; - if (__m1 > __set_algo_cut_off) - { - auto __res_or = __result; - __result += __m1; //we know proper offset due to [first1; left_bound_seq_1) < [first2; last2) - __par_backend::__parallel_invoke( - __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), - //do parallel copying of [first1; left_bound_seq_1) - [=] { - __internal::__pattern_walk2_brick(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, - __left_bound_seq_1, __res_or, __copy_range); - }, - [=, &__result] { - __result = __internal::__parallel_set_op( - __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __left_bound_seq_1, __last1, __first2, - __last2, __result, __comp, [](_DifferenceType __n, _DifferenceType __m) { return __n + __m; }, - __set_union_op); - }); - return __result; - } - - const auto __m2 = __left_bound_seq_2 - __first2; - assert(__m1 == 0 || __m2 == 0); - if (__m2 > __set_algo_cut_off) - { - auto __res_or = __result; - __result += __m2; //we know proper offset due to [first2; left_bound_seq_2) < [first1; last1) - __par_backend::__parallel_invoke( - __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), - //do parallel copying of [first2; left_bound_seq_2) - [=] { - __internal::__pattern_walk2_brick(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first2, - __left_bound_seq_2, __res_or, __copy_range); - }, - [=, &__result] { - __result = __internal::__parallel_set_op( - __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __left_bound_seq_2, - __last2, __result, __comp, [](_DifferenceType __n, _DifferenceType __m) { return __n + __m; }, - __set_union_op); - }); - return __result; - } - - return __internal::__parallel_set_op( - __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2, __result, - __comp, [](_DifferenceType __n, _DifferenceType __m) { return __n + __m; }, __set_union_op); -} - //a shared parallel pattern for '__pattern_set_union' and '__pattern_set_symmetric_difference' template @@ -5578,17 +3595,6 @@ __brick_set_union(_RandomAccessIterator1 __first1, _RandomAccessIterator1 __last return ::std::set_union(__first1, __last1, __first2, __last2, __result, __comp); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _OutputIterator> -__pattern_set_union(_ExecutionPolicy&&, _ForwardIterator1 __first1, _ForwardIterator1 __last1, - _ForwardIterator2 __first2, _ForwardIterator2 __last2, _OutputIterator __result, _Compare __comp, - _IsVector __is_vector, - /*is_parallel=*/::std::false_type) noexcept -{ - return __internal::__brick_set_union(__first1, __last1, __first2, __last2, __result, __comp, __is_vector); -} - template _OutputIterator @@ -5602,35 +3608,6 @@ __pattern_set_union(_Tag, _ExecutionPolicy&&, _ForwardIterator1 __first1, _Forwa typename _Tag::__is_vector{}); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _OutputIterator> -__pattern_set_union(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, - _RandomAccessIterator2 __first2, _RandomAccessIterator2 __last2, _OutputIterator __result, - _Compare __comp, _IsVector __is_vector, /*__is_parallel=*/::std::true_type) -{ - constexpr auto __dispatch_tag = - oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _RandomAccessIterator1, _RandomAccessIterator2, - _OutputIterator>(); - - const auto __n1 = __last1 - __first1; - const auto __n2 = __last2 - __first2; - - // use serial algorithm - if (__n1 + __n2 <= __set_algo_cut_off) - return ::std::set_union(__first1, __last1, __first2, __last2, __result, __comp); - - typedef typename ::std::iterator_traits<_OutputIterator>::value_type _Tp; - return __parallel_set_union_op( - __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2, __result, - __comp, - [](_RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, - _RandomAccessIterator2 __last2, _Tp* __result, _Compare __comp) { - return oneapi::dpl::__utils::__set_union_construct(__first1, __last1, __first2, __last2, __result, __comp, - __BrickCopyConstruct<_IsVector>()); - }); -} - template _OutputIterator @@ -5679,16 +3656,6 @@ __brick_set_intersection(_RandomAccessIterator1 __first1, _RandomAccessIterator1 return ::std::set_intersection(__first1, __last1, __first2, __last2, __result, __comp); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _OutputIterator> -__pattern_set_intersection(_ExecutionPolicy&&, _ForwardIterator1 __first1, _ForwardIterator1 __last1, - _ForwardIterator2 __first2, _ForwardIterator2 __last2, _OutputIterator __result, - _Compare __comp, _IsVector __is_vector, /*is_parallel=*/::std::false_type) noexcept -{ - return __internal::__brick_set_intersection(__first1, __last1, __first2, __last2, __result, __comp, __is_vector); -} - template _OutputIterator @@ -5702,73 +3669,6 @@ __pattern_set_intersection(_Tag, _ExecutionPolicy&&, _ForwardIterator1 __first1, typename _Tag::__is_vector{}); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator3> -__pattern_set_intersection(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, - _RandomAccessIterator2 __first2, _RandomAccessIterator2 __last2, - _RandomAccessIterator3 __result, _Compare __comp, _IsVector __is_vector, - /*is_parallel=*/::std::true_type) -{ - constexpr auto __dispatch_tag = - oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _RandomAccessIterator1, _RandomAccessIterator2, - _RandomAccessIterator3>(); - - typedef typename ::std::iterator_traits<_RandomAccessIterator3>::value_type _T; - typedef typename ::std::iterator_traits<_RandomAccessIterator1>::difference_type _DifferenceType; - - const auto __n1 = __last1 - __first1; - const auto __n2 = __last2 - __first2; - - // intersection is empty - if (__n1 == 0 || __n2 == 0) - return __result; - - // testing whether the sequences are intersected - _RandomAccessIterator1 __left_bound_seq_1 = ::std::lower_bound(__first1, __last1, *__first2, __comp); - //{1} < {2}: seq 2 is wholly greater than seq 1, so, the intersection is empty - if (__left_bound_seq_1 == __last1) - return __result; - - // testing whether the sequences are intersected - _RandomAccessIterator2 __left_bound_seq_2 = ::std::lower_bound(__first2, __last2, *__first1, __comp); - //{2} < {1}: seq 1 is wholly greater than seq 2, so, the intersection is empty - if (__left_bound_seq_2 == __last2) - return __result; - - const auto __m1 = __last1 - __left_bound_seq_1 + __n2; - if (__m1 > __set_algo_cut_off) - { - //we know proper offset due to [first1; left_bound_seq_1) < [first2; last2) - return __internal::__parallel_set_op( - __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __left_bound_seq_1, __last1, __first2, __last2, - __result, __comp, [](_DifferenceType __n, _DifferenceType __m) { return ::std::min(__n, __m); }, - [](_RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, - _RandomAccessIterator2 __last2, _T* __result, _Compare __comp) { - return oneapi::dpl::__utils::__set_intersection_construct(__first1, __last1, __first2, __last2, - __result, __comp); - }); - } - - const auto __m2 = __last2 - __left_bound_seq_2 + __n1; - if (__m2 > __set_algo_cut_off) - { - //we know proper offset due to [first2; left_bound_seq_2) < [first1; last1) - __result = __internal::__parallel_set_op( - __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __left_bound_seq_2, __last2, - __result, __comp, [](_DifferenceType __n, _DifferenceType __m) { return ::std::min(__n, __m); }, - [](_RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, - _RandomAccessIterator2 __last2, _T* __result, _Compare __comp) { - return oneapi::dpl::__utils::__set_intersection_construct(__first2, __last2, __first1, __last1, - __result, __comp); - }); - return __result; - } - - // [left_bound_seq_1; last1) and [left_bound_seq_2; last2) - use serial algorithm - return ::std::set_intersection(__left_bound_seq_1, __last1, __left_bound_seq_2, __last2, __result, __comp); -} - template _RandomAccessIterator3 @@ -5842,96 +3742,31 @@ __brick_set_difference(_ForwardIterator1 __first1, _ForwardIterator1 __last1, _F /*__is_vector=*/::std::false_type) noexcept { return ::std::set_difference(__first1, __last1, __first2, __last2, __result, __comp); -} - -template -_RandomAccessIterator3 -__brick_set_difference(_RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, - _RandomAccessIterator2 __last2, _RandomAccessIterator3 __result, _Compare __comp, - /*__is_vector=*/::std::true_type) noexcept -{ - _PSTL_PRAGMA_MESSAGE("Vectorized algorithm unimplemented, redirected to serial"); - return ::std::set_difference(__first1, __last1, __first2, __last2, __result, __comp); -} - -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _OutputIterator> -__pattern_set_difference(_ExecutionPolicy&&, _ForwardIterator1 __first1, _ForwardIterator1 __last1, - _ForwardIterator2 __first2, _ForwardIterator2 __last2, _OutputIterator __result, - _Compare __comp, _IsVector __is_vector, /*is_parallel=*/::std::false_type) noexcept -{ - return __internal::__brick_set_difference(__first1, __last1, __first2, __last2, __result, __comp, __is_vector); -} - -template -_OutputIterator -__pattern_set_difference(_Tag, _ExecutionPolicy&&, _ForwardIterator1 __first1, _ForwardIterator1 __last1, - _ForwardIterator2 __first2, _ForwardIterator2 __last2, _OutputIterator __result, - _Compare __comp) noexcept -{ - static_assert(__is_backend_tag_v<_Tag>); - - return __internal::__brick_set_difference(__first1, __last1, __first2, __last2, __result, __comp, - typename _Tag::__is_vector{}); -} - -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator3> -__pattern_set_difference(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, - _RandomAccessIterator2 __first2, _RandomAccessIterator2 __last2, - _RandomAccessIterator3 __result, _Compare __comp, _IsVector __is_vector, - /*is_parallel=*/::std::true_type) -{ - typedef typename ::std::iterator_traits<_RandomAccessIterator3>::value_type _T; - typedef typename ::std::iterator_traits<_RandomAccessIterator1>::difference_type _DifferenceType; - - constexpr auto __dispatch_tag = - oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _RandomAccessIterator1, _RandomAccessIterator2, - _RandomAccessIterator3>(); - - const auto __n1 = __last1 - __first1; - const auto __n2 = __last2 - __first2; - - // {} \ {2}: the difference is empty - if (__n1 == 0) - return __result; - - // {1} \ {}: parallel copying just first sequence - if (__n2 == 0) - return __pattern_walk2_brick(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, - __result, __internal::__brick_copy<_ExecutionPolicy>{}); - - // testing whether the sequences are intersected - _RandomAccessIterator1 __left_bound_seq_1 = ::std::lower_bound(__first1, __last1, *__first2, __comp); - //{1} < {2}: seq 2 is wholly greater than seq 1, so, parallel copying just first sequence - if (__left_bound_seq_1 == __last1) - return __pattern_walk2_brick(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, - __result, __internal::__brick_copy<_ExecutionPolicy>{}); - - // testing whether the sequences are intersected - _RandomAccessIterator2 __left_bound_seq_2 = ::std::lower_bound(__first2, __last2, *__first1, __comp); - //{2} < {1}: seq 1 is wholly greater than seq 2, so, parallel copying just first sequence - if (__left_bound_seq_2 == __last2) - return __internal::__pattern_walk2_brick(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, - __last1, __result, __brick_copy<_ExecutionPolicy>{}); - - if (__n1 + __n2 > __set_algo_cut_off) - return __parallel_set_op( - __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2, __result, - __comp, [](_DifferenceType __n, _DifferenceType) { return __n; }, - [](_RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, - _RandomAccessIterator2 __last2, _T* __result, _Compare __comp) { - return oneapi::dpl::__utils::__set_difference_construct(__first1, __last1, __first2, __last2, __result, - __comp, __BrickCopyConstruct<_IsVector>()); - }); +} - // use serial algorithm +template +_RandomAccessIterator3 +__brick_set_difference(_RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, + _RandomAccessIterator2 __last2, _RandomAccessIterator3 __result, _Compare __comp, + /*__is_vector=*/::std::true_type) noexcept +{ + _PSTL_PRAGMA_MESSAGE("Vectorized algorithm unimplemented, redirected to serial"); return ::std::set_difference(__first1, __last1, __first2, __last2, __result, __comp); } +template +_OutputIterator +__pattern_set_difference(_Tag, _ExecutionPolicy&&, _ForwardIterator1 __first1, _ForwardIterator1 __last1, + _ForwardIterator2 __first2, _ForwardIterator2 __last2, _OutputIterator __result, + _Compare __comp) noexcept +{ + static_assert(__is_backend_tag_v<_Tag>); + + return __internal::__brick_set_difference(__first1, __last1, __first2, __last2, __result, __comp, + typename _Tag::__is_vector{}); +} + template _RandomAccessIterator3 @@ -6006,17 +3841,6 @@ __brick_set_symmetric_difference(_RandomAccessIterator1 __first1, _RandomAccessI return ::std::set_symmetric_difference(__first1, __last1, __first2, __last2, __result, __comp); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _OutputIterator> -__pattern_set_symmetric_difference(_ExecutionPolicy&&, _ForwardIterator1 __first1, _ForwardIterator1 __last1, - _ForwardIterator2 __first2, _ForwardIterator2 __last2, _OutputIterator __result, - _Compare __comp, _IsVector __is_vector, /*is_parallel=*/::std::false_type) noexcept -{ - return __internal::__brick_set_symmetric_difference(__first1, __last1, __first2, __last2, __result, __comp, - __is_vector); -} - template _OutputIterator @@ -6030,36 +3854,6 @@ __pattern_set_symmetric_difference(_Tag, _ExecutionPolicy&&, _ForwardIterator1 _ typename _Tag::__is_vector{}); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator3> -__pattern_set_symmetric_difference(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, - _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, - _RandomAccessIterator2 __last2, _RandomAccessIterator3 __result, _Compare __comp, - _IsVector __is_vector, /*is_parallel=*/::std::true_type) -{ - constexpr auto __dispatch_tag = - oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _RandomAccessIterator1, _RandomAccessIterator2, - _RandomAccessIterator3>(); - - const auto __n1 = __last1 - __first1; - const auto __n2 = __last2 - __first2; - - // use serial algorithm - if (__n1 + __n2 <= __set_algo_cut_off) - return ::std::set_symmetric_difference(__first1, __last1, __first2, __last2, __result, __comp); - - typedef typename ::std::iterator_traits<_RandomAccessIterator3>::value_type _T; - return __internal::__parallel_set_union_op( - __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2, __result, - __comp, - [](_RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, - _RandomAccessIterator2 __last2, _T* __result, _Compare __comp) { - return oneapi::dpl::__utils::__set_symmetric_difference_construct( - __first1, __last1, __first2, __last2, __result, __comp, __BrickCopyConstruct<_IsVector>()); - }); -} - template _RandomAccessIterator3 @@ -6108,14 +3902,6 @@ __brick_is_heap_until(_RandomAccessIterator __first, _RandomAccessIterator __las [&__comp](_RandomAccessIterator __it, _SizeType __i) { return __comp(__it[(__i - 1) / 2], __it[__i]); }); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator> -__pattern_is_heap_until(_ExecutionPolicy&&, _RandomAccessIterator __first, _RandomAccessIterator __last, - _Compare __comp, _IsVector __is_vector, /* is_parallel = */ ::std::false_type) noexcept -{ - return __internal::__brick_is_heap_until(__first, __last, __comp, __is_vector); -} - template _RandomAccessIterator __pattern_is_heap_until(_Tag, _ExecutionPolicy&&, _RandomAccessIterator __first, _RandomAccessIterator __last, @@ -6148,25 +3934,6 @@ __is_heap_until_local(_RandomAccessIterator __first, _DifferenceType __begin, _D [&__comp](_RandomAccessIterator __it, _DifferenceType __i) { return __comp(__it[(__i - 1) / 2], __it[__i]); }); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator> -__pattern_is_heap_until(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last, - _Compare __comp, _IsVector __is_vector, /* is_parallel = */ ::std::true_type) -{ - constexpr auto __dispatch_tag = - oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _RandomAccessIterator>(); - using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; - - return __internal::__except_handler([&]() { - return __parallel_find( - __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, - [__first, __comp, __is_vector](_RandomAccessIterator __i, _RandomAccessIterator __j) { - return __internal::__is_heap_until_local(__first, __i - __first, __j - __first, __comp, __is_vector); - }, - ::std::true_type{}); - }); -} - template _RandomAccessIterator __pattern_is_heap_until(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, @@ -6223,14 +3990,6 @@ __is_heap_local(_RandomAccessIterator __first, _DifferenceType __begin, _Differe }); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, bool> -__pattern_is_heap(_ExecutionPolicy&&, _RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp, - _IsVector __is_vector, /* is_parallel = */ ::std::false_type) noexcept -{ - return __internal::__brick_is_heap(__first, __last, __comp, __is_vector); -} - template bool __pattern_is_heap(_Tag, _ExecutionPolicy&&, _RandomAccessIterator __first, _RandomAccessIterator __last, @@ -6241,23 +4000,6 @@ __pattern_is_heap(_Tag, _ExecutionPolicy&&, _RandomAccessIterator __first, _Rand return __internal::__brick_is_heap(__first, __last, __comp, typename _Tag::__is_vector{}); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, bool> -__pattern_is_heap(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last, - _Compare __comp, _IsVector __is_vector, /* is_parallel = */ ::std::true_type) -{ - constexpr auto __dispatch_tag = - oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _RandomAccessIterator>(); - - return __internal::__except_handler([&]() { - return !__parallel_or(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, - [__first, __comp, __is_vector](_RandomAccessIterator __i, _RandomAccessIterator __j) { - return !__internal::__is_heap_local(__first, __i - __first, __j - __first, __comp, - __is_vector); - }); - }); -} - template bool __pattern_is_heap(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, @@ -6296,14 +4038,6 @@ __brick_min_element(_RandomAccessIterator __first, _RandomAccessIterator __last, #endif } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _ForwardIterator> -__pattern_min_element(_ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, _Compare __comp, - _IsVector __is_vector, /* is_parallel = */ ::std::false_type) noexcept -{ - return __internal::__brick_min_element(__first, __last, __comp, __is_vector); -} - template _ForwardIterator __pattern_min_element(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, @@ -6314,41 +4048,6 @@ __pattern_min_element(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _Forwa return __internal::__brick_min_element(__first, __last, __comp, typename _Tag::__is_vector{}); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator> -__pattern_min_element(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last, - _Compare __comp, _IsVector __is_vector, /* is_parallel = */ ::std::true_type) -{ - // a trivial case pre-check - if (__last - __first < 2) - return __first; - - constexpr auto __dispatch_tag = - oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _RandomAccessIterator>(); - using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; - - return __internal::__except_handler([&]() { - return __par_backend::__parallel_reduce( - __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, /*identity*/ __last, - [=](_RandomAccessIterator __begin, _RandomAccessIterator __end, - _RandomAccessIterator __init) -> _RandomAccessIterator { - const _RandomAccessIterator __subresult = - __internal::__brick_min_element(__begin, __end, __comp, __is_vector); - return __init == __last ? __subresult - : __internal::__cmp_iterators_by_values(__init, __subresult, __comp, - oneapi::dpl::__internal::__pstl_less()); - }, - [=](_RandomAccessIterator __it1, _RandomAccessIterator __it2) -> _RandomAccessIterator { - if (__it1 == __last) - return __it2; - if (__it2 == __last) - return __it1; - return __internal::__cmp_iterators_by_values(__it1, __it2, __comp, - oneapi::dpl::__internal::__pstl_less()); - }); - }); -} - template _RandomAccessIterator __pattern_min_element(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, @@ -6406,15 +4105,6 @@ __brick_minmax_element(_RandomAccessIterator __first, _RandomAccessIterator __la #endif } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, - ::std::pair<_ForwardIterator, _ForwardIterator>> -__pattern_minmax_element(_ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, _Compare __comp, - _IsVector __is_vector, /* is_parallel = */ ::std::false_type) noexcept -{ - return __internal::__brick_minmax_element(__first, __last, __comp, __is_vector); -} - template ::std::pair<_ForwardIterator, _ForwardIterator> __pattern_minmax_element(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, @@ -6425,52 +4115,6 @@ __pattern_minmax_element(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _Fo return __internal::__brick_minmax_element(__first, __last, __comp, typename _Tag::__is_vector{}); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, - ::std::pair<_RandomAccessIterator, _RandomAccessIterator>> -__pattern_minmax_element(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last, - _Compare __comp, _IsVector __is_vector, /* is_parallel = */ ::std::true_type) -{ - // a trivial case pre-check - if (__last - __first < 2) - return ::std::make_pair(__first, __first); - - constexpr auto __dispatch_tag = - oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _RandomAccessIterator>(); - using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; - - return __internal::__except_handler([&]() { - typedef ::std::pair<_RandomAccessIterator, _RandomAccessIterator> _Result; - - return __par_backend::__parallel_reduce( - __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, - /*identity*/ ::std::make_pair(__last, __last), - [=, &__comp](_RandomAccessIterator __begin, _RandomAccessIterator __end, _Result __init) -> _Result { - const _Result __subresult = __internal::__brick_minmax_element(__begin, __end, __comp, __is_vector); - if (__init.first == __last) // = identity - return __subresult; - return ::std::make_pair( - __internal::__cmp_iterators_by_values(__init.first, __subresult.first, __comp, - oneapi::dpl::__internal::__pstl_less()), - __internal::__cmp_iterators_by_values(__init.second, __subresult.second, - oneapi::dpl::__internal::__reorder_pred<_Compare>(__comp), - oneapi::dpl::__internal::__pstl_greater())); - }, - [=, &__comp](_Result __p1, _Result __p2) -> _Result { - if (__p1.first == __last) - return __p2; - if (__p2.first == __last) - return __p1; - return ::std::make_pair( - __internal::__cmp_iterators_by_values(__p1.first, __p2.first, __comp, - oneapi::dpl::__internal::__pstl_less()), - __internal::__cmp_iterators_by_values(__p1.second, __p2.second, - oneapi::dpl::__internal::__reorder_pred<_Compare>(__comp), - oneapi::dpl::__internal::__pstl_greater())); - }); - }); -} - template ::std::pair<_RandomAccessIterator, _RandomAccessIterator> __pattern_minmax_element(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, @@ -6542,16 +4186,6 @@ __brick_mismatch(_RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1 return __unseq_backend::__simd_first(__first1, __n, __first2, __not_pred<_Predicate&>(__pred)); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, - ::std::pair<_ForwardIterator1, _ForwardIterator2>> -__pattern_mismatch(_ExecutionPolicy&&, _ForwardIterator1 __first1, _ForwardIterator1 __last1, - _ForwardIterator2 __first2, _ForwardIterator2 __last2, _Predicate __pred, _IsVector __is_vector, - /* is_parallel = */ ::std::false_type) noexcept -{ - return __internal::__brick_mismatch(__first1, __last1, __first2, __last2, __pred, __is_vector); -} - template ::std::pair<_ForwardIterator1, _ForwardIterator2> __pattern_mismatch(_Tag, _ExecutionPolicy&&, _ForwardIterator1 __first1, _ForwardIterator1 __last1, @@ -6562,31 +4196,6 @@ __pattern_mismatch(_Tag, _ExecutionPolicy&&, _ForwardIterator1 __first1, _Forwar return __internal::__brick_mismatch(__first1, __last1, __first2, __last2, __pred, typename _Tag::__is_vector{}); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, - ::std::pair<_RandomAccessIterator1, _RandomAccessIterator2>> -__pattern_mismatch(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, - _RandomAccessIterator2 __first2, _RandomAccessIterator2 __last2, _Predicate __pred, - _IsVector __is_vector, /* is_parallel = */ ::std::true_type) -{ - constexpr auto __dispatch_tag = - oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _RandomAccessIterator1, _RandomAccessIterator2>(); - - return __internal::__except_handler([&]() { - auto __n = ::std::min(__last1 - __first1, __last2 - __first2); - auto __result = __internal::__parallel_find( - __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __first1 + __n, - [__first1, __first2, __pred, __is_vector](_RandomAccessIterator1 __i, _RandomAccessIterator1 __j) { - return __internal::__brick_mismatch(__i, __j, __first2 + (__i - __first1), __first2 + (__j - __first1), - __pred, __is_vector) - .first; - }, - ::std::true_type{}); - return ::std::make_pair(__result, __first2 + (__result - __first1)); - }); -} - template ::std::pair<_RandomAccessIterator1, _RandomAccessIterator2> @@ -6669,15 +4278,6 @@ __pattern_lexicographical_compare(_Tag, _ExecutionPolicy&&, _ForwardIterator1 __ typename _Tag::__is_vector{}); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, bool> -__pattern_lexicographical_compare(_ExecutionPolicy&&, _ForwardIterator1 __first1, _ForwardIterator1 __last1, - _ForwardIterator2 __first2, _ForwardIterator2 __last2, _Compare __comp, - _IsVector __is_vector, /* is_parallel = */ ::std::false_type) noexcept -{ - return __internal::__brick_lexicographical_compare(__first1, __last1, __first2, __last2, __comp, __is_vector); -} - template bool __pattern_lexicographical_compare(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _ForwardIterator1 __first1, @@ -6687,71 +4287,10 @@ __pattern_lexicographical_compare(__parallel_tag<_IsVector>, _ExecutionPolicy&&, return __internal::__brick_lexicographical_compare(__first1, __last1, __first2, __last2, __comp, _IsVector{}); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, bool> -__pattern_lexicographical_compare(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, - _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, - _RandomAccessIterator2 __last2, _Compare __comp, _IsVector __is_vector, - /* is_parallel = */ ::std::true_type) -{ - if (__first2 == __last2) - { // if second sequence is empty - return false; - } - else if (__first1 == __last1) - { // if first sequence is empty - return true; - } - else - { - constexpr auto __dispatch_tag = - oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _RandomAccessIterator1, - _RandomAccessIterator2>(); - - typedef typename ::std::iterator_traits<_RandomAccessIterator1>::reference _RefType1; - typedef typename ::std::iterator_traits<_RandomAccessIterator2>::reference _RefType2; - --__last1; - --__last2; - auto __n = ::std::min(__last1 - __first1, __last2 - __first2); - auto __result = __internal::__parallel_find( - __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __first1 + __n, - [__first1, __first2, &__comp, __is_vector](_RandomAccessIterator1 __i, _RandomAccessIterator1 __j) { - return __internal::__brick_mismatch( - __i, __j, __first2 + (__i - __first1), __first2 + (__j - __first1), - [&__comp](const _RefType1 __x, const _RefType2 __y) { - return !__comp(__x, __y) && !__comp(__y, __x); - }, - __is_vector) - .first; - }, - ::std::true_type{}); - - if (__result == __last1 && __first2 + (__result - __first1) != __last2) - { // if first sequence shorter than second - return !__comp(*(__first2 + (__result - __first1)), *__result); - } - else - { // if second sequence shorter than first or both have the same number of elements - return __comp(*__result, *(__first2 + (__result - __first1))); - } - } -} - //------------------------------------------------------------------------ // swap //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _ForwardIterator2> -__pattern_swap(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, - _ForwardIterator2 __first2, _Function __f, _IsVector __is_vector, _IsParallel __is_parallel) -{ - return __pattern_walk2(::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __f, __is_vector, - __is_parallel); -} - template _ForwardIterator2 __pattern_swap(_Tag __tag, _ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, @@ -6830,15 +4369,6 @@ __brick_shift_left(_ForwardIterator __first, _ForwardIterator __last, return __first + __size_res; } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _ForwardIterator> -__pattern_shift_left(_ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, - typename ::std::iterator_traits<_ForwardIterator>::difference_type __n, _IsVector __is_vector, - /*is_parallel=*/::std::false_type) noexcept -{ - return __brick_shift_left(__first, __last, __n, __is_vector); -} - template _ForwardIterator __pattern_shift_left(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, @@ -6849,53 +4379,6 @@ __pattern_shift_left(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _Forwar return __brick_shift_left(__first, __last, __n, typename _Tag::__is_vector{}); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _ForwardIterator> -__pattern_shift_left(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, - typename ::std::iterator_traits<_ForwardIterator>::difference_type __n, _IsVector __is_vector, - /*is_parallel=*/::std::true_type) -{ - constexpr auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _ForwardIterator>(); - using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; - - //If (n > 0 && n < m), returns first + (m - n). Otherwise, if n > 0, returns first. Otherwise, returns last. - if (__n <= 0) - return __last; - auto __size = __last - __first; - if (__n >= __size) - return __first; - - using _DiffType = typename ::std::iterator_traits<_ForwardIterator>::difference_type; - - _DiffType __mid = __size / 2 + __size % 2; - _DiffType __size_res = __size - __n; - - //1. n >= size/2; there is enough memory to 'total' parallel copying - if (__n >= __mid) - { - __par_backend::__parallel_for(__backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __n, __size, - [__first, __n, __is_vector](_DiffType __i, _DiffType __j) { - __brick_move<_ExecutionPolicy>{}(__first + __i, __first + __j, - __first + __i - __n, __is_vector); - }); - } - else //2. n < size/2; there is not enough memory to parallel copying; doing parallel copying by n elements - { - //TODO: to consider parallel processing by the 'internal' loop (but we may probably get cache locality issues) - for (auto __k = __n; __k < __size; __k += __n) - { - auto __end = ::std::min(__k + __n, __size); - __par_backend::__parallel_for(__backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __k, __end, - [__first, __n, __is_vector](_DiffType __i, _DiffType __j) { - __brick_move<_ExecutionPolicy>{}(__first + __i, __first + __j, - __first + __i - __n, __is_vector); - }); - } - } - - return __first + __size_res; -} - template _ForwardIterator __pattern_shift_left(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _ForwardIterator __first, @@ -6941,20 +4424,6 @@ __pattern_shift_left(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _Forw return __first + __size_res; } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _BidirectionalIterator> -__pattern_shift_right(_ExecutionPolicy&& __exec, _BidirectionalIterator __first, _BidirectionalIterator __last, - typename ::std::iterator_traits<_BidirectionalIterator>::difference_type __n, - _IsVector __is_vector, _IsParallel is_parallel) -{ - using _ReverseIterator = typename ::std::reverse_iterator<_BidirectionalIterator>; - auto __res = oneapi::dpl::__internal::__pattern_shift_left(::std::forward<_ExecutionPolicy>(__exec), - _ReverseIterator(__last), _ReverseIterator(__first), __n, - __is_vector, is_parallel); - - return __res.base(); -} - template _BidirectionalIterator __pattern_shift_right(_Tag, _ExecutionPolicy&& __exec, _BidirectionalIterator __first, _BidirectionalIterator __last, diff --git a/include/oneapi/dpl/pstl/numeric_fwd.h b/include/oneapi/dpl/pstl/numeric_fwd.h index 0e8ba2a96e7..46826aff9d3 100644 --- a/include/oneapi/dpl/pstl/numeric_fwd.h +++ b/include/oneapi/dpl/pstl/numeric_fwd.h @@ -43,26 +43,12 @@ _Tp __brick_transform_reduce(_ForwardIterator1, _ForwardIterator1, _ForwardItera _BinaryOperation2, /*__is_vector=*/::std::false_type) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _Tp> -__pattern_transform_reduce(_ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, _Tp, - _BinaryOperation1, _BinaryOperation2, _IsVector, - /*is_parallel=*/::std::false_type) noexcept; - template _Tp __pattern_transform_reduce(_Tag, _ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, _Tp, _BinaryOperation1, _BinaryOperation2) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _Tp> -__pattern_transform_reduce(_ExecutionPolicy&&, _RandomAccessIterator1, _RandomAccessIterator1, _RandomAccessIterator2, - _Tp, _BinaryOperation1, _BinaryOperation2, _IsVector __is_vector, - /*is_parallel=*/::std::true_type); - template _Tp @@ -81,26 +67,12 @@ template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _Tp> -__pattern_transform_reduce(_ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _Tp, _BinaryOperation, - _UnaryOperation, _IsVector, - /*is_parallel=*/::std::false_type) noexcept; - template _Tp __pattern_transform_reduce(_Tag, _ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, _Tp, _BinaryOperation1, _BinaryOperation2 __bnary_op2) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _Tp> -__pattern_transform_reduce(_ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _Tp, _BinaryOperation, - _UnaryOperation, _IsVector, - /*is_parallel=*/::std::true_type); - template _Tp @@ -135,13 +107,6 @@ ::std::pair<_OutputIterator, _Tp> __brick_transform_scan(_RandomAccessIterator, _UnaryOperation, _Tp, _BinaryOperation, /*Inclusive*/ ::std::true_type) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _OutputIterator> -__pattern_transform_scan(_ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _OutputIterator, _UnaryOperation, _Tp, - _BinaryOperation, _Inclusive, _IsVector, - /*is_parallel=*/::std::false_type) noexcept; - template _OutputIterator @@ -177,13 +142,6 @@ __pattern_transform_scan(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomA _OutputIterator, _UnaryOperation, _Tp, _BinaryOperation, _Inclusive); // transform_scan without initial element -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _OutputIterator> -__pattern_transform_scan(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, - _OutputIterator __result, _UnaryOperation __unary_op, _BinaryOperation __binary_op, _Inclusive, - _IsVector __is_vector, _IsParallel __is_parallel); - template _OutputIterator @@ -203,23 +161,11 @@ _OutputIterator __brick_adjacent_difference(_RandomAccessIterator, _RandomAccess _BinaryOperation, /*is_vector*/ ::std::true_type) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _OutputIterator> -__pattern_adjacent_difference(_ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _OutputIterator, _BinaryOperation, - _IsVector, /*is_parallel*/ ::std::false_type) noexcept; - template _OutputIterator __pattern_adjacent_difference(_Tag, _ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _OutputIterator, _BinaryOperation) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _OutputIterator> -__pattern_adjacent_difference(_ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _OutputIterator, - _BinaryOperation, _IsVector, /*is_parallel*/ ::std::true_type); - template _RandomAccessIterator2 diff --git a/include/oneapi/dpl/pstl/numeric_impl.h b/include/oneapi/dpl/pstl/numeric_impl.h index 216f313ffbc..92ca677bc8d 100644 --- a/include/oneapi/dpl/pstl/numeric_impl.h +++ b/include/oneapi/dpl/pstl/numeric_impl.h @@ -61,17 +61,6 @@ __brick_transform_reduce(_RandomAccessIterator1 __first1, _RandomAccessIterator1 [=, &__binary_op2](_DifferenceType __i) { return __binary_op2(__first1[__i], __first2[__i]); }); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _Tp> -__pattern_transform_reduce(_ExecutionPolicy&&, _ForwardIterator1 __first1, _ForwardIterator1 __last1, - _ForwardIterator2 __first2, _Tp __init, _BinaryOperation1 __binary_op1, - _BinaryOperation2 __binary_op2, _IsVector __is_vector, - /*is_parallel=*/::std::false_type) noexcept -{ - return __brick_transform_reduce(__first1, __last1, __first2, __init, __binary_op1, __binary_op2, __is_vector); -} - template _Tp @@ -85,33 +74,6 @@ __pattern_transform_reduce(_Tag, _ExecutionPolicy&&, _ForwardIterator1 __first1, typename _Tag::__is_vector{}); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _Tp> -__pattern_transform_reduce(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, - _RandomAccessIterator2 __first2, _Tp __init, _BinaryOperation1 __binary_op1, - _BinaryOperation2 __binary_op2, _IsVector __is_vector, /*is_parallel=*/::std::true_type) -{ - constexpr auto __dispatch_tag = - oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _RandomAccessIterator1, _RandomAccessIterator2>(); - using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; - - return __internal::__except_handler([&]() { - return __par_backend::__parallel_transform_reduce( - __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, - [__first1, __first2, __binary_op2](_RandomAccessIterator1 __i) mutable { - return __binary_op2(*__i, *(__first2 + (__i - __first1))); - }, - __init, - __binary_op1, // Combine - [__first1, __first2, __binary_op1, __binary_op2, - __is_vector](_RandomAccessIterator1 __i, _RandomAccessIterator1 __j, _Tp __init) -> _Tp { - return __internal::__brick_transform_reduce(__i, __j, __first2 + (__i - __first1), __init, __binary_op1, - __binary_op2, __is_vector); - }); - }); -} - template _Tp @@ -165,16 +127,6 @@ __brick_transform_reduce(_RandomAccessIterator __first, _RandomAccessIterator __ [=, &__unary_op](_DifferenceType __i) { return __unary_op(__first[__i]); }); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _Tp> -__pattern_transform_reduce(_ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, _Tp __init, - _BinaryOperation __binary_op, _UnaryOperation __unary_op, _IsVector __is_vector, - /*is_parallel=*/::std::false_type) noexcept -{ - return __internal::__brick_transform_reduce(__first, __last, __init, __binary_op, __unary_op, __is_vector); -} - template _Tp @@ -187,27 +139,6 @@ __pattern_transform_reduce(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _ typename _Tag::__is_vector{}); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _Tp> -__pattern_transform_reduce(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last, - _Tp __init, _BinaryOperation __binary_op, _UnaryOperation __unary_op, _IsVector __is_vector, - /*is_parallel=*/::std::true_type) -{ - constexpr auto __dispatch_tag = - oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _RandomAccessIterator>(); - using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; - - return __internal::__except_handler([&]() { - return __par_backend::__parallel_transform_reduce( - __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, - [__unary_op](_RandomAccessIterator __i) mutable { return __unary_op(*__i); }, __init, __binary_op, - [__unary_op, __binary_op, __is_vector](_RandomAccessIterator __i, _RandomAccessIterator __j, _Tp __init) { - return __internal::__brick_transform_reduce(__i, __j, __init, __binary_op, __unary_op, __is_vector); - }); - }); -} - template _Tp @@ -305,18 +236,6 @@ __brick_transform_scan(_RandomAccessIterator __first, _RandomAccessIterator __la /*is_vector=*/::std::false_type()); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _OutputIterator> -__pattern_transform_scan(_ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, - _OutputIterator __result, _UnaryOperation __unary_op, _Tp __init, _BinaryOperation __binary_op, - _Inclusive, _IsVector __is_vector, /*is_parallel=*/::std::false_type) noexcept -{ - return __internal::__brick_transform_scan(__first, __last, __result, __unary_op, __init, __binary_op, _Inclusive(), - __is_vector) - .first; -} - template _OutputIterator @@ -475,31 +394,6 @@ __pattern_transform_scan(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _ } // transform_scan without initial element -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _OutputIterator> -__pattern_transform_scan(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, - _OutputIterator __result, _UnaryOperation __unary_op, _BinaryOperation __binary_op, _Inclusive, - _IsVector __is_vector, _IsParallel __is_parallel) -{ - typedef typename ::std::iterator_traits<_ForwardIterator>::value_type _ValueType; - if (__first != __last) - { - _ValueType __tmp = __unary_op(*__first); - *__result = __tmp; - - constexpr auto __dispatch_tag = - oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _ForwardIterator, _OutputIterator>(); - - return __pattern_transform_scan(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), ++__first, __last, - ++__result, __unary_op, __tmp, __binary_op, _Inclusive()); - } - else - { - return __result; - } -} - template _OutputIterator @@ -553,16 +447,6 @@ __brick_adjacent_difference(_RandomAccessIterator1 __first, _RandomAccessIterato [&__op](_ReferenceType1 __x, _ReferenceType1 __y, _ReferenceType2 __z) { __z = __op(__x, __y); }); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _OutputIterator> -__pattern_adjacent_difference(_ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, - _OutputIterator __d_first, _BinaryOperation __op, _IsVector __is_vector, - /*is_parallel*/ ::std::false_type) noexcept -{ - return __internal::__brick_adjacent_difference(__first, __last, __d_first, __op, __is_vector); -} - template _OutputIterator __pattern_adjacent_difference(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, @@ -573,34 +457,6 @@ __pattern_adjacent_difference(_Tag, _ExecutionPolicy&&, _ForwardIterator __first return __internal::__brick_adjacent_difference(__first, __last, __d_first, __op, typename _Tag::__is_vector{}); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator2> -__pattern_adjacent_difference(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first, _RandomAccessIterator1 __last, - _RandomAccessIterator2 __d_first, _BinaryOperation __op, _IsVector __is_vector, - /*is_parallel=*/::std::true_type) -{ - constexpr auto __dispatch_tag = - oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _RandomAccessIterator1, _RandomAccessIterator2>(); - using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; - - assert(__first != __last); - typedef typename ::std::iterator_traits<_RandomAccessIterator1>::reference _ReferenceType1; - typedef typename ::std::iterator_traits<_RandomAccessIterator2>::reference _ReferenceType2; - - *__d_first = *__first; - __par_backend::__parallel_for( - __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __last - 1, - [&__op, __is_vector, __d_first, __first](_RandomAccessIterator1 __b, _RandomAccessIterator1 __e) { - _RandomAccessIterator2 __d_b = __d_first + (__b - __first); - __internal::__brick_walk3( - __b, __e, __b + 1, __d_b + 1, - [&__op](_ReferenceType1 __x, _ReferenceType1 __y, _ReferenceType2 __z) { __z = __op(__y, __x); }, - __is_vector); - }); - return __d_first + (__last - __first); -} - template _RandomAccessIterator2 From ba4d8c75fc7b48210e54ea476d923bbd25a5d394 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Fri, 16 Feb 2024 10:06:28 +0100 Subject: [PATCH 297/566] Remove old implementations with __enable_if_host_execution_policy_conditional --- include/oneapi/dpl/pstl/algorithm_fwd.h | 62 ----------- include/oneapi/dpl/pstl/algorithm_impl.h | 136 ----------------------- include/oneapi/dpl/pstl/numeric_fwd.h | 16 --- include/oneapi/dpl/pstl/numeric_impl.h | 74 ------------ 4 files changed, 288 deletions(-) diff --git a/include/oneapi/dpl/pstl/algorithm_fwd.h b/include/oneapi/dpl/pstl/algorithm_fwd.h index bcf5fa3b829..5a1dcba419c 100644 --- a/include/oneapi/dpl/pstl/algorithm_fwd.h +++ b/include/oneapi/dpl/pstl/algorithm_fwd.h @@ -72,24 +72,10 @@ template -oneapi::dpl::__internal::__enable_if_host_execution_policy_conditional< - _ExecutionPolicy, __is_random_access_iterator_v<_RandomAccessIterator>> -__pattern_walk1(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last, _Function __f, - _IsVector __is_vector, - /*parallel=*/::std::true_type); - template void __pattern_walk1(__parallel_forward_tag, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _Function); -template -oneapi::dpl::__internal::__enable_if_host_execution_policy_conditional< - _ExecutionPolicy, !__is_random_access_iterator_v<_RandomAccessIterator>> -__pattern_walk1(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last, _Function __f, - _IsVector __is_vector, - /*parallel=*/::std::true_type); - template void __pattern_walk1(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _Function); @@ -157,26 +143,12 @@ template -oneapi::dpl::__internal::__enable_if_host_execution_policy_conditional< - _ExecutionPolicy, __is_random_access_iterator_v<_RandomAccessIterator1, _RandomAccessIterator2>, - _RandomAccessIterator2> -__pattern_walk2(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, - _RandomAccessIterator2 __first2, _Function __f, _IsVector __is_vector, /*parallel=*/::std::true_type); - template _RandomAccessIterator2 __pattern_walk2(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator1, _RandomAccessIterator1, _RandomAccessIterator2, _Function); -template -oneapi::dpl::__internal::__enable_if_host_execution_policy_conditional< - _ExecutionPolicy, !__is_random_access_iterator_v<_ForwardIterator1, _ForwardIterator2>, _ForwardIterator2> -__pattern_walk2(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, - _ForwardIterator2 __first2, _Function __f, _IsVector __is_vector, /*parallel=*/::std::true_type); - template _ForwardIterator2 __pattern_walk2(__parallel_forward_tag, _ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, @@ -198,26 +170,12 @@ _ForwardIterator2 __pattern_walk2_brick(_Tag, _ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, _Brick) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy_conditional< - _ExecutionPolicy, __is_random_access_iterator_v<_RandomAccessIterator1, _RandomAccessIterator2>, - _RandomAccessIterator2> -__pattern_walk2_brick(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, - _RandomAccessIterator2 __first2, _Brick __brick, /*parallel=*/::std::true_type); - template _RandomAccessIterator2 __pattern_walk2_brick(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator1, _RandomAccessIterator1, _RandomAccessIterator2, _Brick); -template -oneapi::dpl::__internal::__enable_if_host_execution_policy_conditional< - _ExecutionPolicy, !__is_random_access_iterator_v<_RandomAccessIterator1, _RandomAccessIterator2>, - _RandomAccessIterator2> -__pattern_walk2_brick(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, - _RandomAccessIterator2 __first2, _Brick __brick, /*parallel=*/::std::true_type); - template _ForwardIterator2 __pattern_walk2_brick(__parallel_forward_tag, _ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, @@ -255,32 +213,12 @@ _ForwardIterator3 __pattern_walk3(_Tag, _ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, _ForwardIterator3, _Function) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy_conditional< - _ExecutionPolicy, - __is_random_access_iterator_v<_RandomAccessIterator1, _RandomAccessIterator2, _RandomAccessIterator3>, - _RandomAccessIterator3> -__pattern_walk3(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, - _RandomAccessIterator2 __first2, _RandomAccessIterator3 __first3, _Function __f, _IsVector __is_vector, - /*parallel=*/::std::true_type); - template _RandomAccessIterator3 __pattern_walk3(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator1, _RandomAccessIterator1, _RandomAccessIterator2, _RandomAccessIterator3, _Function); -template -oneapi::dpl::__internal::__enable_if_host_execution_policy_conditional< - _ExecutionPolicy, - !__is_random_access_iterator_v<_RandomAccessIterator1, _RandomAccessIterator2, _RandomAccessIterator3>, - _RandomAccessIterator3> -__pattern_walk3(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, - _RandomAccessIterator2 __first2, _RandomAccessIterator3 __first3, _Function __f, _IsVector __is_vector, - /*parallel=*/::std::true_type); - template _ForwardIterator3 diff --git a/include/oneapi/dpl/pstl/algorithm_impl.h b/include/oneapi/dpl/pstl/algorithm_impl.h index 11c5d758454..c5d96eacaad 100644 --- a/include/oneapi/dpl/pstl/algorithm_impl.h +++ b/include/oneapi/dpl/pstl/algorithm_impl.h @@ -326,28 +326,6 @@ __pattern_walk2(_Tag, _ExecutionPolicy&&, _ForwardIterator1 __first1, _ForwardIt return __internal::__brick_walk2(__first1, __last1, __first2, __f, typename _Tag::__is_vector{}); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy_conditional< - _ExecutionPolicy, __is_random_access_iterator_v<_RandomAccessIterator1, _RandomAccessIterator2>, - _RandomAccessIterator2> -__pattern_walk2(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, - _RandomAccessIterator2 __first2, _Function __f, _IsVector __is_vector, /*parallel=*/::std::true_type) -{ - constexpr auto __dispatch_tag = - oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _RandomAccessIterator1, _RandomAccessIterator2>(); - using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; - - return __internal::__except_handler([&]() { - __par_backend::__parallel_for( - __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, - [__f, __first1, __first2, __is_vector](_RandomAccessIterator1 __i, _RandomAccessIterator1 __j) { - __internal::__brick_walk2(__i, __j, __first2 + (__i - __first1), __f, __is_vector); - }); - return __first2 + (__last1 - __first1); - }); -} - template _RandomAccessIterator2 @@ -366,33 +344,6 @@ __pattern_walk2(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAcc }); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy_conditional< - _ExecutionPolicy, !__is_random_access_iterator_v<_ForwardIterator1, _ForwardIterator2>, _ForwardIterator2> -__pattern_walk2(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, - _ForwardIterator2 __first2, _Function __f, _IsVector, /*parallel=*/::std::true_type) -{ - return __internal::__except_handler([&]() { - using _iterator_tuple = zip_forward_iterator<_ForwardIterator1, _ForwardIterator2>; - auto __begin = _iterator_tuple(__first1, __first2); - auto __end = _iterator_tuple(__last1, /*dummy parameter*/ _ForwardIterator2()); - - typedef typename ::std::iterator_traits<_ForwardIterator1>::reference _ReferenceType1; - typedef typename ::std::iterator_traits<_ForwardIterator2>::reference _ReferenceType2; - - __par_backend::__parallel_for_each(::std::forward<_ExecutionPolicy>(__exec), __begin, __end, - [&__f](::std::tuple<_ReferenceType1, _ReferenceType2> __val) { - __f(::std::get<0>(__val), ::std::get<1>(__val)); - }); - - //TODO: parallel_for_each does not allow to return correct iterator value according to the ::std::transform - // implementation. Therefore, iterator value is calculated separately. - for (; __begin != __end; ++__begin) - ; - return ::std::get<1>(__begin.base()); - }); -} - template _ForwardIterator2 __pattern_walk2(__parallel_forward_tag, _ExecutionPolicy&& __exec, _ForwardIterator1 __first1, @@ -470,35 +421,6 @@ __pattern_walk2_brick(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _Ran }); } -//TODO: it postponed till adding more or less effective parallel implementation -template -oneapi::dpl::__internal::__enable_if_host_execution_policy_conditional< - _ExecutionPolicy, !__is_random_access_iterator_v<_ForwardIterator1, _ForwardIterator2>, _ForwardIterator2> -__pattern_walk2_brick(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, - _ForwardIterator2 __first2, _Brick __brick, /*parallel=*/::std::true_type) -{ - using _iterator_tuple = zip_forward_iterator<_ForwardIterator1, _ForwardIterator2>; - auto __begin = _iterator_tuple(__first1, __first2); - auto __end = _iterator_tuple(__last1, /*dummy parameter*/ _ForwardIterator2()); - - typedef typename ::std::iterator_traits<_ForwardIterator1>::reference _ReferenceType1; - typedef typename ::std::iterator_traits<_ForwardIterator2>::reference _ReferenceType2; - - return __except_handler([&]() { - __par_backend::__parallel_for_each(::std::forward<_ExecutionPolicy>(__exec), __begin, __end, - [__brick](::std::tuple<_ReferenceType1, _ReferenceType2> __val) { - __brick(::std::get<0>(__val), - ::std::forward<_ReferenceType2>(::std::get<1>(__val))); - }); - - //TODO: parallel_for_each does not allow to return correct iterator value according to the ::std::transform - // implementation. Therefore, iterator value is calculated separately. - for (; __begin != __end; ++__begin) - ; - return ::std::get<1>(__begin.base()); - }); -} - //TODO: it postponed till adding more or less effective parallel implementation template _ForwardIterator2 @@ -592,32 +514,6 @@ __pattern_walk3(_Tag, _ExecutionPolicy&&, _ForwardIterator1 __first1, _ForwardIt return __internal::__brick_walk3(__first1, __last1, __first2, __first3, __f, typename _Tag::__is_vector{}); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy_conditional< - _ExecutionPolicy, - __is_random_access_iterator_v<_RandomAccessIterator1, _RandomAccessIterator2, _RandomAccessIterator3>, - _RandomAccessIterator3> -__pattern_walk3(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, - _RandomAccessIterator2 __first2, _RandomAccessIterator3 __first3, _Function __f, _IsVector __is_vector, - /*parallel=*/::std::true_type) -{ - constexpr auto __dispatch_tag = - oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _RandomAccessIterator1, _RandomAccessIterator2, - _RandomAccessIterator3>(); - using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; - - return __internal::__except_handler([&]() { - __par_backend::__parallel_for( - __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, - [__f, __first1, __first2, __first3, __is_vector](_RandomAccessIterator1 __i, _RandomAccessIterator1 __j) { - __internal::__brick_walk3(__i, __j, __first2 + (__i - __first1), __first3 + (__i - __first1), __f, - __is_vector); - }); - return __first3 + (__last1 - __first1); - }); -} - template _RandomAccessIterator3 @@ -638,38 +534,6 @@ __pattern_walk3(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAcc }); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy_conditional< - _ExecutionPolicy, !__is_random_access_iterator_v<_ForwardIterator1, _ForwardIterator2, _ForwardIterator3>, - _ForwardIterator3> -__pattern_walk3(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, - _ForwardIterator2 __first2, _ForwardIterator3 __first3, _Function __f, _IsVector, - /*parallel=*/::std::true_type) -{ - return __internal::__except_handler([&]() { - using _iterator_tuple = zip_forward_iterator<_ForwardIterator1, _ForwardIterator2, _ForwardIterator3>; - auto __begin = _iterator_tuple(__first1, __first2, __first3); - auto __end = _iterator_tuple(__last1, /*dummy parameter*/ _ForwardIterator2(), - /*dummy parameter*/ _ForwardIterator3()); - - typedef typename ::std::iterator_traits<_ForwardIterator1>::reference _ReferenceType1; - typedef typename ::std::iterator_traits<_ForwardIterator2>::reference _ReferenceType2; - typedef typename ::std::iterator_traits<_ForwardIterator3>::reference _ReferenceType3; - - __par_backend::__parallel_for_each(::std::forward<_ExecutionPolicy>(__exec), __begin, __end, - [&](::std::tuple<_ReferenceType1, _ReferenceType2, _ReferenceType3> __val) { - __f(::std::get<0>(__val), ::std::get<1>(__val), ::std::get<2>(__val)); - }); - - //TODO: parallel_for_each does not allow to return correct iterator value according to the ::std::transform - // implementation. Therefore, iterator value is calculated separately. - for (; __begin != __end; ++__begin) - ; - return ::std::get<2>(__begin.base()); - }); -} - template _ForwardIterator3 diff --git a/include/oneapi/dpl/pstl/numeric_fwd.h b/include/oneapi/dpl/pstl/numeric_fwd.h index 46826aff9d3..9ea0ebb1de4 100644 --- a/include/oneapi/dpl/pstl/numeric_fwd.h +++ b/include/oneapi/dpl/pstl/numeric_fwd.h @@ -113,28 +113,12 @@ _OutputIterator __pattern_transform_scan(_Tag, _ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _OutputIterator, _UnaryOperation, _Tp, _BinaryOperation, _Inclusive) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy_conditional< - _ExecutionPolicy, !::std::is_floating_point_v<_Tp>, _OutputIterator> -__pattern_transform_scan(_ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _OutputIterator, - _UnaryOperation, _Tp, _BinaryOperation, _Inclusive, _IsVector, - /*is_parallel=*/::std::true_type); - template ::std::enable_if_t, _OutputIterator> __pattern_transform_scan(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _OutputIterator, _UnaryOperation, _Tp, _BinaryOperation, _Inclusive); -template -oneapi::dpl::__internal::__enable_if_host_execution_policy_conditional<_ExecutionPolicy, - ::std::is_floating_point_v<_Tp>, _OutputIterator> -__pattern_transform_scan(_ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _OutputIterator, - _UnaryOperation, _Tp, _BinaryOperation, _Inclusive, _IsVector, - /*is_parallel=*/::std::true_type); - template ::std::enable_if_t<::std::is_floating_point_v<_Tp>, _OutputIterator> diff --git a/include/oneapi/dpl/pstl/numeric_impl.h b/include/oneapi/dpl/pstl/numeric_impl.h index 92ca677bc8d..b875883a598 100644 --- a/include/oneapi/dpl/pstl/numeric_impl.h +++ b/include/oneapi/dpl/pstl/numeric_impl.h @@ -250,38 +250,6 @@ __pattern_transform_scan(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _Fo .first; } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy_conditional< - _ExecutionPolicy, !::std::is_floating_point_v<_Tp>, _OutputIterator> -__pattern_transform_scan(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last, - _OutputIterator __result, _UnaryOperation __unary_op, _Tp __init, _BinaryOperation __binary_op, - _Inclusive, _IsVector __is_vector, /*is_parallel=*/::std::true_type) -{ - typedef typename ::std::iterator_traits<_RandomAccessIterator>::difference_type _DifferenceType; - - return __internal::__except_handler([&]() { - __par_backend::__parallel_transform_scan( - oneapi::dpl::__internal::__serial_backend_tag{}, - ::std::forward<_ExecutionPolicy>(__exec), __last - __first, - [__first, __unary_op](_DifferenceType __i) mutable { return __unary_op(__first[__i]); }, __init, - __binary_op, - [__first, __unary_op, __binary_op](_DifferenceType __i, _DifferenceType __j, _Tp __init) { - // Execute serial __brick_transform_reduce, due to the explicit SIMD vectorization (reduction) requires a commutative operation for the guarantee of correct scan. - return __internal::__brick_transform_reduce(__first + __i, __first + __j, __init, __binary_op, - __unary_op, - /*__is_vector*/ ::std::false_type()); - }, - [__first, __unary_op, __binary_op, __result, __is_vector](_DifferenceType __i, _DifferenceType __j, - _Tp __init) { - return __internal::__brick_transform_scan(__first + __i, __first + __j, __result + __i, __unary_op, - __init, __binary_op, _Inclusive(), __is_vector) - .second; - }); - return __result + (__last - __first); - }); -} - template ::std::enable_if_t, _OutputIterator> @@ -312,48 +280,6 @@ __pattern_transform_scan(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __e }); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy_conditional<_ExecutionPolicy, - ::std::is_floating_point_v<_Tp>, _OutputIterator> -__pattern_transform_scan(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last, - _OutputIterator __result, _UnaryOperation __unary_op, _Tp __init, _BinaryOperation __binary_op, - _Inclusive, _IsVector __is_vector, /*is_parallel=*/::std::true_type) -{ - typedef typename ::std::iterator_traits<_RandomAccessIterator>::difference_type _DifferenceType; - _DifferenceType __n = __last - __first; - - if (__n <= 0) - { - return __result; - } - - constexpr auto __dispatch_tag = - oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _RandomAccessIterator, _OutputIterator>(); - using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; - - return __internal::__except_handler([&]() { - __par_backend::__parallel_strict_scan( - __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __n, __init, - [__first, __unary_op, __binary_op, __result, __is_vector](_DifferenceType __i, _DifferenceType __len) { - return __internal::__brick_transform_scan(__first + __i, __first + (__i + __len), __result + __i, - __unary_op, _Tp{}, __binary_op, _Inclusive(), __is_vector) - .second; - }, - __binary_op, - [__result, &__binary_op](_DifferenceType __i, _DifferenceType __len, _Tp __initial) { - return *(::std::transform(__result + __i, __result + __i + __len, __result + __i, - [&__initial, &__binary_op](const _Tp& __x) { - _ONEDPL_PRAGMA_FORCEINLINE - return __binary_op(__initial, __x); - }) - - 1); - }, - [](_Tp) {}); - return __result + (__last - __first); - }); -} - template ::std::enable_if_t<::std::is_floating_point_v<_Tp>, _OutputIterator> From 9aeec7bbfb06ed327c0a3fca6e8c01aa97227f55 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Fri, 16 Feb 2024 10:12:57 +0100 Subject: [PATCH 298/566] include/oneapi/dpl/pstl/execution_defs.h - remove __enable_if_host_execution_policy_conditional as not required anymore --- include/oneapi/dpl/pstl/execution_defs.h | 4 ---- 1 file changed, 4 deletions(-) diff --git a/include/oneapi/dpl/pstl/execution_defs.h b/include/oneapi/dpl/pstl/execution_defs.h index 5001ec9fd0f..6d1d0efd569 100644 --- a/include/oneapi/dpl/pstl/execution_defs.h +++ b/include/oneapi/dpl/pstl/execution_defs.h @@ -184,10 +184,6 @@ template using __enable_if_host_execution_policy = ::std::enable_if_t<__is_host_execution_policy<::std::decay_t<_ExecPolicy>>::value, _T>; -template -using __enable_if_host_execution_policy_conditional = - ::std::enable_if_t<__is_host_execution_policy<::std::decay_t<_ExecPolicy>>::value && __condition, _T>; - template struct __ref_or_copy_impl { From b8eb46bb9e743cd5cb91cf4251edc64b31334fd6 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Thu, 15 Feb 2024 18:45:41 +0100 Subject: [PATCH 299/566] Remove old implementations with __enable_if_hetero_execution_policy --- .../dpl/pstl/hetero/algorithm_impl_hetero.h | 1776 +---------------- .../dpl/pstl/hetero/numeric_impl_hetero.h | 206 -- 2 files changed, 38 insertions(+), 1944 deletions(-) diff --git a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h index 2b7108c7ba3..c451be81c52 100644 --- a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h +++ b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h @@ -39,28 +39,6 @@ namespace __internal // walk1 //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy> -__pattern_walk1(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _Function __f, - /*vector=*/::std::true_type, /*parallel=*/::std::true_type) -{ - auto __n = __last - __first; - if (__n <= 0) - return; - - auto __keep = - oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read_write, _ForwardIterator>(); - auto __buf = __keep(__first, __last); - - constexpr auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _ForwardIterator>(); - using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; - - oneapi::dpl::__par_backend_hetero::__parallel_for(__backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), - unseq_backend::walk_n<_ExecutionPolicy, _Function>{__f}, __n, - __buf.all_view()) - .wait(); -} - template void __pattern_walk1(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, @@ -83,17 +61,6 @@ __pattern_walk1(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _ForwardIt // walk1_n //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, _ForwardIterator> -__pattern_walk1_n(_ExecutionPolicy&& __exec, _ForwardIterator __first, _Size __n, _Function __f, - /*vector=*/::std::true_type, /*parallel=*/::std::true_type) -{ - constexpr auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _ForwardIterator>(); - - __pattern_walk1(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __first + __n, __f); - return __first + __n; -} - template _ForwardIterator @@ -111,38 +78,6 @@ __pattern_walk1_n(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _F // TODO: A tag _IsSync is used for provide a patterns call pipeline, where the last one should be synchronous // Probably it should be re-designed by a pipeline approach, when a pattern returns some sync obejects // and ones are combined into a "pipeline" (probably like Range pipeline) -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, _ForwardIterator2> -__pattern_walk2(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, - _ForwardIterator2 __first2, _Function __f, /*vector=*/::std::true_type, /*parallel=*/::std::true_type) -{ - auto __n = __last1 - __first1; - if (__n <= 0) - return __first2; - - auto __keep1 = oneapi::dpl::__ranges::__get_sycl_range<__acc_mode1, _ForwardIterator1>(); - auto __buf1 = __keep1(__first1, __last1); - - auto __keep2 = oneapi::dpl::__ranges::__get_sycl_range<__acc_mode2, _ForwardIterator2>(); - auto __buf2 = __keep2(__first2, __first2 + __n); - - constexpr auto __dispatch_tag = - oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>(); - using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; - - auto __future_obj = oneapi::dpl::__par_backend_hetero::__parallel_for( - __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), - unseq_backend::walk_n<_ExecutionPolicy, _Function>{__f}, __n, __buf1.all_view(), __buf2.all_view()); - - if constexpr (_IsSync()) - __future_obj.wait(); - - return __first2 + __n; -} - template , _ExecutionPolicy&& __exec, _ForwardIt return __first2 + __n; } -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, _ForwardIterator2> -__pattern_walk2_n(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _Size __n, _ForwardIterator2 __first2, - _Function __f, /*vector=*/::std::true_type, /*parallel=*/::std::true_type) -{ - return __pattern_walk2(::std::forward<_ExecutionPolicy>(__exec), __first1, __first1 + __n, __first2, __f, - ::std::true_type(), ::std::true_type()); -} - template _ForwardIterator2 @@ -194,22 +119,6 @@ __pattern_walk2_n(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _F // swap //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, _ForwardIterator2> -__pattern_swap(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, - _ForwardIterator2 __first2, _Function __f, /*is_vector=*/::std::true_type, - /*is_parallel=*/::std::true_type) -{ - constexpr auto __dispatch_tag = - oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>(); - - using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; - - return __pattern_walk2<__backend_tag, /*_IsSync=*/::std::true_type, __par_backend_hetero::access_mode::read_write, - __par_backend_hetero::access_mode::read_write>( - __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __f); -} - template _ForwardIterator2 @@ -225,39 +134,6 @@ __pattern_swap(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Forw // walk3 //------------------------------------------------------------------------ -template <__par_backend_hetero::access_mode __acc_mode1 = __par_backend_hetero::access_mode::read, - __par_backend_hetero::access_mode __acc_mode2 = __par_backend_hetero::access_mode::read, - __par_backend_hetero::access_mode __acc_mode3 = __par_backend_hetero::access_mode::write, - typename _ExecutionPolicy, typename _ForwardIterator1, typename _ForwardIterator2, typename _ForwardIterator3, - typename _Function> -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, _ForwardIterator3> -__pattern_walk3(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, - _ForwardIterator2 __first2, _ForwardIterator3 __first3, _Function __f, /*vector=*/::std::true_type, - /*parallel=*/::std::true_type) -{ - auto __n = __last1 - __first1; - if (__n <= 0) - return __first3; - - auto __keep1 = oneapi::dpl::__ranges::__get_sycl_range<__acc_mode1, _ForwardIterator1>(); - auto __buf1 = __keep1(__first1, __last1); - auto __keep2 = oneapi::dpl::__ranges::__get_sycl_range<__acc_mode2, _ForwardIterator2>(); - auto __buf2 = __keep2(__first2, __first2 + __n); - auto __keep3 = oneapi::dpl::__ranges::__get_sycl_range<__acc_mode3, _ForwardIterator3>(); - auto __buf3 = __keep3(__first3, __first3 + __n); - - constexpr auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _ForwardIterator1, - _ForwardIterator2, _ForwardIterator3>(); - using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; - - oneapi::dpl::__par_backend_hetero::__parallel_for(__backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), - unseq_backend::walk_n<_ExecutionPolicy, _Function>{__f}, __n, - __buf1.all_view(), __buf2.all_view(), __buf3.all_view()) - .wait(); - - return __first3 + __n; -} - template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy> -__pattern_walk_brick(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _Function __f, - /*parallel=*/::std::true_type) -{ - if (__last - __first <= 0) - return; - - constexpr auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _ForwardIterator>(); - - __pattern_walk1( - __dispatch_tag, - __par_backend_hetero::make_wrapped_policy<__walk_brick_wrapper>(::std::forward<_ExecutionPolicy>(__exec)), - __first, __last, __f); -} - template void __pattern_walk_brick(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _ForwardIterator __first, @@ -330,20 +190,6 @@ struct __walk_brick_n_wrapper { }; -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, _ForwardIterator> -__pattern_walk_brick_n(_ExecutionPolicy&& __exec, _ForwardIterator __first, _Size __n, _Function __f, - /*parallel=*/::std::true_type) -{ - constexpr auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _ForwardIterator>(); - - __pattern_walk1( - __dispatch_tag, - __par_backend_hetero::make_wrapped_policy<__walk_brick_n_wrapper>(::std::forward<_ExecutionPolicy>(__exec)), - __first, __first + __n, __f); - return __first + __n; -} - template _ForwardIterator @@ -366,17 +212,6 @@ struct __walk2_brick_wrapper { }; -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, _ForwardIterator2> -__pattern_walk2_brick(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, - _ForwardIterator2 __first2, _Brick __brick, /*parallel*/ ::std::true_type) -{ - return __pattern_walk2( - __par_backend_hetero::make_wrapped_policy<__walk2_brick_wrapper>(::std::forward<_ExecutionPolicy>(__exec)), - __first1, __last1, __first2, __brick, - /*vector=*/::std::true_type{}, /*parallel*/ ::std::true_type{}); -} - template _ForwardIterator2 @@ -394,19 +229,6 @@ struct __walk2_brick_n_wrapper { }; -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, _ForwardIterator2> -__pattern_walk2_brick_n(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _Size __n, _ForwardIterator2 __first2, - _Brick __brick, /*parallel*/ ::std::true_type) -{ - - return __pattern_walk2( - __par_backend_hetero::make_wrapped_policy<__walk2_brick_n_wrapper>(::std::forward<_ExecutionPolicy>(__exec)), - __first1, __first1 + __n, __first2, __brick, - /*vector=*/::std::true_type{}, /*parallel*/ ::std::true_type{}); -} - template _ForwardIterator2 @@ -428,27 +250,6 @@ struct __walk2_transform_if_wrapper { }; -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, _ForwardIterator2> -__pattern_walk2_transform_if(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, - _ForwardIterator2 __first2, _Function __func, - /*vector=*/::std::true_type, - /*parallel=*/::std::true_type) -{ - constexpr auto __dispatch_tag = - oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>(); - - using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; - - // Require `read_write` access mode for output sequence to force a copy in for host iterators to capture incoming - // values of the output sequence for elements where the predicate is false. - return __pattern_walk2<__backend_tag, /*_IsSync=*/::std::true_type, __par_backend_hetero::access_mode::read, - __par_backend_hetero::access_mode::read_write>( - __par_backend_hetero::make_wrapped_policy<__walk2_transform_if_wrapper>( - __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec)), - __first1, __last1, __first2, __func); -} - template _ForwardIterator2 @@ -470,24 +271,6 @@ struct __walk3_transform_if_wrapper { }; -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, _ForwardIterator3> -__pattern_walk3_transform_if(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, - _ForwardIterator2 __first2, _ForwardIterator3 __first3, _Function __func, - /*vector=*/::std::true_type, - /*parallel=*/::std::true_type) -{ - // Require `read_write` access mode for output sequence to force a copy in for host iterators to capture incoming - // values of the output sequence for elements where the predicate is false. - return __pattern_walk3<__par_backend_hetero::access_mode::read, __par_backend_hetero::access_mode::read, - __par_backend_hetero::access_mode::read_write>( - __par_backend_hetero::make_wrapped_policy<__walk3_transform_if_wrapper>( - ::std::forward<_ExecutionPolicy>(__exec)), - __first1, __last1, __first2, __first3, __func, - /*vector=*/::std::true_type{}, /*parallel*/ ::std::true_type{}); -} - template _ForwardIterator3 @@ -521,20 +304,6 @@ struct fill_functor } }; -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, _ForwardIterator> -__pattern_fill(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, const _T& __value, - /*vector=*/::std::true_type, /*parallel=*/::std::true_type) -{ - constexpr auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _ForwardIterator>(); - - __pattern_walk1(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), - __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::write>(__first), - __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::write>(__last), - fill_functor<_T>{__value}); - return __last; -} - template _ForwardIterator __pattern_fill(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _ForwardIterator __first, @@ -577,20 +346,6 @@ struct generate_functor } }; -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, _ForwardIterator> -__pattern_generate(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _Generator __g, - /*vector=*/::std::true_type, /*parallel=*/::std::true_type) -{ - constexpr auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _ForwardIterator>(); - - __pattern_walk1(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), - __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::write>(__first), - __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::write>(__last), - generate_functor<_Generator>{__g}); - return __last; -} - template _ForwardIterator __pattern_generate(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _ForwardIterator __first, @@ -683,64 +438,6 @@ struct __brick_fill_n<_SourceT, _ExecutionPolicy, // min_element, max_element //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, _Iterator> -__pattern_min_element(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, _Compare __comp, - /*vector*/ ::std::true_type, /*parallel*/ ::std::true_type) -{ - if (__first == __last) - return __last; - - using _IteratorValueType = typename ::std::iterator_traits<_Iterator>::value_type; - using _IndexValueType = ::std::make_unsigned_t::difference_type>; - using _ReduceValueType = tuple<_IndexValueType, _IteratorValueType>; - // Commutativity of the reduction operator depends on the compilation target (see __reduce_fn below); - // __spirv_target_conditional postpones deciding on commutativity to the device code where the - // target can be correctly tested. - using _Commutative = oneapi::dpl::__internal::__spirv_target_conditional; - auto __reduce_fn = [__comp](_ReduceValueType __a, _ReduceValueType __b) { - using ::std::get; - // TODO: Consider removing the non-commutative operator for SPIR-V targets when we see improved performance with the - // non-sequential load path in transform_reduce. - if constexpr (oneapi::dpl::__internal::__is_spirv_target_v) - { - // This operator doesn't track the lowest found index in case of equal min. or max. values. Thus, this operator is - // not commutative. - if (__comp(get<1>(__b), get<1>(__a))) - { - return __b; - } - return __a; - } - else - { - // This operator keeps track of the lowest found index in case of equal min. or max. values. Thus, this operator is - // commutative. - bool _is_a_lt_b = __comp(get<1>(__a), get<1>(__b)); - bool _is_b_lt_a = __comp(get<1>(__b), get<1>(__a)); - - if (_is_b_lt_a || (!_is_a_lt_b && get<0>(__b) < get<0>(__a))) - { - return __b; - } - return __a; - } - }; - auto __transform_fn = [](auto __gidx, auto __acc) { return _ReduceValueType{__gidx, __acc[__gidx]}; }; - - auto __keep = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read, _Iterator>(); - auto __buf = __keep(__first, __last); - - auto __ret_idx = oneapi::dpl::__par_backend_hetero::__parallel_transform_reduce<_ReduceValueType, _Commutative>( - ::std::forward<_ExecutionPolicy>(__exec), __reduce_fn, __transform_fn, - unseq_backend::__no_init_value{}, // no initial value - __buf.all_view()) - .get(); - - return __first + ::std::get<0>(__ret_idx); -} - template _Iterator __pattern_min_element(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, @@ -817,56 +514,6 @@ __pattern_min_element(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec // However the solution requires use of custom pattern or substantial redesign of existing parallel_transform_reduce. // -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, ::std::pair<_Iterator, _Iterator>> -__pattern_minmax_element(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, _Compare __comp, - /*vector*/ ::std::true_type, /*parallel*/ ::std::true_type) -{ - if (__first == __last) - return ::std::make_pair(__first, __first); - - using _IteratorValueType = typename ::std::iterator_traits<_Iterator>::value_type; - using _IndexValueType = ::std::make_unsigned_t::difference_type>; - using _ReduceValueType = ::std::tuple<_IndexValueType, _IndexValueType, _IteratorValueType, _IteratorValueType>; - - // This operator doesn't track the lowest found index in case of equal min. values and the highest found index in - // case of equal max. values. Thus, this operator is not commutative. - auto __reduce_fn = [__comp](_ReduceValueType __a, _ReduceValueType __b) { - using ::std::get; - auto __chosen_for_min = __a; - auto __chosen_for_max = __b; - - assert(get<0>(__a) < get<0>(__b)); - assert(get<1>(__a) < get<1>(__b)); - - if (__comp(get<2>(__b), get<2>(__a))) - __chosen_for_min = ::std::move(__b); - if (__comp(get<3>(__b), get<3>(__a))) - __chosen_for_max = ::std::move(__a); - return _ReduceValueType{get<0>(__chosen_for_min), get<1>(__chosen_for_max), get<2>(__chosen_for_min), - get<3>(__chosen_for_max)}; - }; - - // TODO: Doesn't work with `zip_iterator`. - // In that case the first and the second arguments of `_ReduceValueType` will be - // a `tuple` of `difference_type`, not the `difference_type` itself. - auto __transform_fn = [](auto __gidx, auto __acc) { - return _ReduceValueType{__gidx, __gidx, __acc[__gidx], __acc[__gidx]}; - }; - - auto __keep = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read, _Iterator>(); - auto __buf = __keep(__first, __last); - - auto __ret = oneapi::dpl::__par_backend_hetero::__parallel_transform_reduce<_ReduceValueType, - ::std::false_type /*is_commutative*/>( - ::std::forward<_ExecutionPolicy>(__exec), __reduce_fn, __transform_fn, - unseq_backend::__no_init_value{}, // no initial value - __buf.all_view()) - .get(); - - return ::std::make_pair<_Iterator, _Iterator>(__first + ::std::get<0>(__ret), __first + ::std::get<1>(__ret)); -} - template ::std::pair<_Iterator, _Iterator> __pattern_minmax_element(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Iterator __first, @@ -921,15 +568,11 @@ __pattern_minmax_element(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __e // adjacent_find //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, _Iterator> -__pattern_adjacent_find(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, _BinaryPredicate __predicate, - /*parallel*/ ::std::true_type, /*vector*/ ::std::true_type, - oneapi::dpl::__internal::__or_semantic) +template +_Iterator +__pattern_adjacent_find(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, + _BinaryPredicate __predicate, oneapi::dpl::__internal::__or_semantic) { - constexpr auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _Iterator>(); - using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; - if (__last - __first < 2) return __last; @@ -944,7 +587,7 @@ __pattern_adjacent_find(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator // TODO: in case of confilicting names // __par_backend_hetero::make_wrapped_policy<__par_backend_hetero::__or_policy_wrapper>() bool result = __par_backend_hetero::__parallel_find_or( - __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), + _BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), _Predicate{adjacent_find_fn<_BinaryPredicate>{__predicate}}, __par_backend_hetero::__parallel_or_tag{}, oneapi::dpl::__ranges::make_zip_view(__buf1.all_view(), __buf2.all_view())); @@ -953,66 +596,6 @@ __pattern_adjacent_find(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator return result ? __first : __last; } -template -_Iterator -__pattern_adjacent_find(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, - _BinaryPredicate __predicate, oneapi::dpl::__internal::__or_semantic) -{ - if (__last - __first < 2) - return __last; - - using _Predicate = - oneapi::dpl::unseq_backend::single_match_pred<_ExecutionPolicy, adjacent_find_fn<_BinaryPredicate>>; - - auto __keep1 = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read, _Iterator>(); - auto __buf1 = __keep1(__first, __last - 1); - auto __keep2 = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read, _Iterator>(); - auto __buf2 = __keep2(__first + 1, __last); - - // TODO: in case of confilicting names - // __par_backend_hetero::make_wrapped_policy<__par_backend_hetero::__or_policy_wrapper>() - bool result = __par_backend_hetero::__parallel_find_or( - _BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), - _Predicate{adjacent_find_fn<_BinaryPredicate>{__predicate}}, __par_backend_hetero::__parallel_or_tag{}, - oneapi::dpl::__ranges::make_zip_view(__buf1.all_view(), __buf2.all_view())); - - // inverted conditional because of - // reorder_predicate in glue_algorithm_impl.h - return result ? __first : __last; -} - -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, _Iterator> -__pattern_adjacent_find(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, _BinaryPredicate __predicate, - /*parallel*/ ::std::true_type, /*vector*/ ::std::true_type, - oneapi::dpl::__internal::__first_semantic) -{ - if (__last - __first < 2) - return __last; - - constexpr auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _Iterator>(); - using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; - - using _Predicate = - oneapi::dpl::unseq_backend::single_match_pred<_ExecutionPolicy, adjacent_find_fn<_BinaryPredicate>>; - - auto __result = __par_backend_hetero::__parallel_find( - __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), - __par_backend_hetero::zip( - __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__first), - __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__first + 1)), - __par_backend_hetero::zip( - __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__last - 1), - __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__last)), - _Predicate{adjacent_find_fn<_BinaryPredicate>{__predicate}}, ::std::true_type{}); - - auto __zip_at_first = __par_backend_hetero::zip( - __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__first), - __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__first + 1)); - _Iterator __result_iterator = __first + (__result - __zip_at_first); - return (__result_iterator == __last - 1) ? __last : __result_iterator; -} - template _Iterator __pattern_adjacent_find(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, @@ -1045,35 +628,6 @@ __pattern_adjacent_find(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __ex // count, count_if //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy< - _ExecutionPolicy, typename ::std::iterator_traits<_Iterator>::difference_type> -__pattern_count(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, _Predicate __predicate, - /*parallel*/ ::std::true_type, /*vector*/ ::std::true_type) -{ - if (__first == __last) - return 0; - - using _ReduceValueType = typename ::std::iterator_traits<_Iterator>::difference_type; - - auto __reduce_fn = ::std::plus<_ReduceValueType>{}; - // int is being implicitly casted to difference_type - // otherwise we can only pass the difference_type as a functor template parameter - auto __transform_fn = [__predicate](auto __gidx, auto __acc) -> int { - return (__predicate(__acc[__gidx]) ? 1 : 0); - }; - - auto __keep = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read, _Iterator>(); - auto __buf = __keep(__first, __last); - - return oneapi::dpl::__par_backend_hetero::__parallel_transform_reduce<_ReduceValueType, - ::std::true_type /*is_commutative*/>( - ::std::forward<_ExecutionPolicy>(__exec), __reduce_fn, __transform_fn, - unseq_backend::__no_init_value{}, // no initial value - __buf.all_view()) - .get(); -} - template typename ::std::iterator_traits<_Iterator>::difference_type __pattern_count(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, @@ -1106,29 +660,6 @@ __pattern_count(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Ite // any_of //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, bool> -__pattern_any_of(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, _Pred __pred, - /*vector=*/::std::true_type, /*parallel=*/::std::true_type) -{ - constexpr auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _Iterator>(); - using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; - - if (__first == __last) - return false; - - using _Predicate = oneapi::dpl::unseq_backend::single_match_pred<_ExecutionPolicy, _Pred>; - - auto __keep = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read, _Iterator>(); - auto __buf = __keep(__first, __last); - - return oneapi::dpl::__par_backend_hetero::__parallel_find_or( - __backend_tag{}, - __par_backend_hetero::make_wrapped_policy<__par_backend_hetero::__or_policy_wrapper>( - ::std::forward<_ExecutionPolicy>(__exec)), - _Predicate{__pred}, __par_backend_hetero::__parallel_or_tag{}, __buf.all_view()); -} - template bool __pattern_any_of(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, @@ -1153,34 +684,6 @@ __pattern_any_of(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _It // equal //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, bool> -__pattern_equal(_ExecutionPolicy&& __exec, _Iterator1 __first1, _Iterator1 __last1, _Iterator2 __first2, - _Iterator2 __last2, _Pred __pred, - /*vector=*/::std::true_type, /*parallel=*/::std::true_type) -{ - constexpr auto __dispatch_tag = - oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _Iterator1, _Iterator2>(); - using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; - - if (__last1 == __first1 || __last2 == __first2 || __last1 - __first1 != __last2 - __first2) - return false; - - using _Predicate = oneapi::dpl::unseq_backend::single_match_pred<_ExecutionPolicy, equal_predicate<_Pred>>; - - auto __keep1 = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read, _Iterator1>(); - auto __buf1 = __keep1(__first1, __last1); - auto __keep2 = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read, _Iterator2>(); - auto __buf2 = __keep2(__first2, __last2); - - // TODO: in case of confilicting names - // __par_backend_hetero::make_wrapped_policy<__par_backend_hetero::__or_policy_wrapper>() - return !__par_backend_hetero::__parallel_find_or( - __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), _Predicate{equal_predicate<_Pred>{__pred}}, - __par_backend_hetero::__parallel_or_tag{}, - oneapi::dpl::__ranges::make_zip_view(__buf1.all_view(), __buf2.all_view())); -} - template bool __pattern_equal(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Iterator1 __first1, _Iterator1 __last1, @@ -1208,19 +711,6 @@ __pattern_equal(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Ite // equal version for sequences with equal length //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, bool> -__pattern_equal(_ExecutionPolicy&& __exec, _Iterator1 __first1, _Iterator1 __last1, _Iterator2 __first2, _Pred __pred, - /*vector=*/::std::true_type, /*parallel=*/::std::true_type) -{ - // TODO is it correct that we check _Iterator2 in __select_backend ? - constexpr auto __dispatch_tag = - oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _Iterator1, _Iterator2>(); - - return oneapi::dpl::__internal::__pattern_equal(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, - __last1, __first2, __first2 + (__last1 - __first1), __pred); -} - template bool __pattern_equal(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Iterator1 __first1, _Iterator1 __last1, @@ -1255,39 +745,6 @@ __pattern_find_if(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _I // find_end //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, _Iterator1> -__pattern_find_end(_ExecutionPolicy&& __exec, _Iterator1 __first, _Iterator1 __last, _Iterator2 __s_first, - _Iterator2 __s_last, _Pred __pred, /*vector=*/::std::true_type, /*parallel=*/::std::true_type) -{ - if (__first == __last || __s_last == __s_first || __last - __first < __s_last - __s_first) - return __last; - - // TODO is it correct that we check _Iterator2 in __select_backend ? - constexpr auto __dispatch_tag = - oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _Iterator1, _Iterator2>(); - using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; - - if (__last - __first == __s_last - __s_first) - { - const bool __res = __pattern_equal(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, - __s_first, __pred); - return __res ? __first : __last; - } - else - { - using _Predicate = unseq_backend::multiple_match_pred<_ExecutionPolicy, _Pred>; - - return __par_backend_hetero::__parallel_find( - __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), - __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__first), - __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__last), - __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__s_first), - __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__s_last), _Predicate{__pred}, - ::std::false_type{}); - } -} - template _Iterator1 __pattern_find_end(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Iterator1 __first, _Iterator1 __last, @@ -1320,31 +777,6 @@ __pattern_find_end(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _ // find_first_of //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, _Iterator1> -__pattern_find_first_of(_ExecutionPolicy&& __exec, _Iterator1 __first, _Iterator1 __last, _Iterator2 __s_first, - _Iterator2 __s_last, _Pred __pred, /*vector=*/::std::true_type, /*parallel=*/::std::true_type) -{ - if (__first == __last || __s_last == __s_first) - return __last; - - constexpr auto __dispatch_tag = - oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _Iterator1, _Iterator2>(); - using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; - - using _Predicate = unseq_backend::first_match_pred<_ExecutionPolicy, _Pred>; - - // TODO: To check whether it makes sense to iterate over the second sequence in case of - // distance(__first, __last) < distance(__s_first, __s_last). - return __par_backend_hetero::__parallel_find( - __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), - __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__first), - __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__last), - __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__s_first), - __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__s_last), _Predicate{__pred}, - ::std::true_type{}); -} - template _Iterator1 __pattern_find_first_of(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Iterator1 __first, @@ -1375,41 +807,6 @@ class equal_wrapper { }; -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, _Iterator1> -__pattern_search(_ExecutionPolicy&& __exec, _Iterator1 __first, _Iterator1 __last, _Iterator2 __s_first, - _Iterator2 __s_last, _Pred __pred, /*vector=*/::std::true_type, /*parallel=*/::std::true_type) -{ - if (__s_last == __s_first) - return __first; - - if (__last - __first < __s_last - __s_first) - return __last; - - // TODO is it correct that we check _Iterator2 in __select_backend ? - constexpr auto __dispatch_tag = - oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _Iterator1, _Iterator2>(); - using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; - - if (__last - __first == __s_last - __s_first) - { - const bool __res = __pattern_equal( - __dispatch_tag, - __par_backend_hetero::make_wrapped_policy(::std::forward<_ExecutionPolicy>(__exec)), __first, - __last, __s_first, __pred); - return __res ? __first : __last; - } - - using _Predicate = unseq_backend::multiple_match_pred<_ExecutionPolicy, _Pred>; - return __par_backend_hetero::__parallel_find( - __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), - __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__first), - __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__last), - __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__s_first), - __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__s_last), _Predicate{__pred}, - ::std::true_type{}); -} - template _Iterator1 __pattern_search(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Iterator1 __first, _Iterator1 __last, @@ -1457,39 +854,6 @@ struct __search_n_unary_predicate } }; -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, _Iterator> -__pattern_search_n(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, _Size __count, const _Tp& __value, - _BinaryPredicate __pred, /*vector=*/::std::true_type, /*parallel=*/::std::true_type) -{ - if (__count <= 0) - return __first; - - if (__last - __first < __count) - return __last; - - if (__last - __first == __count) - { - constexpr auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _Iterator>(); - - return (!__internal::__pattern_any_of(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, - __search_n_unary_predicate<_Tp, _BinaryPredicate>{__value, __pred})) - ? __first - : __last; - } - - using _Predicate = unseq_backend::n_elem_match_pred<_ExecutionPolicy, _BinaryPredicate, _Tp, _Size>; - - constexpr auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _Iterator>(); - using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; - - return __par_backend_hetero::__parallel_find( - __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), - __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__first), - __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__last), - _Predicate{__pred, __value, __count}, ::std::true_type{}); -} - template _Iterator @@ -1522,31 +886,6 @@ __pattern_search_n(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _ // mismatch //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, ::std::pair<_Iterator1, _Iterator2>> -__pattern_mismatch(_ExecutionPolicy&& __exec, _Iterator1 __first1, _Iterator1 __last1, _Iterator2 __first2, - _Iterator2 __last2, _Pred __pred, /*vector=*/::std::true_type, /*parallel=*/::std::true_type) -{ - auto __n = ::std::min(__last1 - __first1, __last2 - __first2); - if (__n <= 0) - return ::std::make_pair(__first1, __first2); - - constexpr auto __dispatch_tag = - oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _Iterator1, _Iterator2>(); - using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; - - using _Predicate = oneapi::dpl::unseq_backend::single_match_pred<_ExecutionPolicy, equal_predicate<_Pred>>; - - auto __first_zip = __par_backend_hetero::zip( - __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__first1), - __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__first2)); - auto __result = __par_backend_hetero::__parallel_find( - __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first_zip, __first_zip + __n, - _Predicate{equal_predicate<_Pred>{__pred}}, ::std::true_type{}); - __n = __result - __first_zip; - return ::std::make_pair(__first1 + __n, __first2 + __n); -} - template ::std::pair<_Iterator1, _Iterator2> __pattern_mismatch(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Iterator1 __first1, _Iterator1 __last1, @@ -1572,17 +911,12 @@ __pattern_mismatch(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _ // copy_if //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy< - _ExecutionPolicy, ::std::pair<_IteratorOrTuple, typename ::std::iterator_traits<_Iterator1>::difference_type>> -__pattern_scan_copy(_ExecutionPolicy&& __exec, _Iterator1 __first, _Iterator1 __last, _IteratorOrTuple __output_first, - _CreateMaskOp __create_mask_op, _CopyByMaskOp __copy_by_mask_op) +template +::std::pair<_IteratorOrTuple, typename ::std::iterator_traits<_Iterator1>::difference_type> +__pattern_scan_copy(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _Iterator1 __first, _Iterator1 __last, + _IteratorOrTuple __output_first, _CreateMaskOp __create_mask_op, _CopyByMaskOp __copy_by_mask_op) { - constexpr auto __dispatch_tag = - oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _Iterator1, _IteratorOrTuple>(); - using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; - using _It1DifferenceType = typename ::std::iterator_traits<_Iterator1>::difference_type; if (__first == __last) @@ -1596,7 +930,7 @@ __pattern_scan_copy(_ExecutionPolicy&& __exec, _Iterator1 __first, _Iterator1 __ oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::write, _IteratorOrTuple>(); auto __buf2 = __keep2(__output_first, __output_first + __n); - auto __res = __par_backend_hetero::__parallel_scan_copy(__backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), + auto __res = __par_backend_hetero::__parallel_scan_copy(_BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), __buf1.all_view(), __buf2.all_view(), __n, __create_mask_op, __copy_by_mask_op); @@ -1604,78 +938,23 @@ __pattern_scan_copy(_ExecutionPolicy&& __exec, _Iterator1 __first, _Iterator1 __ return ::std::make_pair(__output_first + __n, __num_copied); } -template -::std::pair<_IteratorOrTuple, typename ::std::iterator_traits<_Iterator1>::difference_type> -__pattern_scan_copy(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _Iterator1 __first, _Iterator1 __last, - _IteratorOrTuple __output_first, _CreateMaskOp __create_mask_op, _CopyByMaskOp __copy_by_mask_op) +template +_Iterator2 +__pattern_copy_if(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Iterator1 __first, _Iterator1 __last, + _Iterator2 __result_first, _Predicate __pred) { using _It1DifferenceType = typename ::std::iterator_traits<_Iterator1>::difference_type; if (__first == __last) - return ::std::make_pair(__output_first, _It1DifferenceType{0}); + return __result_first; _It1DifferenceType __n = __last - __first; auto __keep1 = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read, _Iterator1>(); auto __buf1 = __keep1(__first, __last); - auto __keep2 = - oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::write, _IteratorOrTuple>(); - auto __buf2 = __keep2(__output_first, __output_first + __n); - - auto __res = __par_backend_hetero::__parallel_scan_copy(_BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), - __buf1.all_view(), __buf2.all_view(), __n, __create_mask_op, - __copy_by_mask_op); - - ::std::size_t __num_copied = __res.get(); - return ::std::make_pair(__output_first + __n, __num_copied); -} - -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, _Iterator2> -__pattern_copy_if(_ExecutionPolicy&& __exec, _Iterator1 __first, _Iterator1 __last, _Iterator2 __result_first, - _Predicate __pred, /*vector=*/::std::true_type, /*parallel=*/::std::true_type) -{ - constexpr auto __dispatch_tag = - oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _Iterator1, _Iterator2>(); - using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; - - using _It1DifferenceType = typename ::std::iterator_traits<_Iterator1>::difference_type; - - if (__first == __last) - return __result_first; - - _It1DifferenceType __n = __last - __first; - - auto __keep1 = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read, _Iterator1>(); - auto __buf1 = __keep1(__first, __last); - auto __keep2 = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::write, _Iterator2>(); - auto __buf2 = __keep2(__result_first, __result_first + __n); - - auto __res = __par_backend_hetero::__parallel_copy_if(__backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), - __buf1.all_view(), __buf2.all_view(), __n, __pred); - - ::std::size_t __num_copied = __res.get(); - return __result_first + __num_copied; -} - -template -_Iterator2 -__pattern_copy_if(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Iterator1 __first, _Iterator1 __last, - _Iterator2 __result_first, _Predicate __pred) -{ - using _It1DifferenceType = typename ::std::iterator_traits<_Iterator1>::difference_type; - - if (__first == __last) - return __result_first; - - _It1DifferenceType __n = __last - __first; - - auto __keep1 = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read, _Iterator1>(); - auto __buf1 = __keep1(__first, __last); - auto __keep2 = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::write, _Iterator2>(); - auto __buf2 = __keep2(__result_first, __result_first + __n); + auto __keep2 = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::write, _Iterator2>(); + auto __buf2 = __keep2(__result_first, __result_first + __n); auto __res = __par_backend_hetero::__parallel_copy_if(_BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), __buf1.all_view(), __buf2.all_view(), __n, __pred); @@ -1688,32 +967,6 @@ __pattern_copy_if(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _I // partition_copy //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, ::std::pair<_Iterator2, _Iterator3>> -__pattern_partition_copy(_ExecutionPolicy&& __exec, _Iterator1 __first, _Iterator1 __last, _Iterator2 __result1, - _Iterator3 __result2, _UnaryPredicate __pred, /*vector*/ ::std::true_type, - /*parallel*/ ::std::true_type) -{ - if (__first == __last) - return ::std::make_pair(__result1, __result2); - - using _It1DifferenceType = typename ::std::iterator_traits<_Iterator1>::difference_type; - using _ReduceOp = ::std::plus<_It1DifferenceType>; - - unseq_backend::__create_mask<_UnaryPredicate, _It1DifferenceType> __create_mask_op{__pred}; - unseq_backend::__partition_by_mask<_ReduceOp, /*inclusive*/ ::std::true_type> __copy_by_mask_op{_ReduceOp{}}; - - auto __result = __pattern_scan_copy( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, - __par_backend_hetero::zip( - __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::write>(__result1), - __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::write>(__result2)), - __create_mask_op, __copy_by_mask_op); - - return ::std::make_pair(__result1 + __result.second, __result2 + (__last - __first - __result.second)); -} - template ::std::pair<_Iterator2, _Iterator3> @@ -1743,24 +996,6 @@ __pattern_partition_copy(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __e // unique_copy //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, _Iterator2> -__pattern_unique_copy(_ExecutionPolicy&& __exec, _Iterator1 __first, _Iterator1 __last, _Iterator2 __result_first, - _BinaryPredicate __pred, /*vector*/ ::std::true_type, /*parallel*/ ::std::true_type) -{ - using _It1DifferenceType = typename ::std::iterator_traits<_Iterator1>::difference_type; - unseq_backend::__copy_by_mask<::std::plus<_It1DifferenceType>, oneapi::dpl::__internal::__pstl_assign, - /*inclusive*/ ::std::true_type, 1> - __copy_by_mask_op; - __create_mask_unique_copy<__not_pred<_BinaryPredicate>, _It1DifferenceType> __create_mask_op{ - __not_pred<_BinaryPredicate>{__pred}}; - - auto __result = __pattern_scan_copy(::std::forward<_ExecutionPolicy>(__exec), __first, __last, __result_first, - __create_mask_op, __copy_by_mask_op); - - return __result_first + __result.second; -} - template _Iterator2 @@ -1789,29 +1024,6 @@ class copy_back_wrapper2 { }; -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, _Iterator> -__pattern_remove_if(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, _Predicate __pred, - /*vector*/ ::std::true_type, /*parallel*/ ::std::true_type) -{ - if (__last == __first) - return __last; - - using _ValueType = typename ::std::iterator_traits<_Iterator>::value_type; - - constexpr auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _Iterator>(); - - oneapi::dpl::__par_backend_hetero::__buffer<_ExecutionPolicy, _ValueType> __buf(__exec, __last - __first); - auto __copy_first = __buf.get(); - auto __copy_last = - __pattern_copy_if(__dispatch_tag, __exec, __first, __last, __copy_first, __not_pred<_Predicate>{__pred}); - - //TODO: optimize copy back depending on Iterator, i.e. set_final_data for host iterator/pointer - return __pattern_walk2( - __par_backend_hetero::make_wrapped_policy(::std::forward<_ExecutionPolicy>(__exec)), - __copy_first, __copy_last, __first, __brick_copy<_ExecutionPolicy>{}, ::std::true_type{}, ::std::true_type{}); -} - template _Iterator __pattern_remove_if(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, @@ -1843,36 +1055,6 @@ __pattern_remove_if(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, __copy_first, __copy_last, __first, __brick_copy<_ExecutionPolicy>{}); } -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, _Iterator> -__pattern_unique(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, _BinaryPredicate __pred, - /*vector*/ ::std::true_type, /*parallel*/ ::std::true_type) -{ - if (__last - __first < 2) - return __last; - - using _ValueType = typename ::std::iterator_traits<_Iterator>::value_type; - - oneapi::dpl::__par_backend_hetero::__buffer<_ExecutionPolicy, _ValueType> __buf(__exec, __last - __first); - auto __copy_first = __buf.get(); - auto __copy_last = - __pattern_unique_copy(oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, decltype(__first), - decltype(__last), decltype(__copy_first)>(), - __exec, __first, __last, __copy_first, __pred); - - constexpr auto __dispatch_tag1 = - oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, decltype(__copy_first), decltype(__copy_last), - decltype(__first)>(); - using __backend_tag1 = typename decltype(__dispatch_tag1)::__backend_tag; - - //TODO: optimize copy back depending on Iterator, i.e. set_final_data for host iterator/pointer - return __pattern_walk2<__backend_tag1, /*_IsSync=*/::std::true_type, __par_backend_hetero::access_mode::read_write, - __par_backend_hetero::access_mode::read_write>( - __dispatch_tag1, - __par_backend_hetero::make_wrapped_policy(::std::forward<_ExecutionPolicy>(__exec)), - __copy_first, __copy_last, __first, __brick_copy<_ExecutionPolicy>{}); -} - template _Iterator __pattern_unique(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, @@ -1915,38 +1097,6 @@ enum _IsPartitionedReduceType : signed char __true_false }; -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, bool> -__pattern_is_partitioned(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, _Predicate __predicate, - /*parallel*/ ::std::true_type, /*vector*/ ::std::true_type) -{ - if (__last - __first < 2) - return true; - - using _ReduceValueType = _IsPartitionedReduceType; - auto __reduce_fn = [](_ReduceValueType __a, _ReduceValueType __b) { - _ReduceValueType __table[] = {__broken, __broken, __broken, __broken, __broken, __all_true, - __true_false, __true_false, __broken, __broken, __all_false, __broken, - __broken, __broken, __true_false, __broken}; - return __table[__a * 4 + __b]; - }; - auto __transform_fn = [__predicate](auto __gidx, auto __acc) { - return (__predicate(__acc[__gidx]) ? __all_true : __all_false); - }; - - auto __keep = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read, _Iterator>(); - auto __buf = __keep(__first, __last); - - auto __res = oneapi::dpl::__par_backend_hetero::__parallel_transform_reduce<_ReduceValueType, - ::std::false_type /*is_commutative*/>( - ::std::forward<_ExecutionPolicy>(__exec), __reduce_fn, __transform_fn, - unseq_backend::__no_init_value{}, // no initial value - __buf.all_view()) - .get(); - - return __broken != __reduce_fn(_ReduceValueType{__all_true}, __res); -} - template bool __pattern_is_partitioned(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Iterator __first, @@ -1998,28 +1148,6 @@ struct __is_heap_check } }; -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, _RandomAccessIterator> -__pattern_is_heap_until(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last, - _Compare __comp, /* vector */ ::std::true_type, /* parallel = */ ::std::true_type) -{ - if (__last - __first < 2) - return __last; - - constexpr auto __dispatch_tag = - oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _RandomAccessIterator>(); - using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; - - using _Predicate = - oneapi::dpl::unseq_backend::single_match_pred_by_idx<_ExecutionPolicy, __is_heap_check<_Compare>>; - - return __par_backend_hetero::__parallel_find( - __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), - __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__first), - __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__last), _Predicate{__comp}, - ::std::true_type{}); -} - template _RandomAccessIterator __pattern_is_heap_until(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, @@ -2038,27 +1166,6 @@ __pattern_is_heap_until(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __ex ::std::true_type{}); } -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, bool> -__pattern_is_heap(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last, - _Compare __comp, /* vector */ ::std::true_type, /* parallel = */ ::std::true_type) -{ - constexpr auto __dispatch_tag = - oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _RandomAccessIterator>(); - using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; - - if (__last - __first < 2) - return true; - - using _Predicate = - oneapi::dpl::unseq_backend::single_match_pred_by_idx<_ExecutionPolicy, __is_heap_check<_Compare>>; - - return !__par_backend_hetero::__parallel_or( - __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), - __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__first), - __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__last), _Predicate{__comp}); -} - template bool __pattern_is_heap(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, @@ -2079,51 +1186,6 @@ __pattern_is_heap(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _R //------------------------------------------------------------------------ // merge //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, _Iterator3> -__pattern_merge(_ExecutionPolicy&& __exec, _Iterator1 __first1, _Iterator1 __last1, _Iterator2 __first2, - _Iterator2 __last2, _Iterator3 __d_first, _Compare __comp, /*vector=*/::std::true_type, - /*parallel=*/::std::true_type) -{ - auto __n1 = __last1 - __first1; - auto __n2 = __last2 - __first2; - auto __n = __n1 + __n2; - if (__n == 0) - return __d_first; - - const auto __dispatch_tag = - oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _Iterator1, _Iterator2, _Iterator3>(); - using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; - - //To consider the direct copying pattern call in case just one of sequences is empty. - if (__n1 == 0) - oneapi::dpl::__internal::__pattern_walk2_brick( - __dispatch_tag, - oneapi::dpl::__par_backend_hetero::make_wrapped_policy( - ::std::forward<_ExecutionPolicy>(__exec)), - __first2, __last2, __d_first, oneapi::dpl::__internal::__brick_copy<_ExecutionPolicy>{}); - else if (__n2 == 0) - oneapi::dpl::__internal::__pattern_walk2_brick( - __dispatch_tag, - oneapi::dpl::__par_backend_hetero::make_wrapped_policy( - ::std::forward<_ExecutionPolicy>(__exec)), - __first1, __last1, __d_first, oneapi::dpl::__internal::__brick_copy<_ExecutionPolicy>{}); - else - { - auto __keep1 = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read, _Iterator1>(); - auto __buf1 = __keep1(__first1, __last1); - auto __keep2 = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read, _Iterator2>(); - auto __buf2 = __keep2(__first2, __last2); - - auto __keep3 = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::write, _Iterator3>(); - auto __buf3 = __keep3(__d_first, __d_first + __n); - - __par_backend_hetero::__parallel_merge(__backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), - __buf1.all_view(), __buf2.all_view(), __buf3.all_view(), __comp) - .wait(); - } - return __d_first + __n; -} template @@ -2170,35 +1232,6 @@ __pattern_merge(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Ite //------------------------------------------------------------------------ // inplace_merge //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy> -__pattern_inplace_merge(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __middle, _Iterator __last, - _Compare __comp, /*vector=*/::std::true_type, /*parallel=*/::std::true_type) -{ - using _ValueType = typename ::std::iterator_traits<_Iterator>::value_type; - - if (__first == __middle || __middle == __last || __first == __last) - return; - - assert(__first < __middle && __middle < __last); - - auto __n = __last - __first; - oneapi::dpl::__par_backend_hetero::__buffer<_ExecutionPolicy, _ValueType> __buf(__exec, __n); - auto __copy_first = __buf.get(); - auto __copy_last = __copy_first + __n; - - __pattern_merge(__exec, __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__first), - __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__middle), - __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__middle), - __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__last), - __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::write>(__copy_first), - __comp, ::std::true_type{}, ::std::true_type{}); - - //TODO: optimize copy back depending on Iterator, i.e. set_final_data for host iterator/pointer - __pattern_walk2( - __par_backend_hetero::make_wrapped_policy(::std::forward<_ExecutionPolicy>(__exec)), - __copy_first, __copy_last, __first, __brick_move<_ExecutionPolicy>{}, ::std::true_type{}, ::std::true_type{}); -} template void @@ -2249,15 +1282,6 @@ __stable_sort_with_projection(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy& .wait(); } -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy> -__pattern_sort(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, _Compare __comp, - /*vector=*/::std::true_type, /*parallel=*/::std::true_type, /*is_move_constructible=*/::std::true_type) -{ - __stable_sort_with_projection(::std::forward<_ExecutionPolicy>(__exec), __first, __last, __comp, - oneapi::dpl::identity{}); -} - template void __pattern_sort(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, @@ -2270,14 +1294,6 @@ __pattern_sort(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Iter //------------------------------------------------------------------------ // stable_sort //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy> -__pattern_stable_sort(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, _Compare __comp, - /*vector=*/::std::true_type, /*parallel=*/::std::true_type) -{ - __stable_sort_with_projection(::std::forward<_ExecutionPolicy>(__exec), __first, __last, __comp, - oneapi::dpl::identity{}); -} template void @@ -2288,22 +1304,6 @@ __pattern_stable_sort(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec oneapi::dpl::identity{}); } -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy> -__pattern_sort_by_key(_ExecutionPolicy&& __exec, _Iterator1 __keys_first, _Iterator1 __keys_last, - _Iterator2 __values_first, _Compare __comp, /*vector=*/::std::true_type, - /*parallel=*/::std::true_type) -{ - static_assert(::std::is_move_constructible_v::value_type> - && ::std::is_move_constructible_v::value_type>, - "The keys and values should be move constructible in case of parallel execution."); - - auto __beg = oneapi::dpl::make_zip_iterator(__keys_first, __values_first); - auto __end = __beg + (__keys_last - __keys_first); - __stable_sort_with_projection(::std::forward<_ExecutionPolicy>(__exec), __beg, __end, __comp, - [](const auto& __a) { return ::std::get<0>(__a); }); -} - template void __pattern_sort_by_key(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Iterator1 __keys_first, @@ -2319,57 +1319,6 @@ __pattern_sort_by_key(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec [](const auto& __a) { return ::std::get<0>(__a); }); } -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, _Iterator> -__pattern_stable_partition(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, _UnaryPredicate __pred, - /*vector*/ ::std::true_type, /*parallel*/ ::std::true_type) -{ - if (__last == __first) - return __last; - else if (__last - __first < 2) - { - constexpr auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _Iterator>(); - - return __pattern_any_of(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __pred) - ? __last - : __first; - } - - using _ValueType = typename ::std::iterator_traits<_Iterator>::value_type; - - auto __n = __last - __first; - - oneapi::dpl::__par_backend_hetero::__buffer<_ExecutionPolicy, _ValueType> __true_buf(__exec, __n); - oneapi::dpl::__par_backend_hetero::__buffer<_ExecutionPolicy, _ValueType> __false_buf(__exec, __n); - auto __true_result = __true_buf.get(); - auto __false_result = __false_buf.get(); - - constexpr auto __dispatch_tag = __select_backend<_ExecutionPolicy, decltype(__first), decltype(__last), - decltype(__true_result), decltype(__false_result)>(); - auto copy_result = - __pattern_partition_copy(__dispatch_tag, __exec, __first, __last, __true_result, __false_result, __pred); - auto true_count = copy_result.first - __true_result; - - //TODO: optimize copy back if possible (inplace, decrease number of submits) - constexpr auto __dispatch_tag1 = - oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, decltype(__true_result), - decltype(copy_result.first), decltype(__first)>(); - using __backend_tag1 = typename decltype(__dispatch_tag1)::__backend_tag; - __pattern_walk2<__backend_tag1, /*_IsSync=*/::std::false_type>( - __dispatch_tag1, __par_backend_hetero::make_wrapped_policy(__exec), __true_result, - copy_result.first, __first, __brick_move<_ExecutionPolicy>{}); - - constexpr auto __dispatch_tag2 = - oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, decltype(__false_result), - decltype(copy_result.second), decltype(__first + true_count)>(); - __pattern_walk2( - __dispatch_tag2, - __par_backend_hetero::make_wrapped_policy(::std::forward<_ExecutionPolicy>(__exec)), - __false_result, copy_result.second, __first + true_count, __brick_move<_ExecutionPolicy>{}); - - return __first + true_count; -} - template _Iterator __pattern_stable_partition(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Iterator __first, @@ -2416,18 +1365,6 @@ __pattern_stable_partition(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& _ return __first + true_count; } -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, _Iterator> -__pattern_partition(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, _UnaryPredicate __pred, - /*vector*/ ::std::true_type, /*parallel*/ ::std::true_type) -{ - constexpr auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _Iterator>(); - - //TODO: consider nonstable approaches - return __pattern_stable_partition(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, - __pred); -} - template _Iterator __pattern_partition(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, @@ -2444,55 +1381,6 @@ __pattern_partition(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, // lexicographical_compare //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, bool> -__pattern_lexicographical_compare(_ExecutionPolicy&& __exec, _Iterator1 __first1, _Iterator1 __last1, - _Iterator2 __first2, _Iterator2 __last2, _Compare __comp, /*vector*/ ::std::true_type, - /*parallel*/ ::std::true_type) -{ - //trivial pre-checks - if (__first2 == __last2) - return false; - if (__first1 == __last1) - return true; - - using _Iterator1DifferenceType = typename ::std::iterator_traits<_Iterator1>::difference_type; - using _ReduceValueType = int32_t; - - auto __reduce_fn = [](_ReduceValueType __a, _ReduceValueType __b) { - bool __is_mismatched = __a != 0; - return __a * __is_mismatched + __b * !__is_mismatched; - }; - auto __transform_fn = [__comp](auto __gidx, auto __acc1, auto __acc2) { - auto const& __s1_val = __acc1[__gidx]; - auto const& __s2_val = __acc2[__gidx]; - - ::std::int32_t __is_s1_val_less = __comp(__s1_val, __s2_val); - ::std::int32_t __is_s1_val_greater = __comp(__s2_val, __s1_val); - - // 1 if __s1_val < __s2_val, -1 if __s1_val < __s2_val, 0 if __s1_val == __s2_val - return _ReduceValueType{1 * __is_s1_val_less - 1 * __is_s1_val_greater}; - }; - - auto __shared_size = ::std::min(__last1 - __first1, (_Iterator1DifferenceType)(__last2 - __first2)); - - auto __keep1 = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read, _Iterator1>(); - auto __buf1 = __keep1(__first1, __first1 + __shared_size); - - auto __keep2 = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read, _Iterator2>(); - auto __buf2 = __keep2(__first2, __first2 + __shared_size); - - auto __ret_idx = - oneapi::dpl::__par_backend_hetero::__parallel_transform_reduce<_ReduceValueType, - ::std::false_type /*is_commutative*/>( - ::std::forward<_ExecutionPolicy>(__exec), __reduce_fn, __transform_fn, - unseq_backend::__no_init_value{}, // no initial value - __buf1.all_view(), __buf2.all_view()) - .get(); - - return __ret_idx ? __ret_idx == 1 : (__last1 - __first1) < (__last2 - __first2); -} - template bool __pattern_lexicographical_compare(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Iterator1 __first1, @@ -2541,38 +1429,6 @@ __pattern_lexicographical_compare(__hetero_tag<_BackendTag> __tag, _ExecutionPol return __ret_idx ? __ret_idx == 1 : (__last1 - __first1) < (__last2 - __first2); } -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, bool> -__pattern_includes(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, - _ForwardIterator2 __first2, _ForwardIterator2 __last2, _Compare __comp, /*vector=*/::std::true_type, - /*parallel=*/::std::true_type) -{ - constexpr auto __dispatch_tag = - oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>(); - using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; - - //according to the spec - if (__first2 == __last2) - return true; - - //optimization; {1} - the first sequence, {2} - the second sequence - //{1} is empty or size_of{2} > size_of{1} - if (__first1 == __last1 || __last2 - __first2 > __last1 - __first1) - return false; - - typedef typename ::std::iterator_traits<_ForwardIterator1>::difference_type _Size1; - typedef typename ::std::iterator_traits<_ForwardIterator2>::difference_type _Size2; - - using __brick_include_type = unseq_backend::__brick_includes<_ExecutionPolicy, _Compare, _Size1, _Size2>; - return !__par_backend_hetero::__parallel_or( - __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), - __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__first2), - __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__last2), - __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__first1), - __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__last1), - __brick_include_type(__comp, __last1 - __first1, __last2 - __first2)); -} - template bool @@ -2604,24 +1460,6 @@ __pattern_includes(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _ //------------------------------------------------------------------------ // partial_sort //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy> -__pattern_partial_sort(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __mid, _Iterator __last, _Compare __comp, - /*vector=*/::std::true_type, /*parallel=*/::std::true_type) -{ - if (__last - __first < 2) - return; - - constexpr auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _Iterator>(); - using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; - - __par_backend_hetero::__parallel_partial_sort( - __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), - __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read_write>(__first), - __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read_write>(__mid), - __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read_write>(__last), __comp) - .wait(); -} template void @@ -2668,91 +1506,6 @@ struct __partial_sort_2 { }; -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, _OutIterator> -__pattern_partial_sort_copy(_ExecutionPolicy&& __exec, _InIterator __first, _InIterator __last, - _OutIterator __out_first, _OutIterator __out_last, _Compare __comp, - /*vector=*/::std::true_type, /*parallel=*/::std::true_type) -{ - using _ValueType = typename ::std::iterator_traits<_InIterator>::value_type; - - auto __in_size = __last - __first; - auto __out_size = __out_last - __out_first; - - if (__in_size == 0 || __out_size == 0) - return __out_first; - - // TODO: we can avoid a separate __pattern_walk2 for initial copy: it can be done during sort itself - // like it's done for CPU version, but it's better to be done together with merge cutoff implementation - // as it uses a similar mechanism. - if (__in_size <= __out_size) - { - constexpr auto __dispatch_tag1 = - oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, decltype(__first), decltype(__last), - decltype(__out_first)>(); - using __backend_tag1 = typename decltype(__dispatch_tag1)::__backend_tag; - - // If our output buffer is larger than the input buffer, simply copy elements to the output and use - // full sort on them. - auto __out_end = __pattern_walk2<__backend_tag1, /*_IsSync=*/::std::false_type>( - __dispatch_tag1, __par_backend_hetero::make_wrapped_policy<__initial_copy_1>(__exec), __first, __last, - __out_first, __brick_copy<_ExecutionPolicy>{}); - - constexpr auto __dispatch_tag2 = - oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, decltype(__out_first), decltype(__out_end), - decltype(__out_first)>(); - - // Use reqular sort as partial_sort isn't required to be stable - __pattern_sort( - __dispatch_tag2, - __par_backend_hetero::make_wrapped_policy<__partial_sort_1>(::std::forward<_ExecutionPolicy>(__exec)), - __out_first, __out_end, __comp, ::std::true_type{}); - - return __out_end; - } - else - { - // If our input buffer is smaller than the input buffer do the following: - // - create a temporary buffer and copy all the elements from the input buffer there - // - run partial sort on the temporary buffer - // - copy k elements from the temporary buffer to the output buffer. - oneapi::dpl::__par_backend_hetero::__buffer<_ExecutionPolicy, _ValueType> __buf(__exec, __in_size); - - auto __buf_first = __buf.get(); - - constexpr auto __dispatch_tag1 = - oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, decltype(__first), decltype(__last), - decltype(__buf_first)>(); - using __backend_tag1 = typename decltype(__dispatch_tag1)::__backend_tag; - - auto __buf_last = __pattern_walk2<__backend_tag1, /*_IsSync=*/::std::false_type>( - __par_backend_hetero::make_wrapped_policy<__initial_copy_2>(__exec), __first, __last, __buf_first, - __brick_copy<_ExecutionPolicy>{}); - - auto __buf_mid = __buf_first + __out_size; - - constexpr auto __dispatch_tag11 = - oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, decltype(__buf_first), decltype(__buf_mid), - decltype(__buf_last)>(); - using __backend_tag11 = typename decltype(__dispatch_tag11)::__backend_tag; - - __par_backend_hetero::__parallel_partial_sort( - __backend_tag11{}, __par_backend_hetero::make_wrapped_policy<__partial_sort_2>(__exec), - __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read_write>(__buf_first), - __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read_write>(__buf_mid), - __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read_write>(__buf_last), __comp); - - constexpr auto __dispatch_tag2 = - oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, decltype(__buf_first), decltype(__buf_mid), - decltype(__out_first)>(); - - return __pattern_walk2( - __dispatch_tag2, - __par_backend_hetero::make_wrapped_policy<__copy_back>(::std::forward<_ExecutionPolicy>(__exec)), - __buf_first, __buf_mid, __out_first, __brick_copy<_ExecutionPolicy>{}); - } -} - template _OutIterator @@ -2833,23 +1586,6 @@ __pattern_partial_sort_copy(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& // nth_element //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy> -__pattern_nth_element(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __nth, _Iterator __last, _Compare __comp, - /*vector*/ ::std::true_type, /*parallel*/ ::std::true_type) -{ - if (__first == __last || __nth == __last) - return; - - const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _Iterator>(); - - // TODO: check partition-based implementation - // - try to avoid host dereference issue - // - measure performance of the issue-free implementation - __pattern_partial_sort(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __nth + 1, __last, - __comp); -} - template void __pattern_nth_element(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Iterator __first, _Iterator __nth, @@ -2867,26 +1603,6 @@ __pattern_nth_element(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec //------------------------------------------------------------------------ // reverse //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy> -__pattern_reverse(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, /*vector=*/::std::true_type, - /*parallel=*/::std::true_type) -{ - auto __n = __last - __first; - if (__n <= 0) - return; - - constexpr auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _Iterator>(); - using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; - - auto __keep = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read_write, _Iterator>(); - auto __buf = __keep(__first, __last); - oneapi::dpl::__par_backend_hetero::__parallel_for( - __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), - unseq_backend::__reverse_functor::difference_type>{__n}, __n / 2, - __buf.all_view()) - .wait(); -} template void @@ -2910,34 +1626,6 @@ __pattern_reverse(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _I //------------------------------------------------------------------------ // reverse_copy //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, _ForwardIterator> -__pattern_reverse_copy(_ExecutionPolicy&& __exec, _BidirectionalIterator __first, _BidirectionalIterator __last, - _ForwardIterator __result, /*vector=*/::std::true_type, /*parallel=*/::std::true_type) -{ - auto __n = __last - __first; - if (__n <= 0) - return __result; - - auto __keep1 = - oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read, _BidirectionalIterator>(); - auto __buf1 = __keep1(__first, __last); - auto __keep2 = - oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::write, _ForwardIterator>(); - - constexpr auto __dispatch_tag = - oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _BidirectionalIterator>(); - using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; - - auto __buf2 = __keep2(__result, __result + __n); - oneapi::dpl::__par_backend_hetero::__parallel_for( - __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), - unseq_backend::__reverse_copy::difference_type>{__n}, - __n, __buf1.all_view(), __buf2.all_view()) - .wait(); - - return __result + __n; -} template _ForwardIterator @@ -2976,44 +1664,6 @@ class __rotate_wrapper { }; -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, _Iterator> -__pattern_rotate(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __new_first, _Iterator __last, - /*vector=*/::std::true_type, /*parallel=*/::std::true_type) -{ - auto __n = __last - __first; - if (__n <= 0) - return __first; - - using _Tp = typename ::std::iterator_traits<_Iterator>::value_type; - - auto __keep = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read_write, _Iterator>(); - auto __buf = __keep(__first, __last); - auto __temp_buf = oneapi::dpl::__par_backend_hetero::__buffer<_ExecutionPolicy, _Tp>(__exec, __n); - - auto __temp_rng = - oneapi::dpl::__ranges::all_view<_Tp, __par_backend_hetero::access_mode::write>(__temp_buf.get_buffer()); - - const auto __shift = __new_first - __first; - - constexpr auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _Iterator>(); - using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; - - oneapi::dpl::__par_backend_hetero::__parallel_for( - __backend_tag{}, oneapi::dpl::__par_backend_hetero::make_wrapped_policy<__rotate_wrapper>(__exec), - unseq_backend::__rotate_copy::difference_type>{__n, __shift}, __n, - __buf.all_view(), __temp_rng); - - using _Function = __brick_move<_ExecutionPolicy>; - auto __brick = unseq_backend::walk_n<_ExecutionPolicy, _Function>{_Function{}}; - - oneapi::dpl::__par_backend_hetero::__parallel_for(__backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), - __brick, __n, __temp_rng, __buf.all_view()) - .wait(); - - return __first + (__last - __new_first); -} - template _Iterator __pattern_rotate(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _Iterator __first, _Iterator __new_first, @@ -3030,60 +1680,28 @@ __pattern_rotate(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _Iterator auto __temp_buf = oneapi::dpl::__par_backend_hetero::__buffer<_ExecutionPolicy, _Tp>(__exec, __n); auto __temp_rng = - oneapi::dpl::__ranges::all_view<_Tp, __par_backend_hetero::access_mode::write>(__temp_buf.get_buffer()); - - const auto __shift = __new_first - __first; - oneapi::dpl::__par_backend_hetero::__parallel_for( - _BackendTag{}, oneapi::dpl::__par_backend_hetero::make_wrapped_policy<__rotate_wrapper>(__exec), - unseq_backend::__rotate_copy::difference_type>{__n, __shift}, __n, - __buf.all_view(), __temp_rng); - - using _Function = __brick_move<_ExecutionPolicy>; - auto __brick = unseq_backend::walk_n<_ExecutionPolicy, _Function>{_Function{}}; - - oneapi::dpl::__par_backend_hetero::__parallel_for(_BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), __brick, - __n, __temp_rng, __buf.all_view()) - .wait(); - - return __first + (__last - __new_first); -} - -//------------------------------------------------------------------------ -// rotate_copy -//------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, _ForwardIterator> -__pattern_rotate_copy(_ExecutionPolicy&& __exec, _BidirectionalIterator __first, _BidirectionalIterator __new_first, - _BidirectionalIterator __last, _ForwardIterator __result, /*vector=*/::std::true_type, - /*parallel=*/::std::true_type) -{ - constexpr auto __dispatch_tag = - oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _BidirectionalIterator, _ForwardIterator>(); - using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; - - auto __n = __last - __first; - if (__n <= 0) - return __result; - - auto __keep1 = - oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read, _BidirectionalIterator>(); - auto __buf1 = __keep1(__first, __last); - auto __keep2 = - oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::write, _ForwardIterator>(); - auto __buf2 = __keep2(__result, __result + __n); + oneapi::dpl::__ranges::all_view<_Tp, __par_backend_hetero::access_mode::write>(__temp_buf.get_buffer()); const auto __shift = __new_first - __first; - oneapi::dpl::__par_backend_hetero::__parallel_for( - __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), - unseq_backend::__rotate_copy::difference_type>{__n, - __shift}, - __n, __buf1.all_view(), __buf2.all_view()) + _BackendTag{}, oneapi::dpl::__par_backend_hetero::make_wrapped_policy<__rotate_wrapper>(__exec), + unseq_backend::__rotate_copy::difference_type>{__n, __shift}, __n, + __buf.all_view(), __temp_rng); + + using _Function = __brick_move<_ExecutionPolicy>; + auto __brick = unseq_backend::walk_n<_ExecutionPolicy, _Function>{_Function{}}; + + oneapi::dpl::__par_backend_hetero::__parallel_for(_BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), __brick, + __n, __temp_rng, __buf.all_view()) .wait(); - return __result + __n; + return __first + (__last - __new_first); } +//------------------------------------------------------------------------ +// rotate_copy +//------------------------------------------------------------------------ + template _ForwardIterator __pattern_rotate_copy(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _BidirectionalIterator __first, @@ -3112,86 +1730,6 @@ __pattern_rotate_copy(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _Bid return __result + __n; } -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, _OutputIterator> -__pattern_hetero_set_op(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, - _ForwardIterator2 __first2, _ForwardIterator2 __last2, _OutputIterator __result, - _Compare __comp, _IsOpDifference) -{ - typedef typename ::std::iterator_traits<_ForwardIterator1>::difference_type _Size1; - typedef typename ::std::iterator_traits<_ForwardIterator2>::difference_type _Size2; - - const _Size1 __n1 = __last1 - __first1; - const _Size2 __n2 = __last2 - __first2; - - //Algo is based on the recommended approach of set_intersection algo for GPU: binary search + scan (copying by mask). - using _ReduceOp = ::std::plus<_Size1>; - using _Assigner = unseq_backend::__scan_assigner; - using _NoAssign = unseq_backend::__scan_no_assign; - using _MaskAssigner = unseq_backend::__mask_assigner<2>; - using _InitType = unseq_backend::__no_init_value<_Size1>; - using _DataAcc = unseq_backend::walk_n<_ExecutionPolicy, oneapi::dpl::__internal::__no_op>; - - _ReduceOp __reduce_op; - _Assigner __assign_op; - _DataAcc __get_data_op; - unseq_backend::__copy_by_mask<_ReduceOp, oneapi::dpl::__internal::__pstl_assign, /*inclusive*/ ::std::true_type, 2> - __copy_by_mask_op; - unseq_backend::__brick_set_op<_ExecutionPolicy, _Compare, _Size1, _Size2, _IsOpDifference> __create_mask_op{ - __comp, __n1, __n2}; - - // temporary buffer to store boolean mask - oneapi::dpl::__par_backend_hetero::__buffer<_ExecutionPolicy, int32_t> __mask_buf(__exec, __n1); - - auto __keep1 = - oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read, _ForwardIterator1>(); - auto __buf1 = __keep1(__first1, __last1); - auto __keep2 = - oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read, _ForwardIterator2>(); - auto __buf2 = __keep2(__first2, __last2); - - auto __keep3 = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::write, _OutputIterator>(); - auto __buf3 = __keep3(__result, __result + __n1); - - auto __result_size = - __par_backend_hetero::__parallel_transform_scan_base( - ::std::forward<_ExecutionPolicy>(__exec), - oneapi::dpl::__ranges::make_zip_view( - __buf1.all_view(), __buf2.all_view(), - oneapi::dpl::__ranges::all_view( - __mask_buf.get_buffer())), - __buf3.all_view(), __reduce_op, _InitType{}, - // local scan - unseq_backend::__scan{ - __reduce_op, __get_data_op, __assign_op, _MaskAssigner{}, __create_mask_op}, - // scan between groups - unseq_backend::__scan{__reduce_op, __get_data_op, _NoAssign{}, __assign_op, - __get_data_op}, - // global scan - __copy_by_mask_op) - .get(); - - return __result + __result_size; -} - -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, _OutputIterator> -__pattern_set_intersection(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, - _ForwardIterator2 __first2, _ForwardIterator2 __last2, _OutputIterator __result, - _Compare __comp, /*vector=*/::std::true_type, /*parallel=*/::std::true_type) -{ - // intersection is empty - if (__first1 == __last1 || __first2 == __last2) - return __result; - - return __pattern_hetero_set_op(::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2, - __result, __comp, unseq_backend::_IntersectionTag()); -} - template _OutputIterator @@ -3279,35 +1817,6 @@ class __set_difference_copy_case_1 { }; -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, _OutputIterator> -__pattern_set_difference(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, - _ForwardIterator2 __first2, _ForwardIterator2 __last2, _OutputIterator __result, - _Compare __comp, /*vector=*/::std::true_type, - /*parallel=*/::std::true_type) -{ - // {} \ {2}: the difference is empty - if (__first1 == __last1) - return __result; - - // {1} \ {}: the difference is {1} - if (__first2 == __last2) - { - const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _ForwardIterator1, - _ForwardIterator2, _OutputIterator>(); - - return oneapi::dpl::__internal::__pattern_walk2_brick( - __dispatch_tag, - oneapi::dpl::__par_backend_hetero::make_wrapped_policy<__set_difference_copy_case_1>( - ::std::forward<_ExecutionPolicy>(__exec)), - __first1, __last1, __result, oneapi::dpl::__internal::__brick_copy<_ExecutionPolicy>{}); - } - - return __pattern_hetero_set_op(::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2, - __result, __comp, unseq_backend::_DifferenceTag()); -} - template _OutputIterator @@ -3344,60 +1853,6 @@ class __set_union_copy_case_2 { }; -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, _OutputIterator> -__pattern_set_union(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, - _ForwardIterator2 __first2, _ForwardIterator2 __last2, _OutputIterator __result, _Compare __comp, - /*vector=*/::std::true_type, /*parallel=*/::std::true_type) -{ - if (__first1 == __last1 && __first2 == __last2) - return __result; - - const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _ForwardIterator1, - _ForwardIterator2, _OutputIterator>(); - - //{1} is empty - if (__first1 == __last1) - { - return oneapi::dpl::__internal::__pattern_walk2_brick( - __dispatch_tag, - oneapi::dpl::__par_backend_hetero::make_wrapped_policy<__set_union_copy_case_1>( - ::std::forward<_ExecutionPolicy>(__exec)), - __first2, __last2, __result, oneapi::dpl::__internal::__brick_copy<_ExecutionPolicy>{}); - } - - //{2} is empty - if (__first2 == __last2) - { - return oneapi::dpl::__internal::__pattern_walk2_brick( - __dispatch_tag, - oneapi::dpl::__par_backend_hetero::make_wrapped_policy<__set_union_copy_case_2>( - ::std::forward<_ExecutionPolicy>(__exec)), - __first1, __last1, __result, oneapi::dpl::__internal::__brick_copy<_ExecutionPolicy>{}); - } - - typedef typename ::std::iterator_traits<_OutputIterator>::value_type _ValueType; - - // temporary buffer to store intermediate result - const auto __n2 = __last2 - __first2; - oneapi::dpl::__par_backend_hetero::__buffer<_ExecutionPolicy, _ValueType> __diff(__exec, __n2); - auto __buf = __diff.get(); - - //1. Calc difference {2} \ {1} - const auto __n_diff = oneapi::dpl::__internal::__pattern_hetero_set_op(__exec,__first2, __last2, __first1, __last1, - __buf,__comp, unseq_backend::_DifferenceTag() - ) - __buf; - //2. Merge {1} and the difference - const auto __dispatch_tag1 = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, decltype(__first1), - decltype(__buf), decltype(__result)>(); - return oneapi::dpl::__internal::__pattern_merge( - __dispatch_tag1, - oneapi::dpl::__par_backend_hetero::make_wrapped_policy<__set_union_copy_case_2>( - ::std::forward<_ExecutionPolicy>(__exec)), - __first1, __last1, __buf, __buf + __n_diff, __result, __comp); -} - template _OutputIterator @@ -3481,72 +1936,6 @@ class __set_symmetric_difference_phase_2 // 1. Calc difference {1} \ {2} // 2. Calc difference {2} \ {1} // 3. Merge the differences -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, _OutputIterator> -__pattern_set_symmetric_difference(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, - _ForwardIterator2 __first2, _ForwardIterator2 __last2, _OutputIterator __result, - _Compare __comp, /*vector=*/::std::true_type, - /*parallel=*/::std::true_type) -{ - if (__first1 == __last1 && __first2 == __last2) - return __result; - - const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _ForwardIterator1, - _ForwardIterator2, _OutputIterator>(); - - //{1} is empty - if (__first1 == __last1) - { - return oneapi::dpl::__internal::__pattern_walk2_brick( - __dispatch_tag, - oneapi::dpl::__par_backend_hetero::make_wrapped_policy<__set_symmetric_difference_copy_case_1>( - ::std::forward<_ExecutionPolicy>(__exec)), - __first2, __last2, __result, oneapi::dpl::__internal::__brick_copy<_ExecutionPolicy>{}); - } - - //{2} is empty - if (__first2 == __last2) - { - return oneapi::dpl::__internal::__pattern_walk2_brick( - __dispatch_tag, - oneapi::dpl::__par_backend_hetero::make_wrapped_policy<__set_symmetric_difference_copy_case_2>( - ::std::forward<_ExecutionPolicy>(__exec)), - __first1, __last1, __result, oneapi::dpl::__internal::__brick_copy<_ExecutionPolicy>{}); - } - - typedef typename ::std::iterator_traits<_OutputIterator>::value_type _ValueType; - - // temporary buffers to store intermediate result - const auto __n1 = __last1 - __first1; - oneapi::dpl::__par_backend_hetero::__buffer<_ExecutionPolicy, _ValueType> __diff_1(__exec, __n1); - auto __buf_1 = __diff_1.get(); - const auto __n2 = __last2 - __first2; - oneapi::dpl::__par_backend_hetero::__buffer<_ExecutionPolicy, _ValueType> __diff_2(__exec, __n2); - auto __buf_2 = __diff_2.get(); - - //1. Calc difference {1} \ {2} - const auto __n_diff_1 = - oneapi::dpl::__internal::__pattern_hetero_set_op( - oneapi::dpl::__par_backend_hetero::make_wrapped_policy<__set_symmetric_difference_phase_1>(__exec), - __first1, __last1, __first2, __last2, __buf_1, __comp, unseq_backend::_DifferenceTag()) - - __buf_1; - - //2. Calc difference {2} \ {1} - const auto __n_diff_2 = - oneapi::dpl::__internal::__pattern_hetero_set_op( - oneapi::dpl::__par_backend_hetero::make_wrapped_policy<__set_symmetric_difference_phase_2>(__exec), - __first2, __last2, __first1, __last1, __buf_2, __comp, unseq_backend::_DifferenceTag()) - - __buf_2; - - //3. Merge the differences - constexpr auto __dispatch_tag1 = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, decltype(__buf_1), - decltype(__buf_2), decltype(__result)>(); - return oneapi::dpl::__internal::__pattern_merge(__dispatch_tag1, ::std::forward<_ExecutionPolicy>(__exec), __buf_1, - __buf_1 + __n_diff_1, __buf_2, __buf_2 + __n_diff_2, __result, - __comp); -} - template _OutputIterator @@ -3618,56 +2007,10 @@ class __shift_left_right { }; -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, - oneapi::dpl::__internal::__difference_t<_Range>> -__pattern_shift_left(_ExecutionPolicy&& __exec, _Range __rng, oneapi::dpl::__internal::__difference_t<_Range> __n) -{ - //If (n > 0 && n < m), returns first + (m - n). Otherwise, if n > 0, returns first. Otherwise, returns last. - using _DiffType = oneapi::dpl::__internal::__difference_t<_Range>; - _DiffType __size = __rng.size(); - - assert(__n > 0 && __n < __size); - - _DiffType __mid = __size / 2 + __size % 2; - _DiffType __size_res = __size - __n; - - //TODO: required to implement correct tag selection here - using __backend_tag = oneapi::dpl::__internal::__device_backend_tag; - - //1. n >= size/2; 'size - _n' parallel copying - if (__n >= __mid) - { - using _Function = __brick_move<_ExecutionPolicy>; - auto __brick = oneapi::dpl::unseq_backend::walk_n<_ExecutionPolicy, _Function>{_Function{}}; - - //TODO: to consider use just "read" access mode for a source range and just "write" - for a destination range. - auto __src = oneapi::dpl::__ranges::drop_view_simple<_Range, _DiffType>(__rng, __n); - auto __dst = oneapi::dpl::__ranges::take_view_simple<_Range, _DiffType>(__rng, __size_res); - - oneapi::dpl::__par_backend_hetero::__parallel_for(__backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), - __brick, __size_res, __src, __dst) - .wait(); - } - else //2. n < size/2; 'n' parallel copying - { - auto __brick = unseq_backend::__brick_shift_left<_ExecutionPolicy, _DiffType>{__size, __n}; - oneapi::dpl::__par_backend_hetero::__parallel_for( - __backend_tag{}, - oneapi::dpl::__par_backend_hetero::make_wrapped_policy<__shift_left_right>( - ::std::forward<_ExecutionPolicy>(__exec)), - __brick, __n, __rng) - .wait(); - } - - return __size_res; -} - -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, _Iterator> -__pattern_shift_left(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, - typename ::std::iterator_traits<_Iterator>::difference_type __n, /*vector=*/::std::true_type, - /*is_parallel=*/::std::true_type) +template +_Iterator +__pattern_shift_left(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, + typename ::std::iterator_traits<_Iterator>::difference_type __n) { //If (n > 0 && n < m), returns first + (m - n). Otherwise, if n > 0, returns first. Otherwise, returns last. auto __size = __last - __first; @@ -3684,29 +2027,6 @@ __pattern_shift_left(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __l return __first + __res; } -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, _Iterator> -__pattern_shift_right(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, - typename ::std::iterator_traits<_Iterator>::difference_type __n, /*vector=*/::std::true_type, - /*is_parallel=*/::std::true_type) -{ - //If (n > 0 && n < m), returns first + n. Otherwise, if n > 0, returns last. Otherwise, returns first. - auto __size = __last - __first; - if (__n <= 0) - return __first; - if (__n >= __size) - return __last; - - auto __keep = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read_write, _Iterator>(); - auto __buf = __keep(__first, __last); - - //A shift right is the shift left with a reverse logic. - auto __rng = oneapi::dpl::__ranges::reverse_view_simple{__buf.all_view()}; - auto __res = oneapi::dpl::__internal::__pattern_shift_left(::std::forward<_ExecutionPolicy>(__exec), __rng, __n); - - return __last - __res; -} - template _Iterator __pattern_shift_right(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, @@ -3729,26 +2049,6 @@ __pattern_shift_right(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec return __last - __res; } -template -_Iterator -__pattern_shift_left(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, - typename ::std::iterator_traits<_Iterator>::difference_type __n) -{ - //If (n > 0 && n < m), returns first + (m - n). Otherwise, if n > 0, returns first. Otherwise, returns last. - auto __size = __last - __first; - if (__n <= 0) - return __last; - if (__n >= __size) - return __first; - - auto __keep = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read_write, _Iterator>(); - auto __buf = __keep(__first, __last); - - auto __res = - oneapi::dpl::__internal::__pattern_shift_left(::std::forward<_ExecutionPolicy>(__exec), __buf.all_view(), __n); - return __first + __res; -} - } // namespace __internal } // namespace dpl } // namespace oneapi diff --git a/include/oneapi/dpl/pstl/hetero/numeric_impl_hetero.h b/include/oneapi/dpl/pstl/hetero/numeric_impl_hetero.h index dde4eb10f58..0a79297e7b8 100644 --- a/include/oneapi/dpl/pstl/hetero/numeric_impl_hetero.h +++ b/include/oneapi/dpl/pstl/hetero/numeric_impl_hetero.h @@ -37,35 +37,6 @@ namespace __internal // transform_reduce (version with two binary functions) //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, _Tp> -__pattern_transform_reduce(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, - _RandomAccessIterator2 __first2, _Tp __init, _BinaryOperation1 __binary_op1, - _BinaryOperation2 __binary_op2, /*vector=*/::std::true_type, /*parallel=*/::std::true_type) -{ - if (__first1 == __last1) - return __init; - - using _Functor = unseq_backend::walk_n<_ExecutionPolicy, _BinaryOperation2>; - using _RepackedTp = __par_backend_hetero::__repacked_tuple_t<_Tp>; - - auto __n = __last1 - __first1; - auto __keep1 = - oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read, _RandomAccessIterator1>(); - auto __buf1 = __keep1(__first1, __last1); - auto __keep2 = - oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read, _RandomAccessIterator2>(); - auto __buf2 = __keep2(__first2, __first2 + __n); - - return oneapi::dpl::__par_backend_hetero::__parallel_transform_reduce<_RepackedTp, - ::std::true_type /*is_commutative*/>( - ::std::forward<_ExecutionPolicy>(__exec), __binary_op1, _Functor{__binary_op2}, - unseq_backend::__init_value<_RepackedTp>{__init}, // initial value - __buf1.all_view(), __buf2.all_view()) - .get(); -} - template _Tp @@ -99,30 +70,6 @@ __pattern_transform_reduce(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& _ // transform_reduce (with unary and binary functions) //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, _Tp> -__pattern_transform_reduce(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _Tp __init, - _BinaryOperation __binary_op, _UnaryOperation __unary_op, /*vector=*/::std::true_type, - /*parallel=*/::std::true_type) -{ - if (__first == __last) - return __init; - - using _Functor = unseq_backend::walk_n<_ExecutionPolicy, _UnaryOperation>; - using _RepackedTp = __par_backend_hetero::__repacked_tuple_t<_Tp>; - - auto __keep = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read, _ForwardIterator>(); - auto __buf = __keep(__first, __last); - - return oneapi::dpl::__par_backend_hetero::__parallel_transform_reduce<_RepackedTp, - ::std::true_type /*is_commutative*/>( - ::std::forward<_ExecutionPolicy>(__exec), __binary_op, _Functor{__unary_op}, - unseq_backend::__init_value<_RepackedTp>{__init}, // initial value - __buf.all_view()) - .get(); -} - template _Tp @@ -175,85 +122,6 @@ __iterators_possibly_equal(const sycl_iterator<_Mode1, _T, _Allocator>& __it1, } #endif // _ONEDPL_BACKEND_SYCL -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, _Iterator2> -__pattern_transform_scan_base(_ExecutionPolicy&& __exec, _Iterator1 __first, _Iterator1 __last, _Iterator2 __result, - _UnaryOperation __unary_op, _InitType __init, _BinaryOperation __binary_op, _Inclusive) -{ - if (__first == __last) - return __result; - - const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _Iterator1, _Iterator2>(); - using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; - - const auto __n = __last - __first; - - auto __keep1 = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read, _Iterator1>(); - auto __buf1 = __keep1(__first, __last); - - // This is a temporary workaround for an in-place exclusive scan while the SYCL backend scan pattern is not fixed. - const bool __is_scan_inplace_exclusive = __n > 1 && !_Inclusive{} && __iterators_possibly_equal(__first, __result); - if (!__is_scan_inplace_exclusive) - { - auto __keep2 = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::write, _Iterator2>(); - auto __buf2 = __keep2(__result, __result + __n); - - oneapi::dpl::__par_backend_hetero::__parallel_transform_scan( - __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __buf1.all_view(), __buf2.all_view(), __n, - __unary_op, __init, __binary_op, _Inclusive{}) - .wait(); - } - else - { - assert(__n > 1); - assert(!_Inclusive{}); - assert(__iterators_possibly_equal(__first, __result)); - - using _Type = typename _InitType::__value_type; - - auto __policy = - __par_backend_hetero::make_wrapped_policy(::std::forward<_ExecutionPolicy>(__exec)); - using _NewExecutionPolicy = decltype(__policy); - - // Create temporary buffer - oneapi::dpl::__par_backend_hetero::__buffer<_NewExecutionPolicy, _Type> __tmp_buf(__policy, __n); - auto __first_tmp = __tmp_buf.get(); - auto __last_tmp = __first_tmp + __n; - auto __keep2 = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::write, _Iterator2>(); - auto __buf2 = __keep2(__first_tmp, __last_tmp); - - // Run main algorithm and save data into temporary buffer - oneapi::dpl::__par_backend_hetero::__parallel_transform_scan(__backend_tag{}, __policy, __buf1.all_view(), - __buf2.all_view(), __n, __unary_op, __init, - __binary_op, _Inclusive{}) - .wait(); - - // Move data from temporary buffer into results - oneapi::dpl::__internal::__pattern_walk2_brick(__dispatch_tag, ::std::move(__policy), __first_tmp, __last_tmp, - __result, - oneapi::dpl::__internal::__brick_move<_NewExecutionPolicy>{}); - - //TODO: optimize copy back depending on Iterator, i.e. set_final_data for host iterator/pointer - } - - return __result + __n; -} - -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, _Iterator2> -__pattern_transform_scan(_ExecutionPolicy&& __exec, _Iterator1 __first, _Iterator1 __last, _Iterator2 __result, - _UnaryOperation __unary_op, _Type __init, _BinaryOperation __binary_op, _Inclusive, - /*vector=*/::std::true_type, /*parallel=*/::std::true_type) -{ - using _RepackedType = __par_backend_hetero::__repacked_tuple_t<_Type>; - using _InitType = unseq_backend::__init_value<_RepackedType>; - - return __pattern_transform_scan_base(::std::forward<_ExecutionPolicy>(__exec), __first, __last, __result, - __unary_op, _InitType{__init}, __binary_op, _Inclusive{}); -} - template _Iterator2 @@ -334,21 +202,6 @@ __pattern_transform_scan(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __e } // scan without initial element -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, _Iterator2> -__pattern_transform_scan(_ExecutionPolicy&& __exec, _Iterator1 __first, _Iterator1 __last, _Iterator2 __result, - _UnaryOperation __unary_op, _BinaryOperation __binary_op, _Inclusive, - /*vector=*/::std::true_type, /*parallel=*/::std::true_type) -{ - using _Type = typename ::std::iterator_traits<_Iterator1>::value_type; - using _RepackedType = __par_backend_hetero::__repacked_tuple_t<_Type>; - using _InitType = unseq_backend::__no_init_value<_RepackedType>; - - return __pattern_transform_scan_base(::std::forward<_ExecutionPolicy>(__exec), __first, __last, __result, - __unary_op, _InitType{}, __binary_op, _Inclusive{}); -} - template _Iterator2 @@ -374,65 +227,6 @@ struct adjacent_difference_wrapper { }; -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, _ForwardIterator2> -__pattern_adjacent_difference(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 __last, - _ForwardIterator2 __d_first, _BinaryOperation __op, /*vector*/ ::std::true_type, - /*parallel*/ ::std::true_type) -{ - auto __n = __last - __first; - if (__n <= 0) - return __d_first; - - const auto __dispatch_tag = - oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>(); - using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; - - using _It1ValueT = typename ::std::iterator_traits<_ForwardIterator1>::value_type; - using _It2ValueTRef = typename ::std::iterator_traits<_ForwardIterator2>::reference; - - _ForwardIterator2 __d_last = __d_first + __n; - -#if !__SYCL_UNNAMED_LAMBDA__ - // if we have the only element, just copy it according to the specification - if (__n == 1) - { - return __internal::__except_handler([&__exec, __first, __last, __d_first, __d_last, &__op]() { - auto __wrapped_policy = __par_backend_hetero::make_wrapped_policy( - ::std::forward<_ExecutionPolicy>(__exec)); - - __internal::__pattern_walk2_brick(__dispatch_tag, __wrapped_policy, __first, __last, __d_first, - __internal::__brick_copy{}); - - return __d_last; - }); - } - else -#endif - { - return __internal::__except_handler([&__exec, __first, __last, __d_first, __d_last, &__op, __n]() { - auto __fn = [__op](_It1ValueT __in1, _It1ValueT __in2, _It2ValueTRef __out1) { - __out1 = __op(__in2, __in1); // This move assignment is allowed by the C++ standard draft N4810 - }; - - auto __keep1 = - oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read, _ForwardIterator1>(); - auto __buf1 = __keep1(__first, __last); - auto __keep2 = - oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::write, _ForwardIterator2>(); - auto __buf2 = __keep2(__d_first, __d_last); - - using _Function = unseq_backend::walk_adjacent_difference<_ExecutionPolicy, decltype(__fn)>; - - oneapi::dpl::__par_backend_hetero::__parallel_for(__backend_tag{}, __exec, _Function{__fn}, __n, - __buf1.all_view(), __buf2.all_view()) - .wait(); - - return __d_last; - }); - } -} - template _ForwardIterator2 From c3bbe584729397edbba7e1eaf371dee092296569 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Fri, 16 Feb 2024 09:45:46 +0100 Subject: [PATCH 300/566] Remove old implementations with __enable_if_device_execution_policy --- .../pstl/hetero/dpcpp/parallel_backend_sycl.h | 355 ------------------ 1 file changed, 355 deletions(-) diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h index ebbe14843c8..60de6356b37 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h @@ -773,65 +773,6 @@ __parallel_transform_scan_base(oneapi::dpl::__internal::__device_backend_tag, _E __binary_op, __init, __local_scan, __group_scan, __global_scan); } -template = 0> -auto -__parallel_transform_scan(_ExecutionPolicy&& __exec, _Range1&& __in_rng, _Range2&& __out_rng, ::std::size_t __n, - _UnaryOperation __unary_op, _InitType __init, _BinaryOperation __binary_op, _Inclusive) -{ - using _Type = typename _InitType::__value_type; - - // Next power of 2 greater than or equal to __n - auto __n_uniform = __n; - if ((__n_uniform & (__n_uniform - 1)) != 0) - __n_uniform = oneapi::dpl::__internal::__dpl_bit_floor(__n) << 1; - - // Pessimistically only use half of the memory to take into account memory used by compiled kernel - const ::std::size_t __max_slm_size = - __exec.queue().get_device().template get_info() / 2; - const auto __req_slm_size = sizeof(_Type) * __n_uniform; - - constexpr int __single_group_upper_limit = 16384; - - constexpr bool __can_use_group_scan = unseq_backend::__has_known_identity<_BinaryOperation, _Type>::value; - if constexpr (__can_use_group_scan) - { - if (__n <= __single_group_upper_limit && __max_slm_size >= __req_slm_size) - { - return __parallel_transform_scan_single_group( - std::forward<_ExecutionPolicy>(__exec), ::std::forward<_Range1>(__in_rng), - ::std::forward<_Range2>(__out_rng), __n, __unary_op, __init, __binary_op, _Inclusive{}); - } - } - - // Either we can't use group scan or this input is too big for one workgroup - using _Assigner = unseq_backend::__scan_assigner; - using _NoAssign = unseq_backend::__scan_no_assign; - using _UnaryFunctor = unseq_backend::walk_n<_ExecutionPolicy, _UnaryOperation>; - using _NoOpFunctor = unseq_backend::walk_n<_ExecutionPolicy, oneapi::dpl::__internal::__no_op>; - - _Assigner __assign_op; - _NoAssign __no_assign_op; - _NoOpFunctor __get_data_op; - - return __future( - __parallel_transform_scan_base( - ::std::forward<_ExecutionPolicy>(__exec), ::std::forward<_Range1>(__in_rng), - ::std::forward<_Range2>(__out_rng), __binary_op, __init, - // local scan - unseq_backend::__scan<_Inclusive, _ExecutionPolicy, _BinaryOperation, _UnaryFunctor, _Assigner, _Assigner, - _NoOpFunctor, _InitType>{__binary_op, _UnaryFunctor{__unary_op}, __assign_op, - __assign_op, __get_data_op}, - // scan between groups - unseq_backend::__scan>{ - __binary_op, _NoOpFunctor{}, __no_assign_op, __assign_op, __get_data_op}, - // global scan - unseq_backend::__global_scan_functor<_Inclusive, _BinaryOperation, _InitType>{__binary_op, __init}) - .event()); -} - template auto @@ -933,48 +874,6 @@ struct __invoke_single_group_copy_if } }; -template = 0> -auto -__parallel_scan_copy(_ExecutionPolicy&& __exec, _InRng&& __in_rng, _OutRng&& __out_rng, _Size __n, - _CreateMaskOp __create_mask_op, _CopyByMaskOp __copy_by_mask_op) -{ - using _ReduceOp = ::std::plus<_Size>; - using _Assigner = unseq_backend::__scan_assigner; - using _NoAssign = unseq_backend::__scan_no_assign; - using _MaskAssigner = unseq_backend::__mask_assigner<1>; - using _DataAcc = unseq_backend::walk_n<_ExecutionPolicy, oneapi::dpl::__internal::__no_op>; - using _InitType = unseq_backend::__no_init_value<_Size>; - - _Assigner __assign_op; - _ReduceOp __reduce_op; - _DataAcc __get_data_op; - _MaskAssigner __add_mask_op; - - // temporary buffer to store boolean mask - oneapi::dpl::__par_backend_hetero::__buffer<_ExecutionPolicy, int32_t> __mask_buf(__exec, __n); - - return __parallel_transform_scan_base( - oneapi::dpl::__internal::__device_backend_tag{}, - ::std::forward<_ExecutionPolicy>(__exec), - oneapi::dpl::__ranges::make_zip_view( - ::std::forward<_InRng>(__in_rng), - oneapi::dpl::__ranges::all_view( - __mask_buf.get_buffer())), - ::std::forward<_OutRng>(__out_rng), __reduce_op, _InitType{}, - // local scan - unseq_backend::__scan{__reduce_op, __get_data_op, __assign_op, - __add_mask_op, __create_mask_op}, - // scan between groups - unseq_backend::__scan{__reduce_op, __get_data_op, _NoAssign{}, __assign_op, - __get_data_op}, - // global scan - __copy_by_mask_op); -} - template auto @@ -1016,50 +915,6 @@ __parallel_scan_copy(oneapi::dpl::__internal::__device_backend_tag, _ExecutionPo __copy_by_mask_op); } -template = 0> -auto -__parallel_copy_if(_ExecutionPolicy&& __exec, _InRng&& __in_rng, _OutRng&& __out_rng, _Size __n, _Pred __pred) -{ - using _SingleGroupInvoker = __invoke_single_group_copy_if<_Size>; - - // Next power of 2 greater than or equal to __n - auto __n_uniform = ::oneapi::dpl::__internal::__dpl_bit_ceil(static_cast<::std::make_unsigned_t<_Size>>(__n)); - - // Pessimistically only use half of the memory to take into account memory used by compiled kernel - const ::std::size_t __max_slm_size = - __exec.queue().get_device().template get_info() / 2; - - // The kernel stores n integers for the predicate and another n integers for the offsets - const auto __req_slm_size = sizeof(::std::uint16_t) * __n_uniform * 2; - - constexpr ::std::uint16_t __single_group_upper_limit = 16384; - - ::std::size_t __max_wg_size = oneapi::dpl::__internal::__max_work_group_size(__exec); - - if (__n <= __single_group_upper_limit && __max_slm_size >= __req_slm_size && - __max_wg_size >= _SingleGroupInvoker::__targeted_wg_size) - { - using _SizeBreakpoints = - ::std::integer_sequence<::std::uint16_t, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384>; - - return __par_backend_hetero::__static_monotonic_dispatcher<_SizeBreakpoints>::__dispatch( - _SingleGroupInvoker{}, __n, ::std::forward<_ExecutionPolicy>(__exec), __n, ::std::forward<_InRng>(__in_rng), - ::std::forward<_OutRng>(__out_rng), __pred); - } - else - { - using _ReduceOp = ::std::plus<_Size>; - using CreateOp = unseq_backend::__create_mask<_Pred, _Size>; - using CopyOp = unseq_backend::__copy_by_mask<_ReduceOp, oneapi::dpl::__internal::__pstl_assign, - /*inclusive*/ ::std::true_type, 1>; - - return __parallel_scan_copy(oneapi::dpl::__internal::__device_backend_tag{}, - ::std::forward<_ExecutionPolicy>(__exec), ::std::forward<_InRng>(__in_rng), - ::std::forward<_OutRng>(__out_rng), __n, CreateOp{__pred}, CopyOp{}); - } -} - template auto __parallel_copy_if(oneapi::dpl::__internal::__device_backend_tag, _ExecutionPolicy&& __exec, _InRng&& __in_rng, @@ -1238,109 +1093,6 @@ struct __early_exit_find_or // parallel_find_or - sync pattern //------------------------------------------------------------------------ -// Base pattern for __parallel_or and __parallel_find. The execution depends on tag type _BrickTag. -template -oneapi::dpl::__internal::__enable_if_device_execution_policy< - _ExecutionPolicy, - ::std::conditional_t<::std::is_same_v<_BrickTag, __parallel_or_tag>, bool, - oneapi::dpl::__internal::__difference_t< - typename oneapi::dpl::__ranges::__get_first_range_type<_Ranges...>::type>>> -__parallel_find_or(_ExecutionPolicy&& __exec, _Brick __f, _BrickTag __brick_tag, _Ranges&&... __rngs) -{ - using _CustomName = oneapi::dpl::__internal::__policy_kernel_name<_ExecutionPolicy>; - using _AtomicType = typename _BrickTag::_AtomicType; - using _FindOrKernel = - oneapi::dpl::__par_backend_hetero::__internal::__kernel_name_generator<__find_or_kernel, _CustomName, _Brick, - _BrickTag, _Ranges...>; - - constexpr bool __or_tag_check = ::std::is_same_v<_BrickTag, __parallel_or_tag>; - auto __rng_n = oneapi::dpl::__ranges::__get_first_range_size(__rngs...); - assert(__rng_n > 0); - - // TODO: find a way to generalize getting of reliable work-group size - auto __wgroup_size = oneapi::dpl::__internal::__max_work_group_size(__exec); -#if _ONEDPL_COMPILE_KERNEL - auto __kernel = __internal::__kernel_compiler<_FindOrKernel>::__compile(__exec); - __wgroup_size = ::std::min(__wgroup_size, oneapi::dpl::__internal::__kernel_work_group_size(__exec, __kernel)); -#endif - auto __max_cu = oneapi::dpl::__internal::__max_compute_units(__exec); - - auto __n_groups = (__rng_n - 1) / __wgroup_size + 1; - // TODO: try to change __n_groups with another formula for more perfect load balancing - __n_groups = ::std::min(__n_groups, decltype(__n_groups)(__max_cu)); - - auto __n_iter = (__rng_n - 1) / (__n_groups * __wgroup_size) + 1; - - _PRINT_INFO_IN_DEBUG_MODE(__exec, __wgroup_size, __max_cu); - - _AtomicType __init_value = _BrickTag::__init_value(__rng_n); - auto __result = __init_value; - - auto __pred = oneapi::dpl::__par_backend_hetero::__early_exit_find_or<_ExecutionPolicy, _Brick>{__f}; - - // scope is to copy data back to __result after destruction of temporary sycl:buffer - { - auto __temp = sycl::buffer<_AtomicType, 1>(&__result, 1); // temporary storage for global atomic - - // main parallel_for - __exec.queue().submit([&](sycl::handler& __cgh) { - oneapi::dpl::__ranges::__require_access(__cgh, __rngs...); - auto __temp_acc = __temp.template get_access(__cgh); - - // create local accessor to connect atomic with - __dpl_sycl::__local_accessor<_AtomicType> __temp_local(1, __cgh); -#if _ONEDPL_COMPILE_KERNEL && _ONEDPL_KERNEL_BUNDLE_PRESENT - __cgh.use_kernel_bundle(__kernel.get_kernel_bundle()); -#endif - __cgh.parallel_for<_FindOrKernel>( -#if _ONEDPL_COMPILE_KERNEL && !_ONEDPL_KERNEL_BUNDLE_PRESENT - __kernel, -#endif - sycl::nd_range(sycl::range(__n_groups * __wgroup_size), - sycl::range(__wgroup_size)), - [=](sycl::nd_item __item_id) { - auto __local_idx = __item_id.get_local_id(0); - - __dpl_sycl::__atomic_ref<_AtomicType, sycl::access::address_space::global_space> __found( - *__dpl_sycl::__get_accessor_ptr(__temp_acc)); - __dpl_sycl::__atomic_ref<_AtomicType, sycl::access::address_space::local_space> __found_local( - *__dpl_sycl::__get_accessor_ptr(__temp_local)); - - // 1. Set initial value to local atomic - if (__local_idx == 0) - __found_local.store(__init_value); - __dpl_sycl::__group_barrier(__item_id); - - // 2. Find any element that satisfies pred and set local atomic value to global atomic - constexpr auto __comp = typename _BrickTag::_Compare{}; - __pred(__item_id, __n_iter, __wgroup_size, __comp, __found_local, __brick_tag, __rngs...); - __dpl_sycl::__group_barrier(__item_id); - - // Set local atomic value to global atomic - if (__local_idx == 0 && __comp(__found_local.load(), __found.load())) - { - if constexpr (__or_tag_check) - __found.store(1); - else - { - for (auto __old = __found.load(); __comp(__found_local.load(), __old); - __old = __found.load()) - { - __found.compare_exchange_strong(__old, __found_local.load()); - } - } - } - }); - }); - //The end of the scope - a point of synchronization (on temporary sycl buffer destruction) - } - - if constexpr (__or_tag_check) - return __result; - else - return __result != __init_value ? __result : __rng_n; -} - // Base pattern for __parallel_or and __parallel_find. The execution depends on tag type _BrickTag. template ::std::conditional_t< @@ -1452,26 +1204,6 @@ class __or_policy_wrapper { }; -template -oneapi::dpl::__internal::__enable_if_device_execution_policy<_ExecutionPolicy, bool> -__parallel_or(_ExecutionPolicy&& __exec, _Iterator1 __first, _Iterator1 __last, _Iterator2 __s_first, - _Iterator2 __s_last, _Brick __f) -{ - constexpr auto __dispatch_tag = - oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _Iterator1, _Iterator2>(); - using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; - - auto __keep = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read, _Iterator1>(); - auto __buf = __keep(__first, __last); - auto __s_keep = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read, _Iterator2>(); - auto __s_buf = __s_keep(__s_first, __s_last); - - return oneapi::dpl::__par_backend_hetero::__parallel_find_or( - __backend_tag{}, - __par_backend_hetero::make_wrapped_policy<__or_policy_wrapper>(::std::forward<_ExecutionPolicy>(__exec)), __f, - __parallel_or_tag{}, __buf.all_view(), __s_buf.all_view()); -} - template bool __parallel_or(oneapi::dpl::__internal::__device_backend_tag, _ExecutionPolicy&& __exec, _Iterator1 __first, @@ -1488,25 +1220,6 @@ __parallel_or(oneapi::dpl::__internal::__device_backend_tag, _ExecutionPolicy&& __parallel_or_tag{}, __buf.all_view(), __s_buf.all_view()); } -// Special overload for single sequence cases. -// TODO: check if similar pattern may apply to other algorithms. If so, these overloads should be moved out of -// backend code. -template -oneapi::dpl::__internal::__enable_if_device_execution_policy<_ExecutionPolicy, bool> -__parallel_or(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, _Brick __f) -{ - constexpr auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _Iterator>(); - using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; - - auto __keep = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read, _Iterator>(); - auto __buf = __keep(__first, __last); - - return oneapi::dpl::__par_backend_hetero::__parallel_find_or( - __backend_tag{}, - __par_backend_hetero::make_wrapped_policy<__or_policy_wrapper>(::std::forward<_ExecutionPolicy>(__exec)), __f, - __parallel_or_tag{}, __buf.all_view()); -} - // Special overload for single sequence cases. // TODO: check if similar pattern may apply to other algorithms. If so, these overloads should be moved out of // backend code. @@ -1533,29 +1246,6 @@ class __find_policy_wrapper { }; -template -oneapi::dpl::__internal::__enable_if_device_execution_policy<_ExecutionPolicy, _Iterator1> -__parallel_find(_ExecutionPolicy&& __exec, _Iterator1 __first, _Iterator1 __last, _Iterator2 __s_first, - _Iterator2 __s_last, _Brick __f, _IsFirst) -{ - constexpr auto __dispatch_tag = - oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _Iterator1, _Iterator2>(); - using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; - - auto __keep = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read, _Iterator1>(); - auto __buf = __keep(__first, __last); - auto __s_keep = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read, _Iterator2>(); - auto __s_buf = __s_keep(__s_first, __s_last); - - using _TagType = ::std::conditional_t<_IsFirst::value, __parallel_find_forward_tag, - __parallel_find_backward_tag>; - return __first + oneapi::dpl::__par_backend_hetero::__parallel_find_or( - __backend_tag{}, - __par_backend_hetero::make_wrapped_policy<__find_policy_wrapper>( - ::std::forward<_ExecutionPolicy>(__exec)), - __f, _TagType{}, __buf.all_view(), __s_buf.all_view()); -} - template _Iterator1 __parallel_find(oneapi::dpl::__internal::__device_backend_tag, _ExecutionPolicy&& __exec, _Iterator1 __first, @@ -1578,25 +1268,6 @@ __parallel_find(oneapi::dpl::__internal::__device_backend_tag, _ExecutionPolicy& // Special overload for single sequence cases. // TODO: check if similar pattern may apply to other algorithms. If so, these overloads should be moved out of // backend code. -template -oneapi::dpl::__internal::__enable_if_device_execution_policy<_ExecutionPolicy, _Iterator> -__parallel_find(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, _Brick __f, _IsFirst) -{ - constexpr auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _Iterator>(); - using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; - - auto __keep = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read, _Iterator>(); - auto __buf = __keep(__first, __last); - - using _TagType = ::std::conditional_t<_IsFirst::value, __parallel_find_forward_tag, - __parallel_find_backward_tag>; - return __first + oneapi::dpl::__par_backend_hetero::__parallel_find_or( - __backend_tag{}, - __par_backend_hetero::make_wrapped_policy<__find_policy_wrapper>( - ::std::forward<_ExecutionPolicy>(__exec)), - __f, _TagType{}, __buf.all_view()); -} - template _Iterator __parallel_find(oneapi::dpl::__internal::__device_backend_tag, _ExecutionPolicy&& __exec, _Iterator __first, @@ -2135,18 +1806,6 @@ struct __is_radix_sort_usable_for_type }; #if _USE_RADIX_SORT -template > && - __is_radix_sort_usable_for_type, _Compare>::value, - int> = 0> -auto -__parallel_stable_sort(_ExecutionPolicy&& __exec, _Range&& __rng, _Compare, _Proj __proj) -{ - return __parallel_radix_sort<__internal::__is_comp_ascending<::std::decay_t<_Compare>>::value>( - ::std::forward<_ExecutionPolicy>(__exec), ::std::forward<_Range>(__rng), __proj); -} - template < typename _ExecutionPolicy, typename _Range, typename _Compare, typename _Proj, ::std::enable_if_t< @@ -2161,20 +1820,6 @@ __parallel_stable_sort(oneapi::dpl::__internal::__device_backend_tag, _Execution } #endif -template > && - !__is_radix_sort_usable_for_type, _Compare>::value, - int> = 0> -auto -__parallel_stable_sort(_ExecutionPolicy&& __exec, _Range&& __rng, _Compare __comp, _Proj __proj) -{ - auto __cmp_f = [__comp, __proj](const auto& __a, const auto& __b) mutable { - return __comp(__proj(__a), __proj(__b)); - }; - return __parallel_sort_impl(::std::forward<_ExecutionPolicy>(__exec), ::std::forward<_Range>(__rng), __cmp_f); -} - template < typename _ExecutionPolicy, typename _Range, typename _Compare, typename _Proj, ::std::enable_if_t< From 0a070ed5f7d19d52d005048996c96d166b007a60 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Thu, 15 Feb 2024 19:12:26 +0100 Subject: [PATCH 301/566] Remove old implementations with __enable_if_fpga_execution_policy --- .../hetero/dpcpp/parallel_backend_sycl_fpga.h | 126 ------------------ .../dpcpp/parallel_backend_sycl_reduce.h | 101 -------------- 2 files changed, 227 deletions(-) diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h index 8d563fd1cd1..f0a69e9f0c1 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h @@ -93,18 +93,6 @@ __parallel_for(oneapi::dpl::__internal::__fpga_backend_tag, _ExecutionPolicy&& _ // parallel_transform_reduce //------------------------------------------------------------------------ -template = 0, - typename... _Ranges> -auto -__parallel_transform_reduce(_ExecutionPolicy&& __exec, _ReduceOp __reduce_op, _TransformOp __transform_op, - _InitType __init, _Ranges&&... __rngs) -{ - // workaround until we implement more performant version for patterns - return oneapi::dpl::__par_backend_hetero::__parallel_transform_reduce<_Tp, _Commutative>( - __exec.__device_policy(), __reduce_op, __transform_op, __init, ::std::forward<_Ranges>(__rngs)...); -} - template auto @@ -120,19 +108,6 @@ __parallel_transform_reduce(oneapi::dpl::__internal::__fpga_backend_tag, _Execut // parallel_transform_scan //------------------------------------------------------------------------ -template = 0> -auto -__parallel_transform_scan(_ExecutionPolicy&& __exec, _Range1&& __in_rng, _Range2&& __out_rng, ::std::size_t __n, - _UnaryOperation __unary_op, _InitType __init, _BinaryOperation __binary_op, _Inclusive) -{ - // workaround until we implement more performant version for patterns - return oneapi::dpl::__par_backend_hetero::__parallel_transform_scan( - oneapi::dpl::__internal::__device_backend_tag{}, __exec.__device_policy(), ::std::forward<_Range1>(__in_rng), - ::std::forward<_Range2>(__out_rng), __n, __unary_op, __init, __binary_op, _Inclusive{}); -} - template auto @@ -159,17 +134,6 @@ __parallel_transform_scan_base(oneapi::dpl::__internal::__fpga_backend_tag, _Exe ::std::forward<_Range2>(__rng2), __binary_op, __init, __local_scan, __group_scan, __global_scan); } -template = 0> -auto -__parallel_copy_if(_ExecutionPolicy&& __exec, _InRng&& __in_rng, _OutRng&& __out_rng, _Size __n, _Pred __pred) -{ - // workaround until we implement more performant version for patterns - return oneapi::dpl::__par_backend_hetero::__parallel_copy_if( - oneapi::dpl::__internal::__device_backend_tag{}, __exec.__device_policy(), ::std::forward<_InRng>(__in_rng), - ::std::forward<_OutRng>(__out_rng), __n, __pred); -} - template auto __parallel_copy_if(oneapi::dpl::__internal::__fpga_backend_tag, _ExecutionPolicy&& __exec, _InRng&& __in_rng, @@ -181,19 +145,6 @@ __parallel_copy_if(oneapi::dpl::__internal::__fpga_backend_tag, _ExecutionPolicy ::std::forward<_OutRng>(__out_rng), __n, __pred); } -template = 0> -auto -__parallel_scan_copy(_ExecutionPolicy&& __exec, _InRng&& __in_rng, _OutRng&& __out_rng, _Size __n, - _CreateMaskOp __create_mask_op, _CopyByMaskOp __copy_by_mask_op) -{ - // workaround until we implement more performant version for patterns - return oneapi::dpl::__par_backend_hetero::__parallel_scan_copy( - oneapi::dpl::__internal::__device_backend_tag{}, __exec.__device_policy(), ::std::forward<_InRng>(__in_rng), - ::std::forward<_OutRng>(__out_rng), __n, __create_mask_op, __copy_by_mask_op); -} - template auto @@ -209,18 +160,6 @@ __parallel_scan_copy(oneapi::dpl::__internal::__fpga_backend_tag, _ExecutionPoli //------------------------------------------------------------------------ // __parallel_find_or //----------------------------------------------------------------------- -template -oneapi::dpl::__internal::__enable_if_fpga_execution_policy< - _ExecutionPolicy, - ::std::conditional_t<::std::is_same_v<_BrickTag, __parallel_or_tag>, bool, - oneapi::dpl::__internal::__difference_t< - typename oneapi::dpl::__ranges::__get_first_range_type<_Ranges...>::type>>> -__parallel_find_or(_ExecutionPolicy&& __exec, _Brick __f, _BrickTag __brick_tag, _Ranges&&... __rngs) -{ - return oneapi::dpl::__par_backend_hetero::__parallel_find_or(oneapi::dpl::__internal::__device_backend_tag{}, - __exec.__device_policy(), __f, __brick_tag, - ::std::forward<_Ranges>(__rngs)...); -} template ::std::conditional_t< @@ -259,14 +198,6 @@ __parallel_or(oneapi::dpl::__internal::__fpga_backend_tag, _ExecutionPolicy&& __ __s_last, __f); } -template -oneapi::dpl::__internal::__enable_if_fpga_execution_policy<_ExecutionPolicy, bool> -__parallel_or(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, _Brick __f) -{ - // workaround until we implement more performant version for patterns - return oneapi::dpl::__par_backend_hetero::__parallel_or(__exec.__device_policy(), __first, __last, __f); -} - template bool __parallel_or(oneapi::dpl::__internal::__fpga_backend_tag, _ExecutionPolicy&& __exec, _Iterator __first, @@ -281,18 +212,6 @@ __parallel_or(oneapi::dpl::__internal::__fpga_backend_tag, _ExecutionPolicy&& __ // parallel_find //----------------------------------------------------------------------- -template -oneapi::dpl::__internal::__enable_if_fpga_execution_policy<_ExecutionPolicy, _Iterator1> -__parallel_find(_ExecutionPolicy&& __exec, _Iterator1 __first, _Iterator1 __last, _Iterator2 __s_first, - _Iterator2 __s_last, _Brick __f, _IsFirst __is_first) -{ - - // workaround until we implement more performant version for patterns - return oneapi::dpl::__par_backend_hetero::__parallel_find(oneapi::dpl::__internal::__device_backend_tag{}, - __exec.__device_policy(), __first, __last, __s_first, - __s_last, __f, __is_first); -} - template _Iterator1 __parallel_find(oneapi::dpl::__internal::__fpga_backend_tag, _ExecutionPolicy&& __exec, _Iterator1 __first, @@ -304,15 +223,6 @@ __parallel_find(oneapi::dpl::__internal::__fpga_backend_tag, _ExecutionPolicy&& __s_last, __f, __is_first); } -template -oneapi::dpl::__internal::__enable_if_fpga_execution_policy<_ExecutionPolicy, _Iterator> -__parallel_find(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, _Brick __f, _IsFirst __is_first) -{ - // workaround until we implement more performant version for patterns - return oneapi::dpl::__par_backend_hetero::__parallel_find(__exec.__device_policy(), __first, __last, __f, - __is_first); -} - template _Iterator __parallel_find(oneapi::dpl::__internal::__fpga_backend_tag, _ExecutionPolicy&& __exec, _Iterator __first, @@ -327,20 +237,6 @@ __parallel_find(oneapi::dpl::__internal::__fpga_backend_tag, _ExecutionPolicy&& // parallel_merge //----------------------------------------------------------------------- -template -auto -__parallel_merge(_ExecutionPolicy&& __exec, _Range1&& __rng1, _Range2&& __rng2, _Range3&& __rng3, _Compare __comp) - -> oneapi::dpl::__internal::__enable_if_fpga_execution_policy< - _ExecutionPolicy, decltype(oneapi::dpl::__par_backend_hetero::__parallel_merge( - __exec.__device_policy(), ::std::forward<_Range1>(__rng1), - ::std::forward<_Range2>(__rng2), ::std::forward<_Range3>(__rng3), __comp))> -{ - // workaround until we implement more performant version for patterns - return oneapi::dpl::__par_backend_hetero::__parallel_merge( - __exec.__device_policy(), ::std::forward<_Range1>(__rng1), ::std::forward<_Range2>(__rng2), - ::std::forward<_Range3>(__rng3), __comp); -} - template auto __parallel_merge(oneapi::dpl::__internal::__fpga_backend_tag, _ExecutionPolicy&& __exec, _Range1&& __rng1, @@ -359,17 +255,6 @@ __parallel_merge(oneapi::dpl::__internal::__fpga_backend_tag, _ExecutionPolicy&& // parallel_stable_sort //----------------------------------------------------------------------- -template = 0> -auto -__parallel_stable_sort(_ExecutionPolicy&& __exec, _Range&& __rng, _Compare __comp, _Proj __proj) -{ - // workaround until we implement more performant version for patterns - return oneapi::dpl::__par_backend_hetero::__parallel_stable_sort(oneapi::dpl::__internal::__device_backend_tag{}, - __exec.__device_policy(), - ::std::forward<_Range>(__rng), __comp, __proj); -} - template auto __parallel_stable_sort(oneapi::dpl::__internal::__fpga_backend_tag, _ExecutionPolicy&& __exec, _Range&& __rng, @@ -386,17 +271,6 @@ __parallel_stable_sort(oneapi::dpl::__internal::__fpga_backend_tag, _ExecutionPo //----------------------------------------------------------------------- // TODO: check if it makes sense to move these wrappers out of backend to a common place -template = 0> -auto -__parallel_partial_sort(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __mid, _Iterator __last, - _Compare __comp) -{ - // workaround until we implement more performant version for patterns - return oneapi::dpl::__par_backend_hetero::__parallel_partial_sort( - oneapi::dpl::__internal::__device_backend_tag{}, __exec.__device_policy(), __first, __mid, __last, __comp); -} - template auto __parallel_partial_sort(oneapi::dpl::__internal::__fpga_backend_tag, _ExecutionPolicy&& __exec, _Iterator __first, diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_reduce.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_reduce.h index f1f8c04cac2..28b48333d52 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_reduce.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_reduce.h @@ -411,107 +411,6 @@ struct __parallel_transform_reduce_impl // Mid-sized arrays use two tree reductions with independent __iters_per_work_item. // Big arrays are processed with a recursive tree reduction. __work_group_size * __iters_per_work_item elements are // reduced in each step. -template = 0, - typename... _Ranges> -auto -__parallel_transform_reduce(_ExecutionPolicy&& __exec, _ReduceOp __reduce_op, _TransformOp __transform_op, - _InitType __init, _Ranges&&... __rngs) -{ - auto __n = oneapi::dpl::__ranges::__get_first_range_size(__rngs...); - assert(__n > 0); - - // Get the work group size adjusted to the local memory limit. - // Pessimistically double the memory requirement to take into account memory used by compiled kernel. - // TODO: find a way to generalize getting of reliable work-group size. - ::std::size_t __work_group_size = oneapi::dpl::__internal::__slm_adjusted_work_group_size(__exec, sizeof(_Tp) * 2); - - // Use single work group implementation if array < __work_group_size * __iters_per_work_item. - if (__work_group_size >= 256) - { - if (__n <= 256) - { - return __parallel_transform_reduce_small_impl<_Tp, 256, 1, _Commutative>( - ::std::forward<_ExecutionPolicy>(__exec), __n, __reduce_op, __transform_op, __init, - ::std::forward<_Ranges>(__rngs)...); - } - else if (__n <= 512) - { - return __parallel_transform_reduce_small_impl<_Tp, 256, 2, _Commutative>( - ::std::forward<_ExecutionPolicy>(__exec), __n, __reduce_op, __transform_op, __init, - ::std::forward<_Ranges>(__rngs)...); - } - else if (__n <= 1024) - { - return __parallel_transform_reduce_small_impl<_Tp, 256, 4, _Commutative>( - ::std::forward<_ExecutionPolicy>(__exec), __n, __reduce_op, __transform_op, __init, - ::std::forward<_Ranges>(__rngs)...); - } - else if (__n <= 2048) - { - return __parallel_transform_reduce_small_impl<_Tp, 256, 8, _Commutative>( - ::std::forward<_ExecutionPolicy>(__exec), __n, __reduce_op, __transform_op, __init, - ::std::forward<_Ranges>(__rngs)...); - } - else if (__n <= 4096) - { - return __parallel_transform_reduce_small_impl<_Tp, 256, 16, _Commutative>( - ::std::forward<_ExecutionPolicy>(__exec), __n, __reduce_op, __transform_op, __init, - ::std::forward<_Ranges>(__rngs)...); - } - else if (__n <= 8192) - { - return __parallel_transform_reduce_small_impl<_Tp, 256, 32, _Commutative>( - ::std::forward<_ExecutionPolicy>(__exec), __n, __reduce_op, __transform_op, __init, - ::std::forward<_Ranges>(__rngs)...); - } - - // Use two-step tree reduction. - // First step reduces __work_group_size * __iters_per_work_item_device_kernel elements. - // Second step reduces __work_group_size * __iters_per_work_item_work_group_kernel elements. - else if (__n <= 2097152) - { - return __parallel_transform_reduce_mid_impl<_Tp, 256, 32, 1, _Commutative>( - ::std::forward<_ExecutionPolicy>(__exec), __n, __reduce_op, __transform_op, __init, - ::std::forward<_Ranges>(__rngs)...); - } - else if (__n <= 4194304) - { - return __parallel_transform_reduce_mid_impl<_Tp, 256, 32, 2, _Commutative>( - ::std::forward<_ExecutionPolicy>(__exec), __n, __reduce_op, __transform_op, __init, - ::std::forward<_Ranges>(__rngs)...); - } - else if (__n <= 8388608) - { - return __parallel_transform_reduce_mid_impl<_Tp, 256, 32, 4, _Commutative>( - ::std::forward<_ExecutionPolicy>(__exec), __n, __reduce_op, __transform_op, __init, - ::std::forward<_Ranges>(__rngs)...); - } - else if (__n <= 16777216) - { - return __parallel_transform_reduce_mid_impl<_Tp, 256, 32, 8, _Commutative>( - ::std::forward<_ExecutionPolicy>(__exec), __n, __reduce_op, __transform_op, __init, - ::std::forward<_Ranges>(__rngs)...); - } - else if (__n <= 33554432) - { - return __parallel_transform_reduce_mid_impl<_Tp, 256, 32, 16, _Commutative>( - ::std::forward<_ExecutionPolicy>(__exec), __n, __reduce_op, __transform_op, __init, - ::std::forward<_Ranges>(__rngs)...); - } - else if (__n <= 67108864) - { - return __parallel_transform_reduce_mid_impl<_Tp, 256, 32, 32, _Commutative>( - ::std::forward<_ExecutionPolicy>(__exec), __n, __reduce_op, __transform_op, __init, - ::std::forward<_Ranges>(__rngs)...); - } - } - // Otherwise use a recursive tree reduction. - return __parallel_transform_reduce_impl<_Tp, 32, _Commutative>::submit( - ::std::forward<_ExecutionPolicy>(__exec), __n, __work_group_size, __reduce_op, __transform_op, __init, - ::std::forward<_Ranges>(__rngs)...); -} - template auto From 9f4ec47c26648f5aff6178f4e663b70748283881 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Fri, 16 Feb 2024 15:03:53 +0100 Subject: [PATCH 302/566] Fix error: remove extra __select_backend calls --- .../oneapi/dpl/internal/binary_search_impl.h | 4 +- include/oneapi/dpl/pstl/algorithm_impl.h | 51 +++++-------------- .../dpl/pstl/hetero/algorithm_impl_hetero.h | 12 +---- .../dpl/pstl/hetero/numeric_impl_hetero.h | 8 ++- include/oneapi/dpl/pstl/numeric_impl.h | 4 +- 5 files changed, 21 insertions(+), 58 deletions(-) diff --git a/include/oneapi/dpl/internal/binary_search_impl.h b/include/oneapi/dpl/internal/binary_search_impl.h index 8a6f6b2a910..4e4a17b7f21 100644 --- a/include/oneapi/dpl/internal/binary_search_impl.h +++ b/include/oneapi/dpl/internal/binary_search_impl.h @@ -117,9 +117,7 @@ OutputIterator lower_bound_impl(__hetero_tag<_BackendTag> __tag, Policy&& policy, InputIterator1 start, InputIterator1 end, InputIterator2 value_start, InputIterator2 value_end, OutputIterator result, StrictWeakOrdering comp) { - constexpr auto __dispatch_tag = - oneapi::dpl::__internal::__select_backend(); - using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; + using __backend_tag = typename decltype(__tag)::__backend_tag; namespace __bknd = __par_backend_hetero; const auto size = ::std::distance(start, end); diff --git a/include/oneapi/dpl/pstl/algorithm_impl.h b/include/oneapi/dpl/pstl/algorithm_impl.h index c5d96eacaad..d266c94f232 100644 --- a/include/oneapi/dpl/pstl/algorithm_impl.h +++ b/include/oneapi/dpl/pstl/algorithm_impl.h @@ -2039,16 +2039,14 @@ __pattern_is_partitioned(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _Fo template bool -__pattern_is_partitioned(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, +__pattern_is_partitioned(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last, _UnaryPredicate __pred) { //trivial pre-checks if (__first == __last) return true; - constexpr auto __dispatch_tag = - oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _RandomAccessIterator>(); - using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; + using __backend_tag = typename decltype(__tag)::__backend_tag; return __internal::__except_handler([&]() { // State of current range: @@ -2171,9 +2169,11 @@ __pattern_partition(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _Forward template _RandomAccessIterator -__pattern_partition(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, +__pattern_partition(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last, _UnaryPredicate __pred) { + using __backend_tag = typename decltype(__tag)::__backend_tag; + // partitioned range: elements before pivot satisfy pred (true part), // elements after pivot don't satisfy pred (false part) struct _PartitionRange @@ -2201,11 +2201,6 @@ __pattern_partition(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _Rando // then we should swap the false part of left range and last part of true part of right range else if (__size2 > __size1) { - constexpr auto __dispatch_tag = - oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, decltype(__val1.__pivot), - decltype(__val1.__pivot + __size1)>(); - using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; - __par_backend::__parallel_for( __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __val1.__pivot, __val1.__pivot + __size1, [__val1, __val2, __size1](_RandomAccessIterator __i, _RandomAccessIterator __j) { @@ -2217,11 +2212,6 @@ __pattern_partition(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _Rando // else we should swap the first part of false part of left range and true part of right range else { - constexpr auto __dispatch_tag = - oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, decltype(__val1.__pivot), - decltype(__val1.__pivot + __size2)>(); - using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; - __par_backend::__parallel_for( __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __val1.__pivot, __val1.__pivot + __size2, [__val1, __val2](_RandomAccessIterator __i, _RandomAccessIterator __j) { @@ -2231,10 +2221,6 @@ __pattern_partition(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _Rando } }; - constexpr auto __dispatch_tag = - oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _RandomAccessIterator>(); - using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; - _PartitionRange __result = __par_backend::__parallel_reduce( __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __init, [__pred, __reductor](_RandomAccessIterator __i, _RandomAccessIterator __j, @@ -2283,9 +2269,11 @@ __pattern_stable_partition(_Tag, _ExecutionPolicy&&, _BidirectionalIterator __fi template _RandomAccessIterator -__pattern_stable_partition(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, +__pattern_stable_partition(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last, _UnaryPredicate __pred) { + using __backend_tag = typename decltype(__tag)::__backend_tag; + // partitioned range: elements before pivot satisfy pred (true part), // elements after pivot don't satisfy pred (false part) struct _PartitionRange @@ -2317,10 +2305,6 @@ __pattern_stable_partition(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, } }; - constexpr auto __dispatch_tag = - oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _RandomAccessIterator>(); - using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; - _PartitionRange __result = __par_backend::__parallel_reduce( __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __init, [&__pred, __reductor](_RandomAccessIterator __i, _RandomAccessIterator __j, @@ -2684,16 +2668,14 @@ __pattern_adjacent_find(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _For template _RandomAccessIterator -__pattern_adjacent_find(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, +__pattern_adjacent_find(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last, _BinaryPredicate __pred, _Semantic __or_semantic) { + using __backend_tag = typename decltype(__tag)::__backend_tag; + if (__last - __first < 2) return __last; - constexpr auto __dispatch_tag = - oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _RandomAccessIterator>(); - using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; - return __internal::__except_handler([&]() { return __par_backend::__parallel_reduce( __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __last, @@ -4290,20 +4272,15 @@ __pattern_shift_left(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _Forw template _BidirectionalIterator -__pattern_shift_right(_Tag, _ExecutionPolicy&& __exec, _BidirectionalIterator __first, _BidirectionalIterator __last, +__pattern_shift_right(_Tag __tag, _ExecutionPolicy&& __exec, _BidirectionalIterator __first, _BidirectionalIterator __last, typename ::std::iterator_traits<_BidirectionalIterator>::difference_type __n) { static_assert(__is_backend_tag_v<_Tag>); using _ReverseIterator = typename ::std::reverse_iterator<_BidirectionalIterator>; - constexpr auto __dispatch_tag = - oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, decltype(_ReverseIterator(__last)), - decltype(_ReverseIterator(__first))>(); - - auto __res = - oneapi::dpl::__internal::__pattern_shift_left(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), - _ReverseIterator(__last), _ReverseIterator(__first), __n); + auto __res = oneapi::dpl::__internal::__pattern_shift_left( + __tag, ::std::forward<_ExecutionPolicy>(__exec), _ReverseIterator(__last), _ReverseIterator(__first), __n); return __res.base(); } diff --git a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h index c451be81c52..500a6dcbaac 100644 --- a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h +++ b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h @@ -1037,12 +1037,7 @@ __pattern_remove_if(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, oneapi::dpl::__par_backend_hetero::__buffer<_ExecutionPolicy, _ValueType> __buf(__exec, __last - __first); auto __copy_first = __buf.get(); - constexpr auto __dispatch_tag = - oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, decltype(__first), decltype(__last), - decltype(__copy_first)>(); - - auto __copy_last = - __pattern_copy_if(__dispatch_tag, __exec, __first, __last, __copy_first, __not_pred<_Predicate>{__pred}); + auto __copy_last = __pattern_copy_if(__tag, __exec, __first, __last, __copy_first, __not_pred<_Predicate>{__pred}); constexpr auto __dispatch_tag1 = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, decltype(__copy_first), decltype(__copy_last), @@ -1370,11 +1365,8 @@ _Iterator __pattern_partition(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, _UnaryPredicate __pred) { - constexpr auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _Iterator>(); - //TODO: consider nonstable approaches - return __pattern_stable_partition(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, - __pred); + return __pattern_stable_partition(__tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __pred); } //------------------------------------------------------------------------ diff --git a/include/oneapi/dpl/pstl/hetero/numeric_impl_hetero.h b/include/oneapi/dpl/pstl/hetero/numeric_impl_hetero.h index 0a79297e7b8..914c9de7012 100644 --- a/include/oneapi/dpl/pstl/hetero/numeric_impl_hetero.h +++ b/include/oneapi/dpl/pstl/hetero/numeric_impl_hetero.h @@ -125,15 +125,14 @@ __iterators_possibly_equal(const sycl_iterator<_Mode1, _T, _Allocator>& __it1, template _Iterator2 -__pattern_transform_scan_base(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _Iterator1 __first, +__pattern_transform_scan_base(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Iterator1 __first, _Iterator1 __last, _Iterator2 __result, _UnaryOperation __unary_op, _InitType __init, _BinaryOperation __binary_op, _Inclusive) { if (__first == __last) return __result; - const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _Iterator1, _Iterator2>(); - using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; + using __backend_tag = typename decltype(__tag)::__backend_tag; const auto __n = __last - __first; @@ -178,8 +177,7 @@ __pattern_transform_scan_base(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __ex .wait(); // Move data from temporary buffer into results - oneapi::dpl::__internal::__pattern_walk2_brick(__dispatch_tag, ::std::move(__policy), __first_tmp, __last_tmp, - __result, + oneapi::dpl::__internal::__pattern_walk2_brick(__tag, ::std::move(__policy), __first_tmp, __last_tmp, __result, oneapi::dpl::__internal::__brick_move<_NewExecutionPolicy>{}); //TODO: optimize copy back depending on Iterator, i.e. set_final_data for host iterator/pointer diff --git a/include/oneapi/dpl/pstl/numeric_impl.h b/include/oneapi/dpl/pstl/numeric_impl.h index b875883a598..bb64049ace8 100644 --- a/include/oneapi/dpl/pstl/numeric_impl.h +++ b/include/oneapi/dpl/pstl/numeric_impl.h @@ -390,9 +390,7 @@ __pattern_adjacent_difference(__parallel_tag<_IsVector> __tag, _ExecutionPolicy& _RandomAccessIterator1 __first, _RandomAccessIterator1 __last, _RandomAccessIterator2 __d_first, _BinaryOperation __op) { - constexpr auto __dispatch_tag = - oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _RandomAccessIterator1, _RandomAccessIterator2>(); - using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; + using __backend_tag = typename decltype(__tag)::__backend_tag; assert(__first != __last); typedef typename ::std::iterator_traits<_RandomAccessIterator1>::reference _ReferenceType1; From ff94f67e8b03dcadb892d0edd990cca667b03ee5 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Fri, 16 Feb 2024 15:07:25 +0100 Subject: [PATCH 303/566] Fix self-review comment: move 'using __backend_tag = ' at first line inside function --- include/oneapi/dpl/pstl/algorithm_impl.h | 20 +++++++++---------- .../dpl/pstl/hetero/numeric_impl_hetero.h | 4 ++-- include/oneapi/dpl/pstl/numeric_impl.h | 4 ++-- 3 files changed, 14 insertions(+), 14 deletions(-) diff --git a/include/oneapi/dpl/pstl/algorithm_impl.h b/include/oneapi/dpl/pstl/algorithm_impl.h index d266c94f232..15e065e0065 100644 --- a/include/oneapi/dpl/pstl/algorithm_impl.h +++ b/include/oneapi/dpl/pstl/algorithm_impl.h @@ -2042,12 +2042,12 @@ bool __pattern_is_partitioned(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last, _UnaryPredicate __pred) { + using __backend_tag = typename decltype(__tag)::__backend_tag; + //trivial pre-checks if (__first == __last) return true; - using __backend_tag = typename decltype(__tag)::__backend_tag; - return __internal::__except_handler([&]() { // State of current range: // broken - current range is not partitioned by pred @@ -2529,12 +2529,12 @@ void __pattern_partial_sort(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __middle, _RandomAccessIterator __last, _Compare __comp) { + using __backend_tag = typename __parallel_tag<_IsVector>::__backend_tag; + const auto __n = __middle - __first; if (__n == 0) return; - using __backend_tag = typename __parallel_tag<_IsVector>::__backend_tag; - __except_handler([&]() { __par_backend::__parallel_stable_sort( __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __comp, @@ -3302,10 +3302,10 @@ __parallel_set_union_op(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __ex _ForwardIterator1 __last1, _ForwardIterator2 __first2, _ForwardIterator2 __last2, _OutputIterator __result, _Compare __comp, _SetUnionOp __set_union_op) { - typedef typename ::std::iterator_traits<_ForwardIterator1>::difference_type _DifferenceType; - using __backend_tag = typename decltype(__tag)::__backend_tag; + typedef typename ::std::iterator_traits<_ForwardIterator1>::difference_type _DifferenceType; + const auto __n1 = __last1 - __first1; const auto __n2 = __last2 - __first2; @@ -3899,12 +3899,12 @@ _RandomAccessIterator __pattern_min_element(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp) { + using __backend_tag = typename __parallel_tag<_IsVector>::__backend_tag; + // a trivial case pre-check if (__last - __first < 2) return __first; - using __backend_tag = typename __parallel_tag<_IsVector>::__backend_tag; - return __internal::__except_handler([&]() { return __par_backend::__parallel_reduce( __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, /*identity*/ __last, @@ -3966,12 +3966,12 @@ ::std::pair<_RandomAccessIterator, _RandomAccessIterator> __pattern_minmax_element(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp) { + using __backend_tag = typename __parallel_tag<_IsVector>::__backend_tag; + // a trivial case pre-check if (__last - __first < 2) return ::std::make_pair(__first, __first); - using __backend_tag = typename __parallel_tag<_IsVector>::__backend_tag; - return __internal::__except_handler([&]() { typedef ::std::pair<_RandomAccessIterator, _RandomAccessIterator> _Result; diff --git a/include/oneapi/dpl/pstl/hetero/numeric_impl_hetero.h b/include/oneapi/dpl/pstl/hetero/numeric_impl_hetero.h index 914c9de7012..cd38e03652e 100644 --- a/include/oneapi/dpl/pstl/hetero/numeric_impl_hetero.h +++ b/include/oneapi/dpl/pstl/hetero/numeric_impl_hetero.h @@ -129,11 +129,11 @@ __pattern_transform_scan_base(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy& _Iterator1 __last, _Iterator2 __result, _UnaryOperation __unary_op, _InitType __init, _BinaryOperation __binary_op, _Inclusive) { + using __backend_tag = typename decltype(__tag)::__backend_tag; + if (__first == __last) return __result; - using __backend_tag = typename decltype(__tag)::__backend_tag; - const auto __n = __last - __first; auto __keep1 = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read, _Iterator1>(); diff --git a/include/oneapi/dpl/pstl/numeric_impl.h b/include/oneapi/dpl/pstl/numeric_impl.h index bb64049ace8..ea6cf5e4601 100644 --- a/include/oneapi/dpl/pstl/numeric_impl.h +++ b/include/oneapi/dpl/pstl/numeric_impl.h @@ -287,6 +287,8 @@ __pattern_transform_scan(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _ _RandomAccessIterator __last, _OutputIterator __result, _UnaryOperation __unary_op, _Tp __init, _BinaryOperation __binary_op, _Inclusive) { + using __backend_tag = typename __parallel_tag<_IsVector>::__backend_tag; + typedef typename ::std::iterator_traits<_RandomAccessIterator>::difference_type _DifferenceType; _DifferenceType __n = __last - __first; @@ -295,8 +297,6 @@ __pattern_transform_scan(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _ return __result; } - using __backend_tag = typename __parallel_tag<_IsVector>::__backend_tag; - return __internal::__except_handler([&]() { __par_backend::__parallel_strict_scan( __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __n, __init, From 446a48bc874ebdbe1bccd7ce2fdb7daa44de1da3 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Fri, 16 Feb 2024 15:14:44 +0100 Subject: [PATCH 304/566] Fix self-review comment: define 'using __backend_tag = ' by the same way in all code --- .../oneapi/dpl/internal/binary_search_impl.h | 12 ++++----- include/oneapi/dpl/pstl/algorithm_impl.h | 26 +++++++++---------- .../experimental/internal/for_loop_impl.h | 8 +++--- .../dpl/pstl/hetero/algorithm_impl_hetero.h | 4 +-- .../dpl/pstl/hetero/numeric_impl_hetero.h | 2 +- include/oneapi/dpl/pstl/numeric_impl.h | 4 +-- 6 files changed, 28 insertions(+), 28 deletions(-) diff --git a/include/oneapi/dpl/internal/binary_search_impl.h b/include/oneapi/dpl/internal/binary_search_impl.h index 4e4a17b7f21..0c48edcd468 100644 --- a/include/oneapi/dpl/internal/binary_search_impl.h +++ b/include/oneapi/dpl/internal/binary_search_impl.h @@ -114,10 +114,10 @@ binary_search_impl(_Tag, Policy&& policy, InputIterator1 start, InputIterator1 e template OutputIterator -lower_bound_impl(__hetero_tag<_BackendTag> __tag, Policy&& policy, InputIterator1 start, InputIterator1 end, +lower_bound_impl(__hetero_tag<_BackendTag>, Policy&& policy, InputIterator1 start, InputIterator1 end, InputIterator2 value_start, InputIterator2 value_end, OutputIterator result, StrictWeakOrdering comp) { - using __backend_tag = typename decltype(__tag)::__backend_tag; + using __backend_tag = typename __hetero_tag<_BackendTag>::__backend_tag; namespace __bknd = __par_backend_hetero; const auto size = ::std::distance(start, end); @@ -146,10 +146,10 @@ lower_bound_impl(__hetero_tag<_BackendTag> __tag, Policy&& policy, InputIterator template OutputIterator -upper_bound_impl(__hetero_tag<_BackendTag> __tag, Policy&& policy, InputIterator1 start, InputIterator1 end, +upper_bound_impl(__hetero_tag<_BackendTag>, Policy&& policy, InputIterator1 start, InputIterator1 end, InputIterator2 value_start, InputIterator2 value_end, OutputIterator result, StrictWeakOrdering comp) { - using __backend_tag = typename decltype(__tag)::__backend_tag; + using __backend_tag = typename __hetero_tag<_BackendTag>::__backend_tag; namespace __bknd = __par_backend_hetero; const auto size = ::std::distance(start, end); @@ -178,10 +178,10 @@ upper_bound_impl(__hetero_tag<_BackendTag> __tag, Policy&& policy, InputIterator template OutputIterator -binary_search_impl(__hetero_tag<_BackendTag> __tag, Policy&& policy, InputIterator1 start, InputIterator1 end, +binary_search_impl(__hetero_tag<_BackendTag>, Policy&& policy, InputIterator1 start, InputIterator1 end, InputIterator2 value_start, InputIterator2 value_end, OutputIterator result, StrictWeakOrdering comp) { - using __backend_tag = typename decltype(__tag)::__backend_tag; + using __backend_tag = typename __hetero_tag<_BackendTag>::__backend_tag; namespace __bknd = __par_backend_hetero; const auto size = ::std::distance(start, end); diff --git a/include/oneapi/dpl/pstl/algorithm_impl.h b/include/oneapi/dpl/pstl/algorithm_impl.h index 15e065e0065..2d6569afc2c 100644 --- a/include/oneapi/dpl/pstl/algorithm_impl.h +++ b/include/oneapi/dpl/pstl/algorithm_impl.h @@ -1455,10 +1455,10 @@ __pattern_unique(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _ForwardIte // So, a caller passes _CalcMask brick into remove_elements. template _ForwardIterator -__remove_elements(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _ForwardIterator __first, +__remove_elements(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _CalcMask __calc_mask) { - using __backend_tag = typename decltype(__tag)::__backend_tag; + using __backend_tag = typename __parallel_tag<_IsVector>::__backend_tag; typedef typename ::std::iterator_traits<_ForwardIterator>::difference_type _DifferenceType; typedef typename ::std::iterator_traits<_ForwardIterator>::value_type _Tp; @@ -2039,10 +2039,10 @@ __pattern_is_partitioned(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _Fo template bool -__pattern_is_partitioned(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, +__pattern_is_partitioned(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last, _UnaryPredicate __pred) { - using __backend_tag = typename decltype(__tag)::__backend_tag; + using __backend_tag = typename __parallel_tag<_IsVector>::__backend_tag; //trivial pre-checks if (__first == __last) @@ -2169,10 +2169,10 @@ __pattern_partition(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _Forward template _RandomAccessIterator -__pattern_partition(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, +__pattern_partition(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last, _UnaryPredicate __pred) { - using __backend_tag = typename decltype(__tag)::__backend_tag; + using __backend_tag = typename __parallel_tag<_IsVector>::__backend_tag; // partitioned range: elements before pivot satisfy pred (true part), // elements after pivot don't satisfy pred (false part) @@ -2269,10 +2269,10 @@ __pattern_stable_partition(_Tag, _ExecutionPolicy&&, _BidirectionalIterator __fi template _RandomAccessIterator -__pattern_stable_partition(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, +__pattern_stable_partition(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last, _UnaryPredicate __pred) { - using __backend_tag = typename decltype(__tag)::__backend_tag; + using __backend_tag = typename __parallel_tag<_IsVector>::__backend_tag; // partitioned range: elements before pivot satisfy pred (true part), // elements after pivot don't satisfy pred (false part) @@ -2668,10 +2668,10 @@ __pattern_adjacent_find(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _For template _RandomAccessIterator -__pattern_adjacent_find(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, +__pattern_adjacent_find(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last, _BinaryPredicate __pred, _Semantic __or_semantic) { - using __backend_tag = typename decltype(__tag)::__backend_tag; + using __backend_tag = typename __parallel_tag<_IsVector>::__backend_tag; if (__last - __first < 2) return __last; @@ -3205,11 +3205,11 @@ inline constexpr auto __set_algo_cut_off = 1000; template _OutputIterator -__parallel_set_op(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _ForwardIterator1 __first1, +__parallel_set_op(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, _ForwardIterator2 __last2, _OutputIterator __result, _Compare __comp, _SizeFunction __size_func, _SetOP __set_op) { - using __backend_tag = typename decltype(__tag)::__backend_tag; + using __backend_tag = typename __parallel_tag<_IsVector>::__backend_tag; typedef typename ::std::iterator_traits<_ForwardIterator1>::difference_type _DifferenceType; typedef typename ::std::iterator_traits<_OutputIterator>::value_type _T; @@ -3302,7 +3302,7 @@ __parallel_set_union_op(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __ex _ForwardIterator1 __last1, _ForwardIterator2 __first2, _ForwardIterator2 __last2, _OutputIterator __result, _Compare __comp, _SetUnionOp __set_union_op) { - using __backend_tag = typename decltype(__tag)::__backend_tag; + using __backend_tag = typename __parallel_tag<_IsVector>::__backend_tag; typedef typename ::std::iterator_traits<_ForwardIterator1>::difference_type _DifferenceType; diff --git a/include/oneapi/dpl/pstl/experimental/internal/for_loop_impl.h b/include/oneapi/dpl/pstl/experimental/internal/for_loop_impl.h index e86beeb276d..13d72c4fab4 100644 --- a/include/oneapi/dpl/pstl/experimental/internal/for_loop_impl.h +++ b/include/oneapi/dpl/pstl/experimental/internal/for_loop_impl.h @@ -390,10 +390,10 @@ __pattern_for_loop(_Tag, _ExecutionPolicy&&, _Ip __first, _Ip __last, _Function template void -__pattern_for_loop_n(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _Ip __first, _Size __n, _Function __f, +__pattern_for_loop_n(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _Ip __first, _Size __n, _Function __f, __single_stride_type, _Rest&&... __rest) { - using __backend_tag = typename decltype(__tag)::__backend_tag; + using __backend_tag = typename __parallel_tag<_IsVector>::__backend_tag; using __pack_type = __reduction_pack<_Rest...>; @@ -427,10 +427,10 @@ __pattern_for_loop_n(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, template void -__pattern_for_loop_n(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _Ip __first, _Size __n, _Function __f, +__pattern_for_loop_n(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _Ip __first, _Size __n, _Function __f, _Sp __stride, _Rest&&... __rest) { - using __backend_tag = typename decltype(__tag)::__backend_tag; + using __backend_tag = typename __parallel_tag<_IsVector>::__backend_tag; using __pack_type = __reduction_pack<_Rest...>; diff --git a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h index 500a6dcbaac..173e7bf4891 100644 --- a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h +++ b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h @@ -1598,9 +1598,9 @@ __pattern_nth_element(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec template void -__pattern_reverse(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last) +__pattern_reverse(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last) { - using __backend_tag = typename decltype(__tag)::__backend_tag; + using __backend_tag = typename __hetero_tag<_BackendTag>::__backend_tag; auto __n = __last - __first; if (__n <= 0) diff --git a/include/oneapi/dpl/pstl/hetero/numeric_impl_hetero.h b/include/oneapi/dpl/pstl/hetero/numeric_impl_hetero.h index cd38e03652e..cabcc10eecd 100644 --- a/include/oneapi/dpl/pstl/hetero/numeric_impl_hetero.h +++ b/include/oneapi/dpl/pstl/hetero/numeric_impl_hetero.h @@ -129,7 +129,7 @@ __pattern_transform_scan_base(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy& _Iterator1 __last, _Iterator2 __result, _UnaryOperation __unary_op, _InitType __init, _BinaryOperation __binary_op, _Inclusive) { - using __backend_tag = typename decltype(__tag)::__backend_tag; + using __backend_tag = typename __hetero_tag<_BackendTag>::__backend_tag; if (__first == __last) return __result; diff --git a/include/oneapi/dpl/pstl/numeric_impl.h b/include/oneapi/dpl/pstl/numeric_impl.h index ea6cf5e4601..e5b7180e510 100644 --- a/include/oneapi/dpl/pstl/numeric_impl.h +++ b/include/oneapi/dpl/pstl/numeric_impl.h @@ -386,11 +386,11 @@ __pattern_adjacent_difference(_Tag, _ExecutionPolicy&&, _ForwardIterator __first template _RandomAccessIterator2 -__pattern_adjacent_difference(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, +__pattern_adjacent_difference(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first, _RandomAccessIterator1 __last, _RandomAccessIterator2 __d_first, _BinaryOperation __op) { - using __backend_tag = typename decltype(__tag)::__backend_tag; + using __backend_tag = typename __parallel_tag<_IsVector>::__backend_tag; assert(__first != __last); typedef typename ::std::iterator_traits<_RandomAccessIterator1>::reference _ReferenceType1; From b808d267dfd08088564d567adec7900e5f0f23db Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Fri, 16 Feb 2024 15:19:35 +0100 Subject: [PATCH 305/566] Fix self-review comment: revert extra changes and incorrectly merged code --- include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h index 60de6356b37..aba0a6ada17 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h @@ -268,8 +268,7 @@ auto __parallel_for(oneapi::dpl::__internal::__device_backend_tag, _ExecutionPolicy&& __exec, _Fp __brick, _Index __count, _Ranges&&... __rngs) { - using _Policy = ::std::decay_t<_ExecutionPolicy>; - using _CustomName = typename _Policy::kernel_name; + using _CustomName = oneapi::dpl::__internal::__policy_kernel_name<_ExecutionPolicy>; using _ForKernel = oneapi::dpl::__par_backend_hetero::__internal::__kernel_name_provider<_CustomName>; return __parallel_for_submitter<_ForKernel>()(oneapi::dpl::__internal::__device_backend_tag{}, From ccfde944fc75c4d0efc9ec6ca051fa3ac1f41f84 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Fri, 16 Feb 2024 15:28:41 +0100 Subject: [PATCH 306/566] include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h - fix error: restore __pattern_shift_left(_ExecutionPolicy&& __exec, _Range __rng, oneapi::dpl::__internal::__difference_t<_Range> __n) --- .../dpl/pstl/hetero/algorithm_impl_hetero.h | 42 +++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h index 173e7bf4891..47432beca9d 100644 --- a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h +++ b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h @@ -1999,6 +1999,48 @@ class __shift_left_right { }; +template +oneapi::dpl::__internal::__difference_t<_Range> +__pattern_shift_left(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _Range __rng, + oneapi::dpl::__internal::__difference_t<_Range> __n) +{ + //If (n > 0 && n < m), returns first + (m - n). Otherwise, if n > 0, returns first. Otherwise, returns last. + using _DiffType = oneapi::dpl::__internal::__difference_t<_Range>; + _DiffType __size = __rng.size(); + + assert(__n > 0 && __n < __size); + + _DiffType __mid = __size / 2 + __size % 2; + _DiffType __size_res = __size - __n; + + //1. n >= size/2; 'size - _n' parallel copying + if (__n >= __mid) + { + using _Function = __brick_move<_ExecutionPolicy>; + auto __brick = oneapi::dpl::unseq_backend::walk_n<_ExecutionPolicy, _Function>{_Function{}}; + + //TODO: to consider use just "read" access mode for a source range and just "write" - for a destination range. + auto __src = oneapi::dpl::__ranges::drop_view_simple<_Range, _DiffType>(__rng, __n); + auto __dst = oneapi::dpl::__ranges::take_view_simple<_Range, _DiffType>(__rng, __size_res); + + oneapi::dpl::__par_backend_hetero::__parallel_for(_BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), + __brick, __size_res, __src, __dst) + .wait(); + } + else //2. n < size/2; 'n' parallel copying + { + auto __brick = unseq_backend::__brick_shift_left<_ExecutionPolicy, _DiffType>{__size, __n}; + oneapi::dpl::__par_backend_hetero::__parallel_for( + _BackendTag{}, + oneapi::dpl::__par_backend_hetero::make_wrapped_policy<__shift_left_right>( + ::std::forward<_ExecutionPolicy>(__exec)), + __brick, __n, __rng) + .wait(); + } + + return __size_res; +} + template _Iterator __pattern_shift_left(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, From 0ec6d87576729aef14ea7ad06607b083240f9306 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Fri, 16 Feb 2024 15:46:06 +0100 Subject: [PATCH 307/566] Revert "Fix review comment: I would propose to add a parameter _BackendType to this template definition..." This reverts commit 56406dfe64df2f20ce038f78d9032bb952728a07. --- include/oneapi/dpl/pstl/algorithm_fwd.h | 3 +-- include/oneapi/dpl/pstl/execution_impl.h | 7 +++---- include/oneapi/dpl/pstl/numeric_fwd.h | 2 +- 3 files changed, 5 insertions(+), 7 deletions(-) diff --git a/include/oneapi/dpl/pstl/algorithm_fwd.h b/include/oneapi/dpl/pstl/algorithm_fwd.h index 5a1dcba419c..fd89678dd3b 100644 --- a/include/oneapi/dpl/pstl/algorithm_fwd.h +++ b/include/oneapi/dpl/pstl/algorithm_fwd.h @@ -26,10 +26,9 @@ namespace dpl namespace __internal { -template +template struct __parallel_tag; -template struct __parallel_forward_tag; //------------------------------------------------------------------------ diff --git a/include/oneapi/dpl/pstl/execution_impl.h b/include/oneapi/dpl/pstl/execution_impl.h index cb2b71225b1..dae807121cf 100644 --- a/include/oneapi/dpl/pstl/execution_impl.h +++ b/include/oneapi/dpl/pstl/execution_impl.h @@ -113,22 +113,21 @@ struct __serial_tag using __is_vector = _IsVector; }; -template +template struct __parallel_tag { using __is_vector = _IsVector; // backend tag can be change depending on // TBB availability in the environment - using __backend_tag = _BackendType; + using __backend_tag = __par_backend_tag; }; -template struct __parallel_forward_tag { using __is_vector = ::std::false_type; // backend tag can be change depending on // TBB availability in the environment - using __backend_tag = _BackendType; + using __backend_tag = __par_backend_tag; }; template diff --git a/include/oneapi/dpl/pstl/numeric_fwd.h b/include/oneapi/dpl/pstl/numeric_fwd.h index 9ea0ebb1de4..1c835443e78 100644 --- a/include/oneapi/dpl/pstl/numeric_fwd.h +++ b/include/oneapi/dpl/pstl/numeric_fwd.h @@ -25,7 +25,7 @@ namespace dpl { namespace __internal { -template +template struct __parallel_tag; //------------------------------------------------------------------------ From 4e7bf459892935ffaf116ad2593c6f08ce115744 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Fri, 16 Feb 2024 15:48:11 +0100 Subject: [PATCH 308/566] include/oneapi/dpl/pstl/algorithm_impl.h - fix compile error: use of undeclared identifier '__is_vector' --- include/oneapi/dpl/pstl/algorithm_impl.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/oneapi/dpl/pstl/algorithm_impl.h b/include/oneapi/dpl/pstl/algorithm_impl.h index 2d6569afc2c..acef0bd9cbc 100644 --- a/include/oneapi/dpl/pstl/algorithm_impl.h +++ b/include/oneapi/dpl/pstl/algorithm_impl.h @@ -1521,15 +1521,15 @@ __remove_elements(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _Forward else ::new (::std::addressof(*__z)) _Tp(::std::move(*__x)); }, - __is_vector); + _IsVector{}); }, [&__m](_DifferenceType __total) { __m = __total; }); // 3. Elements from result are moved to [first, last) __par_backend::__parallel_for(__backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __result, - __result + __m, [__result, __first, __is_vector](_Tp* __i, _Tp* __j) { + __result + __m, [__result, __first](_Tp* __i, _Tp* __j) { __brick_move_destroy<_ExecutionPolicy>{}(__i, __j, __first + (__i - __result), - __is_vector); + _IsVector{}); }); return __first + __m; }); From d6722694c41f6830b6e9203a3e011f4b81d8214c Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Fri, 16 Feb 2024 15:51:38 +0100 Subject: [PATCH 309/566] Fix compile error: no template named '__hetero_tag'; did you mean '__internal::__hetero_tag'? --- include/oneapi/dpl/internal/binary_search_impl.h | 12 ++++++------ .../dpl/internal/inclusive_scan_by_segment_impl.h | 8 ++++---- include/oneapi/dpl/internal/reduce_by_segment_impl.h | 6 +++--- include/oneapi/dpl/internal/scan_by_segment_impl.h | 6 +++--- 4 files changed, 16 insertions(+), 16 deletions(-) diff --git a/include/oneapi/dpl/internal/binary_search_impl.h b/include/oneapi/dpl/internal/binary_search_impl.h index 0c48edcd468..1881ed344f8 100644 --- a/include/oneapi/dpl/internal/binary_search_impl.h +++ b/include/oneapi/dpl/internal/binary_search_impl.h @@ -114,10 +114,10 @@ binary_search_impl(_Tag, Policy&& policy, InputIterator1 start, InputIterator1 e template OutputIterator -lower_bound_impl(__hetero_tag<_BackendTag>, Policy&& policy, InputIterator1 start, InputIterator1 end, +lower_bound_impl(__internal::__hetero_tag<_BackendTag>, Policy&& policy, InputIterator1 start, InputIterator1 end, InputIterator2 value_start, InputIterator2 value_end, OutputIterator result, StrictWeakOrdering comp) { - using __backend_tag = typename __hetero_tag<_BackendTag>::__backend_tag; + using __backend_tag = typename __internal::__hetero_tag<_BackendTag>::__backend_tag; namespace __bknd = __par_backend_hetero; const auto size = ::std::distance(start, end); @@ -146,10 +146,10 @@ lower_bound_impl(__hetero_tag<_BackendTag>, Policy&& policy, InputIterator1 star template OutputIterator -upper_bound_impl(__hetero_tag<_BackendTag>, Policy&& policy, InputIterator1 start, InputIterator1 end, +upper_bound_impl(__internal::__hetero_tag<_BackendTag>, Policy&& policy, InputIterator1 start, InputIterator1 end, InputIterator2 value_start, InputIterator2 value_end, OutputIterator result, StrictWeakOrdering comp) { - using __backend_tag = typename __hetero_tag<_BackendTag>::__backend_tag; + using __backend_tag = typename __internal::__hetero_tag<_BackendTag>::__backend_tag; namespace __bknd = __par_backend_hetero; const auto size = ::std::distance(start, end); @@ -178,10 +178,10 @@ upper_bound_impl(__hetero_tag<_BackendTag>, Policy&& policy, InputIterator1 star template OutputIterator -binary_search_impl(__hetero_tag<_BackendTag>, Policy&& policy, InputIterator1 start, InputIterator1 end, +binary_search_impl(__internal::__hetero_tag<_BackendTag>, Policy&& policy, InputIterator1 start, InputIterator1 end, InputIterator2 value_start, InputIterator2 value_end, OutputIterator result, StrictWeakOrdering comp) { - using __backend_tag = typename __hetero_tag<_BackendTag>::__backend_tag; + using __backend_tag = typename __internal::__hetero_tag<_BackendTag>::__backend_tag; namespace __bknd = __par_backend_hetero; const auto size = ::std::distance(start, end); diff --git a/include/oneapi/dpl/internal/inclusive_scan_by_segment_impl.h b/include/oneapi/dpl/internal/inclusive_scan_by_segment_impl.h index 43310cd063b..f211f995b23 100644 --- a/include/oneapi/dpl/internal/inclusive_scan_by_segment_impl.h +++ b/include/oneapi/dpl/internal/inclusive_scan_by_segment_impl.h @@ -43,7 +43,7 @@ pattern_inclusive_scan_by_segment(_Tag, Policy&& policy, InputIterator1 first1, InputIterator2 first2, OutputIterator result, BinaryPredicate binary_pred, BinaryOperator binary_op) { - static_assert(__is_backend_tag_v<_Tag>); + static_assert(__internal::__is_backend_tag_v<_Tag>); const auto n = ::std::distance(first1, last1); @@ -78,7 +78,7 @@ pattern_inclusive_scan_by_segment(_Tag, Policy&& policy, InputIterator1 first1, template OutputIterator -inclusive_scan_by_segment_impl(__hetero_tag<_BackendTag> __tag, Policy&& policy, InputIterator1 first1, InputIterator1 last1, +inclusive_scan_by_segment_impl(__internal::__hetero_tag<_BackendTag> __tag, Policy&& policy, InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, OutputIterator result, BinaryPredicate binary_pred, BinaryOperator binary_op, ::std::true_type /* has_known_identity */) { @@ -91,7 +91,7 @@ inclusive_scan_by_segment_impl(__hetero_tag<_BackendTag> __tag, Policy&& policy, template OutputIterator -inclusive_scan_by_segment_impl(__hetero_tag<_BackendTag>, Policy&& policy, InputIterator1 first1, InputIterator1 last1, +inclusive_scan_by_segment_impl(__internal::__hetero_tag<_BackendTag>, Policy&& policy, InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, OutputIterator result, BinaryPredicate binary_pred, BinaryOperator binary_op, ::std::false_type /* has_known_identity */) { @@ -129,7 +129,7 @@ inclusive_scan_by_segment_impl(__hetero_tag<_BackendTag>, Policy&& policy, Input template OutputIterator -pattern_inclusive_scan_by_segment(__hetero_tag<_BackendTag> __tag, Policy&& policy, InputIterator1 first1, +pattern_inclusive_scan_by_segment(__internal::__hetero_tag<_BackendTag> __tag, Policy&& policy, InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, OutputIterator result, BinaryPredicate binary_pred, BinaryOperator binary_op) { diff --git a/include/oneapi/dpl/internal/reduce_by_segment_impl.h b/include/oneapi/dpl/internal/reduce_by_segment_impl.h index 860664bde67..d65f7ee831c 100644 --- a/include/oneapi/dpl/internal/reduce_by_segment_impl.h +++ b/include/oneapi/dpl/internal/reduce_by_segment_impl.h @@ -193,7 +193,7 @@ using _SegReducePrefixPhase = __seg_reduce_prefix_kernel<_Name...>; template oneapi::dpl::__internal::__difference_t<_Range3> -__sycl_reduce_by_segment(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _Range1&& __keys, _Range2&& __values, +__sycl_reduce_by_segment(__internal::__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _Range1&& __keys, _Range2&& __values, _Range3&& __out_keys, _Range4&& __out_values, _BinaryPredicate __binary_pred, _BinaryOperator __binary_op, ::std::false_type /* has_known_identity */) { @@ -205,7 +205,7 @@ __sycl_reduce_by_segment(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _ template oneapi::dpl::__internal::__difference_t<_Range3> -__sycl_reduce_by_segment(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _Range1&& __keys, _Range2&& __values, +__sycl_reduce_by_segment(__internal::__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _Range1&& __keys, _Range2&& __values, _Range3&& __out_keys, _Range4&& __out_values, _BinaryPredicate __binary_pred, _BinaryOperator __binary_op, ::std::true_type /* has_known_identity */) { @@ -573,7 +573,7 @@ __sycl_reduce_by_segment(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _ template ::std::pair -reduce_by_segment_impl(__hetero_tag<_BackendTag> __tag, Policy&& policy, InputIterator1 first1, InputIterator1 last1, +reduce_by_segment_impl(__internal::__hetero_tag<_BackendTag> __tag, Policy&& policy, InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, OutputIterator1 result1, OutputIterator2 result2, BinaryPred binary_pred, BinaryOperator binary_op) { diff --git a/include/oneapi/dpl/internal/scan_by_segment_impl.h b/include/oneapi/dpl/internal/scan_by_segment_impl.h index 98cc9e1d93a..7007e875861 100644 --- a/include/oneapi/dpl/internal/scan_by_segment_impl.h +++ b/include/oneapi/dpl/internal/scan_by_segment_impl.h @@ -367,9 +367,9 @@ struct __sycl_scan_by_segment_impl template OutputIterator -__scan_by_segment_impl_common(__hetero_tag<_BackendTag>, Policy&& policy, InputIterator1 first1, InputIterator1 last1, - InputIterator2 first2, OutputIterator result, T init, BinaryPredicate binary_pred, - Operator binary_op, Inclusive) +__scan_by_segment_impl_common(__internal::__hetero_tag<_BackendTag>, Policy&& policy, InputIterator1 first1, + InputIterator1 last1, InputIterator2 first2, OutputIterator result, T init, + BinaryPredicate binary_pred, Operator binary_op, Inclusive) { const auto n = ::std::distance(first1, last1); From 9df00f22c791d60f354ff4d7dfba04648200da6b Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Fri, 16 Feb 2024 15:54:12 +0100 Subject: [PATCH 310/566] Fix compile error: use of undeclared identifier '__is_backend_tag_v' --- include/oneapi/dpl/internal/binary_search_impl.h | 6 +++--- .../oneapi/dpl/internal/exclusive_scan_by_segment_impl.h | 8 ++++---- include/oneapi/dpl/internal/reduce_by_segment_impl.h | 2 +- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/include/oneapi/dpl/internal/binary_search_impl.h b/include/oneapi/dpl/internal/binary_search_impl.h index 1881ed344f8..11e47d01417 100644 --- a/include/oneapi/dpl/internal/binary_search_impl.h +++ b/include/oneapi/dpl/internal/binary_search_impl.h @@ -74,7 +74,7 @@ OutputIterator lower_bound_impl(_Tag, Policy&& policy, InputIterator1 start, InputIterator1 end, InputIterator2 value_start, InputIterator2 value_end, OutputIterator result, StrictWeakOrdering comp) { - static_assert(__is_backend_tag_v<_Tag>); + static_assert(__internal::__is_backend_tag_v<_Tag>); return oneapi::dpl::transform(policy, value_start, value_end, result, [=](typename ::std::iterator_traits::reference val) { @@ -88,7 +88,7 @@ OutputIterator upper_bound_impl(_Tag, Policy&& policy, InputIterator1 start, InputIterator1 end, InputIterator2 value_start, InputIterator2 value_end, OutputIterator result, StrictWeakOrdering comp) { - static_assert(__is_backend_tag_v<_Tag>); + static_assert(__internal::__is_backend_tag_v<_Tag>); return oneapi::dpl::transform(policy, value_start, value_end, result, [=](typename ::std::iterator_traits::reference val) { @@ -102,7 +102,7 @@ OutputIterator binary_search_impl(_Tag, Policy&& policy, InputIterator1 start, InputIterator1 end, InputIterator2 value_start, InputIterator2 value_end, OutputIterator result, StrictWeakOrdering comp) { - static_assert(__is_backend_tag_v<_Tag>); + static_assert(__internal::__is_backend_tag_v<_Tag>); return oneapi::dpl::transform(policy, value_start, value_end, result, [=](typename ::std::iterator_traits::reference val) { diff --git a/include/oneapi/dpl/internal/exclusive_scan_by_segment_impl.h b/include/oneapi/dpl/internal/exclusive_scan_by_segment_impl.h index 07d5b930c2f..24d5db6b630 100644 --- a/include/oneapi/dpl/internal/exclusive_scan_by_segment_impl.h +++ b/include/oneapi/dpl/internal/exclusive_scan_by_segment_impl.h @@ -44,7 +44,7 @@ pattern_exclusive_scan_by_segment(_Tag, Policy&& policy, InputIterator1 first1, InputIterator2 first2, OutputIterator result, T init, BinaryPredicate binary_pred, Operator binary_op) { - static_assert(__is_backend_tag_v<_Tag>); + static_assert(__internal::__is_backend_tag_v<_Tag>); const auto n = ::std::distance(first1, last1); @@ -97,7 +97,7 @@ pattern_exclusive_scan_by_segment(_Tag, Policy&& policy, InputIterator1 first1, template OutputIterator -exclusive_scan_by_segment_impl(__hetero_tag<_BackendTag> __tag, Policy&& policy, InputIterator1 first1, InputIterator1 last1, +exclusive_scan_by_segment_impl(__internal::__hetero_tag<_BackendTag> __tag, Policy&& policy, InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, OutputIterator result, T init, BinaryPredicate binary_pred, Operator binary_op, ::std::true_type /* has_known_identity*/) { @@ -108,7 +108,7 @@ exclusive_scan_by_segment_impl(__hetero_tag<_BackendTag> __tag, Policy&& policy, template OutputIterator -exclusive_scan_by_segment_impl(__hetero_tag<_BackendTag>, Policy&& policy, InputIterator1 first1, InputIterator1 last1, +exclusive_scan_by_segment_impl(__internal::__hetero_tag<_BackendTag>, Policy&& policy, InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, OutputIterator result, T init, BinaryPredicate binary_pred, Operator binary_op, ::std::false_type /* has_known_identity*/) { @@ -166,7 +166,7 @@ exclusive_scan_by_segment_impl(__hetero_tag<_BackendTag>, Policy&& policy, Input template OutputIterator -pattern_exclusive_scan_by_segment(__hetero_tag<_BackendTag> __tag, Policy&& policy, InputIterator1 first1, +pattern_exclusive_scan_by_segment(__internal::__hetero_tag<_BackendTag> __tag, Policy&& policy, InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, OutputIterator result, T init, BinaryPredicate binary_pred, Operator binary_op) { diff --git a/include/oneapi/dpl/internal/reduce_by_segment_impl.h b/include/oneapi/dpl/internal/reduce_by_segment_impl.h index d65f7ee831c..60ec2eb9d42 100644 --- a/include/oneapi/dpl/internal/reduce_by_segment_impl.h +++ b/include/oneapi/dpl/internal/reduce_by_segment_impl.h @@ -85,7 +85,7 @@ reduce_by_segment_impl(_Tag, Policy&& policy, InputIterator1 first1, InputIterat OutputIterator1 result1, OutputIterator2 result2, BinaryPred binary_pred, BinaryOperator binary_op) { - static_assert(__is_backend_tag_v<_Tag>); + static_assert(__internal::__is_backend_tag_v<_Tag>); // The algorithm reduces values in [first2, first2 + (last1-first1)) where the associated // keys for the values are equal to the adjacent key. This function's implementation is a derivative work From eec6ebe0352a7a220ebc08fb4a238f464cc15905 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Fri, 16 Feb 2024 16:03:00 +0100 Subject: [PATCH 311/566] include/oneapi/dpl/internal/reduce_by_segment_impl.h - fix compile error: use of undeclared identifier '_ExecutionPolicy' --- include/oneapi/dpl/internal/reduce_by_segment_impl.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/include/oneapi/dpl/internal/reduce_by_segment_impl.h b/include/oneapi/dpl/internal/reduce_by_segment_impl.h index 60ec2eb9d42..e5ea2180cdb 100644 --- a/include/oneapi/dpl/internal/reduce_by_segment_impl.h +++ b/include/oneapi/dpl/internal/reduce_by_segment_impl.h @@ -624,9 +624,8 @@ oneapi::dpl::__internal::__enable_if_execution_policy(); + constexpr auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(); return internal::reduce_by_segment_impl(__dispatch_tag, ::std::forward(policy), first1, last1, first2, result1, result2, binary_pred, binary_op); From dd281bcfbced5201bb98dace8743fd7f28e3a5c4 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Fri, 16 Feb 2024 16:10:33 +0100 Subject: [PATCH 312/566] include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h - fix compile error: redefinition of '__parallel_or' --- .../hetero/dpcpp/parallel_backend_sycl_fpga.h | 32 +++++-------------- 1 file changed, 8 insertions(+), 24 deletions(-) diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h index f0a69e9f0c1..9aabb51117c 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h @@ -187,17 +187,6 @@ __parallel_or(oneapi::dpl::__internal::__fpga_backend_tag, _ExecutionPolicy&& __ __s_last, __f); } -template -bool -__parallel_or(oneapi::dpl::__internal::__fpga_backend_tag, _ExecutionPolicy&& __exec, _Iterator1 __first, - _Iterator1 __last, _Iterator2 __s_first, _Iterator2 __s_last, _Brick __f) -{ - // workaround until we implement more performant version for patterns - return oneapi::dpl::__par_backend_hetero::__parallel_or(oneapi::dpl::__internal::__device_backend_tag{}, - __exec.__device_policy(), __first, __last, __s_first, - __s_last, __f); -} - template bool __parallel_or(oneapi::dpl::__internal::__fpga_backend_tag, _ExecutionPolicy&& __exec, _Iterator __first, @@ -292,19 +281,14 @@ __parallel_histogram(oneapi::dpl::__internal::__fpga_backend_tag, _ExecutionPoli _Range1&& __input, _Range2&& __bins, const _BinHashMgr& __binhash_manager) { - if constexpr (sizeof(oneapi::dpl::__internal::__value_t<_Range2>) <= sizeof(::std::uint32_t)) - { - // workaround until we implement more performant version for patterns - return oneapi::dpl::__par_backend_hetero::__parallel_histogram( - oneapi::dpl::__internal::__device_backend_tag{}, - __exec.__device_policy(), __init_event, ::std::forward<_Range1>(__input), ::std::forward<_Range2>(__bins), - __binhash_manager); - } - else - { - static_assert(false, "histogram is not supported on FPGA devices with output types greater than 32 bits"); - return __future(sycl::event{}); - } + static_assert(sizeof(oneapi::dpl::__internal::__value_t<_Range2>) <= sizeof(::std::uint32_t), + "histogram is not supported on FPGA devices with output types greater than 32 bits"); + + // workaround until we implement more performant version for patterns + return oneapi::dpl::__par_backend_hetero::__parallel_histogram( + oneapi::dpl::__internal::__device_backend_tag{}, + __exec.__device_policy(), __init_event, ::std::forward<_Range1>(__input), ::std::forward<_Range2>(__bins), + __binhash_manager); } } // namespace __par_backend_hetero From 228fb95e8c7bbda20ef16d8f3e4ab3c8b05a2153 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Fri, 16 Feb 2024 16:24:03 +0100 Subject: [PATCH 313/566] include/oneapi/dpl/pstl/hetero/algorithm_ranges_impl_hetero.h - fix compile error: no matching function for call to '__parallel_find_or' --- .../oneapi/dpl/pstl/hetero/algorithm_ranges_impl_hetero.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/include/oneapi/dpl/pstl/hetero/algorithm_ranges_impl_hetero.h b/include/oneapi/dpl/pstl/hetero/algorithm_ranges_impl_hetero.h index bee1d1f1f69..74189556494 100644 --- a/include/oneapi/dpl/pstl/hetero/algorithm_ranges_impl_hetero.h +++ b/include/oneapi/dpl/pstl/hetero/algorithm_ranges_impl_hetero.h @@ -103,6 +103,7 @@ __pattern_equal(_ExecutionPolicy&& __exec, _Range1&& __rng1, _Range2&& __rng2, _ // TODO: in case of confilicting names // __par_backend_hetero::make_wrapped_policy<__par_backend_hetero::__or_policy_wrapper>() return !oneapi::dpl::__par_backend_hetero::__parallel_find_or( + oneapi::dpl::__internal::__device_backend_tag{}, // TODO required to fix backend tag eval ::std::forward<_ExecutionPolicy>(__exec), _Predicate{equal_predicate<_Pred>{__pred}}, oneapi::dpl::__par_backend_hetero::__parallel_or_tag{}, oneapi::dpl::__ranges::zip_view(::std::forward<_Range1>(__rng1), ::std::forward<_Range2>(__rng2))); @@ -125,6 +126,7 @@ __pattern_find_if(_ExecutionPolicy&& __exec, _Range&& __rng, _Pred __pred) using _TagType = oneapi::dpl::__par_backend_hetero::__parallel_find_forward_tag<_Range>; return oneapi::dpl::__par_backend_hetero::__parallel_find_or( + oneapi::dpl::__internal::__device_backend_tag{}, // TODO required to fix backend tag eval __par_backend_hetero::make_wrapped_policy<__par_backend_hetero::__find_policy_wrapper>( ::std::forward<_ExecutionPolicy>(__exec)), _Predicate{__pred}, _TagType{}, ::std::forward<_Range>(__rng)); @@ -154,6 +156,7 @@ __pattern_find_end(_ExecutionPolicy&& __exec, _Range1&& __rng1, _Range2&& __rng2 using _TagType = __par_backend_hetero::__parallel_find_backward_tag<_Range1>; return oneapi::dpl::__par_backend_hetero::__parallel_find_or( + oneapi::dpl::__internal::__device_backend_tag{}, // TODO required to fix backend tag eval __par_backend_hetero::make_wrapped_policy<__par_backend_hetero::__find_policy_wrapper>( ::std::forward<_ExecutionPolicy>(__exec)), _Predicate{__pred}, _TagType{}, ::std::forward<_Range1>(__rng1), ::std::forward<_Range2>(__rng2)); @@ -177,6 +180,7 @@ __pattern_find_first_of(_ExecutionPolicy&& __exec, _Range1&& __rng1, _Range2&& _ //TODO: To check whether it makes sense to iterate over the second sequence in case of __rng1.size() < __rng2.size() return oneapi::dpl::__par_backend_hetero::__parallel_find_or( + oneapi::dpl::__internal::__device_backend_tag{}, // TODO required to fix backend tag eval __par_backend_hetero::make_wrapped_policy<__par_backend_hetero::__find_policy_wrapper>( ::std::forward<_ExecutionPolicy>(__exec)), _Predicate{__pred}, _TagType{}, ::std::forward<_Range1>(__rng1), ::std::forward<_Range2>(__rng2)); @@ -195,6 +199,7 @@ __pattern_any_of(_ExecutionPolicy&& __exec, _Range&& __rng, _Pred __pred) using _Predicate = oneapi::dpl::unseq_backend::single_match_pred<_ExecutionPolicy, _Pred>; return oneapi::dpl::__par_backend_hetero::__parallel_find_or( + oneapi::dpl::__internal::__device_backend_tag{}, // TODO required to fix backend tag eval __par_backend_hetero::make_wrapped_policy( ::std::forward<_ExecutionPolicy>(__exec)), _Predicate{__pred}, oneapi::dpl::__par_backend_hetero::__parallel_or_tag{}, ::std::forward<_Range>(__rng)); @@ -232,6 +237,7 @@ __pattern_search(_ExecutionPolicy&& __exec, _Range1&& __rng1, _Range2&& __rng2, using _TagType = oneapi::dpl::__par_backend_hetero::__parallel_find_forward_tag<_Range1>; return oneapi::dpl::__par_backend_hetero::__parallel_find_or( + oneapi::dpl::__internal::__device_backend_tag{}, // TODO required to fix backend tag eval oneapi::dpl::__par_backend_hetero::make_wrapped_policy (::std::forward<_ExecutionPolicy>(__exec)), _Predicate{__pred}, _TagType{}, ::std::forward<_Range1>(__rng1), ::std::forward<_Range2>(__rng2)); @@ -293,6 +299,7 @@ __pattern_adjacent_find(_ExecutionPolicy&& __exec, _Range&& __rng, _BinaryPredic // TODO: in case of confilicting names // __par_backend_hetero::make_wrapped_policy<__par_backend_hetero::__or_policy_wrapper>() auto result = oneapi::dpl::__par_backend_hetero::__parallel_find_or( + oneapi::dpl::__internal::__device_backend_tag{}, // TODO required to fix backend tag eval ::std::forward<_ExecutionPolicy>(__exec), _Predicate{adjacent_find_fn<_BinaryPredicate>{__predicate}}, _TagType{}, oneapi::dpl::__ranges::zip_view(__rng1, __rng2)); From 9cd710c30937c9c4883fa3b82dbe13c6e8967a25 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Fri, 16 Feb 2024 16:32:36 +0100 Subject: [PATCH 314/566] include/oneapi/dpl/pstl/numeric_impl.h - fix compile error: no matching function for call to '__parallel_transform_scan' --- include/oneapi/dpl/pstl/numeric_impl.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/include/oneapi/dpl/pstl/numeric_impl.h b/include/oneapi/dpl/pstl/numeric_impl.h index e5b7180e510..d5dfe6a09bb 100644 --- a/include/oneapi/dpl/pstl/numeric_impl.h +++ b/include/oneapi/dpl/pstl/numeric_impl.h @@ -253,15 +253,17 @@ __pattern_transform_scan(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _Fo template ::std::enable_if_t, _OutputIterator> -__pattern_transform_scan(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, +__pattern_transform_scan(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last, _OutputIterator __result, _UnaryOperation __unary_op, _Tp __init, _BinaryOperation __binary_op, _Inclusive) { + using __backend_tag = typename __parallel_tag<_IsVector>::__backend_tag; + typedef typename ::std::iterator_traits<_RandomAccessIterator>::difference_type _DifferenceType; return __internal::__except_handler([&]() { __par_backend::__parallel_transform_scan( - oneapi::dpl::__internal::__serial_backend_tag{}, + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __last - __first, [__first, __unary_op](_DifferenceType __i) mutable { return __unary_op(__first[__i]); }, __init, __binary_op, From f3593bbcb6f0467e2a2f56dd8e3cb491f8c11336 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Fri, 16 Feb 2024 16:42:09 +0100 Subject: [PATCH 315/566] include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h - fix compile error: no matching function for call to '__pattern_merge' --- .../oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h index 47432beca9d..415f0d567c5 100644 --- a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h +++ b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h @@ -1230,7 +1230,7 @@ __pattern_merge(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Ite template void -__pattern_inplace_merge(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Iterator __first, +__pattern_inplace_merge(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _Iterator __first, _Iterator __middle, _Iterator __last, _Compare __comp) { using _ValueType = typename ::std::iterator_traits<_Iterator>::value_type; @@ -1245,16 +1245,20 @@ __pattern_inplace_merge(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __ex auto __copy_first = __buf.get(); auto __copy_last = __copy_first + __n; - __pattern_merge(__exec, __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__first), + __pattern_merge(oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, decltype(__first), decltype(__middle), + decltype(__last), decltype(__copy_first)>(), + __exec, __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__first), __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__middle), __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__middle), __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__last), __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::write>(__copy_first), - __comp, ::std::true_type{}, ::std::true_type{}); + __comp); //TODO: optimize copy back depending on Iterator, i.e. set_final_data for host iterator/pointer __pattern_walk2( - __tag, __par_backend_hetero::make_wrapped_policy(::std::forward<_ExecutionPolicy>(__exec)), + oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, decltype(__copy_first), decltype(__copy_last), + decltype(__first)>(), + __par_backend_hetero::make_wrapped_policy(::std::forward<_ExecutionPolicy>(__exec)), __copy_first, __copy_last, __first, __brick_move<_ExecutionPolicy>{}); } From fff76e82870f9d4b1b0a25cb9d8dddcea9b9c842 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Fri, 16 Feb 2024 16:49:56 +0100 Subject: [PATCH 316/566] Fix compile error: no matching function for call to '__parallel_transform_reduce' --- include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h | 5 +++++ .../dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h | 1 + include/oneapi/dpl/pstl/hetero/numeric_impl_hetero.h | 2 ++ 3 files changed, 8 insertions(+) diff --git a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h index 415f0d567c5..659fcc99155 100644 --- a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h +++ b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h @@ -488,6 +488,7 @@ __pattern_min_element(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec auto __buf = __keep(__first, __last); auto __ret_idx = oneapi::dpl::__par_backend_hetero::__parallel_transform_reduce<_ReduceValueType, _Commutative>( + _BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), __reduce_fn, __transform_fn, unseq_backend::__no_init_value{}, // no initial value __buf.all_view()) @@ -556,6 +557,7 @@ __pattern_minmax_element(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __e auto __ret = oneapi::dpl::__par_backend_hetero::__parallel_transform_reduce<_ReduceValueType, ::std::false_type /*is_commutative*/>( + _BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), __reduce_fn, __transform_fn, unseq_backend::__no_init_value{}, // no initial value __buf.all_view()) @@ -650,6 +652,7 @@ __pattern_count(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Ite return oneapi::dpl::__par_backend_hetero::__parallel_transform_reduce<_ReduceValueType, ::std::true_type /*is_commutative*/>( + _BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), __reduce_fn, __transform_fn, unseq_backend::__no_init_value{}, // no initial value __buf.all_view()) @@ -1116,6 +1119,7 @@ __pattern_is_partitioned(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __e auto __res = oneapi::dpl::__par_backend_hetero::__parallel_transform_reduce<_ReduceValueType, ::std::false_type /*is_commutative*/>( + _BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), __reduce_fn, __transform_fn, unseq_backend::__no_init_value{}, // no initial value __buf.all_view()) @@ -1417,6 +1421,7 @@ __pattern_lexicographical_compare(__hetero_tag<_BackendTag> __tag, _ExecutionPol auto __ret_idx = oneapi::dpl::__par_backend_hetero::__parallel_transform_reduce<_ReduceValueType, ::std::false_type /*is_commutative*/>( + _BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), __reduce_fn, __transform_fn, unseq_backend::__no_init_value{}, // no initial value __buf1.all_view(), __buf2.all_view()) diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h index 9aabb51117c..77fa3388f47 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h @@ -101,6 +101,7 @@ __parallel_transform_reduce(oneapi::dpl::__internal::__fpga_backend_tag, _Execut { // workaround until we implement more performant version for patterns return oneapi::dpl::__par_backend_hetero::__parallel_transform_reduce<_Tp, _Commutative>( + oneapi::dpl::__internal::__device_backend_tag{}, __exec.__device_policy(), __reduce_op, __transform_op, __init, ::std::forward<_Ranges>(__rngs)...); } diff --git a/include/oneapi/dpl/pstl/hetero/numeric_impl_hetero.h b/include/oneapi/dpl/pstl/hetero/numeric_impl_hetero.h index cabcc10eecd..84a49029182 100644 --- a/include/oneapi/dpl/pstl/hetero/numeric_impl_hetero.h +++ b/include/oneapi/dpl/pstl/hetero/numeric_impl_hetero.h @@ -60,6 +60,7 @@ __pattern_transform_reduce(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& _ return oneapi::dpl::__par_backend_hetero::__parallel_transform_reduce<_RepackedTp, ::std::true_type /*is_commutative*/>( + _BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), __binary_op1, _Functor{__binary_op2}, unseq_backend::__init_value<_RepackedTp>{__init}, // initial value __buf1.all_view(), __buf2.all_view()) @@ -88,6 +89,7 @@ __pattern_transform_reduce(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& _ return oneapi::dpl::__par_backend_hetero::__parallel_transform_reduce<_RepackedTp, ::std::true_type /*is_commutative*/>( + _BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), __binary_op, _Functor{__unary_op}, unseq_backend::__init_value<_RepackedTp>{__init}, // initial value __buf.all_view()) From 8d70c650006b6ce4237bfe59d40b8131bfbacb4d Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Fri, 16 Feb 2024 17:01:14 +0100 Subject: [PATCH 317/566] Fix compile error: no matching function for call to '__parallel_transform_scan_base' --- include/oneapi/dpl/pstl/hetero/algorithm_ranges_impl_hetero.h | 1 + include/oneapi/dpl/pstl/hetero/numeric_ranges_impl_hetero.h | 1 + 2 files changed, 2 insertions(+) diff --git a/include/oneapi/dpl/pstl/hetero/algorithm_ranges_impl_hetero.h b/include/oneapi/dpl/pstl/hetero/algorithm_ranges_impl_hetero.h index 74189556494..258dd76e2d6 100644 --- a/include/oneapi/dpl/pstl/hetero/algorithm_ranges_impl_hetero.h +++ b/include/oneapi/dpl/pstl/hetero/algorithm_ranges_impl_hetero.h @@ -363,6 +363,7 @@ __pattern_scan_copy(_ExecutionPolicy&& __exec, _Range1&& __rng1, _Range2&& __rng auto __res = __par_backend_hetero::__parallel_transform_scan_base( + oneapi::dpl::__internal::__device_backend_tag{}, // TODO required to fix backend tag eval ::std::forward<_ExecutionPolicy>(__exec), oneapi::dpl::__ranges::zip_view( __rng1, oneapi::dpl::__ranges::all_view( diff --git a/include/oneapi/dpl/pstl/hetero/numeric_ranges_impl_hetero.h b/include/oneapi/dpl/pstl/hetero/numeric_ranges_impl_hetero.h index 0b104a5ff4b..3565ac22473 100644 --- a/include/oneapi/dpl/pstl/hetero/numeric_ranges_impl_hetero.h +++ b/include/oneapi/dpl/pstl/hetero/numeric_ranges_impl_hetero.h @@ -106,6 +106,7 @@ __pattern_transform_scan_base(_ExecutionPolicy&& __exec, _Range1&& __rng1, _Rang _NoOpFunctor __get_data_op; oneapi::dpl::__par_backend_hetero::__parallel_transform_scan_base( + oneapi::dpl::__internal::__device_backend_tag{}, // TODO required to fix backend tag eval ::std::forward<_ExecutionPolicy>(__exec), ::std::forward<_Range1>(__rng1), ::std::forward<_Range2>(__rng2), __binary_op, __init, // local scan From 40048ea32a4ca2dfe440c6958fd24f2cad0a21c3 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Fri, 16 Feb 2024 17:15:49 +0100 Subject: [PATCH 318/566] include/oneapi/dpl/internal/async_impl/glue_async_impl.h - fix compile error: no matching function for call to '__pattern_transform_scan_async' --- include/oneapi/dpl/internal/async_impl/glue_async_impl.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/include/oneapi/dpl/internal/async_impl/glue_async_impl.h b/include/oneapi/dpl/internal/async_impl/glue_async_impl.h index d89da19493e..d9f06601b45 100644 --- a/include/oneapi/dpl/internal/async_impl/glue_async_impl.h +++ b/include/oneapi/dpl/internal/async_impl/glue_async_impl.h @@ -220,9 +220,13 @@ auto inclusive_scan_async(_ExecutionPolicy&& __exec, _ForwardIt1 __first1, _ForwardIt1 __last1, _ForwardIt2 __first2, _Events&&... __dependencies) { + constexpr auto __dispatch_tag = + oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _ForwardIt1, _ForwardIt2>(); + using _ValueType = typename ::std::iterator_traits<_ForwardIt1>::value_type; wait_for_all(::std::forward<_Events>(__dependencies)...); return oneapi::dpl::__internal::__pattern_transform_scan_async( + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, oneapi::dpl::__internal::__no_op(), ::std::plus<_ValueType>(), /*inclusive=*/::std::true_type()); } From 82b7531b5e5ca8be8c8cefeded8e43dc4e877f55 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Fri, 16 Feb 2024 17:21:00 +0100 Subject: [PATCH 319/566] include/oneapi/dpl/pstl/hetero/algorithm_ranges_impl_hetero.h - fix compile error: no matching function for call to '__parallel_for' --- .../dpl/pstl/hetero/algorithm_ranges_impl_hetero.h | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/include/oneapi/dpl/pstl/hetero/algorithm_ranges_impl_hetero.h b/include/oneapi/dpl/pstl/hetero/algorithm_ranges_impl_hetero.h index 258dd76e2d6..e10f8c0a8f5 100644 --- a/include/oneapi/dpl/pstl/hetero/algorithm_ranges_impl_hetero.h +++ b/include/oneapi/dpl/pstl/hetero/algorithm_ranges_impl_hetero.h @@ -46,9 +46,10 @@ __pattern_walk_n(_ExecutionPolicy&& __exec, _Function __f, _Ranges&&... __rngs) auto __n = oneapi::dpl::__ranges::__get_first_range_size(__rngs...); if (__n > 0) { - oneapi::dpl::__par_backend_hetero::__parallel_for(::std::forward<_ExecutionPolicy>(__exec), - unseq_backend::walk_n<_ExecutionPolicy, _Function>{__f}, __n, - ::std::forward<_Ranges>(__rngs)...) + oneapi::dpl::__par_backend_hetero::__parallel_for( + oneapi::dpl::__internal::__device_backend_tag{}, // TODO required to fix backend tag eval + ::std::forward<_ExecutionPolicy>(__exec), unseq_backend::walk_n<_ExecutionPolicy, _Function>{__f}, __n, + ::std::forward<_Ranges>(__rngs)...) .wait(); } } @@ -753,6 +754,7 @@ __pattern_reduce_by_segment(_ExecutionPolicy&& __exec, _Range1&& __keys, _Range2 //reduce by segment oneapi::dpl::__par_backend_hetero::__parallel_for( + oneapi::dpl::__internal::__device_backend_tag{}, // TODO required to fix backend tag eval oneapi::dpl::__par_backend_hetero::make_wrapped_policy<__reduce1_wrapper>(__exec), unseq_backend::__brick_reduce_idx<_BinaryOperator, decltype(__n)>(__binary_op, __n), __intermediate_result_end, oneapi::dpl::__ranges::take_view_simple(experimental::ranges::views::all_read(__idx), @@ -794,6 +796,7 @@ __pattern_reduce_by_segment(_ExecutionPolicy&& __exec, _Range1&& __keys, _Range2 //reduce by segment oneapi::dpl::__par_backend_hetero::__parallel_for( + oneapi::dpl::__internal::__device_backend_tag{}, // TODO required to fix backend tag eval oneapi::dpl::__par_backend_hetero::make_wrapped_policy<__reduce2_wrapper>( ::std::forward<_ExecutionPolicy>(__exec)), unseq_backend::__brick_reduce_idx<_BinaryOperator, decltype(__intermediate_result_end)>( From 6fc106d0dc8491cb2ace052d43efe59a7a18c24e Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Fri, 16 Feb 2024 17:24:56 +0100 Subject: [PATCH 320/566] include/oneapi/dpl/pstl/algorithm_impl.h - fix compile error: no matching function for call to '__remove_elements' --- include/oneapi/dpl/pstl/algorithm_impl.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/include/oneapi/dpl/pstl/algorithm_impl.h b/include/oneapi/dpl/pstl/algorithm_impl.h index acef0bd9cbc..3f6ae184678 100644 --- a/include/oneapi/dpl/pstl/algorithm_impl.h +++ b/include/oneapi/dpl/pstl/algorithm_impl.h @@ -1558,8 +1558,7 @@ __pattern_unique(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _Ra __internal::__brick_walk3( __b, __e, __it - 1, __it, [&__pred](bool& __x, _ReferenceType __y, _ReferenceType __z) { __x = !__pred(__y, __z); }, _IsVector{}); - }, - _IsVector{}); + }); } //------------------------------------------------------------------------ @@ -2991,8 +2990,7 @@ __pattern_remove_if(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, [&__pred](bool* __b, bool* __e, _RandomAccessIterator __it) { __internal::__brick_walk2( __b, __e, __it, [&__pred](bool& __x, _ReferenceType __y) { __x = !__pred(__y); }, _IsVector{}); - }, - _IsVector{}); + }); } //------------------------------------------------------------------------ From 2e94dfa3f7d4ef28ce3b64d52db21730a844c30a Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Mon, 19 Feb 2024 09:24:40 +0100 Subject: [PATCH 321/566] include/oneapi/dpl/pstl/algorithm_impl.h - fix compile error: no matching function for call to '__parallel_transform_reduce' --- include/oneapi/dpl/pstl/hetero/algorithm_ranges_impl_hetero.h | 3 +++ include/oneapi/dpl/pstl/hetero/numeric_ranges_impl_hetero.h | 1 + 2 files changed, 4 insertions(+) diff --git a/include/oneapi/dpl/pstl/hetero/algorithm_ranges_impl_hetero.h b/include/oneapi/dpl/pstl/hetero/algorithm_ranges_impl_hetero.h index e10f8c0a8f5..9c3882063f1 100644 --- a/include/oneapi/dpl/pstl/hetero/algorithm_ranges_impl_hetero.h +++ b/include/oneapi/dpl/pstl/hetero/algorithm_ranges_impl_hetero.h @@ -328,6 +328,7 @@ __pattern_count(_ExecutionPolicy&& __exec, _Range&& __rng, _Predicate __predicat return oneapi::dpl::__par_backend_hetero::__parallel_transform_reduce<_ReduceValueType, ::std::true_type /*is_commutative*/>( + oneapi::dpl::__internal::__device_backend_tag{}, // TODO required to fix backend tag eval ::std::forward<_ExecutionPolicy>(__exec), __reduce_fn, __transform_fn, unseq_backend::__no_init_value{}, // no initial value ::std::forward<_Range>(__rng)) @@ -571,6 +572,7 @@ __pattern_min_element(_ExecutionPolicy&& __exec, _Range&& __rng, _Compare __comp auto __ret_idx = oneapi::dpl::__par_backend_hetero::__parallel_transform_reduce<_ReduceValueType, ::std::false_type /*is_commutative*/>( + oneapi::dpl::__internal::__device_backend_tag{}, // TODO required to fix backend tag eval ::std::forward<_ExecutionPolicy>(__exec), __reduce_fn, __transform_fn, unseq_backend::__no_init_value{}, // no initial value ::std::forward<_Range>(__rng)) @@ -627,6 +629,7 @@ __pattern_minmax_element(_ExecutionPolicy&& __exec, _Range&& __rng, _Compare __c _ReduceValueType __ret = oneapi::dpl::__par_backend_hetero::__parallel_transform_reduce<_ReduceValueType, ::std::false_type /*is_commutative*/>( + oneapi::dpl::__internal::__device_backend_tag{}, // TODO required to fix backend tag eval ::std::forward<_ExecutionPolicy>(__exec), __reduce_fn, __transform_fn, unseq_backend::__no_init_value{}, // no initial value ::std::forward<_Range>(__rng)) diff --git a/include/oneapi/dpl/pstl/hetero/numeric_ranges_impl_hetero.h b/include/oneapi/dpl/pstl/hetero/numeric_ranges_impl_hetero.h index 3565ac22473..40f1f617eca 100644 --- a/include/oneapi/dpl/pstl/hetero/numeric_ranges_impl_hetero.h +++ b/include/oneapi/dpl/pstl/hetero/numeric_ranges_impl_hetero.h @@ -74,6 +74,7 @@ __pattern_transform_reduce(_ExecutionPolicy&& __exec, _Range&& __rng, _Tp __init return oneapi::dpl::__par_backend_hetero::__parallel_transform_reduce<_RepackedTp, ::std::true_type /*is_commutative*/>( + oneapi::dpl::__internal::__device_backend_tag{}, // TODO required to fix backend tag eval ::std::forward<_ExecutionPolicy>(__exec), __binary_op, _Functor{__unary_op}, unseq_backend::__init_value<_RepackedTp>{__init}, // initial value ::std::forward<_Range>(__rng)) From 214ba0f9ae19ae9f25048e4d84319d9dcdef75e2 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Mon, 19 Feb 2024 09:49:27 +0100 Subject: [PATCH 322/566] struct __parallel_transform_reduce_small_submitter::operator() + tag impls --- .../dpl/pstl/hetero/dpcpp/parallel_backend_sycl_reduce.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_reduce.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_reduce.h index 28b48333d52..0dd1a79a186 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_reduce.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_reduce.h @@ -106,11 +106,10 @@ struct __parallel_transform_reduce_small_submitter<_Tp, __work_group_size, __ite __internal::__optional_kernel_name<_Name...>> { template = 0, typename... _Ranges> auto - operator()(_ExecutionPolicy&& __exec, const _Size __n, _ReduceOp __reduce_op, _TransformOp __transform_op, - _InitType __init, _Ranges&&... __rngs) const + operator()(oneapi::dpl::__internal::__device_backend_tag, _ExecutionPolicy&& __exec, const _Size __n, + _ReduceOp __reduce_op, _TransformOp __transform_op, _InitType __init, _Ranges&&... __rngs) const { auto __transform_pattern = unseq_backend::transform_reduce<_ExecutionPolicy, __iters_per_work_item, _ReduceOp, _TransformOp, From a67e6ec5dbf1b7d24180634b171539a79f6c3c10 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Mon, 19 Feb 2024 09:50:06 +0100 Subject: [PATCH 323/566] struct __parallel_transform_reduce_small_submitter::operator() + tag calls --- .../dpl/pstl/hetero/dpcpp/parallel_backend_sycl_reduce.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_reduce.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_reduce.h index 0dd1a79a186..d1761d266d9 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_reduce.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_reduce.h @@ -148,9 +148,9 @@ __parallel_transform_reduce_small_impl(oneapi::dpl::__internal::__device_backend __reduce_small_kernel<::std::integral_constant<::std::uint8_t, __iters_per_work_item>, _CustomName>>; return __parallel_transform_reduce_small_submitter<_Tp, __work_group_size, __iters_per_work_item, _Commutative, - _ReduceKernel>()(::std::forward<_ExecutionPolicy>(__exec), __n, - __reduce_op, __transform_op, __init, - ::std::forward<_Ranges>(__rngs)...); + _ReduceKernel>()( + oneapi::dpl::__internal::__device_backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __n, __reduce_op, + __transform_op, __init, ::std::forward<_Ranges>(__rngs)...); } // Submits the first kernel of the parallel_transform_reduce for mid-sized arrays. From 262f54ffd31dfdb98079f4c2f1536e7165bc508a Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Mon, 19 Feb 2024 09:50:30 +0100 Subject: [PATCH 324/566] struct __parallel_transform_reduce_device_kernel_submitter::operator() + tag impls --- .../dpl/pstl/hetero/dpcpp/parallel_backend_sycl_reduce.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_reduce.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_reduce.h index d1761d266d9..b30cb6ee6f3 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_reduce.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_reduce.h @@ -166,11 +166,11 @@ struct __parallel_transform_reduce_device_kernel_submitter<_Tp, __work_group_siz __internal::__optional_kernel_name<_KernelName...>> { template = 0, typename... _Ranges> auto - operator()(_ExecutionPolicy&& __exec, _Size __n, _ReduceOp __reduce_op, _TransformOp __transform_op, - _InitType __init, sycl::buffer<_Tp>& __temp, _Ranges&&... __rngs) const + operator()(oneapi::dpl::__internal::__device_backend_tag, _ExecutionPolicy&& __exec, _Size __n, + _ReduceOp __reduce_op, _TransformOp __transform_op, _InitType __init, sycl::buffer<_Tp>& __temp, + _Ranges&&... __rngs) const { auto __transform_pattern = unseq_backend::transform_reduce<_ExecutionPolicy, __iters_per_work_item, _ReduceOp, _TransformOp, From b5c682d39117668f27e3f66780e72fd56d9d0fd4 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Mon, 19 Feb 2024 09:53:12 +0100 Subject: [PATCH 325/566] struct __parallel_transform_reduce_device_kernel_submitter::operator() + tag calls --- .../dpl/pstl/hetero/dpcpp/parallel_backend_sycl_reduce.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_reduce.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_reduce.h index b30cb6ee6f3..c0785583975 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_reduce.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_reduce.h @@ -279,7 +279,8 @@ __parallel_transform_reduce_mid_impl(oneapi::dpl::__internal::__device_backend_t sycl::event __reduce_event = __parallel_transform_reduce_device_kernel_submitter<_Tp, __work_group_size, __iters_per_work_item_device_kernel, _Commutative, _ReduceDeviceKernel>()( - __exec, __n, __reduce_op, __transform_op, __init, __temp, ::std::forward<_Ranges>(__rngs)...); + oneapi::dpl::__internal::__device_backend_tag{}, __exec, __n, __reduce_op, __transform_op, __init, __temp, + ::std::forward<_Ranges>(__rngs)...); __n = __n_groups; // Number of preliminary results from the device kernel. return __parallel_transform_reduce_work_group_kernel_submitter< From fcf5105b192d0983f174b8c7fe910aae53e71be0 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Mon, 19 Feb 2024 09:53:37 +0100 Subject: [PATCH 326/566] struct __parallel_transform_reduce_work_group_kernel_submitter::operator() + tag impls --- .../dpl/pstl/hetero/dpcpp/parallel_backend_sycl_reduce.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_reduce.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_reduce.h index c0785583975..eac42441558 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_reduce.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_reduce.h @@ -207,11 +207,11 @@ template > { - template = 0> + template auto - operator()(_ExecutionPolicy&& __exec, sycl::event& __reduce_event, _Size __n, _ReduceOp __reduce_op, - _TransformOp __transform_op, _InitType __init, sycl::buffer<_Tp>& __temp) const + operator()(oneapi::dpl::__internal::__device_backend_tag, _ExecutionPolicy&& __exec, sycl::event& __reduce_event, + _Size __n, _ReduceOp __reduce_op, _TransformOp __transform_op, _InitType __init, + sycl::buffer<_Tp>& __temp) const { using _NoOpFunctor = unseq_backend::walk_n<_ExecutionPolicy, oneapi::dpl::__internal::__no_op>; auto __transform_pattern = From 0fce2a16216a3084fcd969c9b839f60be9fa98e2 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Mon, 19 Feb 2024 09:54:06 +0100 Subject: [PATCH 327/566] struct__parallel_transform_reduce_impl::submit + tag impls --- .../dpl/pstl/hetero/dpcpp/parallel_backend_sycl_reduce.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_reduce.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_reduce.h index eac42441558..04c201494f1 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_reduce.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_reduce.h @@ -285,7 +285,8 @@ __parallel_transform_reduce_mid_impl(oneapi::dpl::__internal::__device_backend_t __n = __n_groups; // Number of preliminary results from the device kernel. return __parallel_transform_reduce_work_group_kernel_submitter< _Tp, __work_group_size, __iters_per_work_item_work_group_kernel, _Commutative, _ReduceWorkGroupKernel>()( - ::std::forward<_ExecutionPolicy>(__exec), __reduce_event, __n, __reduce_op, __transform_op, __init, __temp); + oneapi::dpl::__internal::__device_backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __reduce_event, __n, + __reduce_op, __transform_op, __init, __temp); } // General implementation using a tree reduction @@ -293,11 +294,11 @@ template = 0, typename... _Ranges> static auto - submit(_ExecutionPolicy&& __exec, _Size __n, ::std::uint16_t __work_group_size, _ReduceOp __reduce_op, - _TransformOp __transform_op, _InitType __init, _Ranges&&... __rngs) + submit(oneapi::dpl::__internal::__device_backend_tag, _ExecutionPolicy&& __exec, _Size __n, + ::std::uint16_t __work_group_size, _ReduceOp __reduce_op, _TransformOp __transform_op, _InitType __init, + _Ranges&&... __rngs) { using _CustomName = oneapi::dpl::__internal::__policy_kernel_name<_ExecutionPolicy>; using _NoOpFunctor = unseq_backend::walk_n<_ExecutionPolicy, oneapi::dpl::__internal::__no_op>; From 1e2ed7d87e4a06fb124270b6d9aff2b677f8c2dd Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Mon, 19 Feb 2024 09:55:02 +0100 Subject: [PATCH 328/566] struct__parallel_transform_reduce_impl::submit + tag calls --- .../oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_reduce.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_reduce.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_reduce.h index 04c201494f1..c6d164f9415 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_reduce.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_reduce.h @@ -520,6 +520,7 @@ __parallel_transform_reduce(oneapi::dpl::__internal::__device_backend_tag, _Exec } // Otherwise use a recursive tree reduction. return __parallel_transform_reduce_impl<_Tp, 32, _Commutative>::submit( + oneapi::dpl::__internal::__device_backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __n, __work_group_size, __reduce_op, __transform_op, __init, ::std::forward<_Ranges>(__rngs)...); } From 88a3e14636aa87561fac77731735ae641172070e Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Mon, 19 Feb 2024 10:03:57 +0100 Subject: [PATCH 329/566] include/oneapi/dpl/pstl/algorithm_impl.h - fix compile error: no matching function for call to '__parallel_merge' --- .../oneapi/dpl/pstl/hetero/algorithm_ranges_impl_hetero.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/include/oneapi/dpl/pstl/hetero/algorithm_ranges_impl_hetero.h b/include/oneapi/dpl/pstl/hetero/algorithm_ranges_impl_hetero.h index 9c3882063f1..bf4087fcea3 100644 --- a/include/oneapi/dpl/pstl/hetero/algorithm_ranges_impl_hetero.h +++ b/include/oneapi/dpl/pstl/hetero/algorithm_ranges_impl_hetero.h @@ -517,9 +517,10 @@ __pattern_merge(_ExecutionPolicy&& __exec, _Range1&& __rng1, _Range2&& __rng2, _ } else { - __par_backend_hetero::__parallel_merge(::std::forward<_ExecutionPolicy>(__exec), - ::std::forward<_Range1>(__rng1), ::std::forward<_Range2>(__rng2), - ::std::forward<_Range3>(__rng3), __comp) + __par_backend_hetero::__parallel_merge( + oneapi::dpl::__internal::__device_backend_tag{}, // TODO required to fix backend tag eval + ::std::forward<_ExecutionPolicy>(__exec), ::std::forward<_Range1>(__rng1), ::std::forward<_Range2>(__rng2), + ::std::forward<_Range3>(__rng3), __comp) .wait(); } From 666acb213c9a0f99aa60550403591890348d5332 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Mon, 19 Feb 2024 10:14:29 +0100 Subject: [PATCH 330/566] include/oneapi/dpl/pstl/algorithm_impl.h - fix compile error: no matching function for call to '__parallel_stable_sort' --- .../oneapi/dpl/pstl/hetero/algorithm_ranges_impl_hetero.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/include/oneapi/dpl/pstl/hetero/algorithm_ranges_impl_hetero.h b/include/oneapi/dpl/pstl/hetero/algorithm_ranges_impl_hetero.h index bf4087fcea3..24a5d428a77 100644 --- a/include/oneapi/dpl/pstl/hetero/algorithm_ranges_impl_hetero.h +++ b/include/oneapi/dpl/pstl/hetero/algorithm_ranges_impl_hetero.h @@ -536,8 +536,9 @@ oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy> __pattern_sort(_ExecutionPolicy&& __exec, _Range&& __rng, _Compare __comp, _Proj __proj) { if (__rng.size() >= 2) - __par_backend_hetero::__parallel_stable_sort(::std::forward<_ExecutionPolicy>(__exec), - ::std::forward<_Range>(__rng), __comp, __proj) + __par_backend_hetero::__parallel_stable_sort( + oneapi::dpl::__internal::__device_backend_tag{}, // TODO required to fix backend tag eval + ::std::forward<_ExecutionPolicy>(__exec), ::std::forward<_Range>(__rng), __comp, __proj) .wait(); } From a8c0a10e2895698761227329ccc3005434c9c87f Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Mon, 19 Feb 2024 10:22:02 +0100 Subject: [PATCH 331/566] include/oneapi/dpl/pstl/hetero/numeric_ranges_impl_hetero.h - fix compile error: no matching function for call to '__parallel_transform_reduce' --- include/oneapi/dpl/pstl/hetero/numeric_ranges_impl_hetero.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/oneapi/dpl/pstl/hetero/numeric_ranges_impl_hetero.h b/include/oneapi/dpl/pstl/hetero/numeric_ranges_impl_hetero.h index 40f1f617eca..42502a4b7b5 100644 --- a/include/oneapi/dpl/pstl/hetero/numeric_ranges_impl_hetero.h +++ b/include/oneapi/dpl/pstl/hetero/numeric_ranges_impl_hetero.h @@ -51,6 +51,7 @@ __pattern_transform_reduce(_ExecutionPolicy&& __exec, _Range1&& __rng1, _Range2& return oneapi::dpl::__par_backend_hetero::__parallel_transform_reduce<_RepackedTp, ::std::true_type /*is_commutative*/>( + oneapi::dpl::__internal::__device_backend_tag{}, // TODO required to fix backend tag eval ::std::forward<_ExecutionPolicy>(__exec), __binary_op1, _Functor{__binary_op2}, unseq_backend::__init_value<_RepackedTp>{__init}, // initial value ::std::forward<_Range1>(__rng1), ::std::forward<_Range2>(__rng2)) From d1b498de48a781e068cf149269d013cd91aa99ce Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Mon, 19 Feb 2024 10:35:05 +0100 Subject: [PATCH 332/566] include/oneapi/dpl/pstl/algorithm_fwd.h - fix compile error: no member named '__pattern_shift_right' in namespace 'oneapi::dpl::__internal' --- include/oneapi/dpl/pstl/algorithm_fwd.h | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/include/oneapi/dpl/pstl/algorithm_fwd.h b/include/oneapi/dpl/pstl/algorithm_fwd.h index fd89678dd3b..7eba977aba1 100644 --- a/include/oneapi/dpl/pstl/algorithm_fwd.h +++ b/include/oneapi/dpl/pstl/algorithm_fwd.h @@ -1265,6 +1265,15 @@ _ForwardIterator __pattern_shift_left(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, typename ::std::iterator_traits<_ForwardIterator>::difference_type); +//------------------------------------------------------------------------ +// shift_right +//------------------------------------------------------------------------ + +template +_BidirectionalIterator +__pattern_shift_right(_Tag, _ExecutionPolicy&&, _BidirectionalIterator, _BidirectionalIterator, + typename ::std::iterator_traits<_BidirectionalIterator>::difference_type); + } // namespace __internal } // namespace dpl } // namespace oneapi From 5f34eda9ec56354effcd6906fd0a02f50f3d783f Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Mon, 19 Feb 2024 10:49:46 +0100 Subject: [PATCH 333/566] async patterns tag impls and calls --- .../internal/async_impl/async_impl_hetero.h | 84 ++++++++----------- .../dpl/internal/async_impl/glue_async_impl.h | 47 ++++++++--- 2 files changed, 67 insertions(+), 64 deletions(-) diff --git a/include/oneapi/dpl/internal/async_impl/async_impl_hetero.h b/include/oneapi/dpl/internal/async_impl/async_impl_hetero.h index dc08e7ba2d1..eaa92958c27 100644 --- a/include/oneapi/dpl/internal/async_impl/async_impl_hetero.h +++ b/include/oneapi/dpl/internal/async_impl/async_impl_hetero.h @@ -30,10 +30,10 @@ namespace dpl namespace __internal { -template = 0> +template auto -__pattern_walk1_async(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _Function __f) +__pattern_walk1_async(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _ForwardIterator __first, + _ForwardIterator __last, _Function __f) { auto __n = __last - __first; assert(__n > 0); @@ -42,23 +42,19 @@ __pattern_walk1_async(_ExecutionPolicy&& __exec, _ForwardIterator __first, _Forw oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read_write, _ForwardIterator>(); auto __buf = __keep(__first, __last); - constexpr auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _ForwardIterator>(); - using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; - auto __future_obj = oneapi::dpl::__par_backend_hetero::__parallel_for( - __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), + _BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), unseq_backend::walk_n<_ExecutionPolicy, _Function>{__f}, __n, __buf.all_view()); return __future_obj; } -template = 0> + typename _ExecutionPolicy, typename _ForwardIterator1, typename _ForwardIterator2, typename _Function> auto -__pattern_walk2_async(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, - _ForwardIterator2 __first2, _Function __f) +__pattern_walk2_async(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _ForwardIterator1 __first1, + _ForwardIterator1 __last1, _ForwardIterator2 __first2, _Function __f) { auto __n = __last1 - __first1; assert(__n > 0); @@ -69,12 +65,8 @@ __pattern_walk2_async(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _Fo auto __keep2 = oneapi::dpl::__ranges::__get_sycl_range<__acc_mode2, _ForwardIterator2>(); auto __buf2 = __keep2(__first2, __first2 + __n); - constexpr auto __dispatch_tag = - oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>(); - using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; - auto __future = oneapi::dpl::__par_backend_hetero::__parallel_for( - __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), + _BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), unseq_backend::walk_n<_ExecutionPolicy, _Function>{__f}, __n, __buf1.all_view(), __buf2.all_view()); if constexpr (_IsSync::value) @@ -83,11 +75,11 @@ __pattern_walk2_async(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _Fo return __future.__make_future(__first2 + __n); } -template = 0> +template auto -__pattern_walk3_async(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, - _ForwardIterator2 __first2, _ForwardIterator3 __first3, _Function __f) +__pattern_walk3_async(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _ForwardIterator1 __first1, + _ForwardIterator1 __last1, _ForwardIterator2 __first2, _ForwardIterator3 __first3, _Function __f) { auto __n = __last1 - __first1; assert(__n > 0); @@ -102,25 +94,22 @@ __pattern_walk3_async(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _Fo oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::write, _ForwardIterator3>(); auto __buf3 = __keep3(__first3, __first3 + __n); - constexpr auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _ForwardIterator1, - _ForwardIterator2, _ForwardIterator3>(); - using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; - auto __future = - oneapi::dpl::__par_backend_hetero::__parallel_for(__backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), + oneapi::dpl::__par_backend_hetero::__parallel_for(_BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), unseq_backend::walk_n<_ExecutionPolicy, _Function>{__f}, __n, __buf1.all_view(), __buf2.all_view(), __buf3.all_view()); return __future.__make_future(__first3 + __n); } -template = 0> +template auto -__pattern_walk2_brick_async(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, - _ForwardIterator2 __first2, _Brick __brick) +__pattern_walk2_brick_async(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _ForwardIterator1 __first1, + _ForwardIterator1 __last1, _ForwardIterator2 __first2, _Brick __brick) { return __pattern_walk2_async( + __tag, __par_backend_hetero::make_wrapped_policy<__walk2_brick_wrapper>(::std::forward<_ExecutionPolicy>(__exec)), __first1, __last1, __first2, __brick); } @@ -129,11 +118,10 @@ __pattern_walk2_brick_async(_ExecutionPolicy&& __exec, _ForwardIterator1 __first // transform_reduce (version with two binary functions) //------------------------------------------------------------------------ -template = 0> +template auto -__pattern_transform_reduce_async(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, +__pattern_transform_reduce_async(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, _Tp __init, _BinaryOperation1 __binary_op1, _BinaryOperation2 __binary_op2) { @@ -151,13 +139,9 @@ __pattern_transform_reduce_async(_ExecutionPolicy&& __exec, _RandomAccessIterato oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read, _RandomAccessIterator2>(); auto __buf2 = __keep2(__first2, __first2 + __n); - constexpr auto __dispatch_tag = - oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _RandomAccessIterator1, _RandomAccessIterator2>(); - using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; - return oneapi::dpl::__par_backend_hetero::__parallel_transform_reduce<_RepackedTp, ::std::true_type /*is_commutative*/>( - __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __binary_op1, _Functor{__binary_op2}, + _BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), __binary_op1, _Functor{__binary_op2}, unseq_backend::__init_value<_RepackedTp>{__init}, // initial value __buf1.all_view(), __buf2.all_view()); } @@ -166,12 +150,12 @@ __pattern_transform_reduce_async(_ExecutionPolicy&& __exec, _RandomAccessIterato // transform_reduce (with unary and binary functions) //------------------------------------------------------------------------ -template = 0> +template auto -__pattern_transform_reduce_async(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, - _Tp __init, _BinaryOperation __binary_op, _UnaryOperation __unary_op) +__pattern_transform_reduce_async(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _ForwardIterator __first, + _ForwardIterator __last, _Tp __init, _BinaryOperation __binary_op, + _UnaryOperation __unary_op) { assert(__first < __last); @@ -182,22 +166,20 @@ __pattern_transform_reduce_async(_ExecutionPolicy&& __exec, _ForwardIterator __f auto __keep = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read, _ForwardIterator>(); auto __buf = __keep(__first, __last); - constexpr auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _ForwardIterator>(); - using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; - return oneapi::dpl::__par_backend_hetero::__parallel_transform_reduce<_RepackedTp, ::std::true_type /*is_commutative*/>( - __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __binary_op, _Functor{__unary_op}, + _BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), __binary_op, _Functor{__unary_op}, unseq_backend::__init_value<_RepackedTp>{__init}, // initial value __buf.all_view()); } -template = 0> +template auto -__pattern_fill_async(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, const _T& __value) +__pattern_fill_async(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _ForwardIterator __first, + _ForwardIterator __last, const _T& __value) { return __pattern_walk1_async( + __tag, ::std::forward<_ExecutionPolicy>(__exec), __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::write>(__first), __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::write>(__last), diff --git a/include/oneapi/dpl/internal/async_impl/glue_async_impl.h b/include/oneapi/dpl/internal/async_impl/glue_async_impl.h index d9f06601b45..30a37fe229e 100644 --- a/include/oneapi/dpl/internal/async_impl/glue_async_impl.h +++ b/include/oneapi/dpl/internal/async_impl/glue_async_impl.h @@ -43,9 +43,12 @@ auto transform_async(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 __last, _ForwardIterator2 __result, _UnaryOperation __op, _Events&&... __dependencies) { + constexpr auto __dispatch_tag = + oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>(); + wait_for_all(::std::forward<_Events>(__dependencies)...); auto ret_val = oneapi::dpl::__internal::__pattern_walk2_async( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __result, + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __result, oneapi::dpl::__internal::__transform_functor<_UnaryOperation>{::std::move(__op)}); return ret_val; } @@ -59,9 +62,12 @@ transform_async(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardI _ForwardIterator2 __first2, _ForwardIterator __result, _BinaryOperation __op, _Events&&... __dependencies) { + constexpr auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _ForwardIterator1, + _ForwardIterator2, _ForwardIterator>(); + wait_for_all(::std::forward<_Events>(__dependencies)...); auto ret_val = oneapi::dpl::__internal::__pattern_walk3_async( - ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __result, + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __result, oneapi::dpl::__internal::__transform_functor<_BinaryOperation>(::std::move(__op))); return ret_val; } @@ -73,9 +79,12 @@ auto copy_async(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 __last, _ForwardIterator2 __result, _Events&&... __dependencies) { + constexpr auto __dispatch_tag = + oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>(); + wait_for_all(::std::forward<_Events>(__dependencies)...); auto ret_val = oneapi::dpl::__internal::__pattern_walk2_brick_async( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __result, + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __result, oneapi::dpl::__internal::__brick_copy<_ExecutionPolicy>{}); return ret_val; } @@ -118,9 +127,11 @@ auto for_each_async(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _Function __f, _Events&&... __dependencies) { + constexpr auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _ForwardIterator>(); + wait_for_all(::std::forward<_Events>(__dependencies)...); - auto ret_val = - oneapi::dpl::__internal::__pattern_walk1_async(::std::forward<_ExecutionPolicy>(__exec), __first, __last, __f); + auto ret_val = oneapi::dpl::__internal::__pattern_walk1_async( + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __f); return ret_val; } @@ -133,10 +144,12 @@ auto reduce_async(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _Tp __init, _BinaryOperation __binary_op, _Events&&... __dependencies) { + constexpr auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _ForwardIterator>(); + wait_for_all(::std::forward<_Events>(__dependencies)...); - auto ret_val = oneapi::dpl::__internal::__pattern_transform_reduce_async(::std::forward<_ExecutionPolicy>(__exec), - __first, __last, __init, __binary_op, - oneapi::dpl::__internal::__no_op()); + auto ret_val = oneapi::dpl::__internal::__pattern_transform_reduce_async( + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __init, __binary_op, + oneapi::dpl::__internal::__no_op()); return ret_val; } @@ -168,9 +181,11 @@ auto fill_async(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, const _Tp& __value, _Events&&... __dependencies) { + constexpr auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _ForwardIterator>(); + wait_for_all(::std::forward<_Events>(__dependencies)...); - return oneapi::dpl::__internal::__pattern_fill_async(::std::forward<_ExecutionPolicy>(__exec), __first, __last, - __value); + return oneapi::dpl::__internal::__pattern_fill_async(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), + __first, __last, __value); } // [async.transform_reduce] @@ -183,9 +198,13 @@ auto transform_reduce_async(_ExecutionPolicy&& __exec, _ForwardIt1 __first1, _ForwardIt1 __last1, _ForwardIt2 __first2, _T __init, _BinaryOp1 __binary_op1, _BinaryOp2 __binary_op2, _Events&&... __dependencies) { + constexpr auto __dispatch_tag = + oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _ForwardIt1, _ForwardIt2>(); + wait_for_all(::std::forward<_Events>(__dependencies)...); return oneapi::dpl::__internal::__pattern_transform_reduce_async( - ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __init, __binary_op1, __binary_op2); + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __init, __binary_op1, + __binary_op2); } template (); + wait_for_all(::std::forward<_Events>(__dependencies)...); - return oneapi::dpl::__internal::__pattern_transform_reduce_async(::std::forward<_ExecutionPolicy>(__exec), __first, - __last, __init, __binary_op, __unary_op); + return oneapi::dpl::__internal::__pattern_transform_reduce_async( + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __init, __binary_op, __unary_op); } template Date: Mon, 19 Feb 2024 11:05:44 +0100 Subject: [PATCH 334/566] include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h - remove fpga patterns which simple calls oneapi::dpl::__internal::__device_backend_tag impls --- .../hetero/dpcpp/parallel_backend_sycl_fpga.h | 182 ------------------ 1 file changed, 182 deletions(-) diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h index 77fa3388f47..6c88c276a66 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h @@ -89,188 +89,6 @@ __parallel_for(oneapi::dpl::__internal::__fpga_backend_tag, _ExecutionPolicy&& _ __count, std::forward<_Ranges>(__rngs)...); } -//------------------------------------------------------------------------ -// parallel_transform_reduce -//------------------------------------------------------------------------ - -template -auto -__parallel_transform_reduce(oneapi::dpl::__internal::__fpga_backend_tag, _ExecutionPolicy&& __exec, - _ReduceOp __reduce_op, _TransformOp __transform_op, _InitType __init, _Ranges&&... __rngs) -{ - // workaround until we implement more performant version for patterns - return oneapi::dpl::__par_backend_hetero::__parallel_transform_reduce<_Tp, _Commutative>( - oneapi::dpl::__internal::__device_backend_tag{}, - __exec.__device_policy(), __reduce_op, __transform_op, __init, ::std::forward<_Ranges>(__rngs)...); -} - -//------------------------------------------------------------------------ -// parallel_transform_scan -//------------------------------------------------------------------------ - -template -auto -__parallel_transform_scan(oneapi::dpl::__internal::__fpga_backend_tag, _ExecutionPolicy&& __exec, _Range1&& __in_rng, - _Range2&& __out_rng, ::std::size_t __n, _UnaryOperation __unary_op, _InitType __init, - _BinaryOperation __binary_op, _Inclusive) -{ - // workaround until we implement more performant version for patterns - return oneapi::dpl::__par_backend_hetero::__parallel_transform_scan( - oneapi::dpl::__internal::__device_backend_tag{}, __exec.__device_policy(), ::std::forward<_Range1>(__in_rng), - ::std::forward<_Range2>(__out_rng), __n, __unary_op, __init, __binary_op, _Inclusive{}); -} - -template -auto -__parallel_transform_scan_base(oneapi::dpl::__internal::__fpga_backend_tag, _ExecutionPolicy&& __exec, _Range1&& __rng1, - _Range2&& __rng2, _BinaryOperation __binary_op, _InitType __init, - _LocalScan __local_scan, _GroupScan __group_scan, _GlobalScan __global_scan) -{ - // workaround until we implement more performant version for patterns - return oneapi::dpl::__par_backend_hetero::__parallel_transform_scan_base( - oneapi::dpl::__internal::__device_backend_tag{}, __exec.__device_policy(), ::std::forward<_Range1>(__rng1), - ::std::forward<_Range2>(__rng2), __binary_op, __init, __local_scan, __group_scan, __global_scan); -} - -template -auto -__parallel_copy_if(oneapi::dpl::__internal::__fpga_backend_tag, _ExecutionPolicy&& __exec, _InRng&& __in_rng, - _OutRng&& __out_rng, _Size __n, _Pred __pred) -{ - // workaround until we implement more performant version for patterns - return oneapi::dpl::__par_backend_hetero::__parallel_copy_if( - oneapi::dpl::__internal::__device_backend_tag{}, __exec.__device_policy(), ::std::forward<_InRng>(__in_rng), - ::std::forward<_OutRng>(__out_rng), __n, __pred); -} - -template -auto -__parallel_scan_copy(oneapi::dpl::__internal::__fpga_backend_tag, _ExecutionPolicy&& __exec, _InRng&& __in_rng, - _OutRng&& __out_rng, _Size __n, _CreateMaskOp __create_mask_op, _CopyByMaskOp __copy_by_mask_op) -{ - // workaround until we implement more performant version for patterns - return oneapi::dpl::__par_backend_hetero::__parallel_scan_copy( - oneapi::dpl::__internal::__device_backend_tag{}, __exec.__device_policy(), ::std::forward<_InRng>(__in_rng), - ::std::forward<_OutRng>(__out_rng), __n, __create_mask_op, __copy_by_mask_op); -} - -//------------------------------------------------------------------------ -// __parallel_find_or -//----------------------------------------------------------------------- - -template -::std::conditional_t< - ::std::is_same_v<_BrickTag, __parallel_or_tag>, bool, - oneapi::dpl::__internal::__difference_t::type>> -__parallel_find_or(oneapi::dpl::__internal::__fpga_backend_tag, _ExecutionPolicy&& __exec, _Brick __f, - _BrickTag __brick_tag, _Ranges&&... __rngs) -{ - return oneapi::dpl::__par_backend_hetero::__parallel_find_or(oneapi::dpl::__internal::__device_backend_tag{}, - __exec.__device_policy(), __f, __brick_tag, - ::std::forward<_Ranges>(__rngs)...); -} - -//------------------------------------------------------------------------ -// parallel_or -//----------------------------------------------------------------------- -template -bool -__parallel_or(oneapi::dpl::__internal::__fpga_backend_tag, _ExecutionPolicy&& __exec, _Iterator1 __first, - _Iterator1 __last, _Iterator2 __s_first, _Iterator2 __s_last, _Brick __f) -{ - // workaround until we implement more performant version for patterns - return oneapi::dpl::__par_backend_hetero::__parallel_or(oneapi::dpl::__internal::__device_backend_tag{}, - __exec.__device_policy(), __first, __last, __s_first, - __s_last, __f); -} - -template -bool -__parallel_or(oneapi::dpl::__internal::__fpga_backend_tag, _ExecutionPolicy&& __exec, _Iterator __first, - _Iterator __last, _Brick __f) -{ - // workaround until we implement more performant version for patterns - return oneapi::dpl::__par_backend_hetero::__parallel_or(oneapi::dpl::__internal::__device_backend_tag{}, - __exec.__device_policy(), __first, __last, __f); -} - -//------------------------------------------------------------------------ -// parallel_find -//----------------------------------------------------------------------- - -template -_Iterator1 -__parallel_find(oneapi::dpl::__internal::__fpga_backend_tag, _ExecutionPolicy&& __exec, _Iterator1 __first, - _Iterator1 __last, _Iterator2 __s_first, _Iterator2 __s_last, _Brick __f, _IsFirst __is_first) -{ - // workaround until we implement more performant version for patterns - return oneapi::dpl::__par_backend_hetero::__parallel_find(oneapi::dpl::__internal::__device_backend_tag{}, - __exec.__device_policy(), __first, __last, __s_first, - __s_last, __f, __is_first); -} - -template -_Iterator -__parallel_find(oneapi::dpl::__internal::__fpga_backend_tag, _ExecutionPolicy&& __exec, _Iterator __first, - _Iterator __last, _Brick __f, _IsFirst __is_first) -{ - // workaround until we implement more performant version for patterns - return oneapi::dpl::__par_backend_hetero::__parallel_find( - oneapi::dpl::__internal::__device_backend_tag{}, __exec.__device_policy(), __first, __last, __f, __is_first); -} - -//------------------------------------------------------------------------ -// parallel_merge -//----------------------------------------------------------------------- - -template -auto -__parallel_merge(oneapi::dpl::__internal::__fpga_backend_tag, _ExecutionPolicy&& __exec, _Range1&& __rng1, - _Range2&& __rng2, _Range3&& __rng3, _Compare __comp) - -> decltype(oneapi::dpl::__par_backend_hetero::__parallel_merge( - oneapi::dpl::__internal::__device_backend_tag{}, __exec.__device_policy(), ::std::forward<_Range1>(__rng1), - ::std::forward<_Range2>(__rng2), ::std::forward<_Range3>(__rng3), __comp)) -{ - // workaround until we implement more performant version for patterns - return oneapi::dpl::__par_backend_hetero::__parallel_merge( - oneapi::dpl::__internal::__device_backend_tag{}, __exec.__device_policy(), ::std::forward<_Range1>(__rng1), - ::std::forward<_Range2>(__rng2), ::std::forward<_Range3>(__rng3), __comp); -} - -//------------------------------------------------------------------------ -// parallel_stable_sort -//----------------------------------------------------------------------- - -template -auto -__parallel_stable_sort(oneapi::dpl::__internal::__fpga_backend_tag, _ExecutionPolicy&& __exec, _Range&& __rng, - _Compare __comp, _Proj __proj) -{ - // workaround until we implement more performant version for patterns - return oneapi::dpl::__par_backend_hetero::__parallel_stable_sort(oneapi::dpl::__internal::__device_backend_tag{}, - __exec.__device_policy(), - ::std::forward<_Range>(__rng), __comp, __proj); -} - -//------------------------------------------------------------------------ -// parallel_partial_sort -//----------------------------------------------------------------------- - -// TODO: check if it makes sense to move these wrappers out of backend to a common place -template -auto -__parallel_partial_sort(oneapi::dpl::__internal::__fpga_backend_tag, _ExecutionPolicy&& __exec, _Iterator __first, - _Iterator __mid, _Iterator __last, _Compare __comp) -{ - // workaround until we implement more performant version for patterns - return oneapi::dpl::__par_backend_hetero::__parallel_partial_sort( - oneapi::dpl::__internal::__device_backend_tag{}, __exec.__device_policy(), __first, __mid, __last, __comp); -} - //------------------------------------------------------------------------ // parallel_histogram //----------------------------------------------------------------------- From a006b97a4ef3862593731f6f2cd2adae2a3438e4 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Mon, 19 Feb 2024 11:23:55 +0100 Subject: [PATCH 335/566] Apply GitHUB clang format --- .../internal/async_impl/async_impl_hetero.h | 8 +-- .../dpl/internal/async_impl/glue_async_impl.h | 5 +- .../internal/exclusive_scan_by_segment_impl.h | 14 ++-- .../internal/inclusive_scan_by_segment_impl.h | 14 ++-- .../dpl/internal/reduce_by_segment_impl.h | 20 +++--- include/oneapi/dpl/pstl/algorithm_impl.h | 11 ++-- .../dpl/pstl/hetero/algorithm_impl_hetero.h | 25 +++---- .../hetero/algorithm_ranges_impl_hetero.h | 4 +- .../pstl/hetero/dpcpp/parallel_backend_sycl.h | 12 ++-- .../hetero/dpcpp/parallel_backend_sycl_fpga.h | 8 +-- .../dpcpp/parallel_backend_sycl_histogram.h | 16 ++--- .../dpcpp/parallel_backend_sycl_reduce.h | 65 ++++++++----------- .../dpl/pstl/hetero/numeric_impl_hetero.h | 6 +- include/oneapi/dpl/pstl/numeric_impl.h | 8 +-- 14 files changed, 95 insertions(+), 121 deletions(-) diff --git a/include/oneapi/dpl/internal/async_impl/async_impl_hetero.h b/include/oneapi/dpl/internal/async_impl/async_impl_hetero.h index eaa92958c27..02ee5f852f1 100644 --- a/include/oneapi/dpl/internal/async_impl/async_impl_hetero.h +++ b/include/oneapi/dpl/internal/async_impl/async_impl_hetero.h @@ -179,8 +179,7 @@ __pattern_fill_async(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _ForwardIterator __last, const _T& __value) { return __pattern_walk1_async( - __tag, - ::std::forward<_ExecutionPolicy>(__exec), + __tag, ::std::forward<_ExecutionPolicy>(__exec), __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::write>(__first), __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::write>(__last), fill_functor<_T>{__value}); @@ -206,9 +205,8 @@ __pattern_transform_scan_base_async(__hetero_tag<_BackendTag>, _ExecutionPolicy& auto __buf2 = __keep2(__result, __result + __n); auto __res = oneapi::dpl::__par_backend_hetero::__parallel_transform_scan( - _BackendTag{}, - ::std::forward<_ExecutionPolicy>(__exec), __buf1.all_view(), __buf2.all_view(), __n, __unary_op, __init, - __binary_op, _Inclusive{}); + _BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), __buf1.all_view(), __buf2.all_view(), __n, __unary_op, + __init, __binary_op, _Inclusive{}); return __res.__make_future(__result + __n); } diff --git a/include/oneapi/dpl/internal/async_impl/glue_async_impl.h b/include/oneapi/dpl/internal/async_impl/glue_async_impl.h index 30a37fe229e..586360f83af 100644 --- a/include/oneapi/dpl/internal/async_impl/glue_async_impl.h +++ b/include/oneapi/dpl/internal/async_impl/glue_async_impl.h @@ -247,9 +247,8 @@ inclusive_scan_async(_ExecutionPolicy&& __exec, _ForwardIt1 __first1, _ForwardIt using _ValueType = typename ::std::iterator_traits<_ForwardIt1>::value_type; wait_for_all(::std::forward<_Events>(__dependencies)...); return oneapi::dpl::__internal::__pattern_transform_scan_async( - __dispatch_tag, - ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, oneapi::dpl::__internal::__no_op(), - ::std::plus<_ValueType>(), /*inclusive=*/::std::true_type()); + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, + oneapi::dpl::__internal::__no_op(), ::std::plus<_ValueType>(), /*inclusive=*/::std::true_type()); } template OutputIterator -exclusive_scan_by_segment_impl(__internal::__hetero_tag<_BackendTag> __tag, Policy&& policy, InputIterator1 first1, InputIterator1 last1, - InputIterator2 first2, OutputIterator result, T init, BinaryPredicate binary_pred, - Operator binary_op, ::std::true_type /* has_known_identity*/) +exclusive_scan_by_segment_impl(__internal::__hetero_tag<_BackendTag> __tag, Policy&& policy, InputIterator1 first1, + InputIterator1 last1, InputIterator2 first2, OutputIterator result, T init, + BinaryPredicate binary_pred, Operator binary_op, + ::std::true_type /* has_known_identity*/) { return internal::__scan_by_segment_impl_common(__tag, ::std::forward(policy), first1, last1, first2, result, init, binary_pred, binary_op, ::std::false_type{}); @@ -108,9 +109,10 @@ exclusive_scan_by_segment_impl(__internal::__hetero_tag<_BackendTag> __tag, Poli template OutputIterator -exclusive_scan_by_segment_impl(__internal::__hetero_tag<_BackendTag>, Policy&& policy, InputIterator1 first1, InputIterator1 last1, - InputIterator2 first2, OutputIterator result, T init, BinaryPredicate binary_pred, - Operator binary_op, ::std::false_type /* has_known_identity*/) +exclusive_scan_by_segment_impl(__internal::__hetero_tag<_BackendTag>, Policy&& policy, InputIterator1 first1, + InputIterator1 last1, InputIterator2 first2, OutputIterator result, T init, + BinaryPredicate binary_pred, Operator binary_op, + ::std::false_type /* has_known_identity*/) { const auto n = ::std::distance(first1, last1); diff --git a/include/oneapi/dpl/internal/inclusive_scan_by_segment_impl.h b/include/oneapi/dpl/internal/inclusive_scan_by_segment_impl.h index f211f995b23..38b377bf601 100644 --- a/include/oneapi/dpl/internal/inclusive_scan_by_segment_impl.h +++ b/include/oneapi/dpl/internal/inclusive_scan_by_segment_impl.h @@ -78,9 +78,10 @@ pattern_inclusive_scan_by_segment(_Tag, Policy&& policy, InputIterator1 first1, template OutputIterator -inclusive_scan_by_segment_impl(__internal::__hetero_tag<_BackendTag> __tag, Policy&& policy, InputIterator1 first1, InputIterator1 last1, - InputIterator2 first2, OutputIterator result, BinaryPredicate binary_pred, - BinaryOperator binary_op, ::std::true_type /* has_known_identity */) +inclusive_scan_by_segment_impl(__internal::__hetero_tag<_BackendTag> __tag, Policy&& policy, InputIterator1 first1, + InputIterator1 last1, InputIterator2 first2, OutputIterator result, + BinaryPredicate binary_pred, BinaryOperator binary_op, + ::std::true_type /* has_known_identity */) { using iter_value_t = typename ::std::iterator_traits::value_type; iter_value_t identity = unseq_backend::__known_identity; @@ -91,9 +92,10 @@ inclusive_scan_by_segment_impl(__internal::__hetero_tag<_BackendTag> __tag, Poli template OutputIterator -inclusive_scan_by_segment_impl(__internal::__hetero_tag<_BackendTag>, Policy&& policy, InputIterator1 first1, InputIterator1 last1, - InputIterator2 first2, OutputIterator result, BinaryPredicate binary_pred, - BinaryOperator binary_op, ::std::false_type /* has_known_identity */) +inclusive_scan_by_segment_impl(__internal::__hetero_tag<_BackendTag>, Policy&& policy, InputIterator1 first1, + InputIterator1 last1, InputIterator2 first2, OutputIterator result, + BinaryPredicate binary_pred, BinaryOperator binary_op, + ::std::false_type /* has_known_identity */) { typedef unsigned int FlagType; diff --git a/include/oneapi/dpl/internal/reduce_by_segment_impl.h b/include/oneapi/dpl/internal/reduce_by_segment_impl.h index e5ea2180cdb..546237b2ae8 100644 --- a/include/oneapi/dpl/internal/reduce_by_segment_impl.h +++ b/include/oneapi/dpl/internal/reduce_by_segment_impl.h @@ -193,9 +193,10 @@ using _SegReducePrefixPhase = __seg_reduce_prefix_kernel<_Name...>; template oneapi::dpl::__internal::__difference_t<_Range3> -__sycl_reduce_by_segment(__internal::__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _Range1&& __keys, _Range2&& __values, - _Range3&& __out_keys, _Range4&& __out_values, _BinaryPredicate __binary_pred, - _BinaryOperator __binary_op, ::std::false_type /* has_known_identity */) +__sycl_reduce_by_segment(__internal::__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _Range1&& __keys, + _Range2&& __values, _Range3&& __out_keys, _Range4&& __out_values, + _BinaryPredicate __binary_pred, _BinaryOperator __binary_op, + ::std::false_type /* has_known_identity */) { return oneapi::dpl::experimental::ranges::reduce_by_segment( ::std::forward<_ExecutionPolicy>(__exec), ::std::forward<_Range1>(__keys), ::std::forward<_Range2>(__values), @@ -205,9 +206,10 @@ __sycl_reduce_by_segment(__internal::__hetero_tag<_BackendTag>, _ExecutionPolicy template oneapi::dpl::__internal::__difference_t<_Range3> -__sycl_reduce_by_segment(__internal::__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _Range1&& __keys, _Range2&& __values, - _Range3&& __out_keys, _Range4&& __out_values, _BinaryPredicate __binary_pred, - _BinaryOperator __binary_op, ::std::true_type /* has_known_identity */) +__sycl_reduce_by_segment(__internal::__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _Range1&& __keys, + _Range2&& __values, _Range3&& __out_keys, _Range4&& __out_values, + _BinaryPredicate __binary_pred, _BinaryOperator __binary_op, + ::std::true_type /* has_known_identity */) { using _CustomName = oneapi::dpl::__internal::__policy_kernel_name<_ExecutionPolicy>; @@ -573,9 +575,9 @@ __sycl_reduce_by_segment(__internal::__hetero_tag<_BackendTag>, _ExecutionPolicy template ::std::pair -reduce_by_segment_impl(__internal::__hetero_tag<_BackendTag> __tag, Policy&& policy, InputIterator1 first1, InputIterator1 last1, - InputIterator2 first2, OutputIterator1 result1, OutputIterator2 result2, BinaryPred binary_pred, - BinaryOperator binary_op) +reduce_by_segment_impl(__internal::__hetero_tag<_BackendTag> __tag, Policy&& policy, InputIterator1 first1, + InputIterator1 last1, InputIterator2 first2, OutputIterator1 result1, OutputIterator2 result2, + BinaryPred binary_pred, BinaryOperator binary_op) { // The algorithm reduces values in [first2, first2 + (last1-first1)) where the associated // keys for the values are equal to the adjacent key. diff --git a/include/oneapi/dpl/pstl/algorithm_impl.h b/include/oneapi/dpl/pstl/algorithm_impl.h index 3f6ae184678..87415af4904 100644 --- a/include/oneapi/dpl/pstl/algorithm_impl.h +++ b/include/oneapi/dpl/pstl/algorithm_impl.h @@ -1470,7 +1470,7 @@ __remove_elements(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _Forward _DifferenceType __min = __par_backend::__parallel_reduce( __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), _DifferenceType(0), __n, __n, [__first, __mask, &__calc_mask](_DifferenceType __i, _DifferenceType __j, - _DifferenceType __local_min) -> _DifferenceType { + _DifferenceType __local_min) -> _DifferenceType { // Create mask __calc_mask(__mask + __i, __mask + __j, __first + __i); @@ -1552,8 +1552,7 @@ __pattern_unique(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _Ra return __internal::__brick_unique(__first, __last, __pred, _IsVector{}); } return __internal::__remove_elements( - __tag, - ::std::forward<_ExecutionPolicy>(__exec), ++__first, __last, + __tag, ::std::forward<_ExecutionPolicy>(__exec), ++__first, __last, [&__pred](bool* __b, bool* __e, _RandomAccessIterator __it) { __internal::__brick_walk3( __b, __e, __it - 1, __it, @@ -2985,8 +2984,7 @@ __pattern_remove_if(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, } return __internal::__remove_elements( - __tag, - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, + __tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, [&__pred](bool* __b, bool* __e, _RandomAccessIterator __it) { __internal::__brick_walk2( __b, __e, __it, [&__pred](bool& __x, _ReferenceType __y) { __x = !__pred(__y); }, _IsVector{}); @@ -4270,7 +4268,8 @@ __pattern_shift_left(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _Forw template _BidirectionalIterator -__pattern_shift_right(_Tag __tag, _ExecutionPolicy&& __exec, _BidirectionalIterator __first, _BidirectionalIterator __last, +__pattern_shift_right(_Tag __tag, _ExecutionPolicy&& __exec, _BidirectionalIterator __first, + _BidirectionalIterator __last, typename ::std::iterator_traits<_BidirectionalIterator>::difference_type __n) { static_assert(__is_backend_tag_v<_Tag>); diff --git a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h index 659fcc99155..ea13b6e53cf 100644 --- a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h +++ b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h @@ -488,8 +488,7 @@ __pattern_min_element(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec auto __buf = __keep(__first, __last); auto __ret_idx = oneapi::dpl::__par_backend_hetero::__parallel_transform_reduce<_ReduceValueType, _Commutative>( - _BackendTag{}, - ::std::forward<_ExecutionPolicy>(__exec), __reduce_fn, __transform_fn, + _BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), __reduce_fn, __transform_fn, unseq_backend::__no_init_value{}, // no initial value __buf.all_view()) .get(); @@ -557,8 +556,7 @@ __pattern_minmax_element(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __e auto __ret = oneapi::dpl::__par_backend_hetero::__parallel_transform_reduce<_ReduceValueType, ::std::false_type /*is_commutative*/>( - _BackendTag{}, - ::std::forward<_ExecutionPolicy>(__exec), __reduce_fn, __transform_fn, + _BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), __reduce_fn, __transform_fn, unseq_backend::__no_init_value{}, // no initial value __buf.all_view()) .get(); @@ -652,8 +650,7 @@ __pattern_count(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Ite return oneapi::dpl::__par_backend_hetero::__parallel_transform_reduce<_ReduceValueType, ::std::true_type /*is_commutative*/>( - _BackendTag{}, - ::std::forward<_ExecutionPolicy>(__exec), __reduce_fn, __transform_fn, + _BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), __reduce_fn, __transform_fn, unseq_backend::__no_init_value{}, // no initial value __buf.all_view()) .get(); @@ -1119,8 +1116,7 @@ __pattern_is_partitioned(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __e auto __res = oneapi::dpl::__par_backend_hetero::__parallel_transform_reduce<_ReduceValueType, ::std::false_type /*is_commutative*/>( - _BackendTag{}, - ::std::forward<_ExecutionPolicy>(__exec), __reduce_fn, __transform_fn, + _BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), __reduce_fn, __transform_fn, unseq_backend::__no_init_value{}, // no initial value __buf.all_view()) .get(); @@ -1234,8 +1230,8 @@ __pattern_merge(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Ite template void -__pattern_inplace_merge(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _Iterator __first, - _Iterator __middle, _Iterator __last, _Compare __comp) +__pattern_inplace_merge(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _Iterator __first, _Iterator __middle, + _Iterator __last, _Compare __comp) { using _ValueType = typename ::std::iterator_traits<_Iterator>::value_type; @@ -1421,8 +1417,7 @@ __pattern_lexicographical_compare(__hetero_tag<_BackendTag> __tag, _ExecutionPol auto __ret_idx = oneapi::dpl::__par_backend_hetero::__parallel_transform_reduce<_ReduceValueType, ::std::false_type /*is_commutative*/>( - _BackendTag{}, - ::std::forward<_ExecutionPolicy>(__exec), __reduce_fn, __transform_fn, + _BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), __reduce_fn, __transform_fn, unseq_backend::__no_init_value{}, // no initial value __buf1.all_view(), __buf2.all_view()) .get(); @@ -1775,8 +1770,7 @@ __pattern_hetero_set_op(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _F auto __result_size = __par_backend_hetero::__parallel_transform_scan_base( - _BackendTag{}, - ::std::forward<_ExecutionPolicy>(__exec), + _BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), oneapi::dpl::__ranges::make_zip_view( __buf1.all_view(), __buf2.all_view(), oneapi::dpl::__ranges::all_view( @@ -2087,7 +2081,8 @@ __pattern_shift_right(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec //A shift right is the shift left with a reverse logic. auto __rng = oneapi::dpl::__ranges::reverse_view_simple{__buf.all_view()}; - auto __res = oneapi::dpl::__internal::__pattern_shift_left(__tag, ::std::forward<_ExecutionPolicy>(__exec), __rng, __n); + auto __res = + oneapi::dpl::__internal::__pattern_shift_left(__tag, ::std::forward<_ExecutionPolicy>(__exec), __rng, __n); return __last - __res; } diff --git a/include/oneapi/dpl/pstl/hetero/algorithm_ranges_impl_hetero.h b/include/oneapi/dpl/pstl/hetero/algorithm_ranges_impl_hetero.h index 24a5d428a77..f227fd29383 100644 --- a/include/oneapi/dpl/pstl/hetero/algorithm_ranges_impl_hetero.h +++ b/include/oneapi/dpl/pstl/hetero/algorithm_ranges_impl_hetero.h @@ -239,8 +239,8 @@ __pattern_search(_ExecutionPolicy&& __exec, _Range1&& __rng1, _Range2&& __rng2, return oneapi::dpl::__par_backend_hetero::__parallel_find_or( oneapi::dpl::__internal::__device_backend_tag{}, // TODO required to fix backend tag eval - oneapi::dpl::__par_backend_hetero::make_wrapped_policy - (::std::forward<_ExecutionPolicy>(__exec)), + oneapi::dpl::__par_backend_hetero::make_wrapped_policy< + oneapi::dpl::__par_backend_hetero::__find_policy_wrapper>(::std::forward<_ExecutionPolicy>(__exec)), _Predicate{__pred}, _TagType{}, ::std::forward<_Range1>(__rng1), ::std::forward<_Range2>(__rng2)); } diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h index aba0a6ada17..ab31b05dc72 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h @@ -817,9 +817,8 @@ __parallel_transform_scan(oneapi::dpl::__internal::__device_backend_tag, _Execut return __future( __parallel_transform_scan_base( - oneapi::dpl::__internal::__device_backend_tag{}, - ::std::forward<_ExecutionPolicy>(__exec), ::std::forward<_Range1>(__in_rng), - ::std::forward<_Range2>(__out_rng), __binary_op, __init, + oneapi::dpl::__internal::__device_backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), + ::std::forward<_Range1>(__in_rng), ::std::forward<_Range2>(__out_rng), __binary_op, __init, // local scan unseq_backend::__scan<_Inclusive, _ExecutionPolicy, _BinaryOperation, _UnaryFunctor, _Assigner, _Assigner, _NoOpFunctor, _InitType>{__binary_op, _UnaryFunctor{__unary_op}, __assign_op, @@ -895,8 +894,7 @@ __parallel_scan_copy(oneapi::dpl::__internal::__device_backend_tag, _ExecutionPo oneapi::dpl::__par_backend_hetero::__buffer<_ExecutionPolicy, int32_t> __mask_buf(__exec, __n); return __parallel_transform_scan_base( - oneapi::dpl::__internal::__device_backend_tag{}, - ::std::forward<_ExecutionPolicy>(__exec), + oneapi::dpl::__internal::__device_backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), oneapi::dpl::__ranges::make_zip_view( ::std::forward<_InRng>(__in_rng), oneapi::dpl::__ranges::all_view( @@ -1814,8 +1812,8 @@ __parallel_stable_sort(oneapi::dpl::__internal::__device_backend_tag, _Execution _Compare, _Proj __proj) { return __parallel_radix_sort<__internal::__is_comp_ascending<::std::decay_t<_Compare>>::value>( - oneapi::dpl::__internal::__device_backend_tag{}, - ::std::forward<_ExecutionPolicy>(__exec), ::std::forward<_Range>(__rng), __proj); + oneapi::dpl::__internal::__device_backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), + ::std::forward<_Range>(__rng), __proj); } #endif diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h index 6c88c276a66..08ccc58287c 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h @@ -97,17 +97,15 @@ __parallel_for(oneapi::dpl::__internal::__fpga_backend_tag, _ExecutionPolicy&& _ template auto __parallel_histogram(oneapi::dpl::__internal::__fpga_backend_tag, _ExecutionPolicy&& __exec, const _Event& __init_event, - _Range1&& __input, _Range2&& __bins, - const _BinHashMgr& __binhash_manager) + _Range1&& __input, _Range2&& __bins, const _BinHashMgr& __binhash_manager) { static_assert(sizeof(oneapi::dpl::__internal::__value_t<_Range2>) <= sizeof(::std::uint32_t), "histogram is not supported on FPGA devices with output types greater than 32 bits"); // workaround until we implement more performant version for patterns return oneapi::dpl::__par_backend_hetero::__parallel_histogram( - oneapi::dpl::__internal::__device_backend_tag{}, - __exec.__device_policy(), __init_event, ::std::forward<_Range1>(__input), ::std::forward<_Range2>(__bins), - __binhash_manager); + oneapi::dpl::__internal::__device_backend_tag{}, __exec.__device_policy(), __init_event, + ::std::forward<_Range1>(__input), ::std::forward<_Range2>(__bins), __binhash_manager); } } // namespace __par_backend_hetero diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_histogram.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_histogram.h index 8fbe0ec5af6..dd2de5ebc7d 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_histogram.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_histogram.h @@ -515,9 +515,9 @@ __parallel_histogram_select_kernel(oneapi::dpl::__internal::__device_backend_tag { return __future( __histogram_general_registers_local_reduction<__iters_per_work_item, __max_work_item_private_bins>( - oneapi::dpl::__internal::__device_backend_tag{}, - ::std::forward<_ExecutionPolicy>(__exec), __init_event, __work_group_size, - ::std::forward<_Range1>(__input), ::std::forward<_Range2>(__bins), __binhash_manager)); + oneapi::dpl::__internal::__device_backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __init_event, + __work_group_size, ::std::forward<_Range1>(__input), ::std::forward<_Range2>(__bins), + __binhash_manager)); } // if bins fit into SLM, use local atomics else if (__num_bins * sizeof(_local_histogram_type) + @@ -551,16 +551,14 @@ __parallel_histogram(oneapi::dpl::__internal::__device_backend_tag, _ExecutionPo if (__input.size() < 1048576) // 2^20 { return __parallel_histogram_select_kernel( - oneapi::dpl::__internal::__device_backend_tag{}, - ::std::forward<_ExecutionPolicy>(__exec), __init_event, ::std::forward<_Range1>(__input), - ::std::forward<_Range2>(__bins), __binhash_manager); + oneapi::dpl::__internal::__device_backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __init_event, + ::std::forward<_Range1>(__input), ::std::forward<_Range2>(__bins), __binhash_manager); } else { return __parallel_histogram_select_kernel( - oneapi::dpl::__internal::__device_backend_tag{}, - ::std::forward<_ExecutionPolicy>(__exec), __init_event, ::std::forward<_Range1>(__input), - ::std::forward<_Range2>(__bins), __binhash_manager); + oneapi::dpl::__internal::__device_backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __init_event, + ::std::forward<_Range1>(__input), ::std::forward<_Range2>(__bins), __binhash_manager); } } diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_reduce.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_reduce.h index c6d164f9415..726c1743301 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_reduce.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_reduce.h @@ -432,44 +432,38 @@ __parallel_transform_reduce(oneapi::dpl::__internal::__device_backend_tag, _Exec if (__n <= 256) { return __parallel_transform_reduce_small_impl<_Tp, 256, 1, _Commutative>( - oneapi::dpl::__internal::__device_backend_tag{}, - ::std::forward<_ExecutionPolicy>(__exec), __n, __reduce_op, __transform_op, __init, - ::std::forward<_Ranges>(__rngs)...); + oneapi::dpl::__internal::__device_backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __n, + __reduce_op, __transform_op, __init, ::std::forward<_Ranges>(__rngs)...); } else if (__n <= 512) { return __parallel_transform_reduce_small_impl<_Tp, 256, 2, _Commutative>( - oneapi::dpl::__internal::__device_backend_tag{}, - ::std::forward<_ExecutionPolicy>(__exec), __n, __reduce_op, __transform_op, __init, - ::std::forward<_Ranges>(__rngs)...); + oneapi::dpl::__internal::__device_backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __n, + __reduce_op, __transform_op, __init, ::std::forward<_Ranges>(__rngs)...); } else if (__n <= 1024) { return __parallel_transform_reduce_small_impl<_Tp, 256, 4, _Commutative>( - oneapi::dpl::__internal::__device_backend_tag{}, - ::std::forward<_ExecutionPolicy>(__exec), __n, __reduce_op, __transform_op, __init, - ::std::forward<_Ranges>(__rngs)...); + oneapi::dpl::__internal::__device_backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __n, + __reduce_op, __transform_op, __init, ::std::forward<_Ranges>(__rngs)...); } else if (__n <= 2048) { return __parallel_transform_reduce_small_impl<_Tp, 256, 8, _Commutative>( - oneapi::dpl::__internal::__device_backend_tag{}, - ::std::forward<_ExecutionPolicy>(__exec), __n, __reduce_op, __transform_op, __init, - ::std::forward<_Ranges>(__rngs)...); + oneapi::dpl::__internal::__device_backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __n, + __reduce_op, __transform_op, __init, ::std::forward<_Ranges>(__rngs)...); } else if (__n <= 4096) { return __parallel_transform_reduce_small_impl<_Tp, 256, 16, _Commutative>( - oneapi::dpl::__internal::__device_backend_tag{}, - ::std::forward<_ExecutionPolicy>(__exec), __n, __reduce_op, __transform_op, __init, - ::std::forward<_Ranges>(__rngs)...); + oneapi::dpl::__internal::__device_backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __n, + __reduce_op, __transform_op, __init, ::std::forward<_Ranges>(__rngs)...); } else if (__n <= 8192) { return __parallel_transform_reduce_small_impl<_Tp, 256, 32, _Commutative>( - oneapi::dpl::__internal::__device_backend_tag{}, - ::std::forward<_ExecutionPolicy>(__exec), __n, __reduce_op, __transform_op, __init, - ::std::forward<_Ranges>(__rngs)...); + oneapi::dpl::__internal::__device_backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __n, + __reduce_op, __transform_op, __init, ::std::forward<_Ranges>(__rngs)...); } // Use two-step tree reduction. @@ -478,51 +472,44 @@ __parallel_transform_reduce(oneapi::dpl::__internal::__device_backend_tag, _Exec else if (__n <= 2097152) { return __parallel_transform_reduce_mid_impl<_Tp, 256, 32, 1, _Commutative>( - oneapi::dpl::__internal::__device_backend_tag{}, - ::std::forward<_ExecutionPolicy>(__exec), __n, __reduce_op, __transform_op, __init, - ::std::forward<_Ranges>(__rngs)...); + oneapi::dpl::__internal::__device_backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __n, + __reduce_op, __transform_op, __init, ::std::forward<_Ranges>(__rngs)...); } else if (__n <= 4194304) { return __parallel_transform_reduce_mid_impl<_Tp, 256, 32, 2, _Commutative>( - oneapi::dpl::__internal::__device_backend_tag{}, - ::std::forward<_ExecutionPolicy>(__exec), __n, __reduce_op, __transform_op, __init, - ::std::forward<_Ranges>(__rngs)...); + oneapi::dpl::__internal::__device_backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __n, + __reduce_op, __transform_op, __init, ::std::forward<_Ranges>(__rngs)...); } else if (__n <= 8388608) { return __parallel_transform_reduce_mid_impl<_Tp, 256, 32, 4, _Commutative>( - oneapi::dpl::__internal::__device_backend_tag{}, - ::std::forward<_ExecutionPolicy>(__exec), __n, __reduce_op, __transform_op, __init, - ::std::forward<_Ranges>(__rngs)...); + oneapi::dpl::__internal::__device_backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __n, + __reduce_op, __transform_op, __init, ::std::forward<_Ranges>(__rngs)...); } else if (__n <= 16777216) { return __parallel_transform_reduce_mid_impl<_Tp, 256, 32, 8, _Commutative>( - oneapi::dpl::__internal::__device_backend_tag{}, - ::std::forward<_ExecutionPolicy>(__exec), __n, __reduce_op, __transform_op, __init, - ::std::forward<_Ranges>(__rngs)...); + oneapi::dpl::__internal::__device_backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __n, + __reduce_op, __transform_op, __init, ::std::forward<_Ranges>(__rngs)...); } else if (__n <= 33554432) { return __parallel_transform_reduce_mid_impl<_Tp, 256, 32, 16, _Commutative>( - oneapi::dpl::__internal::__device_backend_tag{}, - ::std::forward<_ExecutionPolicy>(__exec), __n, __reduce_op, __transform_op, __init, - ::std::forward<_Ranges>(__rngs)...); + oneapi::dpl::__internal::__device_backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __n, + __reduce_op, __transform_op, __init, ::std::forward<_Ranges>(__rngs)...); } else if (__n <= 67108864) { return __parallel_transform_reduce_mid_impl<_Tp, 256, 32, 32, _Commutative>( - oneapi::dpl::__internal::__device_backend_tag{}, - ::std::forward<_ExecutionPolicy>(__exec), __n, __reduce_op, __transform_op, __init, - ::std::forward<_Ranges>(__rngs)...); + oneapi::dpl::__internal::__device_backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __n, + __reduce_op, __transform_op, __init, ::std::forward<_Ranges>(__rngs)...); } } // Otherwise use a recursive tree reduction. return __parallel_transform_reduce_impl<_Tp, 32, _Commutative>::submit( - oneapi::dpl::__internal::__device_backend_tag{}, - ::std::forward<_ExecutionPolicy>(__exec), __n, __work_group_size, __reduce_op, __transform_op, __init, - ::std::forward<_Ranges>(__rngs)...); + oneapi::dpl::__internal::__device_backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __n, + __work_group_size, __reduce_op, __transform_op, __init, ::std::forward<_Ranges>(__rngs)...); } } // namespace __par_backend_hetero diff --git a/include/oneapi/dpl/pstl/hetero/numeric_impl_hetero.h b/include/oneapi/dpl/pstl/hetero/numeric_impl_hetero.h index 84a49029182..2955815d29d 100644 --- a/include/oneapi/dpl/pstl/hetero/numeric_impl_hetero.h +++ b/include/oneapi/dpl/pstl/hetero/numeric_impl_hetero.h @@ -60,8 +60,7 @@ __pattern_transform_reduce(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& _ return oneapi::dpl::__par_backend_hetero::__parallel_transform_reduce<_RepackedTp, ::std::true_type /*is_commutative*/>( - _BackendTag{}, - ::std::forward<_ExecutionPolicy>(__exec), __binary_op1, _Functor{__binary_op2}, + _BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), __binary_op1, _Functor{__binary_op2}, unseq_backend::__init_value<_RepackedTp>{__init}, // initial value __buf1.all_view(), __buf2.all_view()) .get(); @@ -89,8 +88,7 @@ __pattern_transform_reduce(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& _ return oneapi::dpl::__par_backend_hetero::__parallel_transform_reduce<_RepackedTp, ::std::true_type /*is_commutative*/>( - _BackendTag{}, - ::std::forward<_ExecutionPolicy>(__exec), __binary_op, _Functor{__unary_op}, + _BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), __binary_op, _Functor{__unary_op}, unseq_backend::__init_value<_RepackedTp>{__init}, // initial value __buf.all_view()) .get(); diff --git a/include/oneapi/dpl/pstl/numeric_impl.h b/include/oneapi/dpl/pstl/numeric_impl.h index d5dfe6a09bb..5e7f4dcbc0d 100644 --- a/include/oneapi/dpl/pstl/numeric_impl.h +++ b/include/oneapi/dpl/pstl/numeric_impl.h @@ -263,8 +263,7 @@ __pattern_transform_scan(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _ return __internal::__except_handler([&]() { __par_backend::__parallel_transform_scan( - __backend_tag{}, - ::std::forward<_ExecutionPolicy>(__exec), __last - __first, + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __last - __first, [__first, __unary_op](_DifferenceType __i) mutable { return __unary_op(__first[__i]); }, __init, __binary_op, [__first, __unary_op, __binary_op](_DifferenceType __i, _DifferenceType __j, _Tp __init) { @@ -388,9 +387,8 @@ __pattern_adjacent_difference(_Tag, _ExecutionPolicy&&, _ForwardIterator __first template _RandomAccessIterator2 -__pattern_adjacent_difference(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, - _RandomAccessIterator1 __first, _RandomAccessIterator1 __last, - _RandomAccessIterator2 __d_first, _BinaryOperation __op) +__pattern_adjacent_difference(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first, + _RandomAccessIterator1 __last, _RandomAccessIterator2 __d_first, _BinaryOperation __op) { using __backend_tag = typename __parallel_tag<_IsVector>::__backend_tag; From fd191e25e3d0642154cdc157af9e2f49d230f75d Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Mon, 19 Feb 2024 11:47:54 +0100 Subject: [PATCH 336/566] Fix self review comment: using oneapi::dpl::__internal::__device_backend_tag instance from function param instead of creating the new one --- .../pstl/hetero/dpcpp/parallel_backend_sycl.h | 60 +++++++++++-------- .../hetero/dpcpp/parallel_backend_sycl_fpga.h | 3 +- .../dpcpp/parallel_backend_sycl_histogram.h | 15 ++--- .../dpcpp/parallel_backend_sycl_reduce.h | 40 +++++++------ 4 files changed, 65 insertions(+), 53 deletions(-) diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h index ab31b05dc72..0459126fbed 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h @@ -265,15 +265,14 @@ struct __parallel_for_submitter<__internal::__optional_kernel_name<_Name...>> //for some algorithms happens that size of processing range is n, but amount of iterations is n/2. template auto -__parallel_for(oneapi::dpl::__internal::__device_backend_tag, _ExecutionPolicy&& __exec, _Fp __brick, _Index __count, - _Ranges&&... __rngs) +__parallel_for(oneapi::dpl::__internal::__device_backend_tag __backend_tag, _ExecutionPolicy&& __exec, _Fp __brick, + _Index __count, _Ranges&&... __rngs) { using _CustomName = oneapi::dpl::__internal::__policy_kernel_name<_ExecutionPolicy>; using _ForKernel = oneapi::dpl::__par_backend_hetero::__internal::__kernel_name_provider<_CustomName>; - return __parallel_for_submitter<_ForKernel>()(oneapi::dpl::__internal::__device_backend_tag{}, - ::std::forward<_ExecutionPolicy>(__exec), __brick, __count, - ::std::forward<_Ranges>(__rngs)...); + return __parallel_for_submitter<_ForKernel>()(__backend_tag, ::std::forward<_ExecutionPolicy>(__exec), __brick, + __count, ::std::forward<_Ranges>(__rngs)...); } //------------------------------------------------------------------------ @@ -775,7 +774,8 @@ __parallel_transform_scan_base(oneapi::dpl::__internal::__device_backend_tag, _E template auto -__parallel_transform_scan(oneapi::dpl::__internal::__device_backend_tag, _ExecutionPolicy&& __exec, _Range1&& __in_rng, +__parallel_transform_scan(oneapi::dpl::__internal::__device_backend_tag __backend_tag, _ExecutionPolicy&& __exec, + _Range1&& __in_rng, _Range2&& __out_rng, ::std::size_t __n, _UnaryOperation __unary_op, _InitType __init, _BinaryOperation __binary_op, _Inclusive) { @@ -799,7 +799,7 @@ __parallel_transform_scan(oneapi::dpl::__internal::__device_backend_tag, _Execut if (__n <= __single_group_upper_limit && __max_slm_size >= __req_slm_size) { return __parallel_transform_scan_single_group( - oneapi::dpl::__internal::__device_backend_tag{}, std::forward<_ExecutionPolicy>(__exec), + __backend_tag, std::forward<_ExecutionPolicy>(__exec), ::std::forward<_Range1>(__in_rng), ::std::forward<_Range2>(__out_rng), __n, __unary_op, __init, __binary_op, _Inclusive{}); } @@ -817,7 +817,7 @@ __parallel_transform_scan(oneapi::dpl::__internal::__device_backend_tag, _Execut return __future( __parallel_transform_scan_base( - oneapi::dpl::__internal::__device_backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), + __backend_tag, ::std::forward<_ExecutionPolicy>(__exec), ::std::forward<_Range1>(__in_rng), ::std::forward<_Range2>(__out_rng), __binary_op, __init, // local scan unseq_backend::__scan<_Inclusive, _ExecutionPolicy, _BinaryOperation, _UnaryFunctor, _Assigner, _Assigner, @@ -875,7 +875,8 @@ struct __invoke_single_group_copy_if template auto -__parallel_scan_copy(oneapi::dpl::__internal::__device_backend_tag, _ExecutionPolicy&& __exec, _InRng&& __in_rng, +__parallel_scan_copy(oneapi::dpl::__internal::__device_backend_tag __backend_tag, _ExecutionPolicy&& __exec, + _InRng&& __in_rng, _OutRng&& __out_rng, _Size __n, _CreateMaskOp __create_mask_op, _CopyByMaskOp __copy_by_mask_op) { using _ReduceOp = ::std::plus<_Size>; @@ -894,7 +895,7 @@ __parallel_scan_copy(oneapi::dpl::__internal::__device_backend_tag, _ExecutionPo oneapi::dpl::__par_backend_hetero::__buffer<_ExecutionPolicy, int32_t> __mask_buf(__exec, __n); return __parallel_transform_scan_base( - oneapi::dpl::__internal::__device_backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), + __backend_tag, ::std::forward<_ExecutionPolicy>(__exec), oneapi::dpl::__ranges::make_zip_view( ::std::forward<_InRng>(__in_rng), oneapi::dpl::__ranges::all_view( @@ -914,7 +915,8 @@ __parallel_scan_copy(oneapi::dpl::__internal::__device_backend_tag, _ExecutionPo template auto -__parallel_copy_if(oneapi::dpl::__internal::__device_backend_tag, _ExecutionPolicy&& __exec, _InRng&& __in_rng, +__parallel_copy_if(oneapi::dpl::__internal::__device_backend_tag __backend_tag, _ExecutionPolicy&& __exec, + _InRng&& __in_rng, _OutRng&& __out_rng, _Size __n, _Pred __pred) { using _SingleGroupInvoker = __invoke_single_group_copy_if<_Size>; @@ -950,7 +952,7 @@ __parallel_copy_if(oneapi::dpl::__internal::__device_backend_tag, _ExecutionPoli using CopyOp = unseq_backend::__copy_by_mask<_ReduceOp, oneapi::dpl::__internal::__pstl_assign, /*inclusive*/ ::std::true_type, 1>; - return __parallel_scan_copy(oneapi::dpl::__internal::__device_backend_tag{}, + return __parallel_scan_copy(__backend_tag, ::std::forward<_ExecutionPolicy>(__exec), ::std::forward<_InRng>(__in_rng), ::std::forward<_OutRng>(__out_rng), __n, CreateOp{__pred}, CopyOp{}); } @@ -1203,7 +1205,8 @@ class __or_policy_wrapper template bool -__parallel_or(oneapi::dpl::__internal::__device_backend_tag, _ExecutionPolicy&& __exec, _Iterator1 __first, +__parallel_or(oneapi::dpl::__internal::__device_backend_tag __backend_tag, _ExecutionPolicy&& __exec, + _Iterator1 __first, _Iterator1 __last, _Iterator2 __s_first, _Iterator2 __s_last, _Brick __f) { auto __keep = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read, _Iterator1>(); @@ -1212,7 +1215,7 @@ __parallel_or(oneapi::dpl::__internal::__device_backend_tag, _ExecutionPolicy&& auto __s_buf = __s_keep(__s_first, __s_last); return oneapi::dpl::__par_backend_hetero::__parallel_find_or( - oneapi::dpl::__internal::__device_backend_tag{}, + __backend_tag, __par_backend_hetero::make_wrapped_policy<__or_policy_wrapper>(::std::forward<_ExecutionPolicy>(__exec)), __f, __parallel_or_tag{}, __buf.all_view(), __s_buf.all_view()); } @@ -1222,14 +1225,14 @@ __parallel_or(oneapi::dpl::__internal::__device_backend_tag, _ExecutionPolicy&& // backend code. template bool -__parallel_or(oneapi::dpl::__internal::__device_backend_tag, _ExecutionPolicy&& __exec, _Iterator __first, +__parallel_or(oneapi::dpl::__internal::__device_backend_tag __backend_tag, _ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, _Brick __f) { auto __keep = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read, _Iterator>(); auto __buf = __keep(__first, __last); return oneapi::dpl::__par_backend_hetero::__parallel_find_or( - oneapi::dpl::__internal::__device_backend_tag{}, + __backend_tag, __par_backend_hetero::make_wrapped_policy<__or_policy_wrapper>(::std::forward<_ExecutionPolicy>(__exec)), __f, __parallel_or_tag{}, __buf.all_view()); } @@ -1245,7 +1248,8 @@ class __find_policy_wrapper template _Iterator1 -__parallel_find(oneapi::dpl::__internal::__device_backend_tag, _ExecutionPolicy&& __exec, _Iterator1 __first, +__parallel_find(oneapi::dpl::__internal::__device_backend_tag __backend_tag, _ExecutionPolicy&& __exec, + _Iterator1 __first, _Iterator1 __last, _Iterator2 __s_first, _Iterator2 __s_last, _Brick __f, _IsFirst) { auto __keep = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read, _Iterator1>(); @@ -1256,7 +1260,7 @@ __parallel_find(oneapi::dpl::__internal::__device_backend_tag, _ExecutionPolicy& using _TagType = ::std::conditional_t<_IsFirst::value, __parallel_find_forward_tag, __parallel_find_backward_tag>; return __first + oneapi::dpl::__par_backend_hetero::__parallel_find_or( - oneapi::dpl::__internal::__device_backend_tag{}, + __backend_tag, __par_backend_hetero::make_wrapped_policy<__find_policy_wrapper>( ::std::forward<_ExecutionPolicy>(__exec)), __f, _TagType{}, __buf.all_view(), __s_buf.all_view()); @@ -1267,7 +1271,8 @@ __parallel_find(oneapi::dpl::__internal::__device_backend_tag, _ExecutionPolicy& // backend code. template _Iterator -__parallel_find(oneapi::dpl::__internal::__device_backend_tag, _ExecutionPolicy&& __exec, _Iterator __first, +__parallel_find(oneapi::dpl::__internal::__device_backend_tag __backend_tag, _ExecutionPolicy&& __exec, + _Iterator __first, _Iterator __last, _Brick __f, _IsFirst) { auto __keep = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read, _Iterator>(); @@ -1277,7 +1282,7 @@ __parallel_find(oneapi::dpl::__internal::__device_backend_tag, _ExecutionPolicy& typename ::std::conditional<_IsFirst::value, __parallel_find_forward_tag, __parallel_find_backward_tag>::type; return __first + oneapi::dpl::__par_backend_hetero::__parallel_find_or( - oneapi::dpl::__internal::__device_backend_tag{}, + __backend_tag, __par_backend_hetero::make_wrapped_policy<__find_policy_wrapper>( ::std::forward<_ExecutionPolicy>(__exec)), __f, _TagType{}, __buf.all_view()); @@ -1808,11 +1813,12 @@ template < ::std::enable_if_t< __is_radix_sort_usable_for_type, _Compare>::value, int> = 0> auto -__parallel_stable_sort(oneapi::dpl::__internal::__device_backend_tag, _ExecutionPolicy&& __exec, _Range&& __rng, +__parallel_stable_sort(oneapi::dpl::__internal::__device_backend_tag __backend_tag, _ExecutionPolicy&& __exec, + _Range&& __rng, _Compare, _Proj __proj) { return __parallel_radix_sort<__internal::__is_comp_ascending<::std::decay_t<_Compare>>::value>( - oneapi::dpl::__internal::__device_backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), + __backend_tag, ::std::forward<_ExecutionPolicy>(__exec), ::std::forward<_Range>(__rng), __proj); } #endif @@ -1822,13 +1828,14 @@ template < ::std::enable_if_t< !__is_radix_sort_usable_for_type, _Compare>::value, int> = 0> auto -__parallel_stable_sort(oneapi::dpl::__internal::__device_backend_tag, _ExecutionPolicy&& __exec, _Range&& __rng, +__parallel_stable_sort(oneapi::dpl::__internal::__device_backend_tag __backend_tag, _ExecutionPolicy&& __exec, + _Range&& __rng, _Compare __comp, _Proj __proj) { auto __cmp_f = [__comp, __proj](const auto& __a, const auto& __b) mutable { return __comp(__proj(__a), __proj(__b)); }; - return __parallel_sort_impl(oneapi::dpl::__internal::__device_backend_tag{}, + return __parallel_sort_impl(__backend_tag, ::std::forward<_ExecutionPolicy>(__exec), ::std::forward<_Range>(__rng), __cmp_f); } @@ -1841,7 +1848,8 @@ __parallel_stable_sort(oneapi::dpl::__internal::__device_backend_tag, _Execution // __full_merge_kernel in order to use __parallel_sort_impl routine template auto -__parallel_partial_sort(oneapi::dpl::__internal::__device_backend_tag, _ExecutionPolicy&& __exec, _Iterator __first, +__parallel_partial_sort(oneapi::dpl::__internal::__device_backend_tag __backend_tag, _ExecutionPolicy&& __exec, + _Iterator __first, _Iterator __mid, _Iterator __last, _Compare __comp) { const auto __mid_idx = __mid - __first; @@ -1849,7 +1857,7 @@ __parallel_partial_sort(oneapi::dpl::__internal::__device_backend_tag, _Executio auto __keep = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read_write, _Iterator>(); auto __buf = __keep(__first, __last); - return __parallel_partial_sort_impl(oneapi::dpl::__internal::__device_backend_tag{}, + return __parallel_partial_sort_impl(__backend_tag, ::std::forward<_ExecutionPolicy>(__exec), __buf.all_view(), __partial_merge_kernel{__mid_idx}, __comp); } diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h index 08ccc58287c..d3970e5f5a5 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h @@ -96,7 +96,8 @@ __parallel_for(oneapi::dpl::__internal::__fpga_backend_tag, _ExecutionPolicy&& _ // TODO: check if it makes sense to move these wrappers out of backend to a common place template auto -__parallel_histogram(oneapi::dpl::__internal::__fpga_backend_tag, _ExecutionPolicy&& __exec, const _Event& __init_event, +__parallel_histogram(oneapi::dpl::__internal::__fpga_backend_tag, _ExecutionPolicy&& __exec, + const _Event& __init_event, _Range1&& __input, _Range2&& __bins, const _BinHashMgr& __binhash_manager) { static_assert(sizeof(oneapi::dpl::__internal::__value_t<_Range2>) <= sizeof(::std::uint32_t), diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_histogram.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_histogram.h index dd2de5ebc7d..54d6e54bb8e 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_histogram.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_histogram.h @@ -495,7 +495,8 @@ __histogram_general_private_global_atomics(oneapi::dpl::__internal::__device_bac template <::std::uint16_t __iters_per_work_item, typename _ExecutionPolicy, typename _Range1, typename _Range2, typename _BinHashMgr> auto -__parallel_histogram_select_kernel(oneapi::dpl::__internal::__device_backend_tag, _ExecutionPolicy&& __exec, +__parallel_histogram_select_kernel(oneapi::dpl::__internal::__device_backend_tag __backend_tag, + _ExecutionPolicy&& __exec, const sycl::event& __init_event, _Range1&& __input, _Range2&& __bins, const _BinHashMgr& __binhash_manager) { @@ -515,7 +516,7 @@ __parallel_histogram_select_kernel(oneapi::dpl::__internal::__device_backend_tag { return __future( __histogram_general_registers_local_reduction<__iters_per_work_item, __max_work_item_private_bins>( - oneapi::dpl::__internal::__device_backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __init_event, + __backend_tag, ::std::forward<_ExecutionPolicy>(__exec), __init_event, __work_group_size, ::std::forward<_Range1>(__input), ::std::forward<_Range2>(__bins), __binhash_manager)); } @@ -525,7 +526,7 @@ __parallel_histogram_select_kernel(oneapi::dpl::__internal::__device_backend_tag __local_mem_size) { return __future(__histogram_general_local_atomics<__iters_per_work_item>( - oneapi::dpl::__internal::__device_backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __init_event, + __backend_tag, ::std::forward<_ExecutionPolicy>(__exec), __init_event, __work_group_size, ::std::forward<_Range1>(__input), ::std::forward<_Range2>(__bins), __binhash_manager)); } else // otherwise, use global atomics (private copies per workgroup) @@ -536,7 +537,7 @@ __parallel_histogram_select_kernel(oneapi::dpl::__internal::__device_backend_tag // private copies of the histogram bins in global memory. No unrolling is taken advantage of here because it // is a runtime argument. return __future(__histogram_general_private_global_atomics( - oneapi::dpl::__internal::__device_backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __init_event, + __backend_tag, ::std::forward<_ExecutionPolicy>(__exec), __init_event, __iters_per_work_item, __work_group_size, ::std::forward<_Range1>(__input), ::std::forward<_Range2>(__bins), __binhash_manager)); } @@ -544,20 +545,20 @@ __parallel_histogram_select_kernel(oneapi::dpl::__internal::__device_backend_tag template auto -__parallel_histogram(oneapi::dpl::__internal::__device_backend_tag, _ExecutionPolicy&& __exec, +__parallel_histogram(oneapi::dpl::__internal::__device_backend_tag __backend_tag, _ExecutionPolicy&& __exec, const sycl::event& __init_event, _Range1&& __input, _Range2&& __bins, const _BinHashMgr& __binhash_manager) { if (__input.size() < 1048576) // 2^20 { return __parallel_histogram_select_kernel( - oneapi::dpl::__internal::__device_backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __init_event, + __backend_tag, ::std::forward<_ExecutionPolicy>(__exec), __init_event, ::std::forward<_Range1>(__input), ::std::forward<_Range2>(__bins), __binhash_manager); } else { return __parallel_histogram_select_kernel( - oneapi::dpl::__internal::__device_backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __init_event, + __backend_tag, ::std::forward<_ExecutionPolicy>(__exec), __init_event, ::std::forward<_Range1>(__input), ::std::forward<_Range2>(__bins), __binhash_manager); } } diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_reduce.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_reduce.h index 726c1743301..100369680bf 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_reduce.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_reduce.h @@ -139,7 +139,8 @@ template auto -__parallel_transform_reduce_small_impl(oneapi::dpl::__internal::__device_backend_tag, _ExecutionPolicy&& __exec, +__parallel_transform_reduce_small_impl(oneapi::dpl::__internal::__device_backend_tag __backend_tag, + _ExecutionPolicy&& __exec, const _Size __n, _ReduceOp __reduce_op, _TransformOp __transform_op, _InitType __init, _Ranges&&... __rngs) { @@ -149,7 +150,7 @@ __parallel_transform_reduce_small_impl(oneapi::dpl::__internal::__device_backend return __parallel_transform_reduce_small_submitter<_Tp, __work_group_size, __iters_per_work_item, _Commutative, _ReduceKernel>()( - oneapi::dpl::__internal::__device_backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __n, __reduce_op, + __backend_tag, ::std::forward<_ExecutionPolicy>(__exec), __n, __reduce_op, __transform_op, __init, ::std::forward<_Ranges>(__rngs)...); } @@ -257,7 +258,8 @@ template auto -__parallel_transform_reduce_mid_impl(oneapi::dpl::__internal::__device_backend_tag, _ExecutionPolicy&& __exec, +__parallel_transform_reduce_mid_impl(oneapi::dpl::__internal::__device_backend_tag __backend_tag, + _ExecutionPolicy&& __exec, _Size __n, _ReduceOp __reduce_op, _TransformOp __transform_op, _InitType __init, _Ranges&&... __rngs) { @@ -279,13 +281,13 @@ __parallel_transform_reduce_mid_impl(oneapi::dpl::__internal::__device_backend_t sycl::event __reduce_event = __parallel_transform_reduce_device_kernel_submitter<_Tp, __work_group_size, __iters_per_work_item_device_kernel, _Commutative, _ReduceDeviceKernel>()( - oneapi::dpl::__internal::__device_backend_tag{}, __exec, __n, __reduce_op, __transform_op, __init, __temp, + __backend_tag, __exec, __n, __reduce_op, __transform_op, __init, __temp, ::std::forward<_Ranges>(__rngs)...); __n = __n_groups; // Number of preliminary results from the device kernel. return __parallel_transform_reduce_work_group_kernel_submitter< _Tp, __work_group_size, __iters_per_work_item_work_group_kernel, _Commutative, _ReduceWorkGroupKernel>()( - oneapi::dpl::__internal::__device_backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __reduce_event, __n, + __backend_tag, ::std::forward<_ExecutionPolicy>(__exec), __reduce_event, __n, __reduce_op, __transform_op, __init, __temp); } @@ -415,7 +417,7 @@ struct __parallel_transform_reduce_impl template auto -__parallel_transform_reduce(oneapi::dpl::__internal::__device_backend_tag, _ExecutionPolicy&& __exec, +__parallel_transform_reduce(oneapi::dpl::__internal::__device_backend_tag __backend_tag, _ExecutionPolicy&& __exec, _ReduceOp __reduce_op, _TransformOp __transform_op, _InitType __init, _Ranges&&... __rngs) { auto __n = oneapi::dpl::__ranges::__get_first_range_size(__rngs...); @@ -432,37 +434,37 @@ __parallel_transform_reduce(oneapi::dpl::__internal::__device_backend_tag, _Exec if (__n <= 256) { return __parallel_transform_reduce_small_impl<_Tp, 256, 1, _Commutative>( - oneapi::dpl::__internal::__device_backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __n, + __backend_tag, ::std::forward<_ExecutionPolicy>(__exec), __n, __reduce_op, __transform_op, __init, ::std::forward<_Ranges>(__rngs)...); } else if (__n <= 512) { return __parallel_transform_reduce_small_impl<_Tp, 256, 2, _Commutative>( - oneapi::dpl::__internal::__device_backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __n, + __backend_tag, ::std::forward<_ExecutionPolicy>(__exec), __n, __reduce_op, __transform_op, __init, ::std::forward<_Ranges>(__rngs)...); } else if (__n <= 1024) { return __parallel_transform_reduce_small_impl<_Tp, 256, 4, _Commutative>( - oneapi::dpl::__internal::__device_backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __n, + __backend_tag, ::std::forward<_ExecutionPolicy>(__exec), __n, __reduce_op, __transform_op, __init, ::std::forward<_Ranges>(__rngs)...); } else if (__n <= 2048) { return __parallel_transform_reduce_small_impl<_Tp, 256, 8, _Commutative>( - oneapi::dpl::__internal::__device_backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __n, + __backend_tag, ::std::forward<_ExecutionPolicy>(__exec), __n, __reduce_op, __transform_op, __init, ::std::forward<_Ranges>(__rngs)...); } else if (__n <= 4096) { return __parallel_transform_reduce_small_impl<_Tp, 256, 16, _Commutative>( - oneapi::dpl::__internal::__device_backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __n, + __backend_tag, ::std::forward<_ExecutionPolicy>(__exec), __n, __reduce_op, __transform_op, __init, ::std::forward<_Ranges>(__rngs)...); } else if (__n <= 8192) { return __parallel_transform_reduce_small_impl<_Tp, 256, 32, _Commutative>( - oneapi::dpl::__internal::__device_backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __n, + __backend_tag, ::std::forward<_ExecutionPolicy>(__exec), __n, __reduce_op, __transform_op, __init, ::std::forward<_Ranges>(__rngs)...); } @@ -472,43 +474,43 @@ __parallel_transform_reduce(oneapi::dpl::__internal::__device_backend_tag, _Exec else if (__n <= 2097152) { return __parallel_transform_reduce_mid_impl<_Tp, 256, 32, 1, _Commutative>( - oneapi::dpl::__internal::__device_backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __n, + __backend_tag, ::std::forward<_ExecutionPolicy>(__exec), __n, __reduce_op, __transform_op, __init, ::std::forward<_Ranges>(__rngs)...); } else if (__n <= 4194304) { return __parallel_transform_reduce_mid_impl<_Tp, 256, 32, 2, _Commutative>( - oneapi::dpl::__internal::__device_backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __n, + __backend_tag, ::std::forward<_ExecutionPolicy>(__exec), __n, __reduce_op, __transform_op, __init, ::std::forward<_Ranges>(__rngs)...); } else if (__n <= 8388608) { return __parallel_transform_reduce_mid_impl<_Tp, 256, 32, 4, _Commutative>( - oneapi::dpl::__internal::__device_backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __n, + __backend_tag, ::std::forward<_ExecutionPolicy>(__exec), __n, __reduce_op, __transform_op, __init, ::std::forward<_Ranges>(__rngs)...); } else if (__n <= 16777216) { return __parallel_transform_reduce_mid_impl<_Tp, 256, 32, 8, _Commutative>( - oneapi::dpl::__internal::__device_backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __n, + __backend_tag, ::std::forward<_ExecutionPolicy>(__exec), __n, __reduce_op, __transform_op, __init, ::std::forward<_Ranges>(__rngs)...); } else if (__n <= 33554432) { return __parallel_transform_reduce_mid_impl<_Tp, 256, 32, 16, _Commutative>( - oneapi::dpl::__internal::__device_backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __n, + __backend_tag, ::std::forward<_ExecutionPolicy>(__exec), __n, __reduce_op, __transform_op, __init, ::std::forward<_Ranges>(__rngs)...); } else if (__n <= 67108864) { return __parallel_transform_reduce_mid_impl<_Tp, 256, 32, 32, _Commutative>( - oneapi::dpl::__internal::__device_backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __n, + __backend_tag, ::std::forward<_ExecutionPolicy>(__exec), __n, __reduce_op, __transform_op, __init, ::std::forward<_Ranges>(__rngs)...); } } // Otherwise use a recursive tree reduction. return __parallel_transform_reduce_impl<_Tp, 32, _Commutative>::submit( - oneapi::dpl::__internal::__device_backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __n, + __backend_tag, ::std::forward<_ExecutionPolicy>(__exec), __n, __work_group_size, __reduce_op, __transform_op, __init, ::std::forward<_Ranges>(__rngs)...); } From 90827c27ec1fe6e645313ea51c9608a5d9f96465 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Mon, 19 Feb 2024 11:54:53 +0100 Subject: [PATCH 337/566] Apply GitHUB clang format --- .../pstl/hetero/dpcpp/parallel_backend_sycl.h | 55 ++++++--------- .../hetero/dpcpp/parallel_backend_sycl_fpga.h | 3 +- .../dpcpp/parallel_backend_sycl_histogram.h | 27 ++++--- .../dpcpp/parallel_backend_sycl_reduce.h | 70 +++++++++---------- 4 files changed, 69 insertions(+), 86 deletions(-) diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h index 0459126fbed..e0f7e3a4a5c 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h @@ -775,9 +775,8 @@ template auto __parallel_transform_scan(oneapi::dpl::__internal::__device_backend_tag __backend_tag, _ExecutionPolicy&& __exec, - _Range1&& __in_rng, - _Range2&& __out_rng, ::std::size_t __n, _UnaryOperation __unary_op, _InitType __init, - _BinaryOperation __binary_op, _Inclusive) + _Range1&& __in_rng, _Range2&& __out_rng, ::std::size_t __n, _UnaryOperation __unary_op, + _InitType __init, _BinaryOperation __binary_op, _Inclusive) { using _Type = typename _InitType::__value_type; @@ -799,9 +798,8 @@ __parallel_transform_scan(oneapi::dpl::__internal::__device_backend_tag __backen if (__n <= __single_group_upper_limit && __max_slm_size >= __req_slm_size) { return __parallel_transform_scan_single_group( - __backend_tag, std::forward<_ExecutionPolicy>(__exec), - ::std::forward<_Range1>(__in_rng), ::std::forward<_Range2>(__out_rng), __n, __unary_op, __init, - __binary_op, _Inclusive{}); + __backend_tag, std::forward<_ExecutionPolicy>(__exec), ::std::forward<_Range1>(__in_rng), + ::std::forward<_Range2>(__out_rng), __n, __unary_op, __init, __binary_op, _Inclusive{}); } } @@ -817,8 +815,8 @@ __parallel_transform_scan(oneapi::dpl::__internal::__device_backend_tag __backen return __future( __parallel_transform_scan_base( - __backend_tag, ::std::forward<_ExecutionPolicy>(__exec), - ::std::forward<_Range1>(__in_rng), ::std::forward<_Range2>(__out_rng), __binary_op, __init, + __backend_tag, ::std::forward<_ExecutionPolicy>(__exec), ::std::forward<_Range1>(__in_rng), + ::std::forward<_Range2>(__out_rng), __binary_op, __init, // local scan unseq_backend::__scan<_Inclusive, _ExecutionPolicy, _BinaryOperation, _UnaryFunctor, _Assigner, _Assigner, _NoOpFunctor, _InitType>{__binary_op, _UnaryFunctor{__unary_op}, __assign_op, @@ -876,8 +874,8 @@ template auto __parallel_scan_copy(oneapi::dpl::__internal::__device_backend_tag __backend_tag, _ExecutionPolicy&& __exec, - _InRng&& __in_rng, - _OutRng&& __out_rng, _Size __n, _CreateMaskOp __create_mask_op, _CopyByMaskOp __copy_by_mask_op) + _InRng&& __in_rng, _OutRng&& __out_rng, _Size __n, _CreateMaskOp __create_mask_op, + _CopyByMaskOp __copy_by_mask_op) { using _ReduceOp = ::std::plus<_Size>; using _Assigner = unseq_backend::__scan_assigner; @@ -916,8 +914,7 @@ __parallel_scan_copy(oneapi::dpl::__internal::__device_backend_tag __backend_tag template auto __parallel_copy_if(oneapi::dpl::__internal::__device_backend_tag __backend_tag, _ExecutionPolicy&& __exec, - _InRng&& __in_rng, - _OutRng&& __out_rng, _Size __n, _Pred __pred) + _InRng&& __in_rng, _OutRng&& __out_rng, _Size __n, _Pred __pred) { using _SingleGroupInvoker = __invoke_single_group_copy_if<_Size>; @@ -952,9 +949,9 @@ __parallel_copy_if(oneapi::dpl::__internal::__device_backend_tag __backend_tag, using CopyOp = unseq_backend::__copy_by_mask<_ReduceOp, oneapi::dpl::__internal::__pstl_assign, /*inclusive*/ ::std::true_type, 1>; - return __parallel_scan_copy(__backend_tag, - ::std::forward<_ExecutionPolicy>(__exec), ::std::forward<_InRng>(__in_rng), - ::std::forward<_OutRng>(__out_rng), __n, CreateOp{__pred}, CopyOp{}); + return __parallel_scan_copy(__backend_tag, ::std::forward<_ExecutionPolicy>(__exec), + ::std::forward<_InRng>(__in_rng), ::std::forward<_OutRng>(__out_rng), __n, + CreateOp{__pred}, CopyOp{}); } } @@ -1206,8 +1203,7 @@ class __or_policy_wrapper template bool __parallel_or(oneapi::dpl::__internal::__device_backend_tag __backend_tag, _ExecutionPolicy&& __exec, - _Iterator1 __first, - _Iterator1 __last, _Iterator2 __s_first, _Iterator2 __s_last, _Brick __f) + _Iterator1 __first, _Iterator1 __last, _Iterator2 __s_first, _Iterator2 __s_last, _Brick __f) { auto __keep = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read, _Iterator1>(); auto __buf = __keep(__first, __last); @@ -1249,8 +1245,7 @@ class __find_policy_wrapper template _Iterator1 __parallel_find(oneapi::dpl::__internal::__device_backend_tag __backend_tag, _ExecutionPolicy&& __exec, - _Iterator1 __first, - _Iterator1 __last, _Iterator2 __s_first, _Iterator2 __s_last, _Brick __f, _IsFirst) + _Iterator1 __first, _Iterator1 __last, _Iterator2 __s_first, _Iterator2 __s_last, _Brick __f, _IsFirst) { auto __keep = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read, _Iterator1>(); auto __buf = __keep(__first, __last); @@ -1272,8 +1267,7 @@ __parallel_find(oneapi::dpl::__internal::__device_backend_tag __backend_tag, _Ex template _Iterator __parallel_find(oneapi::dpl::__internal::__device_backend_tag __backend_tag, _ExecutionPolicy&& __exec, - _Iterator __first, - _Iterator __last, _Brick __f, _IsFirst) + _Iterator __first, _Iterator __last, _Brick __f, _IsFirst) { auto __keep = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read, _Iterator>(); auto __buf = __keep(__first, __last); @@ -1814,12 +1808,10 @@ template < __is_radix_sort_usable_for_type, _Compare>::value, int> = 0> auto __parallel_stable_sort(oneapi::dpl::__internal::__device_backend_tag __backend_tag, _ExecutionPolicy&& __exec, - _Range&& __rng, - _Compare, _Proj __proj) + _Range&& __rng, _Compare, _Proj __proj) { return __parallel_radix_sort<__internal::__is_comp_ascending<::std::decay_t<_Compare>>::value>( - __backend_tag, ::std::forward<_ExecutionPolicy>(__exec), - ::std::forward<_Range>(__rng), __proj); + __backend_tag, ::std::forward<_ExecutionPolicy>(__exec), ::std::forward<_Range>(__rng), __proj); } #endif @@ -1829,14 +1821,13 @@ template < !__is_radix_sort_usable_for_type, _Compare>::value, int> = 0> auto __parallel_stable_sort(oneapi::dpl::__internal::__device_backend_tag __backend_tag, _ExecutionPolicy&& __exec, - _Range&& __rng, - _Compare __comp, _Proj __proj) + _Range&& __rng, _Compare __comp, _Proj __proj) { auto __cmp_f = [__comp, __proj](const auto& __a, const auto& __b) mutable { return __comp(__proj(__a), __proj(__b)); }; - return __parallel_sort_impl(__backend_tag, - ::std::forward<_ExecutionPolicy>(__exec), ::std::forward<_Range>(__rng), __cmp_f); + return __parallel_sort_impl(__backend_tag, ::std::forward<_ExecutionPolicy>(__exec), ::std::forward<_Range>(__rng), + __cmp_f); } //------------------------------------------------------------------------ @@ -1849,16 +1840,14 @@ __parallel_stable_sort(oneapi::dpl::__internal::__device_backend_tag __backend_t template auto __parallel_partial_sort(oneapi::dpl::__internal::__device_backend_tag __backend_tag, _ExecutionPolicy&& __exec, - _Iterator __first, - _Iterator __mid, _Iterator __last, _Compare __comp) + _Iterator __first, _Iterator __mid, _Iterator __last, _Compare __comp) { const auto __mid_idx = __mid - __first; auto __keep = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read_write, _Iterator>(); auto __buf = __keep(__first, __last); - return __parallel_partial_sort_impl(__backend_tag, - ::std::forward<_ExecutionPolicy>(__exec), __buf.all_view(), + return __parallel_partial_sort_impl(__backend_tag, ::std::forward<_ExecutionPolicy>(__exec), __buf.all_view(), __partial_merge_kernel{__mid_idx}, __comp); } } // namespace __par_backend_hetero diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h index d3970e5f5a5..08ccc58287c 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h @@ -96,8 +96,7 @@ __parallel_for(oneapi::dpl::__internal::__fpga_backend_tag, _ExecutionPolicy&& _ // TODO: check if it makes sense to move these wrappers out of backend to a common place template auto -__parallel_histogram(oneapi::dpl::__internal::__fpga_backend_tag, _ExecutionPolicy&& __exec, - const _Event& __init_event, +__parallel_histogram(oneapi::dpl::__internal::__fpga_backend_tag, _ExecutionPolicy&& __exec, const _Event& __init_event, _Range1&& __input, _Range2&& __bins, const _BinHashMgr& __binhash_manager) { static_assert(sizeof(oneapi::dpl::__internal::__value_t<_Range2>) <= sizeof(::std::uint32_t), diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_histogram.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_histogram.h index 54d6e54bb8e..2e7223ec062 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_histogram.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_histogram.h @@ -496,9 +496,8 @@ template <::std::uint16_t __iters_per_work_item, typename _ExecutionPolicy, type typename _BinHashMgr> auto __parallel_histogram_select_kernel(oneapi::dpl::__internal::__device_backend_tag __backend_tag, - _ExecutionPolicy&& __exec, - const sycl::event& __init_event, _Range1&& __input, _Range2&& __bins, - const _BinHashMgr& __binhash_manager) + _ExecutionPolicy&& __exec, const sycl::event& __init_event, _Range1&& __input, + _Range2&& __bins, const _BinHashMgr& __binhash_manager) { using _private_histogram_type = ::std::uint16_t; using _local_histogram_type = ::std::uint32_t; @@ -516,9 +515,8 @@ __parallel_histogram_select_kernel(oneapi::dpl::__internal::__device_backend_tag { return __future( __histogram_general_registers_local_reduction<__iters_per_work_item, __max_work_item_private_bins>( - __backend_tag, ::std::forward<_ExecutionPolicy>(__exec), __init_event, - __work_group_size, ::std::forward<_Range1>(__input), ::std::forward<_Range2>(__bins), - __binhash_manager)); + __backend_tag, ::std::forward<_ExecutionPolicy>(__exec), __init_event, __work_group_size, + ::std::forward<_Range1>(__input), ::std::forward<_Range2>(__bins), __binhash_manager)); } // if bins fit into SLM, use local atomics else if (__num_bins * sizeof(_local_histogram_type) + @@ -526,8 +524,8 @@ __parallel_histogram_select_kernel(oneapi::dpl::__internal::__device_backend_tag __local_mem_size) { return __future(__histogram_general_local_atomics<__iters_per_work_item>( - __backend_tag, ::std::forward<_ExecutionPolicy>(__exec), __init_event, - __work_group_size, ::std::forward<_Range1>(__input), ::std::forward<_Range2>(__bins), __binhash_manager)); + __backend_tag, ::std::forward<_ExecutionPolicy>(__exec), __init_event, __work_group_size, + ::std::forward<_Range1>(__input), ::std::forward<_Range2>(__bins), __binhash_manager)); } else // otherwise, use global atomics (private copies per workgroup) { @@ -537,9 +535,8 @@ __parallel_histogram_select_kernel(oneapi::dpl::__internal::__device_backend_tag // private copies of the histogram bins in global memory. No unrolling is taken advantage of here because it // is a runtime argument. return __future(__histogram_general_private_global_atomics( - __backend_tag, ::std::forward<_ExecutionPolicy>(__exec), __init_event, - __iters_per_work_item, __work_group_size, ::std::forward<_Range1>(__input), ::std::forward<_Range2>(__bins), - __binhash_manager)); + __backend_tag, ::std::forward<_ExecutionPolicy>(__exec), __init_event, __iters_per_work_item, + __work_group_size, ::std::forward<_Range1>(__input), ::std::forward<_Range2>(__bins), __binhash_manager)); } } @@ -552,14 +549,14 @@ __parallel_histogram(oneapi::dpl::__internal::__device_backend_tag __backend_tag if (__input.size() < 1048576) // 2^20 { return __parallel_histogram_select_kernel( - __backend_tag, ::std::forward<_ExecutionPolicy>(__exec), __init_event, - ::std::forward<_Range1>(__input), ::std::forward<_Range2>(__bins), __binhash_manager); + __backend_tag, ::std::forward<_ExecutionPolicy>(__exec), __init_event, ::std::forward<_Range1>(__input), + ::std::forward<_Range2>(__bins), __binhash_manager); } else { return __parallel_histogram_select_kernel( - __backend_tag, ::std::forward<_ExecutionPolicy>(__exec), __init_event, - ::std::forward<_Range1>(__input), ::std::forward<_Range2>(__bins), __binhash_manager); + __backend_tag, ::std::forward<_ExecutionPolicy>(__exec), __init_event, ::std::forward<_Range1>(__input), + ::std::forward<_Range2>(__bins), __binhash_manager); } } diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_reduce.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_reduce.h index 100369680bf..da1d37e22f1 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_reduce.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_reduce.h @@ -140,9 +140,8 @@ template auto __parallel_transform_reduce_small_impl(oneapi::dpl::__internal::__device_backend_tag __backend_tag, - _ExecutionPolicy&& __exec, - const _Size __n, _ReduceOp __reduce_op, _TransformOp __transform_op, - _InitType __init, _Ranges&&... __rngs) + _ExecutionPolicy&& __exec, const _Size __n, _ReduceOp __reduce_op, + _TransformOp __transform_op, _InitType __init, _Ranges&&... __rngs) { using _CustomName = oneapi::dpl::__internal::__policy_kernel_name<_ExecutionPolicy>; using _ReduceKernel = oneapi::dpl::__par_backend_hetero::__internal::__kernel_name_provider< @@ -150,8 +149,8 @@ __parallel_transform_reduce_small_impl(oneapi::dpl::__internal::__device_backend return __parallel_transform_reduce_small_submitter<_Tp, __work_group_size, __iters_per_work_item, _Commutative, _ReduceKernel>()( - __backend_tag, ::std::forward<_ExecutionPolicy>(__exec), __n, __reduce_op, - __transform_op, __init, ::std::forward<_Ranges>(__rngs)...); + __backend_tag, ::std::forward<_ExecutionPolicy>(__exec), __n, __reduce_op, __transform_op, __init, + ::std::forward<_Ranges>(__rngs)...); } // Submits the first kernel of the parallel_transform_reduce for mid-sized arrays. @@ -259,9 +258,8 @@ template auto __parallel_transform_reduce_mid_impl(oneapi::dpl::__internal::__device_backend_tag __backend_tag, - _ExecutionPolicy&& __exec, - _Size __n, _ReduceOp __reduce_op, _TransformOp __transform_op, _InitType __init, - _Ranges&&... __rngs) + _ExecutionPolicy&& __exec, _Size __n, _ReduceOp __reduce_op, + _TransformOp __transform_op, _InitType __init, _Ranges&&... __rngs) { using _CustomName = oneapi::dpl::__internal::__policy_kernel_name<_ExecutionPolicy>; @@ -287,8 +285,8 @@ __parallel_transform_reduce_mid_impl(oneapi::dpl::__internal::__device_backend_t __n = __n_groups; // Number of preliminary results from the device kernel. return __parallel_transform_reduce_work_group_kernel_submitter< _Tp, __work_group_size, __iters_per_work_item_work_group_kernel, _Commutative, _ReduceWorkGroupKernel>()( - __backend_tag, ::std::forward<_ExecutionPolicy>(__exec), __reduce_event, __n, - __reduce_op, __transform_op, __init, __temp); + __backend_tag, ::std::forward<_ExecutionPolicy>(__exec), __reduce_event, __n, __reduce_op, __transform_op, + __init, __temp); } // General implementation using a tree reduction @@ -434,38 +432,38 @@ __parallel_transform_reduce(oneapi::dpl::__internal::__device_backend_tag __back if (__n <= 256) { return __parallel_transform_reduce_small_impl<_Tp, 256, 1, _Commutative>( - __backend_tag, ::std::forward<_ExecutionPolicy>(__exec), __n, - __reduce_op, __transform_op, __init, ::std::forward<_Ranges>(__rngs)...); + __backend_tag, ::std::forward<_ExecutionPolicy>(__exec), __n, __reduce_op, __transform_op, __init, + ::std::forward<_Ranges>(__rngs)...); } else if (__n <= 512) { return __parallel_transform_reduce_small_impl<_Tp, 256, 2, _Commutative>( - __backend_tag, ::std::forward<_ExecutionPolicy>(__exec), __n, - __reduce_op, __transform_op, __init, ::std::forward<_Ranges>(__rngs)...); + __backend_tag, ::std::forward<_ExecutionPolicy>(__exec), __n, __reduce_op, __transform_op, __init, + ::std::forward<_Ranges>(__rngs)...); } else if (__n <= 1024) { return __parallel_transform_reduce_small_impl<_Tp, 256, 4, _Commutative>( - __backend_tag, ::std::forward<_ExecutionPolicy>(__exec), __n, - __reduce_op, __transform_op, __init, ::std::forward<_Ranges>(__rngs)...); + __backend_tag, ::std::forward<_ExecutionPolicy>(__exec), __n, __reduce_op, __transform_op, __init, + ::std::forward<_Ranges>(__rngs)...); } else if (__n <= 2048) { return __parallel_transform_reduce_small_impl<_Tp, 256, 8, _Commutative>( - __backend_tag, ::std::forward<_ExecutionPolicy>(__exec), __n, - __reduce_op, __transform_op, __init, ::std::forward<_Ranges>(__rngs)...); + __backend_tag, ::std::forward<_ExecutionPolicy>(__exec), __n, __reduce_op, __transform_op, __init, + ::std::forward<_Ranges>(__rngs)...); } else if (__n <= 4096) { return __parallel_transform_reduce_small_impl<_Tp, 256, 16, _Commutative>( - __backend_tag, ::std::forward<_ExecutionPolicy>(__exec), __n, - __reduce_op, __transform_op, __init, ::std::forward<_Ranges>(__rngs)...); + __backend_tag, ::std::forward<_ExecutionPolicy>(__exec), __n, __reduce_op, __transform_op, __init, + ::std::forward<_Ranges>(__rngs)...); } else if (__n <= 8192) { return __parallel_transform_reduce_small_impl<_Tp, 256, 32, _Commutative>( - __backend_tag, ::std::forward<_ExecutionPolicy>(__exec), __n, - __reduce_op, __transform_op, __init, ::std::forward<_Ranges>(__rngs)...); + __backend_tag, ::std::forward<_ExecutionPolicy>(__exec), __n, __reduce_op, __transform_op, __init, + ::std::forward<_Ranges>(__rngs)...); } // Use two-step tree reduction. @@ -474,44 +472,44 @@ __parallel_transform_reduce(oneapi::dpl::__internal::__device_backend_tag __back else if (__n <= 2097152) { return __parallel_transform_reduce_mid_impl<_Tp, 256, 32, 1, _Commutative>( - __backend_tag, ::std::forward<_ExecutionPolicy>(__exec), __n, - __reduce_op, __transform_op, __init, ::std::forward<_Ranges>(__rngs)...); + __backend_tag, ::std::forward<_ExecutionPolicy>(__exec), __n, __reduce_op, __transform_op, __init, + ::std::forward<_Ranges>(__rngs)...); } else if (__n <= 4194304) { return __parallel_transform_reduce_mid_impl<_Tp, 256, 32, 2, _Commutative>( - __backend_tag, ::std::forward<_ExecutionPolicy>(__exec), __n, - __reduce_op, __transform_op, __init, ::std::forward<_Ranges>(__rngs)...); + __backend_tag, ::std::forward<_ExecutionPolicy>(__exec), __n, __reduce_op, __transform_op, __init, + ::std::forward<_Ranges>(__rngs)...); } else if (__n <= 8388608) { return __parallel_transform_reduce_mid_impl<_Tp, 256, 32, 4, _Commutative>( - __backend_tag, ::std::forward<_ExecutionPolicy>(__exec), __n, - __reduce_op, __transform_op, __init, ::std::forward<_Ranges>(__rngs)...); + __backend_tag, ::std::forward<_ExecutionPolicy>(__exec), __n, __reduce_op, __transform_op, __init, + ::std::forward<_Ranges>(__rngs)...); } else if (__n <= 16777216) { return __parallel_transform_reduce_mid_impl<_Tp, 256, 32, 8, _Commutative>( - __backend_tag, ::std::forward<_ExecutionPolicy>(__exec), __n, - __reduce_op, __transform_op, __init, ::std::forward<_Ranges>(__rngs)...); + __backend_tag, ::std::forward<_ExecutionPolicy>(__exec), __n, __reduce_op, __transform_op, __init, + ::std::forward<_Ranges>(__rngs)...); } else if (__n <= 33554432) { return __parallel_transform_reduce_mid_impl<_Tp, 256, 32, 16, _Commutative>( - __backend_tag, ::std::forward<_ExecutionPolicy>(__exec), __n, - __reduce_op, __transform_op, __init, ::std::forward<_Ranges>(__rngs)...); + __backend_tag, ::std::forward<_ExecutionPolicy>(__exec), __n, __reduce_op, __transform_op, __init, + ::std::forward<_Ranges>(__rngs)...); } else if (__n <= 67108864) { return __parallel_transform_reduce_mid_impl<_Tp, 256, 32, 32, _Commutative>( - __backend_tag, ::std::forward<_ExecutionPolicy>(__exec), __n, - __reduce_op, __transform_op, __init, ::std::forward<_Ranges>(__rngs)...); + __backend_tag, ::std::forward<_ExecutionPolicy>(__exec), __n, __reduce_op, __transform_op, __init, + ::std::forward<_Ranges>(__rngs)...); } } // Otherwise use a recursive tree reduction. return __parallel_transform_reduce_impl<_Tp, 32, _Commutative>::submit( - __backend_tag, ::std::forward<_ExecutionPolicy>(__exec), __n, - __work_group_size, __reduce_op, __transform_op, __init, ::std::forward<_Ranges>(__rngs)...); + __backend_tag, ::std::forward<_ExecutionPolicy>(__exec), __n, __work_group_size, __reduce_op, __transform_op, + __init, ::std::forward<_Ranges>(__rngs)...); } } // namespace __par_backend_hetero From 508b95d667d5c2b566f16354dd862c937d023162 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Mon, 19 Feb 2024 17:15:35 +0100 Subject: [PATCH 338/566] include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h - fix self review comment: remove extra __select_backend calls --- .../dpl/pstl/hetero/algorithm_impl_hetero.h | 112 +++++------------- 1 file changed, 29 insertions(+), 83 deletions(-) diff --git a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h index ea13b6e53cf..105f805c313 100644 --- a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h +++ b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h @@ -1039,14 +1039,9 @@ __pattern_remove_if(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, auto __copy_last = __pattern_copy_if(__tag, __exec, __first, __last, __copy_first, __not_pred<_Predicate>{__pred}); - constexpr auto __dispatch_tag1 = - oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, decltype(__copy_first), decltype(__copy_last), - decltype(__first)>(); - //TODO: optimize copy back depending on Iterator, i.e. set_final_data for host iterator/pointer return __pattern_walk2( - __dispatch_tag1, - __par_backend_hetero::make_wrapped_policy(::std::forward<_ExecutionPolicy>(__exec)), + __tag, __par_backend_hetero::make_wrapped_policy(::std::forward<_ExecutionPolicy>(__exec)), __copy_first, __copy_last, __first, __brick_copy<_ExecutionPolicy>{}); } @@ -1062,21 +1057,12 @@ __pattern_unique(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _It oneapi::dpl::__par_backend_hetero::__buffer<_ExecutionPolicy, _ValueType> __buf(__exec, __last - __first); auto __copy_first = __buf.get(); - auto __copy_last = - __pattern_unique_copy(oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, decltype(__first), - decltype(__last), decltype(__copy_first)>(), - __exec, __first, __last, __copy_first, __pred); - - constexpr auto __dispatch_tag1 = - oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, decltype(__copy_first), decltype(__copy_last), - decltype(__first)>(); - using __backend_tag1 = typename decltype(__dispatch_tag1)::__backend_tag; + auto __copy_last = __pattern_unique_copy(__tag, __exec, __first, __last, __copy_first, __pred); //TODO: optimize copy back depending on Iterator, i.e. set_final_data for host iterator/pointer - return __pattern_walk2<__backend_tag1, /*_IsSync=*/::std::true_type, __par_backend_hetero::access_mode::read_write, + return __pattern_walk2<_BackendTag, /*_IsSync=*/::std::true_type, __par_backend_hetero::access_mode::read_write, __par_backend_hetero::access_mode::read_write>( - __dispatch_tag1, - __par_backend_hetero::make_wrapped_policy(::std::forward<_ExecutionPolicy>(__exec)), + __tag, __par_backend_hetero::make_wrapped_policy(::std::forward<_ExecutionPolicy>(__exec)), __copy_first, __copy_last, __first, __brick_copy<_ExecutionPolicy>{}); } @@ -1230,7 +1216,7 @@ __pattern_merge(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Ite template void -__pattern_inplace_merge(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _Iterator __first, _Iterator __middle, +__pattern_inplace_merge(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Iterator __first, _Iterator __middle, _Iterator __last, _Compare __comp) { using _ValueType = typename ::std::iterator_traits<_Iterator>::value_type; @@ -1245,20 +1231,16 @@ __pattern_inplace_merge(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _I auto __copy_first = __buf.get(); auto __copy_last = __copy_first + __n; - __pattern_merge(oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, decltype(__first), decltype(__middle), - decltype(__last), decltype(__copy_first)>(), - __exec, __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__first), - __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__middle), - __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__middle), - __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__last), - __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::write>(__copy_first), - __comp); + __pattern_merge( + __tag, __exec, __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__first), + __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__middle), + __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__middle), + __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__last), + __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::write>(__copy_first), __comp); //TODO: optimize copy back depending on Iterator, i.e. set_final_data for host iterator/pointer __pattern_walk2( - oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, decltype(__copy_first), decltype(__copy_last), - decltype(__first)>(), - __par_backend_hetero::make_wrapped_policy(::std::forward<_ExecutionPolicy>(__exec)), + __tag, __par_backend_hetero::make_wrapped_policy(::std::forward<_ExecutionPolicy>(__exec)), __copy_first, __copy_last, __first, __brick_move<_ExecutionPolicy>{}); } @@ -1338,27 +1320,16 @@ __pattern_stable_partition(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& _ auto __true_result = __true_buf.get(); auto __false_result = __false_buf.get(); - constexpr auto __dispatch_tag = __select_backend<_ExecutionPolicy, decltype(__first), decltype(__last), - decltype(__true_result), decltype(__false_result)>(); - auto copy_result = - __pattern_partition_copy(__dispatch_tag, __exec, __first, __last, __true_result, __false_result, __pred); + auto copy_result = __pattern_partition_copy(__tag, __exec, __first, __last, __true_result, __false_result, __pred); auto true_count = copy_result.first - __true_result; //TODO: optimize copy back if possible (inplace, decrease number of submits) - constexpr auto __dispatch_tag1 = - oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, decltype(__true_result), - decltype(copy_result.first), decltype(__first)>(); - using __backend_tag1 = typename decltype(__dispatch_tag1)::__backend_tag; - __pattern_walk2<__backend_tag1, /*_IsSync=*/::std::false_type>( - __dispatch_tag1, __par_backend_hetero::make_wrapped_policy(__exec), __true_result, - copy_result.first, __first, __brick_move<_ExecutionPolicy>{}); - - constexpr auto __dispatch_tag2 = - oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, decltype(__false_result), - decltype(copy_result.second), decltype(__first + true_count)>(); + __pattern_walk2<_BackendTag, /*_IsSync=*/::std::false_type>( + __tag, __par_backend_hetero::make_wrapped_policy(__exec), __true_result, copy_result.first, + __first, __brick_move<_ExecutionPolicy>{}); + __pattern_walk2( - __dispatch_tag2, - __par_backend_hetero::make_wrapped_policy(::std::forward<_ExecutionPolicy>(__exec)), + __tag, __par_backend_hetero::make_wrapped_policy(::std::forward<_ExecutionPolicy>(__exec)), __false_result, copy_result.second, __first + true_count, __brick_move<_ExecutionPolicy>{}); return __first + true_count; @@ -1545,35 +1516,20 @@ __pattern_partial_sort_copy(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& auto __buf_first = __buf.get(); - constexpr auto __dispatch_tag1 = - oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, decltype(__first), decltype(__last), - decltype(__buf_first)>(); - using __backend_tag1 = typename decltype(__dispatch_tag1)::__backend_tag; - - auto __buf_last = __pattern_walk2<__backend_tag1, /*_IsSync=*/::std::false_type>( - __dispatch_tag1, __par_backend_hetero::make_wrapped_policy<__initial_copy_2>(__exec), __first, __last, - __buf_first, __brick_copy<_ExecutionPolicy>{}); + auto __buf_last = __pattern_walk2<_BackendTag, /*_IsSync=*/::std::false_type>( + __tag, __par_backend_hetero::make_wrapped_policy<__initial_copy_2>(__exec), __first, __last, __buf_first, + __brick_copy<_ExecutionPolicy>{}); auto __buf_mid = __buf_first + __out_size; - constexpr auto __dispatch_tag11 = - oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, decltype(__buf_first), decltype(__buf_mid), - decltype(__buf_last)>(); - using __backend_tag11 = typename decltype(__dispatch_tag11)::__backend_tag; - __par_backend_hetero::__parallel_partial_sort( - __backend_tag11{}, __par_backend_hetero::make_wrapped_policy<__partial_sort_2>(__exec), + _BackendTag{}, __par_backend_hetero::make_wrapped_policy<__partial_sort_2>(__exec), __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read_write>(__buf_first), __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read_write>(__buf_mid), __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read_write>(__buf_last), __comp); - constexpr auto __dispatch_tag2 = - oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, decltype(__buf_first), decltype(__buf_mid), - decltype(__out_first)>(); - return __pattern_walk2( - __dispatch_tag2, - __par_backend_hetero::make_wrapped_policy<__copy_back>(::std::forward<_ExecutionPolicy>(__exec)), + __tag, __par_backend_hetero::make_wrapped_policy<__copy_back>(::std::forward<_ExecutionPolicy>(__exec)), __buf_first, __buf_mid, __out_first, __brick_copy<_ExecutionPolicy>{}); } } @@ -1887,17 +1843,13 @@ __pattern_set_union(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, //1. Calc difference {2} \ {1} const auto __n_diff = - oneapi::dpl::__internal::__pattern_hetero_set_op( - oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, decltype(__first2), decltype(__last2), - decltype(__first1), decltype(__last1), decltype(__buf)>(), - __exec, __first2, __last2, __first1, __last1, __buf, __comp, unseq_backend::_DifferenceTag()) - + oneapi::dpl::__internal::__pattern_hetero_set_op(__tag, __exec, __first2, __last2, __first1, __last1, __buf, + __comp, unseq_backend::_DifferenceTag()) - __buf; //2. Merge {1} and the difference - const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, decltype(__first1), - decltype(__buf), decltype(__result)>(); return oneapi::dpl::__internal::__pattern_merge( - __dispatch_tag, + __tag, oneapi::dpl::__par_backend_hetero::make_wrapped_policy<__set_union_copy_case_2>( ::std::forward<_ExecutionPolicy>(__exec)), __first1, __last1, __buf, __buf + __n_diff, __result, __comp); @@ -1974,25 +1926,19 @@ __pattern_set_symmetric_difference(__hetero_tag<_BackendTag> __tag, _ExecutionPo //1. Calc difference {1} \ {2} const auto __n_diff_1 = oneapi::dpl::__internal::__pattern_hetero_set_op( - oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, decltype(__first1), decltype(__last1), - decltype(__first2), decltype(__last2), decltype(__buf_1)>(), - oneapi::dpl::__par_backend_hetero::make_wrapped_policy<__set_symmetric_difference_phase_1>(__exec), + __tag, oneapi::dpl::__par_backend_hetero::make_wrapped_policy<__set_symmetric_difference_phase_1>(__exec), __first1, __last1, __first2, __last2, __buf_1, __comp, unseq_backend::_DifferenceTag()) - __buf_1; //2. Calc difference {2} \ {1} const auto __n_diff_2 = oneapi::dpl::__internal::__pattern_hetero_set_op( - oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, decltype(__first2), decltype(__last2), - decltype(__first1), decltype(__last1), decltype(__buf_2)>(), - oneapi::dpl::__par_backend_hetero::make_wrapped_policy<__set_symmetric_difference_phase_2>(__exec), + __tag, oneapi::dpl::__par_backend_hetero::make_wrapped_policy<__set_symmetric_difference_phase_2>(__exec), __first2, __last2, __first1, __last1, __buf_2, __comp, unseq_backend::_DifferenceTag()) - __buf_2; //3. Merge the differences - constexpr auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, decltype(__buf_1), - decltype(__buf_2), decltype(__result)>(); - return oneapi::dpl::__internal::__pattern_merge(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __buf_1, + return oneapi::dpl::__internal::__pattern_merge(__tag, ::std::forward<_ExecutionPolicy>(__exec), __buf_1, __buf_1 + __n_diff_1, __buf_2, __buf_2 + __n_diff_2, __result, __comp); } From 6d04ccbc605ff4f9a79c29cd01e61a7720822376 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Mon, 19 Feb 2024 17:21:28 +0100 Subject: [PATCH 339/566] Fix an error: remove extra __pattern_replace_if and it's usage --- include/oneapi/dpl/pstl/algorithm_impl.h | 14 -------------- include/oneapi/dpl/pstl/glue_algorithm_impl.h | 7 +++++-- 2 files changed, 5 insertions(+), 16 deletions(-) diff --git a/include/oneapi/dpl/pstl/algorithm_impl.h b/include/oneapi/dpl/pstl/algorithm_impl.h index 87415af4904..008c261042b 100644 --- a/include/oneapi/dpl/pstl/algorithm_impl.h +++ b/include/oneapi/dpl/pstl/algorithm_impl.h @@ -174,20 +174,6 @@ __pattern_walk1(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _ForwardIt }); } -template -void -__pattern_replace_if(_Tag __tag, _ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, - _UnaryPredicate __pred, const _Tp& __new_value) -{ - static_assert(__is_backend_tag_v<_Tag>); - - oneapi::dpl::__internal::__pattern_walk1( - __tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, - oneapi::dpl::__internal::__replace_functor< - oneapi::dpl::__internal::__ref_or_copy<_ExecutionPolicy, const _Tp>, - oneapi::dpl::__internal::__ref_or_copy<_ExecutionPolicy, _UnaryPredicate>>(__new_value, __pred)); -} - template void __pattern_walk_brick(_Tag, _ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, diff --git a/include/oneapi/dpl/pstl/glue_algorithm_impl.h b/include/oneapi/dpl/pstl/glue_algorithm_impl.h index 495ff06be8c..abb6d6f6c7f 100644 --- a/include/oneapi/dpl/pstl/glue_algorithm_impl.h +++ b/include/oneapi/dpl/pstl/glue_algorithm_impl.h @@ -395,8 +395,11 @@ replace_if(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator { constexpr auto __dispatch_tag = oneapi::dpl::__internal::__select_backend<_ExecutionPolicy, _ForwardIterator>(); - __pattern_replace_if(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __pred, - __new_value); + oneapi::dpl::__internal::__pattern_walk1( + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, + oneapi::dpl::__internal::__replace_functor< + oneapi::dpl::__internal::__ref_or_copy<_ExecutionPolicy, const _Tp>, + oneapi::dpl::__internal::__ref_or_copy<_ExecutionPolicy, _UnaryPredicate>>(__new_value, __pred)); } template From 70cdfeba361f64bb30965aa7372c224af174ed9b Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Tue, 20 Feb 2024 09:35:46 +0100 Subject: [PATCH 340/566] include/oneapi/dpl/pstl/hetero/dpcpp/utils_ranges_sycl.h - implementation of __ranges::__select_backend() --- include/oneapi/dpl/pstl/hetero/dpcpp/utils_ranges_sycl.h | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/utils_ranges_sycl.h b/include/oneapi/dpl/pstl/hetero/dpcpp/utils_ranges_sycl.h index b102ab58123..9e44033a330 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/utils_ranges_sycl.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/utils_ranges_sycl.h @@ -716,6 +716,15 @@ struct __get_sycl_range } }; +template +constexpr oneapi::dpl::__internal::__hetero_tag< + typename oneapi::dpl::__internal::__select_backend_for_hetero_policy_trait< + ::std::decay_t<_ExecutionPolicy>>::__backend_tag> +__select_backend() +{ + return {}; // return __hetero_tag<__device_backend_tag> or __hetero_tag<__fpga_backend_tag> +} + } // namespace __ranges } // namespace dpl } // namespace oneapi From c299b06aba91854a94207910d3bf2fdd19396d09 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Tue, 20 Feb 2024 09:37:05 +0100 Subject: [PATCH 341/566] __ranges::__pattern_transform_reduce + tag impls --- .../pstl/hetero/numeric_ranges_impl_hetero.h | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/include/oneapi/dpl/pstl/hetero/numeric_ranges_impl_hetero.h b/include/oneapi/dpl/pstl/hetero/numeric_ranges_impl_hetero.h index 42502a4b7b5..4abed65341b 100644 --- a/include/oneapi/dpl/pstl/hetero/numeric_ranges_impl_hetero.h +++ b/include/oneapi/dpl/pstl/hetero/numeric_ranges_impl_hetero.h @@ -37,11 +37,11 @@ namespace __ranges // transform_reduce (version with two binary functions) //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, _Tp> -__pattern_transform_reduce(_ExecutionPolicy&& __exec, _Range1&& __rng1, _Range2&& __rng2, _Tp __init, - _BinaryOperation1 __binary_op1, _BinaryOperation2 __binary_op2) +template +_Tp +__pattern_transform_reduce(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _Range1&& __rng1, _Range2&& __rng2, + _Tp __init, _BinaryOperation1 __binary_op1, _BinaryOperation2 __binary_op2) { if (__rng1.empty()) return __init; @@ -62,10 +62,11 @@ __pattern_transform_reduce(_ExecutionPolicy&& __exec, _Range1&& __rng1, _Range2& // transform_reduce (with unary and binary functions) //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, _Tp> -__pattern_transform_reduce(_ExecutionPolicy&& __exec, _Range&& __rng, _Tp __init, _BinaryOperation __binary_op, - _UnaryOperation __unary_op) +template +_Tp +__pattern_transform_reduce(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _Range&& __rng, _Tp __init, + _BinaryOperation __binary_op, _UnaryOperation __unary_op) { if (__rng.empty()) return __init; From b4f7eab4e84d595987d45e80d3aead0d267f84ae Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Tue, 20 Feb 2024 09:37:35 +0100 Subject: [PATCH 342/566] __ranges::__pattern_transform_scan_base + tag impls --- .../dpl/pstl/hetero/numeric_ranges_impl_hetero.h | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/include/oneapi/dpl/pstl/hetero/numeric_ranges_impl_hetero.h b/include/oneapi/dpl/pstl/hetero/numeric_ranges_impl_hetero.h index 4abed65341b..9cfe02afe6e 100644 --- a/include/oneapi/dpl/pstl/hetero/numeric_ranges_impl_hetero.h +++ b/include/oneapi/dpl/pstl/hetero/numeric_ranges_impl_hetero.h @@ -87,12 +87,11 @@ __pattern_transform_reduce(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, // transform_scan //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, - oneapi::dpl::__internal::__difference_t<_Range2>> -__pattern_transform_scan_base(_ExecutionPolicy&& __exec, _Range1&& __rng1, _Range2&& __rng2, _UnaryOperation __unary_op, - _InitType __init, _BinaryOperation __binary_op, _Inclusive) +template +oneapi::dpl::__internal::__difference_t<_Range2> +__pattern_transform_scan_base(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _Range1&& __rng1, _Range2&& __rng2, + _UnaryOperation __unary_op, _InitType __init, _BinaryOperation __binary_op, _Inclusive) { if (__rng1.empty()) return 0; From 646c63b2d82eb63601bcd0671acf9e7243f4fef6 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Tue, 20 Feb 2024 09:37:52 +0100 Subject: [PATCH 343/566] __ranges::__pattern_transform_scan + tag impls --- .../pstl/hetero/numeric_ranges_impl_hetero.h | 20 +++++++++---------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/include/oneapi/dpl/pstl/hetero/numeric_ranges_impl_hetero.h b/include/oneapi/dpl/pstl/hetero/numeric_ranges_impl_hetero.h index 9cfe02afe6e..4223facecfb 100644 --- a/include/oneapi/dpl/pstl/hetero/numeric_ranges_impl_hetero.h +++ b/include/oneapi/dpl/pstl/hetero/numeric_ranges_impl_hetero.h @@ -125,12 +125,11 @@ __pattern_transform_scan_base(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __ex return __rng1_size; } -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, - oneapi::dpl::__internal::__difference_t<_Range2>> -__pattern_transform_scan(_ExecutionPolicy&& __exec, _Range1&& __rng1, _Range2&& __rng2, _UnaryOperation __unary_op, - _Type __init, _BinaryOperation __binary_op, _Inclusive) +template +oneapi::dpl::__internal::__difference_t<_Range2> +__pattern_transform_scan(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _Range1&& __rng1, _Range2&& __rng2, + _UnaryOperation __unary_op, _Type __init, _BinaryOperation __binary_op, _Inclusive) { using _RepackedType = __par_backend_hetero::__repacked_tuple_t<_Type>; using _InitType = unseq_backend::__init_value<_RepackedType>; @@ -141,12 +140,11 @@ __pattern_transform_scan(_ExecutionPolicy&& __exec, _Range1&& __rng1, _Range2&& } // scan without initial element -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, - oneapi::dpl::__internal::__difference_t<_Range2>> -__pattern_transform_scan(_ExecutionPolicy&& __exec, _Range1&& __rng1, _Range2&& __rng2, _UnaryOperation __unary_op, - _BinaryOperation __binary_op, _Inclusive) +oneapi::dpl::__internal::__difference_t<_Range2> +__pattern_transform_scan(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Range1&& __rng1, _Range2&& __rng2, + _UnaryOperation __unary_op, _BinaryOperation __binary_op, _Inclusive) { using _Type = oneapi::dpl::__internal::__value_t<_Range1>; using _RepackedType = __par_backend_hetero::__repacked_tuple_t<_Type>; From a41760a2f1baa5eec241550ea123cca2728a0930 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Tue, 20 Feb 2024 09:39:09 +0100 Subject: [PATCH 344/566] __ranges::__pattern_walk_n + tag impls --- .../oneapi/dpl/pstl/hetero/algorithm_ranges_impl_hetero.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/oneapi/dpl/pstl/hetero/algorithm_ranges_impl_hetero.h b/include/oneapi/dpl/pstl/hetero/algorithm_ranges_impl_hetero.h index f227fd29383..e7f743c7172 100644 --- a/include/oneapi/dpl/pstl/hetero/algorithm_ranges_impl_hetero.h +++ b/include/oneapi/dpl/pstl/hetero/algorithm_ranges_impl_hetero.h @@ -39,9 +39,9 @@ namespace __ranges // walk_n //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy> -__pattern_walk_n(_ExecutionPolicy&& __exec, _Function __f, _Ranges&&... __rngs) +template +void +__pattern_walk_n(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _Function __f, _Ranges&&... __rngs) { auto __n = oneapi::dpl::__ranges::__get_first_range_size(__rngs...); if (__n > 0) From 97ff7b2e5c3d07e2bf1cc53d9c2e16ca2e0bcff4 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Tue, 20 Feb 2024 09:39:21 +0100 Subject: [PATCH 345/566] __ranges::__pattern_swap + tag impls --- .../oneapi/dpl/pstl/hetero/algorithm_ranges_impl_hetero.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/oneapi/dpl/pstl/hetero/algorithm_ranges_impl_hetero.h b/include/oneapi/dpl/pstl/hetero/algorithm_ranges_impl_hetero.h index e7f743c7172..6ae7d5b87f4 100644 --- a/include/oneapi/dpl/pstl/hetero/algorithm_ranges_impl_hetero.h +++ b/include/oneapi/dpl/pstl/hetero/algorithm_ranges_impl_hetero.h @@ -68,9 +68,9 @@ class __swap2_wrapper { }; -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, bool> -__pattern_swap(_ExecutionPolicy&& __exec, _Range1&& __rng1, _Range2&& __rng2, _Function __f) +template +bool +__pattern_swap(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Range1&& __rng1, _Range2&& __rng2, _Function __f) { if (__rng1.size() <= __rng2.size()) { From bfec7cc04b6318ac6b6ebb78eaa73d125186e649 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Tue, 20 Feb 2024 09:39:32 +0100 Subject: [PATCH 346/566] __ranges::__pattern_equal + tag impls --- .../oneapi/dpl/pstl/hetero/algorithm_ranges_impl_hetero.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/oneapi/dpl/pstl/hetero/algorithm_ranges_impl_hetero.h b/include/oneapi/dpl/pstl/hetero/algorithm_ranges_impl_hetero.h index 6ae7d5b87f4..4f08f8f9fc4 100644 --- a/include/oneapi/dpl/pstl/hetero/algorithm_ranges_impl_hetero.h +++ b/include/oneapi/dpl/pstl/hetero/algorithm_ranges_impl_hetero.h @@ -92,9 +92,9 @@ __pattern_swap(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Rang // equal //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, bool> -__pattern_equal(_ExecutionPolicy&& __exec, _Range1&& __rng1, _Range2&& __rng2, _Pred __pred) +template +bool +__pattern_equal(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _Range1&& __rng1, _Range2&& __rng2, _Pred __pred) { if (__rng1.empty() || __rng2.empty() || __rng1.size() != __rng2.size()) return false; From 2313a2bb9729559d2ea0e43826cd50fe927f3e5b Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Tue, 20 Feb 2024 09:39:46 +0100 Subject: [PATCH 347/566] __ranges::__pattern_find_if + tag impls --- .../oneapi/dpl/pstl/hetero/algorithm_ranges_impl_hetero.h | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/include/oneapi/dpl/pstl/hetero/algorithm_ranges_impl_hetero.h b/include/oneapi/dpl/pstl/hetero/algorithm_ranges_impl_hetero.h index 4f08f8f9fc4..5cf8d4be6aa 100644 --- a/include/oneapi/dpl/pstl/hetero/algorithm_ranges_impl_hetero.h +++ b/include/oneapi/dpl/pstl/hetero/algorithm_ranges_impl_hetero.h @@ -114,10 +114,9 @@ __pattern_equal(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _Range1&& // find_if //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, - oneapi::dpl::__internal::__difference_t<_Range>> -__pattern_find_if(_ExecutionPolicy&& __exec, _Range&& __rng, _Pred __pred) +template +oneapi::dpl::__internal::__difference_t<_Range> +__pattern_find_if(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _Range&& __rng, _Pred __pred) { //trivial pre-checks if (__rng.empty()) From fbfe04f2206c4f003ba54b57c41a194d236c2fd9 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Tue, 20 Feb 2024 09:40:00 +0100 Subject: [PATCH 348/566] __ranges::__pattern_find_end + tag impls --- .../oneapi/dpl/pstl/hetero/algorithm_ranges_impl_hetero.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/include/oneapi/dpl/pstl/hetero/algorithm_ranges_impl_hetero.h b/include/oneapi/dpl/pstl/hetero/algorithm_ranges_impl_hetero.h index 5cf8d4be6aa..9f31e62c354 100644 --- a/include/oneapi/dpl/pstl/hetero/algorithm_ranges_impl_hetero.h +++ b/include/oneapi/dpl/pstl/hetero/algorithm_ranges_impl_hetero.h @@ -136,10 +136,10 @@ __pattern_find_if(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _Range&& // find_end //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, - oneapi::dpl::__internal::__difference_t<_Range1>> -__pattern_find_end(_ExecutionPolicy&& __exec, _Range1&& __rng1, _Range2&& __rng2, _Pred __pred) +template +oneapi::dpl::__internal::__difference_t<_Range1> +__pattern_find_end(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Range1&& __rng1, _Range2&& __rng2, + _Pred __pred) { //trivial pre-checks if (__rng1.empty() || __rng2.empty() || __rng1.size() < __rng2.size()) From 1844c14c615b7694246bb4610371aea34c808cda Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Tue, 20 Feb 2024 09:40:13 +0100 Subject: [PATCH 349/566] __ranges::__pattern_find_first_of + tag impls --- .../oneapi/dpl/pstl/hetero/algorithm_ranges_impl_hetero.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/include/oneapi/dpl/pstl/hetero/algorithm_ranges_impl_hetero.h b/include/oneapi/dpl/pstl/hetero/algorithm_ranges_impl_hetero.h index 9f31e62c354..9087f68aa4f 100644 --- a/include/oneapi/dpl/pstl/hetero/algorithm_ranges_impl_hetero.h +++ b/include/oneapi/dpl/pstl/hetero/algorithm_ranges_impl_hetero.h @@ -166,10 +166,10 @@ __pattern_find_end(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _ // find_first_of //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, - oneapi::dpl::__internal::__difference_t<_Range1>> -__pattern_find_first_of(_ExecutionPolicy&& __exec, _Range1&& __rng1, _Range2&& __rng2, _Pred __pred) +template +oneapi::dpl::__internal::__difference_t<_Range1> +__pattern_find_first_of(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _Range1&& __rng1, _Range2&& __rng2, + _Pred __pred) { //trivial pre-checks if (__rng1.empty() || __rng2.empty()) From aef87590e27bb80e51e77f0c860e925678ddacce Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Tue, 20 Feb 2024 09:40:28 +0100 Subject: [PATCH 350/566] __ranges::__pattern_any_of + tag impls --- .../oneapi/dpl/pstl/hetero/algorithm_ranges_impl_hetero.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/oneapi/dpl/pstl/hetero/algorithm_ranges_impl_hetero.h b/include/oneapi/dpl/pstl/hetero/algorithm_ranges_impl_hetero.h index 9087f68aa4f..16d4e9410af 100644 --- a/include/oneapi/dpl/pstl/hetero/algorithm_ranges_impl_hetero.h +++ b/include/oneapi/dpl/pstl/hetero/algorithm_ranges_impl_hetero.h @@ -190,9 +190,9 @@ __pattern_find_first_of(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _R // any_of //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, bool> -__pattern_any_of(_ExecutionPolicy&& __exec, _Range&& __rng, _Pred __pred) +template +bool +__pattern_any_of(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _Range&& __rng, _Pred __pred) { if (__rng.empty()) return false; From b3fb59ff5a32e3a1e656926839430a233ea70db5 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Tue, 20 Feb 2024 09:40:41 +0100 Subject: [PATCH 351/566] __ranges::__pattern_search + tag impls --- .../oneapi/dpl/pstl/hetero/algorithm_ranges_impl_hetero.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/include/oneapi/dpl/pstl/hetero/algorithm_ranges_impl_hetero.h b/include/oneapi/dpl/pstl/hetero/algorithm_ranges_impl_hetero.h index 16d4e9410af..f502d78a775 100644 --- a/include/oneapi/dpl/pstl/hetero/algorithm_ranges_impl_hetero.h +++ b/include/oneapi/dpl/pstl/hetero/algorithm_ranges_impl_hetero.h @@ -214,10 +214,10 @@ class equal_wrapper { }; -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, - oneapi::dpl::__internal::__difference_t<_Range1>> -__pattern_search(_ExecutionPolicy&& __exec, _Range1&& __rng1, _Range2&& __rng2, _Pred __pred) +template +oneapi::dpl::__internal::__difference_t<_Range1> +__pattern_search(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Range1&& __rng1, _Range2&& __rng2, + _Pred __pred) { //trivial pre-checks if (__rng2.empty()) From b49ec8a19b2511581c1a18201dc098f3d28bddbb Mon Sep 17 00:00:00 2001 From: Sergey Kopienko <> Date: Tue, 20 Feb 2024 09:40:55 +0100 Subject: [PATCH 352/566] __ranges::__pattern_search_n + tag impls --- .../oneapi/dpl/pstl/hetero/algorithm_ranges_impl_hetero.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/include/oneapi/dpl/pstl/hetero/algorithm_ranges_impl_hetero.h b/include/oneapi/dpl/pstl/hetero/algorithm_ranges_impl_hetero.h index f502d78a775..0919ecd644a 100644 --- a/include/oneapi/dpl/pstl/hetero/algorithm_ranges_impl_hetero.h +++ b/include/oneapi/dpl/pstl/hetero/algorithm_ranges_impl_hetero.h @@ -247,10 +247,10 @@ __pattern_search(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Ra // search_n //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, - oneapi::dpl::__internal::__difference_t<_Range>> -__pattern_search_n(_ExecutionPolicy&& __exec, _Range&& __rng, _Size __count, const _Tp& __value, +template