From 768ebb6cdd83cfb44f022e4a1c4db8443f73610c Mon Sep 17 00:00:00 2001 From: Dan Hoeflinger Date: Mon, 5 Aug 2024 13:42:31 -0400 Subject: [PATCH] Revert "remove unique pattern family from reduce_then_scan" This reverts commit 4d78ec337c0977ccb1ab3c112be168d42f2fe453. --- .../dpl/pstl/hetero/algorithm_impl_hetero.h | 55 ++++------- .../hetero/algorithm_ranges_impl_hetero.h | 93 +++++++------------ .../pstl/hetero/dpcpp/parallel_backend_sycl.h | 61 ++++++++++-- .../parallel_backend_sycl_reduce_then_scan.h | 69 ++++++++++---- .../dpl/pstl/hetero/dpcpp/sycl_traits.h | 9 ++ .../device_copyable.pass.cpp | 10 ++ test/support/utils_device_copyable.h | 31 +++++++ 7 files changed, 210 insertions(+), 118 deletions(-) diff --git a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h index 9629cf5dee2..300681a76da 100644 --- a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h +++ b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h @@ -885,33 +885,6 @@ __pattern_mismatch(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _Iterat // copy_if //------------------------------------------------------------------------ -template -::std::pair<_IteratorOrTuple, typename ::std::iterator_traits<_Iterator1>::difference_type> -__pattern_scan_copy(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _Iterator1 __first, _Iterator1 __last, - _IteratorOrTuple __output_first, _CreateMaskOp __create_mask_op, _CopyByMaskOp __copy_by_mask_op) -{ - using _It1DifferenceType = typename ::std::iterator_traits<_Iterator1>::difference_type; - - if (__first == __last) - return ::std::make_pair(__output_first, _It1DifferenceType{0}); - - _It1DifferenceType __n = __last - __first; - - auto __keep1 = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read, _Iterator1>(); - auto __buf1 = __keep1(__first, __last); - auto __keep2 = - oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::write, _IteratorOrTuple>(); - auto __buf2 = __keep2(__output_first, __output_first + __n); - - auto __res = __par_backend_hetero::__parallel_scan_copy(_BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), - __buf1.all_view(), __buf2.all_view(), __n, __create_mask_op, - __copy_by_mask_op); - - ::std::size_t __num_copied = __res.get(); - return ::std::make_pair(__output_first + __n, __num_copied); -} - template _Iterator2 @@ -982,16 +955,28 @@ __pattern_unique_copy(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec _Iterator2 __result_first, _BinaryPredicate __pred) { using _It1DifferenceType = typename ::std::iterator_traits<_Iterator1>::difference_type; - unseq_backend::__copy_by_mask<::std::plus<_It1DifferenceType>, oneapi::dpl::__internal::__pstl_assign, - /*inclusive*/ ::std::true_type, 1> - __copy_by_mask_op; - __create_mask_unique_copy<__not_pred<_BinaryPredicate>, _It1DifferenceType> __create_mask_op{ - __not_pred<_BinaryPredicate>{__pred}}; - auto __result = __pattern_scan_copy(__tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, - __result_first, __create_mask_op, __copy_by_mask_op); + _It1DifferenceType __n = __last - __first; + + if (__n == 0) + return __result_first; + if (__n == 1) + { + oneapi::dpl::__internal::__pattern_walk2_brick( + __hetero_tag<_BackendTag>{}, std::forward<_ExecutionPolicy>(__exec), __first, __last, __result_first, + oneapi::dpl::__internal::__brick_copy<__hetero_tag<_BackendTag>, _ExecutionPolicy>{}); + return __result_first + 1; + } + + auto __keep1 = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read, _Iterator1>(); + auto __buf1 = __keep1(__first, __last); + auto __keep2 = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::write, _Iterator2>(); + auto __buf2 = __keep2(__result_first, __result_first + __n); + + auto __result = oneapi::dpl::__par_backend_hetero::__parallel_unique_copy( + _BackendTag{}, std::forward<_ExecutionPolicy>(__exec), __buf1.all_view(), __buf2.all_view(), __pred); - return __result_first + __result.second; + return __result_first + __result.get(); } template diff --git a/include/oneapi/dpl/pstl/hetero/algorithm_ranges_impl_hetero.h b/include/oneapi/dpl/pstl/hetero/algorithm_ranges_impl_hetero.h index 9bf1b673cba..4c42db598d9 100644 --- a/include/oneapi/dpl/pstl/hetero/algorithm_ranges_impl_hetero.h +++ b/include/oneapi/dpl/pstl/hetero/algorithm_ranges_impl_hetero.h @@ -334,52 +334,6 @@ __pattern_count(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _Range&& _ // copy_if //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__difference_t<_Range1> -__pattern_scan_copy(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _Range1&& __rng1, _Range2&& __rng2, - _CreateMaskOp __create_mask_op, _CopyByMaskOp __copy_by_mask_op) -{ - if (__rng1.size() == 0) - return __rng1.size(); - - using _SizeType = decltype(__rng1.size()); - using _ReduceOp = ::std::plus<_SizeType>; - using _Assigner = unseq_backend::__scan_assigner; - using _NoAssign = unseq_backend::__scan_no_assign; - using _MaskAssigner = unseq_backend::__mask_assigner<1>; - using _InitType = unseq_backend::__no_init_value<_SizeType>; - using _DataAcc = unseq_backend::walk_n<_ExecutionPolicy, oneapi::dpl::__internal::__no_op>; - - _Assigner __assign_op; - _ReduceOp __reduce_op; - _DataAcc __get_data_op; - _MaskAssigner __add_mask_op; - - oneapi::dpl::__par_backend_hetero::__buffer<_ExecutionPolicy, int32_t> __mask_buf(__exec, __rng1.size()); - - auto __res = - __par_backend_hetero::__parallel_transform_scan_base( - _BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), - oneapi::dpl::__ranges::zip_view( - __rng1, oneapi::dpl::__ranges::all_view( - __mask_buf.get_buffer())), - __rng2, __reduce_op, _InitType{}, - // local scan - unseq_backend::__scan{__reduce_op, __get_data_op, __assign_op, - __add_mask_op, __create_mask_op}, - // scan between groups - unseq_backend::__scan{__reduce_op, __get_data_op, _NoAssign{}, __assign_op, - __get_data_op}, - // global scan - __copy_by_mask_op) - .get(); - - return __res; -} - template oneapi::dpl::__internal::__difference_t<_Range2> @@ -429,27 +383,45 @@ __pattern_remove_if(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, // unique_copy //------------------------------------------------------------------------ +template +struct __copy_wrapper; + template oneapi::dpl::__internal::__difference_t<_Range2> __pattern_unique_copy(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Range1&& __rng, _Range2&& __result, - _BinaryPredicate __pred, _Assign) + _BinaryPredicate __pred, _Assign&& __assign) { - using _It1DifferenceType = oneapi::dpl::__internal::__difference_t<_Range1>; - unseq_backend::__copy_by_mask<::std::plus<_It1DifferenceType>, _Assign, /*inclusive*/ ::std::true_type, 1> - __copy_by_mask_op; - __create_mask_unique_copy<__not_pred<_BinaryPredicate>, _It1DifferenceType> __create_mask_op{ - __not_pred<_BinaryPredicate>{__pred}}; - - return __ranges::__pattern_scan_copy(__tag, ::std::forward<_ExecutionPolicy>(__exec), - ::std::forward<_Range1>(__rng), ::std::forward<_Range2>(__result), - __create_mask_op, __copy_by_mask_op); + auto __n = __rng.size(); + if (__n == 0) + return 0; + if (__n == 1) + { + using CopyBrick = oneapi::dpl::__internal::__brick_copy<__hetero_tag<_BackendTag>, _ExecutionPolicy>; + oneapi::dpl::__par_backend_hetero::__parallel_for( + _BackendTag{}, + oneapi::dpl::__par_backend_hetero::make_wrapped_policy<__copy_wrapper>( + ::std::forward<_ExecutionPolicy>(__exec)), + unseq_backend::walk_n<_ExecutionPolicy, CopyBrick>{CopyBrick{}}, __n, std::forward<_Range1>(__rng), + std::forward<_Range2>(__result)) + .get(); + + return 1; + } + + return oneapi::dpl::__par_backend_hetero::__parallel_unique_copy( + _BackendTag{}, std::forward<_ExecutionPolicy>(__exec), std::forward<_Range1>(__rng), + std::forward<_Range2>(__result), __pred, std::forward<_Assign>(__assign)) + .get(); } //------------------------------------------------------------------------ // unique //------------------------------------------------------------------------ +template +struct __unique_wrapper; + template oneapi::dpl::__internal::__difference_t<_Range> __pattern_unique(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Range&& __rng, _BinaryPredicate __pred) @@ -461,10 +433,13 @@ __pattern_unique(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Ra oneapi::dpl::__par_backend_hetero::__buffer<_ExecutionPolicy, _ValueType> __buf(__exec, __rng.size()); auto res_rng = oneapi::dpl::__ranges::views::all(__buf.get_buffer()); - auto res = __ranges::__pattern_unique_copy(__tag, __exec, __rng, res_rng, __pred, - oneapi::dpl::__internal::__pstl_assign()); + auto res = __ranges::__pattern_unique_copy( + __tag, oneapi::dpl::__par_backend_hetero::make_wrapped_policy<__unique_wrapper>(__exec), __rng, res_rng, __pred, + oneapi::dpl::__internal::__pstl_assign()); - __ranges::__pattern_walk_n(__tag, ::std::forward<_ExecutionPolicy>(__exec), + __ranges::__pattern_walk_n(__tag, + oneapi::dpl::__par_backend_hetero::make_wrapped_policy<__copy_wrapper>( + ::std::forward<_ExecutionPolicy>(__exec)), __brick_copy<__hetero_tag<_BackendTag>, _ExecutionPolicy>{}, res_rng, ::std::forward<_Range>(__rng)); return res; diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h index a5a370940d1..8d4368fb093 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h @@ -810,6 +810,19 @@ struct __gen_mask _Predicate __pred; }; +template +struct __gen_unique_mask +{ + template + bool + operator()(_InRng&& __in_rng, std::size_t __idx) const + { + //starting index is offset to 1 for "unique" patterns and 0th element copy is handled separately + return !__pred(__in_rng[__idx], __in_rng[__idx - 1]); + } + _BinaryPredicate __pred; +}; + template struct __gen_count_mask { @@ -928,7 +941,8 @@ __parallel_transform_scan(oneapi::dpl::__internal::__device_backend_tag __backen return __parallel_transform_reduce_then_scan( __backend_tag, std::forward<_ExecutionPolicy>(__exec), std::forward<_Range1>(__in_rng), std::forward<_Range2>(__out_rng), __gen_transform, __binary_op, __gen_transform, _ScanInputTransform{}, - _WriteOp{}, __init, _Inclusive{}); + _WriteOp{}, __init, _Inclusive{}, + /*_IsUniquePattern=*/std::false_type{}); } } { @@ -998,11 +1012,11 @@ struct __invoke_single_group_copy_if }; template + typename _WriteOp, typename _IsUniquePattern> auto __parallel_reduce_then_scan_copy(oneapi::dpl::__internal::__device_backend_tag __backend_tag, _ExecutionPolicy&& __exec, _InRng&& __in_rng, _OutRng&& __out_rng, _Size __n, _GenMask __generate_mask, - _WriteOp __write_op) + _WriteOp __write_op, _IsUniquePattern __is_unique_pattern) { using _GenReduceInput = oneapi::dpl::__par_backend_hetero::__gen_count_mask<_GenMask>; using _ReduceOp = std::plus<_Size>; @@ -1013,7 +1027,7 @@ __parallel_reduce_then_scan_copy(oneapi::dpl::__internal::__device_backend_tag _ __backend_tag, std::forward<_ExecutionPolicy>(__exec), std::forward<_InRng>(__in_rng), std::forward<_OutRng>(__out_rng), _GenReduceInput{__generate_mask}, _ReduceOp{}, _GenScanInput{__generate_mask}, _ScanInputTransform{}, __write_op, oneapi::dpl::unseq_backend::__no_init_value<_Size>{}, - /*_Inclusive=*/std::true_type{}); + /*_Inclusive=*/std::true_type{}, __is_unique_pattern); } template +auto +__parallel_unique_copy(oneapi::dpl::__internal::__device_backend_tag __backend_tag, _ExecutionPolicy&& __exec, + _Range1&& __rng, _Range2&& __result, _BinaryPredicate __pred, + _Assign&& __assign = oneapi::dpl::__internal::__pstl_assign{}) +{ + + auto __n = __rng.size(); + if (oneapi::dpl::__par_backend_hetero::__prefer_reduce_then_scan(__exec)) + { + using _GenMask = oneapi::dpl::__par_backend_hetero::__gen_unique_mask<_BinaryPredicate>; + using _WriteOp = oneapi::dpl::__par_backend_hetero::__write_to_idx_if<1, _Assign>; + + return __parallel_reduce_then_scan_copy(__backend_tag, std::forward<_ExecutionPolicy>(__exec), + std::forward<_Range1>(__rng), std::forward<_Range2>(__result), __n, + _GenMask{__pred}, _WriteOp{std::forward<_Assign>(__assign)}, + /*_IsUniquePattern=*/std::true_type{}); + } + else + { + + using _ReduceOp = std::plus; + using _CreateOp = oneapi::dpl::__internal::__create_mask_unique_copy, + decltype(__n)>; + using _CopyOp = unseq_backend::__copy_by_mask<_ReduceOp, _Assign, /*inclusive*/ std::true_type, 1>; + + return __parallel_scan_copy(__backend_tag, std::forward<_ExecutionPolicy>(__exec), std::forward<_Range1>(__rng), + std::forward<_Range2>(__result), __n, + _CreateOp{oneapi::dpl::__internal::__not_pred<_BinaryPredicate>{__pred}}, + _CopyOp{_ReduceOp{}, std::forward<_Assign>(__assign)}); + } +} + template auto __parallel_partition_copy(oneapi::dpl::__internal::__device_backend_tag __backend_tag, _ExecutionPolicy&& __exec, @@ -1071,7 +1119,7 @@ __parallel_partition_copy(oneapi::dpl::__internal::__device_backend_tag __backen return __parallel_reduce_then_scan_copy(__backend_tag, std::forward<_ExecutionPolicy>(__exec), std::forward<_Range1>(__rng), std::forward<_Range2>(__result), __n, - _GenMask{__pred}, _WriteOp{}); + _GenMask{__pred}, _WriteOp{}, /*_IsUniquePattern=*/std::false_type{}); } else { @@ -1122,7 +1170,8 @@ __parallel_copy_if(oneapi::dpl::__internal::__device_backend_tag __backend_tag, return __parallel_reduce_then_scan_copy(__backend_tag, std::forward<_ExecutionPolicy>(__exec), std::forward<_InRng>(__in_rng), std::forward<_OutRng>(__out_rng), __n, - _GenMask{__pred}, _WriteOp{std::forward<_Assign>(__assign)}); + _GenMask{__pred}, _WriteOp{std::forward<_Assign>(__assign)}, + /*Unique=*/std::false_type{}); } else { diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_reduce_then_scan.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_reduce_then_scan.h index 06dcc9f21cf..348c010d0ad 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_reduce_then_scan.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_reduce_then_scan.h @@ -266,13 +266,15 @@ template class __reduce_then_scan_scan_kernel; template + bool __is_unique_pattern_v, typename _GenReduceInput, typename _ReduceOp, typename _InitType, + typename _KernelName> struct __parallel_reduce_then_scan_reduce_submitter; template + bool __is_unique_pattern_v, typename _GenReduceInput, typename _ReduceOp, typename _InitType, + typename... _KernelName> struct __parallel_reduce_then_scan_reduce_submitter<__sub_group_size, __max_inputs_per_item, __is_inclusive, - _GenReduceInput, _ReduceOp, _InitType, + __is_unique_pattern_v, _GenReduceInput, _ReduceOp, _InitType, __internal::__optional_kernel_name<_KernelName...>> { // Step 1 - SubGroupReduce is expected to perform sub-group reductions to global memory @@ -302,7 +304,11 @@ struct __parallel_reduce_then_scan_reduce_submitter<__sub_group_size, __max_inpu oneapi::dpl::__internal::__lazy_ctor_storage<_InitValueType> __sub_group_carry; std::size_t __group_start_idx = (__block_num * __max_block_size) + (__g * __inputs_per_sub_group * __num_sub_groups_local); - + if constexpr (__is_unique_pattern_v) + { + // for unique patterns, the first element is always copied to the output, so we need to skip it + __group_start_idx += 1; + } std::size_t __elements_in_group = std::min(__n - __group_start_idx, std::size_t(__num_sub_groups_local * __inputs_per_sub_group)); std::uint32_t __active_subgroups = @@ -397,16 +403,16 @@ struct __parallel_reduce_then_scan_reduce_submitter<__sub_group_size, __max_inpu }; template + bool __is_unique_pattern_v, typename _GenReduceInput, typename _ReduceOp, typename _GenScanInput, + typename _ScanInputTransform, typename _WriteOp, typename _InitType, typename _KernelName> struct __parallel_reduce_then_scan_scan_submitter; template + bool __is_unique_pattern_v, typename _GenReduceInput, typename _ReduceOp, typename _GenScanInput, + typename _ScanInputTransform, typename _WriteOp, typename _InitType, typename... _KernelName> struct __parallel_reduce_then_scan_scan_submitter< - __sub_group_size, __max_inputs_per_item, __is_inclusive, _GenReduceInput, _ReduceOp, _GenScanInput, - _ScanInputTransform, _WriteOp, _InitType, __internal::__optional_kernel_name<_KernelName...>> + __sub_group_size, __max_inputs_per_item, __is_inclusive, __is_unique_pattern_v, _GenReduceInput, _ReduceOp, + _GenScanInput, _ScanInputTransform, _WriteOp, _InitType, __internal::__optional_kernel_name<_KernelName...>> { template @@ -455,6 +461,11 @@ struct __parallel_reduce_then_scan_scan_submitter< auto __group_start_idx = (__block_num * __max_block_size) + (__g * __inputs_per_sub_group * __num_sub_groups_local); + if constexpr (__is_unique_pattern_v) + { + // for unique patterns, the first element is always copied to the output, so we need to skip it + __group_start_idx += 1; + } std::size_t __elements_in_group = std::min(__n - __group_start_idx, std::size_t(__num_sub_groups_local * __inputs_per_sub_group)); @@ -600,6 +611,15 @@ struct __parallel_reduce_then_scan_scan_submitter< } else // zeroth block, group and subgroup { + if constexpr (__is_unique_pattern_v) + { + if (__sub_group_local_id == 0) + { + // For unique patterns, always copy the 0th element to the output + __write_op.__assign(__in_rng[0], __out_rng[0]); + } + } + if constexpr (std::is_same_v<_InitType, oneapi::dpl::unseq_backend::__no_init_value<_InitValueType>>) { @@ -662,7 +682,14 @@ struct __parallel_reduce_then_scan_scan_submitter< { if (__block_num + 1 == __num_blocks) { - __res_ptr[0] = __sub_group_carry.__v; + if constexpr (__is_unique_pattern_v) + { + __res_ptr[0] = __sub_group_carry.__v + 1; + } + else + { + __res_ptr[0] = __sub_group_carry.__v; + } } else { @@ -712,13 +739,13 @@ __prefer_reduce_then_scan(const _ExecutionPolicy& __exec) // and performs the final write to output operation template + typename _Inclusive, typename _IsUniquePattern> auto __parallel_transform_reduce_then_scan(oneapi::dpl::__internal::__device_backend_tag, _ExecutionPolicy&& __exec, _InRng&& __in_rng, _OutRng&& __out_rng, _GenReduceInput __gen_reduce_input, _ReduceOp __reduce_op, _GenScanInput __gen_scan_input, _ScanInputTransform __scan_input_transform, _WriteOp __write_op, _InitType __init, - _Inclusive) + _Inclusive, _IsUniquePattern) { using _CustomName = oneapi::dpl::__internal::__policy_kernel_name<_ExecutionPolicy>; using _ReduceKernel = oneapi::dpl::__par_backend_hetero::__internal::__kernel_name_provider< @@ -731,6 +758,7 @@ __parallel_transform_reduce_then_scan(oneapi::dpl::__internal::__device_backend_ // Empirically determined maximum. May be less for non-full blocks. constexpr std::uint8_t __max_inputs_per_item = 128; constexpr bool __inclusive = _Inclusive::value; + constexpr bool __is_unique_pattern_v = _IsUniquePattern::value; // TODO: This min call is temporary until PR #1683 is merged. const std::size_t __work_group_size = @@ -745,8 +773,12 @@ __parallel_transform_reduce_then_scan(oneapi::dpl::__internal::__device_backend_ const std::size_t __n = __in_rng.size(); const std::size_t __max_inputs_per_block = __work_group_size * __max_inputs_per_item * __num_work_groups; std::size_t __num_remaining = __n; - - // reduce_then_scan kernel is not built to handle "empty". + if constexpr (__is_unique_pattern_v) + { + // skip scan of zeroth element in unique patterns + __num_remaining -= 1; + } + // reduce_then_scan kernel is not built to handle "empty" scans which includes `__n == 1` for unique patterns. // These trivial end cases should be handled at a higher level. assert(__num_remaining > 0); auto __inputs_per_sub_group = @@ -767,11 +799,12 @@ __parallel_transform_reduce_then_scan(oneapi::dpl::__internal::__device_backend_ // Reduce and scan step implementations using _ReduceSubmitter = __parallel_reduce_then_scan_reduce_submitter<__sub_group_size, __max_inputs_per_item, __inclusive, - _GenReduceInput, _ReduceOp, _InitType, _ReduceKernel>; + __is_unique_pattern_v, _GenReduceInput, _ReduceOp, _InitType, + _ReduceKernel>; using _ScanSubmitter = __parallel_reduce_then_scan_scan_submitter<__sub_group_size, __max_inputs_per_item, __inclusive, - _GenReduceInput, _ReduceOp, _GenScanInput, _ScanInputTransform, - _WriteOp, _InitType, _ScanKernel>; + __is_unique_pattern_v, _GenReduceInput, _ReduceOp, _GenScanInput, + _ScanInputTransform, _WriteOp, _InitType, _ScanKernel>; _ReduceSubmitter __reduce_submitter{__max_inputs_per_block, __num_sub_groups_local, __num_sub_groups_global, diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/sycl_traits.h b/include/oneapi/dpl/pstl/hetero/dpcpp/sycl_traits.h index e4a61210193..9a935152446 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/sycl_traits.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/sycl_traits.h @@ -239,6 +239,9 @@ struct __gen_transform_input; template struct __gen_mask; +template +struct __gen_unique_mask; + template struct __gen_count_mask; @@ -269,6 +272,12 @@ struct sycl::is_device_copyable<_ONEDPL_SPECIALIZE_FOR(oneapi::dpl::__par_backen { }; +template +struct sycl::is_device_copyable<_ONEDPL_SPECIALIZE_FOR(oneapi::dpl::__par_backend_hetero::__gen_unique_mask, + _BinaryPredicate)> + : oneapi::dpl::__internal::__are_all_device_copyable<_BinaryPredicate> +{ +}; template struct sycl::is_device_copyable<_ONEDPL_SPECIALIZE_FOR(oneapi::dpl::__par_backend_hetero::__gen_count_mask, _GenMask)> : oneapi::dpl::__internal::__are_all_device_copyable<_GenMask> diff --git a/test/general/implementation_details/device_copyable.pass.cpp b/test/general/implementation_details/device_copyable.pass.cpp index 5f8b9c1a459..25f5fc2e608 100644 --- a/test/general/implementation_details/device_copyable.pass.cpp +++ b/test/general/implementation_details/device_copyable.pass.cpp @@ -154,6 +154,11 @@ test_device_copyable() static_assert(sycl::is_device_copyable_v>, "__gen_mask is not device copyable with device copyable types"); + //__gen_unique_mask + static_assert( + sycl::is_device_copyable_v>, + "__gen_unique_mask is not device copyable with device copyable types"); + //__gen_count_mask static_assert(sycl::is_device_copyable_v>>, @@ -381,6 +386,11 @@ test_non_device_copyable() static_assert(!sycl::is_device_copyable_v>, "__gen_mask is device copyable with non device copyable types"); + //__gen_unique_mask + static_assert(!sycl::is_device_copyable_v< + oneapi::dpl::__par_backend_hetero::__gen_unique_mask>, + "__gen_unique_mask is device copyable with non device copyable types"); + //__gen_count_mask static_assert(!sycl::is_device_copyable_v>>, diff --git a/test/support/utils_device_copyable.h b/test/support/utils_device_copyable.h index ea5d7a63240..32e02991933 100644 --- a/test/support/utils_device_copyable.h +++ b/test/support/utils_device_copyable.h @@ -73,6 +73,32 @@ struct assign_device_copyable } }; +// Device copyable binary operator binary operators. +// Intentionally non-trivially copyable to test that device_copyable speciailzation works and we are not +// relying on trivial copyability +struct binary_op_non_device_copyable +{ + binary_op_non_device_copyable(const binary_op_non_device_copyable& other) + { + std::cout << " non trivial copy ctor\n"; + } + int + operator()(int a, int b) const + { + return a; + } +}; + +struct binary_op_device_copyable +{ + binary_op_device_copyable(const binary_op_device_copyable& other) { std::cout << " non trivial copy ctor\n"; } + int + operator()(int a, int b) const + { + return a; + } +}; + // Device copyable int wrapper struct used in testing as surrogate for values, value types, etc. // Intentionally non-trivially copyable to test that device_copyable speciailzation works and we are not // relying on trivial copyability @@ -190,6 +216,11 @@ struct sycl::is_device_copyable : std::true_t { }; +template <> +struct sycl::is_device_copyable : std::true_type +{ +}; + template <> struct sycl::is_device_copyable : std::true_type {