Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Relocate __lazy_ctor_storage to utils header #1769

Merged
merged 5 commits into from
Aug 8, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -47,14 +47,6 @@ class __reduce_mid_work_group_kernel;
template <typename... _Name>
class __reduce_kernel;

// Storage helper since _Tp may not have a default constructor.
julianmi marked this conversation as resolved.
Show resolved Hide resolved
template <typename _Tp>
union __lazy_ctor_storage
{
_Tp __v;
__lazy_ctor_storage() {}
};

// Adjust number of sequential operations per work-item based on the vector size. Single elements are kept to
// improve performance of small arrays or remainder loops.
template <std::uint8_t _VecSize, typename _Size>
Expand All @@ -76,7 +68,7 @@ __work_group_reduce_kernel(const _NDItemId __item_id, const _Size __n, const _Si
{
auto __local_idx = __item_id.get_local_id(0);
const _Size __group_size = __item_id.get_local_range().size();
__lazy_ctor_storage<_Tp> __result;
oneapi::dpl::__internal::__lazy_ctor_storage<_Tp> __result;
// 1. Initialization (transform part). Fill local memory
__transform_pattern(__item_id, __n, __iters_per_work_item, /*global_offset*/ (_Size)0, __is_full,
/*__n_groups*/ (_Size)1, __result, __acc...);
Expand All @@ -89,7 +81,7 @@ __work_group_reduce_kernel(const _NDItemId __item_id, const _Size __n, const _Si
__reduce_pattern.apply_init(__init, __result.__v);
__res_ptr[0] = __result.__v;
}
__result.__v.~_Tp();
__result.__destroy();
}

// Device kernel that transforms and reduces __n elements to the number of work groups preliminary results.
Expand All @@ -104,7 +96,7 @@ __device_reduce_kernel(const _NDItemId __item_id, const _Size __n, const _Size _
auto __local_idx = __item_id.get_local_id(0);
auto __group_idx = __item_id.get_group(0);
const _Size __group_size = __item_id.get_local_range().size();
__lazy_ctor_storage<_Tp> __result;
oneapi::dpl::__internal::__lazy_ctor_storage<_Tp> __result;
// 1. Initialization (transform part). Fill local memory
__transform_pattern(__item_id, __n, __iters_per_work_item, /*global_offset*/ (_Size)0, __is_full, __n_groups,
__result, __acc...);
Expand All @@ -114,7 +106,7 @@ __device_reduce_kernel(const _NDItemId __item_id, const _Size __n, const _Size _
__result.__v = __reduce_pattern(__item_id, __n_items, __result.__v, __local_mem);
if (__local_idx == 0)
__temp_acc[__group_idx] = __result.__v;
__result.__v.~_Tp();
__result.__destroy();
}

//------------------------------------------------------------------------
Expand Down Expand Up @@ -394,7 +386,7 @@ struct __parallel_transform_reduce_impl
// 1. Initialization (transform part). Fill local memory
_Size __n_items;
const bool __is_full = __n == __size_per_work_group * __n_groups;
__lazy_ctor_storage<_Tp> __result;
oneapi::dpl::__internal::__lazy_ctor_storage<_Tp> __result;
if (__is_first)
{
__transform_pattern1(__item_id, __n, __iters_per_work_item, /*global_offset*/ (_Size)0,
Expand All @@ -420,7 +412,7 @@ struct __parallel_transform_reduce_impl

__temp_ptr[__offset_1 + __group_idx] = __result.__v;
}
__result.__v.~_Tp();
__result.__destroy();
});
});
__is_first = false;
Expand Down
6 changes: 3 additions & 3 deletions include/oneapi/dpl/pstl/hetero/dpcpp/unseq_backend_sycl.h
Original file line number Diff line number Diff line change
Expand Up @@ -213,7 +213,7 @@ struct transform_reduce
void
vectorized_reduction_first(const _Size __start_idx, _Res& __res, const _Acc&... __acc) const
{
new (&__res.__v) _Tp(__unary_op(__start_idx, __acc...));
__res.__setup(__unary_op(__start_idx, __acc...));
_ONEDPL_PRAGMA_UNROLL
for (_Size __i = 1; __i < _VecSize; ++__i)
__res.__v = __binary_op(__res.__v, __unary_op(__start_idx + __i, __acc...));
Expand Down Expand Up @@ -251,7 +251,7 @@ struct transform_reduce
return;
if (__iters_per_work_item == 1)
{
new (&__res.__v) _Tp(__unary_op(__global_idx, __acc...));
__res.__setup(__unary_op(__global_idx, __acc...));
return;
}
const _Size __local_range = __item_id.get_local_range(0);
Expand Down Expand Up @@ -318,7 +318,7 @@ struct transform_reduce
// Scalar remainder
else if (__adjusted_global_id < __adjusted_n)
{
new (&__res.__v) _Tp(__unary_op(__adjusted_global_id, __acc...));
__res.__setup(__unary_op(__adjusted_global_id, __acc...));
const _Size __adjusted_global_id_plus_one = __adjusted_global_id + 1;
scalar_reduction_remainder(__adjusted_global_id_plus_one, __adjusted_n, __res, __acc...);
}
Expand Down
21 changes: 21 additions & 0 deletions include/oneapi/dpl/pstl/utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -765,6 +765,27 @@ struct __is_iterator_type<_T, std::void_t<typename std::iterator_traits<_T>::dif
template <typename _T>
static constexpr bool __is_iterator_type_v = __is_iterator_type<_T>::value;

// Storage helper since _Tp may not have a default constructor.
template <typename _Tp>
union __lazy_ctor_storage
{
using __value_type = _Tp;
_Tp __v;
__lazy_ctor_storage() {}

template <typename _U>
void
__setup(_U&& init)
{
new (&__v) _Tp(std::forward<_U>(init));
}
void
__destroy()
{
__v.~_Tp();
}
};

} // namespace __internal
} // namespace dpl
} // namespace oneapi
Expand Down
Loading