Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Specify access mode in __result_and_scratch_storage methods #1909

Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
Show all changes
37 commits
Select commit Hold shift + click to select a range
cc03d10
include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_utils.h - …
SergeyKopienko Oct 17, 2024
153419e
include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_utils.h - …
SergeyKopienko Oct 17, 2024
badca77
include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_utils.h - …
SergeyKopienko Oct 17, 2024
8be1ad9
__result_and_scratch.__get_result_acc - write correct access mode
SergeyKopienko Oct 17, 2024
b4876d1
__result_and_scratch.__get_scratch_acc - write correct access mode
SergeyKopienko Oct 17, 2024
33fc658
include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h - fix ac…
SergeyKopienko Oct 21, 2024
edb40f4
include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h - fix ac…
SergeyKopienko Oct 21, 2024
501fe83
include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h - fix ac…
SergeyKopienko Oct 21, 2024
c349a9e
include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_reduce.h -…
SergeyKopienko Oct 21, 2024
118a5e0
include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_reduce.h -…
SergeyKopienko Oct 21, 2024
6acba1e
include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_reduce.h -…
SergeyKopienko Oct 21, 2024
e9cf231
include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_reduce.h -…
SergeyKopienko Oct 21, 2024
5a97c2d
include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_utils.h - …
SergeyKopienko Oct 21, 2024
e5795b4
include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_reduce_the…
SergeyKopienko Oct 21, 2024
80ac2c8
include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_reduce_the…
SergeyKopienko Oct 21, 2024
fd4514f
include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_utils.h - …
SergeyKopienko Oct 21, 2024
3f69087
include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_utils.h - …
SergeyKopienko Oct 21, 2024
bdb6c04
include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_utils.h - …
SergeyKopienko Oct 21, 2024
b5f2331
Merge branch 'main' into dev/skopienko/introduce_access_mode_into_usm…
SergeyKopienko Oct 21, 2024
1caeeae
include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_reduce.h -…
SergeyKopienko Oct 21, 2024
a75d903
Apply GitHUB clang format
SergeyKopienko Oct 21, 2024
a4835bd
include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_utils.h - …
SergeyKopienko Oct 21, 2024
d7b89cd
Fix compile errors in the __result_and_scratch_storage::__get_result_…
SergeyKopienko Oct 21, 2024
d7af18b
Fix compile errors in the __result_and_scratch_storage::__get_scratch…
SergeyKopienko Oct 21, 2024
1397d94
include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_reduce_the…
SergeyKopienko Oct 21, 2024
e11abd9
Fix review comment: the function __get_access_mode_tag doesn't requir…
SergeyKopienko Oct 21, 2024
0816553
Apply GitHUB clang format
SergeyKopienko Oct 21, 2024
8fc12c6
include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_utils.h - …
SergeyKopienko Oct 21, 2024
ea82f3e
include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h - specif…
SergeyKopienko Oct 21, 2024
9be1b53
include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_reduce.h -…
SergeyKopienko Oct 21, 2024
58ec240
include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_reduce_the…
SergeyKopienko Oct 21, 2024
ac35744
Fix __dpl_sycl::__no_init{} usage for sycl::access_mode::read_write mode
SergeyKopienko Oct 21, 2024
accfdc3
include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_reduce_the…
SergeyKopienko Oct 21, 2024
e879129
include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_utils.h - …
SergeyKopienko Oct 21, 2024
f52edd5
include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_utils.h - …
SergeyKopienko Oct 22, 2024
b634f44
Apply GitHUB clang format
SergeyKopienko Oct 22, 2024
1ec055d
include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_utils.h - …
SergeyKopienko Oct 22, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 10 additions & 2 deletions include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h
Original file line number Diff line number Diff line change
Expand Up @@ -319,6 +319,8 @@ struct __parallel_scan_submitter<_CustomName, __internal::__optional_kernel_name
// 1. Local scan on each workgroup
auto __submit_event = __exec.queue().submit([&](sycl::handler& __cgh) {
oneapi::dpl::__ranges::__require_access(__cgh, __rng1, __rng2); //get an access to data under SYCL buffer
// TODO what is correct access mode here for __get_scratch_acc call?
danhoeflinger marked this conversation as resolved.
Show resolved Hide resolved
// Is default sycl::access_mode::read_write is ok?
auto __temp_acc = __result_and_scratch.__get_scratch_acc(__cgh);
__dpl_sycl::__local_accessor<_Type> __local_acc(__wgroup_size, __cgh);
#if _ONEDPL_COMPILE_KERNEL && _ONEDPL_KERNEL_BUNDLE_PRESENT
Expand All @@ -340,6 +342,8 @@ struct __parallel_scan_submitter<_CustomName, __internal::__optional_kernel_name
auto __iters_per_single_wg = oneapi::dpl::__internal::__dpl_ceiling_div(__n_groups, __wgroup_size);
__submit_event = __exec.queue().submit([&](sycl::handler& __cgh) {
__cgh.depends_on(__submit_event);
// TODO what is correct access mode here for __get_scratch_acc call?
// Is default sycl::access_mode::read_write is ok?
danhoeflinger marked this conversation as resolved.
Show resolved Hide resolved
auto __temp_acc = __result_and_scratch.__get_scratch_acc(__cgh);
__dpl_sycl::__local_accessor<_Type> __local_acc(__wgroup_size, __cgh);
#if _ONEDPL_COMPILE_KERNEL && _ONEDPL_KERNEL_BUNDLE_PRESENT
Expand All @@ -362,7 +366,11 @@ struct __parallel_scan_submitter<_CustomName, __internal::__optional_kernel_name
auto __final_event = __exec.queue().submit([&](sycl::handler& __cgh) {
__cgh.depends_on(__submit_event);
oneapi::dpl::__ranges::__require_access(__cgh, __rng1, __rng2); //get an access to data under SYCL buffer
// TODO what is correct access mode here for __get_scratch_acc call?
// Is default sycl::access_mode::read_write is ok?
danhoeflinger marked this conversation as resolved.
Show resolved Hide resolved
auto __temp_acc = __result_and_scratch.__get_scratch_acc(__cgh);
// TODO what is right access mode here?
// sycl::access_mode::read_write is used by default.
danhoeflinger marked this conversation as resolved.
Show resolved Hide resolved
auto __res_acc = __result_and_scratch.__get_result_acc(__cgh);
__cgh.parallel_for<_PropagateScanName...>(sycl::range<1>(__n_groups * __size_per_wg), [=](auto __item) {
auto __temp_ptr = __result_and_scratch_storage_t::__get_usm_or_buffer_accessor_ptr(__temp_acc);
Expand Down Expand Up @@ -579,7 +587,7 @@ struct __parallel_copy_if_static_single_group_submitter<_Size, _ElemsPerItem, _W
// predicate on each element of the input range. The second half stores the index of the output
// range to copy elements of the input range.
auto __lacc = __dpl_sycl::__local_accessor<_ValueType>(sycl::range<1>{__elems_per_wg * 2}, __hdl);
auto __res_acc = __result.__get_result_acc(__hdl);
auto __res_acc = __result.__get_result_acc<sycl::access_mode::write>(__hdl);

__hdl.parallel_for<_ScanKernelName...>(
sycl::nd_range<1>(_WGSize, _WGSize), [=](sycl::nd_item<1> __self_item) {
Expand Down Expand Up @@ -1466,7 +1474,7 @@ __parallel_find_or_impl_one_wg(oneapi::dpl::__internal::__device_backend_tag, _E
// main parallel_for
auto __event = __exec.queue().submit([&](sycl::handler& __cgh) {
oneapi::dpl::__ranges::__require_access(__cgh, __rngs...);
auto __result_acc = __result_storage.__get_result_acc(__cgh);
auto __result_acc = __result_storage.__get_result_acc<sycl::access_mode::write>(__cgh);

__cgh.parallel_for<KernelName>(
sycl::nd_range</*dim=*/1>(sycl::range</*dim=*/1>(__wgroup_size), sycl::range</*dim=*/1>(__wgroup_size)),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@ struct __parallel_transform_reduce_small_submitter<_Tp, _Commutative, _VecSize,

sycl::event __reduce_event = __exec.queue().submit([&, __n](sycl::handler& __cgh) {
oneapi::dpl::__ranges::__require_access(__cgh, __rngs...); // get an access to data under SYCL buffer
auto __res_acc = __scratch_container.__get_result_acc(__cgh);
auto __res_acc = __scratch_container.__get_result_acc<sycl::access_mode::write>(__cgh);
std::size_t __local_mem_size = __reduce_pattern.local_mem_req(__work_group_size);
__dpl_sycl::__local_accessor<_Tp> __temp_local(sycl::range<1>(__local_mem_size), __cgh);
__cgh.parallel_for<_Name...>(
Expand Down Expand Up @@ -208,7 +208,7 @@ struct __parallel_transform_reduce_device_kernel_submitter<_Tp, _Commutative, _V
oneapi::dpl::__ranges::__require_access(__cgh, __rngs...); // get an access to data under SYCL buffer
std::size_t __local_mem_size = __reduce_pattern.local_mem_req(__work_group_size);
__dpl_sycl::__local_accessor<_Tp> __temp_local(sycl::range<1>(__local_mem_size), __cgh);
auto __temp_acc = __scratch_container.__get_scratch_acc(__cgh);
auto __temp_acc = __scratch_container.__get_scratch_acc<sycl::access_mode::write>(__cgh);
__cgh.parallel_for<_KernelName...>(
sycl::nd_range<1>(sycl::range<1>(__n_groups * __work_group_size), sycl::range<1>(__work_group_size)),
[=](sycl::nd_item<1> __item_id) {
Expand Down Expand Up @@ -253,8 +253,10 @@ struct __parallel_transform_reduce_work_group_kernel_submitter<_Tp, _Commutative
__reduce_event = __exec.queue().submit([&, __n](sycl::handler& __cgh) {
__cgh.depends_on(__reduce_event);

// TODO what is correct access mode here for __get_scratch_acc call?
// Is default sycl::access_mode::read_write is ok?
danhoeflinger marked this conversation as resolved.
Show resolved Hide resolved
auto __temp_acc = __scratch_container.__get_scratch_acc(__cgh);
auto __res_acc = __scratch_container.__get_result_acc(__cgh);
auto __res_acc = __scratch_container.__get_result_acc<sycl::access_mode::write>(__cgh);
__dpl_sycl::__local_accessor<_Tp> __temp_local(sycl::range<1>(__work_group_size), __cgh);

__cgh.parallel_for<_KernelName...>(
Expand Down Expand Up @@ -358,7 +360,11 @@ struct __parallel_transform_reduce_impl
__reduce_event = __exec.queue().submit([&, __is_first, __offset_1, __offset_2, __n,
__n_groups](sycl::handler& __cgh) {
__cgh.depends_on(__reduce_event);
// TODO what is correct access mode here for __get_scratch_acc call?
// Is default sycl::access_mode::read_write is ok?
SergeyKopienko marked this conversation as resolved.
Show resolved Hide resolved
auto __temp_acc = __scratch_container.__get_scratch_acc(__cgh);
// TODO what is correct access mode here for __get_result_acc call?
// Is default sycl::access_mode::read_write is ok?
danhoeflinger marked this conversation as resolved.
Show resolved Hide resolved
auto __res_acc = __scratch_container.__get_result_acc(__cgh);

// get an access to data under SYCL buffer
Expand All @@ -377,7 +383,8 @@ struct __parallel_transform_reduce_impl
[=](sycl::nd_item<1> __item_id) {
auto __temp_ptr = __result_and_scratch_storage_t::__get_usm_or_buffer_accessor_ptr(__temp_acc);
auto __res_ptr =
__result_and_scratch_storage_t::__get_usm_or_buffer_accessor_ptr(__res_acc, 2 * __n_groups);
__result_and_scratch_storage_t::__get_usm_or_buffer_accessor_ptr<sycl::access_mode::write>(
__res_acc, 2 * __n_groups);
danhoeflinger marked this conversation as resolved.
Show resolved Hide resolved
auto __local_idx = __item_id.get_local_id(0);
auto __group_idx = __item_id.get_group(0);
// 1. Initialization (transform part). Fill local memory
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -298,10 +298,11 @@ struct __parallel_reduce_then_scan_reduce_submitter<__sub_group_size, __max_inpu
__dpl_sycl::__local_accessor<_InitValueType> __sub_group_partials(__num_sub_groups_local, __cgh);
__cgh.depends_on(__prior_event);
oneapi::dpl::__ranges::__require_access(__cgh, __in_rng);
auto __temp_acc = __scratch_container.__get_scratch_acc(__cgh);
auto __temp_acc = __scratch_container.__get_scratch_acc<sycl::access_mode::write>(__cgh);
__cgh.parallel_for<_KernelName...>(
__nd_range, [=, *this](sycl::nd_item<1> __ndi) [[sycl::reqd_sub_group_size(__sub_group_size)]] {
_InitValueType* __temp_ptr = _TmpStorageAcc::__get_usm_or_buffer_accessor_ptr(__temp_acc);
_InitValueType* __temp_ptr =
_TmpStorageAcc::__get_usm_or_buffer_accessor_ptr<sycl::access_mode::write>(__temp_acc);
danhoeflinger marked this conversation as resolved.
Show resolved Hide resolved
std::size_t __group_id = __ndi.get_group(0);
__dpl_sycl::__sub_group __sub_group = __ndi.get_sub_group();
std::uint32_t __sub_group_id = __sub_group.get_group_linear_id();
Expand Down Expand Up @@ -453,8 +454,8 @@ struct __parallel_reduce_then_scan_scan_submitter<
__dpl_sycl::__local_accessor<_InitValueType> __sub_group_partials(__num_sub_groups_local + 1, __cgh);
__cgh.depends_on(__prior_event);
oneapi::dpl::__ranges::__require_access(__cgh, __in_rng, __out_rng);
auto __temp_acc = __scratch_container.__get_scratch_acc(__cgh);
auto __res_acc = __scratch_container.__get_result_acc(__cgh);
auto __temp_acc = __scratch_container.__get_scratch_acc<sycl::access_mode::read_write>(__cgh);
danhoeflinger marked this conversation as resolved.
Show resolved Hide resolved
auto __res_acc = __scratch_container.__get_result_acc<sycl::access_mode::write>(__cgh);

__cgh.parallel_for<_KernelName...>(
__nd_range, [=, *this] (sycl::nd_item<1> __ndi) [[sycl::reqd_sub_group_size(__sub_group_size)]] {
Expand Down
39 changes: 29 additions & 10 deletions include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -478,12 +478,11 @@ using __repacked_tuple_t = typename __repacked_tuple<T>::type;
template <typename _ContainerOrIterable>
using __value_t = typename __internal::__memobj_traits<_ContainerOrIterable>::value_type;

template <typename _T>
template <typename _T, sycl::access_mode _AccessMode = sycl::access_mode::read_write>
danhoeflinger marked this conversation as resolved.
Show resolved Hide resolved
struct __usm_or_buffer_accessor
{
private:
using __accessor_t = sycl::accessor<_T, 1, sycl::access::mode::read_write, __dpl_sycl::__target_device,
sycl::access::placeholder::false_t>;
using __accessor_t = sycl::accessor<_T, 1, _AccessMode, __dpl_sycl::__target_device, sycl::access::placeholder::false_t>;
__accessor_t __acc;
_T* __ptr = nullptr;
bool __usm = false;
Expand All @@ -492,11 +491,12 @@ struct __usm_or_buffer_accessor
public:
// Buffer accessor
__usm_or_buffer_accessor(sycl::handler& __cgh, sycl::buffer<_T, 1>* __sycl_buf)
: __acc(sycl::accessor(*__sycl_buf, __cgh, sycl::read_write, __dpl_sycl::__no_init{}))
: __acc(sycl::accessor(*__sycl_buf, __cgh, __get_access_mode_tag(), __dpl_sycl::__no_init{}))
{
}
__usm_or_buffer_accessor(sycl::handler& __cgh, sycl::buffer<_T, 1>* __sycl_buf, size_t __acc_offset)
: __acc(sycl::accessor(*__sycl_buf, __cgh, sycl::read_write, __dpl_sycl::__no_init{})), __offset(__acc_offset)
: __acc(sycl::accessor(*__sycl_buf, __cgh, __get_access_mode_tag(), __dpl_sycl::__no_init{})),
__offset(__acc_offset)
{
}

Expand All @@ -512,6 +512,23 @@ struct __usm_or_buffer_accessor
{
return __usm ? __ptr + __offset : &__acc[__offset];
}

private:

static auto __get_access_mode_tag()
{
if constexpr (_AccessMode == sycl::access::mode::read)
return sycl::read;

else if constexpr (_AccessMode == sycl::access::mode::write)
return sycl::write;

else if constexpr (_AccessMode == sycl::access::mode::read_write)
return sycl::read_write;

else
static_assert(false, "Unknown _AccessMode state");
}
danhoeflinger marked this conversation as resolved.
Show resolved Hide resolved
};

template <typename _ExecutionPolicy, typename _T>
Expand Down Expand Up @@ -611,27 +628,29 @@ struct __result_and_scratch_storage
#endif
}

template <sycl::access_mode _AccessMode = sycl::access_mode::read_write>
auto
__get_result_acc(sycl::handler& __cgh) const
{
#if _ONEDPL_SYCL_UNIFIED_USM_BUFFER_PRESENT
if (__use_USM_host && __supports_USM_device)
return __usm_or_buffer_accessor<_T>(__cgh, __result_buf.get());
return __usm_or_buffer_accessor<_T, _AccessMode>(__cgh, __result_buf.get());
else if (__supports_USM_device)
return __usm_or_buffer_accessor<_T>(__cgh, __scratch_buf.get(), __scratch_n);
return __usm_or_buffer_accessor<_T>(__cgh, __sycl_buf.get(), __scratch_n);
return __usm_or_buffer_accessor<_T, _AccessMode>(__cgh, __scratch_buf.get(), __scratch_n);
return __usm_or_buffer_accessor<_T, _AccessMode>(__cgh, __sycl_buf.get(), __scratch_n);
#else
return sycl::accessor(*__sycl_buf.get(), __cgh, sycl::read_write, __dpl_sycl::__no_init{});
#endif
}

template <sycl::access_mode _AccessMode = sycl::access_mode::read_write>
auto
__get_scratch_acc(sycl::handler& __cgh) const
{
#if _ONEDPL_SYCL_UNIFIED_USM_BUFFER_PRESENT
if (__use_USM_host || __supports_USM_device)
return __usm_or_buffer_accessor<_T>(__cgh, __scratch_buf.get());
return __usm_or_buffer_accessor<_T>(__cgh, __sycl_buf.get());
return __usm_or_buffer_accessor<_T, _AccessMode>(__cgh, __scratch_buf.get());
return __usm_or_buffer_accessor<_T, _AccessMode>(__cgh, __sycl_buf.get());
#else
return sycl::accessor(*__sycl_buf.get(), __cgh, sycl::read_write, __dpl_sycl::__no_init{});
#endif
Expand Down
Loading