Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Specify access mode in __result_and_scratch_storage methods #1909

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
37 commits
Select commit Hold shift + click to select a range
cc03d10
include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_utils.h - …
SergeyKopienko Oct 17, 2024
153419e
include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_utils.h - …
SergeyKopienko Oct 17, 2024
badca77
include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_utils.h - …
SergeyKopienko Oct 17, 2024
8be1ad9
__result_and_scratch.__get_result_acc - write correct access mode
SergeyKopienko Oct 17, 2024
b4876d1
__result_and_scratch.__get_scratch_acc - write correct access mode
SergeyKopienko Oct 17, 2024
33fc658
include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h - fix ac…
SergeyKopienko Oct 21, 2024
edb40f4
include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h - fix ac…
SergeyKopienko Oct 21, 2024
501fe83
include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h - fix ac…
SergeyKopienko Oct 21, 2024
c349a9e
include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_reduce.h -…
SergeyKopienko Oct 21, 2024
118a5e0
include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_reduce.h -…
SergeyKopienko Oct 21, 2024
6acba1e
include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_reduce.h -…
SergeyKopienko Oct 21, 2024
e9cf231
include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_reduce.h -…
SergeyKopienko Oct 21, 2024
5a97c2d
include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_utils.h - …
SergeyKopienko Oct 21, 2024
e5795b4
include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_reduce_the…
SergeyKopienko Oct 21, 2024
80ac2c8
include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_reduce_the…
SergeyKopienko Oct 21, 2024
fd4514f
include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_utils.h - …
SergeyKopienko Oct 21, 2024
3f69087
include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_utils.h - …
SergeyKopienko Oct 21, 2024
bdb6c04
include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_utils.h - …
SergeyKopienko Oct 21, 2024
b5f2331
Merge branch 'main' into dev/skopienko/introduce_access_mode_into_usm…
SergeyKopienko Oct 21, 2024
1caeeae
include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_reduce.h -…
SergeyKopienko Oct 21, 2024
a75d903
Apply GitHUB clang format
SergeyKopienko Oct 21, 2024
a4835bd
include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_utils.h - …
SergeyKopienko Oct 21, 2024
d7b89cd
Fix compile errors in the __result_and_scratch_storage::__get_result_…
SergeyKopienko Oct 21, 2024
d7af18b
Fix compile errors in the __result_and_scratch_storage::__get_scratch…
SergeyKopienko Oct 21, 2024
1397d94
include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_reduce_the…
SergeyKopienko Oct 21, 2024
e11abd9
Fix review comment: the function __get_access_mode_tag doesn't requir…
SergeyKopienko Oct 21, 2024
0816553
Apply GitHUB clang format
SergeyKopienko Oct 21, 2024
8fc12c6
include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_utils.h - …
SergeyKopienko Oct 21, 2024
ea82f3e
include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h - specif…
SergeyKopienko Oct 21, 2024
9be1b53
include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_reduce.h -…
SergeyKopienko Oct 21, 2024
58ec240
include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_reduce_the…
SergeyKopienko Oct 21, 2024
ac35744
Fix __dpl_sycl::__no_init{} usage for sycl::access_mode::read_write mode
SergeyKopienko Oct 21, 2024
accfdc3
include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_reduce_the…
SergeyKopienko Oct 21, 2024
e879129
include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_utils.h - …
SergeyKopienko Oct 21, 2024
f52edd5
include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_utils.h - …
SergeyKopienko Oct 22, 2024
b634f44
Apply GitHUB clang format
SergeyKopienko Oct 22, 2024
1ec055d
include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_utils.h - …
SergeyKopienko Oct 22, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 10 additions & 6 deletions include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h
Original file line number Diff line number Diff line change
Expand Up @@ -319,7 +319,8 @@ struct __parallel_scan_submitter<_CustomName, __internal::__optional_kernel_name
// 1. Local scan on each workgroup
auto __submit_event = __exec.queue().submit([&](sycl::handler& __cgh) {
oneapi::dpl::__ranges::__require_access(__cgh, __rng1, __rng2); //get an access to data under SYCL buffer
auto __temp_acc = __result_and_scratch.__get_scratch_acc(__cgh);
auto __temp_acc = __result_and_scratch.template __get_scratch_acc<sycl::access_mode::write>(
__cgh, __dpl_sycl::__no_init{});
__dpl_sycl::__local_accessor<_Type> __local_acc(__wgroup_size, __cgh);
#if _ONEDPL_COMPILE_KERNEL && _ONEDPL_KERNEL_BUNDLE_PRESENT
__cgh.use_kernel_bundle(__kernel_1.get_kernel_bundle());
Expand All @@ -340,7 +341,7 @@ struct __parallel_scan_submitter<_CustomName, __internal::__optional_kernel_name
auto __iters_per_single_wg = oneapi::dpl::__internal::__dpl_ceiling_div(__n_groups, __wgroup_size);
__submit_event = __exec.queue().submit([&](sycl::handler& __cgh) {
__cgh.depends_on(__submit_event);
auto __temp_acc = __result_and_scratch.__get_scratch_acc(__cgh);
auto __temp_acc = __result_and_scratch.template __get_scratch_acc<sycl::access_mode::read_write>(__cgh);
__dpl_sycl::__local_accessor<_Type> __local_acc(__wgroup_size, __cgh);
#if _ONEDPL_COMPILE_KERNEL && _ONEDPL_KERNEL_BUNDLE_PRESENT
__cgh.use_kernel_bundle(__kernel_2.get_kernel_bundle());
Expand All @@ -362,8 +363,9 @@ struct __parallel_scan_submitter<_CustomName, __internal::__optional_kernel_name
auto __final_event = __exec.queue().submit([&](sycl::handler& __cgh) {
__cgh.depends_on(__submit_event);
oneapi::dpl::__ranges::__require_access(__cgh, __rng1, __rng2); //get an access to data under SYCL buffer
auto __temp_acc = __result_and_scratch.__get_scratch_acc(__cgh);
auto __res_acc = __result_and_scratch.__get_result_acc(__cgh);
auto __temp_acc = __result_and_scratch.template __get_scratch_acc<sycl::access_mode::read>(__cgh);
auto __res_acc = __result_and_scratch.template __get_result_acc<sycl::access_mode::write>(
__cgh, __dpl_sycl::__no_init{});
__cgh.parallel_for<_PropagateScanName...>(sycl::range<1>(__n_groups * __size_per_wg), [=](auto __item) {
auto __temp_ptr = __result_and_scratch_storage_t::__get_usm_or_buffer_accessor_ptr(__temp_acc);
auto __res_ptr =
Expand Down Expand Up @@ -579,7 +581,8 @@ struct __parallel_copy_if_static_single_group_submitter<_Size, _ElemsPerItem, _W
// predicate on each element of the input range. The second half stores the index of the output
// range to copy elements of the input range.
auto __lacc = __dpl_sycl::__local_accessor<_ValueType>(sycl::range<1>{__elems_per_wg * 2}, __hdl);
auto __res_acc = __result.__get_result_acc(__hdl);
auto __res_acc =
__result.template __get_result_acc<sycl::access_mode::write>(__hdl, __dpl_sycl::__no_init{});

__hdl.parallel_for<_ScanKernelName...>(
sycl::nd_range<1>(_WGSize, _WGSize), [=](sycl::nd_item<1> __self_item) {
Expand Down Expand Up @@ -1466,7 +1469,8 @@ __parallel_find_or_impl_one_wg(oneapi::dpl::__internal::__device_backend_tag, _E
// main parallel_for
auto __event = __exec.queue().submit([&](sycl::handler& __cgh) {
oneapi::dpl::__ranges::__require_access(__cgh, __rngs...);
auto __result_acc = __result_storage.__get_result_acc(__cgh);
auto __result_acc =
__result_storage.template __get_result_acc<sycl::access_mode::write>(__cgh, __dpl_sycl::__no_init{});

__cgh.parallel_for<KernelName>(
sycl::nd_range</*dim=*/1>(sycl::range</*dim=*/1>(__wgroup_size), sycl::range</*dim=*/1>(__wgroup_size)),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,8 @@ struct __parallel_transform_reduce_small_submitter<_Tp, _Commutative, _VecSize,

sycl::event __reduce_event = __exec.queue().submit([&, __n](sycl::handler& __cgh) {
oneapi::dpl::__ranges::__require_access(__cgh, __rngs...); // get an access to data under SYCL buffer
auto __res_acc = __scratch_container.__get_result_acc(__cgh);
auto __res_acc =
__scratch_container.template __get_result_acc<sycl::access_mode::write>(__cgh, __dpl_sycl::__no_init{});
std::size_t __local_mem_size = __reduce_pattern.local_mem_req(__work_group_size);
__dpl_sycl::__local_accessor<_Tp> __temp_local(sycl::range<1>(__local_mem_size), __cgh);
__cgh.parallel_for<_Name...>(
Expand Down Expand Up @@ -208,7 +209,8 @@ struct __parallel_transform_reduce_device_kernel_submitter<_Tp, _Commutative, _V
oneapi::dpl::__ranges::__require_access(__cgh, __rngs...); // get an access to data under SYCL buffer
std::size_t __local_mem_size = __reduce_pattern.local_mem_req(__work_group_size);
__dpl_sycl::__local_accessor<_Tp> __temp_local(sycl::range<1>(__local_mem_size), __cgh);
auto __temp_acc = __scratch_container.__get_scratch_acc(__cgh);
auto __temp_acc = __scratch_container.template __get_scratch_acc<sycl::access_mode::write>(
__cgh, __dpl_sycl::__no_init{});
__cgh.parallel_for<_KernelName...>(
sycl::nd_range<1>(sycl::range<1>(__n_groups * __work_group_size), sycl::range<1>(__work_group_size)),
[=](sycl::nd_item<1> __item_id) {
Expand Down Expand Up @@ -253,8 +255,9 @@ struct __parallel_transform_reduce_work_group_kernel_submitter<_Tp, _Commutative
__reduce_event = __exec.queue().submit([&, __n](sycl::handler& __cgh) {
__cgh.depends_on(__reduce_event);

auto __temp_acc = __scratch_container.__get_scratch_acc(__cgh);
auto __res_acc = __scratch_container.__get_result_acc(__cgh);
auto __temp_acc = __scratch_container.template __get_scratch_acc<sycl::access_mode::read>(__cgh);
auto __res_acc =
__scratch_container.template __get_result_acc<sycl::access_mode::write>(__cgh, __dpl_sycl::__no_init{});
__dpl_sycl::__local_accessor<_Tp> __temp_local(sycl::range<1>(__work_group_size), __cgh);

__cgh.parallel_for<_KernelName...>(
Expand Down Expand Up @@ -358,8 +361,10 @@ struct __parallel_transform_reduce_impl
__reduce_event = __exec.queue().submit([&, __is_first, __offset_1, __offset_2, __n,
__n_groups](sycl::handler& __cgh) {
__cgh.depends_on(__reduce_event);
auto __temp_acc = __scratch_container.__get_scratch_acc(__cgh);
auto __res_acc = __scratch_container.__get_result_acc(__cgh);
auto __temp_acc = __scratch_container.template __get_scratch_acc<sycl::access_mode::read_write>(
__cgh, __is_first ? sycl::property_list{__dpl_sycl::__no_init{}} : sycl::property_list{});
auto __res_acc = __scratch_container.template __get_result_acc<sycl::access_mode::write>(
__cgh, __dpl_sycl::__no_init{});

// get an access to data under SYCL buffer
oneapi::dpl::__ranges::__require_access(__cgh, __rngs...);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -298,7 +298,8 @@ struct __parallel_reduce_then_scan_reduce_submitter<__sub_group_size, __max_inpu
__dpl_sycl::__local_accessor<_InitValueType> __sub_group_partials(__num_sub_groups_local, __cgh);
__cgh.depends_on(__prior_event);
oneapi::dpl::__ranges::__require_access(__cgh, __in_rng);
auto __temp_acc = __scratch_container.__get_scratch_acc(__cgh);
auto __temp_acc = __scratch_container.template __get_scratch_acc<sycl::access_mode::write>(
__cgh, __dpl_sycl::__no_init{});
__cgh.parallel_for<_KernelName...>(
__nd_range, [=, *this](sycl::nd_item<1> __ndi) [[sycl::reqd_sub_group_size(__sub_group_size)]] {
_InitValueType* __temp_ptr = _TmpStorageAcc::__get_usm_or_buffer_accessor_ptr(__temp_acc);
Expand Down Expand Up @@ -453,8 +454,9 @@ struct __parallel_reduce_then_scan_scan_submitter<
__dpl_sycl::__local_accessor<_InitValueType> __sub_group_partials(__num_sub_groups_local + 1, __cgh);
__cgh.depends_on(__prior_event);
oneapi::dpl::__ranges::__require_access(__cgh, __in_rng, __out_rng);
auto __temp_acc = __scratch_container.__get_scratch_acc(__cgh);
auto __res_acc = __scratch_container.__get_result_acc(__cgh);
auto __temp_acc = __scratch_container.template __get_scratch_acc<sycl::access_mode::read_write>(__cgh);
auto __res_acc =
__scratch_container.template __get_result_acc<sycl::access_mode::write>(__cgh, __dpl_sycl::__no_init{});

__cgh.parallel_for<_KernelName...>(
__nd_range, [=, *this] (sycl::nd_item<1> __ndi) [[sycl::reqd_sub_group_size(__sub_group_size)]] {
Expand Down
49 changes: 30 additions & 19 deletions include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -478,31 +478,35 @@ using __repacked_tuple_t = typename __repacked_tuple<T>::type;
template <typename _ContainerOrIterable>
using __value_t = typename __internal::__memobj_traits<_ContainerOrIterable>::value_type;

template <typename _T>
template <typename _Accessor>
struct __usm_or_buffer_accessor
{
private:
using __accessor_t = sycl::accessor<_T, 1, sycl::access::mode::read_write, __dpl_sycl::__target_device,
sycl::access::placeholder::false_t>;
__accessor_t __acc;
using _T = std::decay_t<typename _Accessor::value_type>;
_Accessor __acc;
_T* __ptr = nullptr;
bool __usm = false;
size_t __offset = 0;

public:
// Buffer accessor
__usm_or_buffer_accessor(sycl::handler& __cgh, sycl::buffer<_T, 1>* __sycl_buf)
: __acc(sycl::accessor(*__sycl_buf, __cgh, sycl::read_write, __dpl_sycl::__no_init{}))
__usm_or_buffer_accessor(sycl::handler& __cgh, sycl::buffer<_T, 1>* __sycl_buf,
const sycl::property_list& __prop_list)
: __acc(*__sycl_buf, __cgh, __prop_list)
{
}
__usm_or_buffer_accessor(sycl::handler& __cgh, sycl::buffer<_T, 1>* __sycl_buf, size_t __acc_offset)
: __acc(sycl::accessor(*__sycl_buf, __cgh, sycl::read_write, __dpl_sycl::__no_init{})), __offset(__acc_offset)
__usm_or_buffer_accessor(sycl::handler& __cgh, sycl::buffer<_T, 1>* __sycl_buf, size_t __acc_offset,
const sycl::property_list& __prop_list)
: __acc(*__sycl_buf, __cgh, __prop_list), __offset(__acc_offset)
{
}

// USM pointer
__usm_or_buffer_accessor(sycl::handler& __cgh, _T* __usm_buf) : __ptr(__usm_buf), __usm(true) {}
__usm_or_buffer_accessor(sycl::handler& __cgh, _T* __usm_buf, size_t __ptr_offset)
__usm_or_buffer_accessor(sycl::handler& __cgh, _T* __usm_buf, const sycl::property_list&)
: __ptr(__usm_buf), __usm(true)
{
}
__usm_or_buffer_accessor(sycl::handler& __cgh, _T* __usm_buf, size_t __ptr_offset, const sycl::property_list&)
: __ptr(__usm_buf), __usm(true), __offset(__ptr_offset)
{
}
Expand All @@ -520,6 +524,10 @@ struct __result_and_scratch_storage
private:
using __sycl_buffer_t = sycl::buffer<_T, 1>;

template <sycl::access_mode _AccessMode>
using __accessor_t =
sycl::accessor<_T, 1, _AccessMode, __dpl_sycl::__target_device, sycl::access::placeholder::false_t>;

_ExecutionPolicy __exec;
std::shared_ptr<_T> __scratch_buf;
std::shared_ptr<_T> __result_buf;
Expand Down Expand Up @@ -611,29 +619,32 @@ struct __result_and_scratch_storage
#endif
}

template <sycl::access_mode _AccessMode = sycl::access_mode::read_write>
auto
__get_result_acc(sycl::handler& __cgh) const
__get_result_acc(sycl::handler& __cgh, const sycl::property_list& __prop_list = {}) const
{
#if _ONEDPL_SYCL_UNIFIED_USM_BUFFER_PRESENT
if (__use_USM_host && __supports_USM_device)
return __usm_or_buffer_accessor<_T>(__cgh, __result_buf.get());
return __usm_or_buffer_accessor<__accessor_t<_AccessMode>>(__cgh, __result_buf.get(), __prop_list);
else if (__supports_USM_device)
return __usm_or_buffer_accessor<_T>(__cgh, __scratch_buf.get(), __scratch_n);
return __usm_or_buffer_accessor<_T>(__cgh, __sycl_buf.get(), __scratch_n);
return __usm_or_buffer_accessor<__accessor_t<_AccessMode>>(__cgh, __scratch_buf.get(), __scratch_n,
__prop_list);
return __usm_or_buffer_accessor<__accessor_t<_AccessMode>>(__cgh, __sycl_buf.get(), __scratch_n, __prop_list);
#else
return sycl::accessor(*__sycl_buf.get(), __cgh, sycl::read_write, __dpl_sycl::__no_init{});
return __accessor_t<_AccessMode>(*__sycl_buf.get(), __cgh, __prop_list);
#endif
}

template <sycl::access_mode _AccessMode = sycl::access_mode::read_write>
auto
__get_scratch_acc(sycl::handler& __cgh) const
__get_scratch_acc(sycl::handler& __cgh, const sycl::property_list& __prop_list = {}) const
{
#if _ONEDPL_SYCL_UNIFIED_USM_BUFFER_PRESENT
if (__use_USM_host || __supports_USM_device)
return __usm_or_buffer_accessor<_T>(__cgh, __scratch_buf.get());
return __usm_or_buffer_accessor<_T>(__cgh, __sycl_buf.get());
return __usm_or_buffer_accessor<__accessor_t<_AccessMode>>(__cgh, __scratch_buf.get(), __prop_list);
return __usm_or_buffer_accessor<__accessor_t<_AccessMode>>(__cgh, __sycl_buf.get(), __prop_list);
#else
return sycl::accessor(*__sycl_buf.get(), __cgh, sycl::read_write, __dpl_sycl::__no_init{});
return __accessor_t<_AccessMode>(*__sycl_buf.get(), __cgh, __prop_list);
#endif
}

Expand Down
Loading