Skip to content

Commit

Permalink
Correct preprocessor SYCL target checks for reduce and min/max element (
Browse files Browse the repository at this point in the history
#1342)

This PR corrects preprocessor checks for the transform_reduce nonsequential path that is taken on non-SPIR-V devices. Additionally, fixes are provided for kernel not found errors and a CMake bug.

Signed-off-by: Matthew Michel <[email protected]>

---------

Signed-off-by: Matthew Michel <[email protected]>
Signed-off-by: Aidan Belton <[email protected]>
Co-authored-by: AidanBeltonS <[email protected]>
Co-authored-by: aidan.belton-schure <[email protected]>
Co-authored-by: Dan Hoeflinger <[email protected]>
  • Loading branch information
4 people authored Jan 25, 2024
1 parent ee30b5c commit c5643ba
Show file tree
Hide file tree
Showing 5 changed files with 58 additions and 33 deletions.
46 changes: 26 additions & 20 deletions include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h
Original file line number Diff line number Diff line change
Expand Up @@ -428,33 +428,39 @@ __pattern_min_element(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __
using _IteratorValueType = typename ::std::iterator_traits<_Iterator>::value_type;
using _IndexValueType = ::std::make_unsigned_t<typename ::std::iterator_traits<_Iterator>::difference_type>;
using _ReduceValueType = tuple<_IndexValueType, _IteratorValueType>;
#ifdef _ONEDPL_DETECT_SPIRV_COMPILATION
using _Commutative = ::std::false_type;
// This operator doesn't track the lowest found index in case of equal min. or max. values. Thus, this operator is
// not commutative.
// Commutativity of the reduction operator depends on the compilation target (see __reduce_fn below);
// __spirv_target_conditional postpones deciding on commutativity to the device code where the
// target can be correctly tested.
using _Commutative = oneapi::dpl::__internal::__spirv_target_conditional</*_SpirvT*/ ::std::false_type,
/*_NonSpirvT*/ ::std::true_type>;
auto __reduce_fn = [__comp](_ReduceValueType __a, _ReduceValueType __b) {
using ::std::get;
if (__comp(get<1>(__b), get<1>(__a)))
// TODO: Consider removing the non-commutative operator for SPIR-V targets when we see improved performance with the
// non-sequential load path in transform_reduce.
if constexpr (oneapi::dpl::__internal::__is_spirv_target_v)
{
return __b;
// This operator doesn't track the lowest found index in case of equal min. or max. values. Thus, this operator is
// not commutative.
if (__comp(get<1>(__b), get<1>(__a)))
{
return __b;
}
return __a;
}
return __a;
};
#else
using _Commutative = ::std::true_type;
// This operator keeps track of the lowest found index in case of equal min. or max. values. Thus, this operator is
// commutative.
auto __reduce_fn = [__comp](_ReduceValueType __a, _ReduceValueType __b) {
bool _is_a_lt_b = __comp(get<1>(__a), get<1>(__b));
bool _is_b_lt_a = __comp(get<1>(__b), get<1>(__a));

if (_is_b_lt_a || (!_is_a_lt_b && get<0>(__b) < get<0>(__a)))
else
{
return __b;
// This operator keeps track of the lowest found index in case of equal min. or max. values. Thus, this operator is
// commutative.
bool _is_a_lt_b = __comp(get<1>(__a), get<1>(__b));
bool _is_b_lt_a = __comp(get<1>(__b), get<1>(__a));

if (_is_b_lt_a || (!_is_a_lt_b && get<0>(__b) < get<0>(__a)))
{
return __b;
}
return __a;
}
return __a;
};
#endif
auto __transform_fn = [](auto __gidx, auto __acc) { return _ReduceValueType{__gidx, __acc[__gidx]}; };

auto __keep = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read, _Iterator>();
Expand Down
8 changes: 5 additions & 3 deletions include/oneapi/dpl/pstl/hetero/dpcpp/sycl_defs.h
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,8 @@
// TODO: determine which compiler configurations provide subgroup load/store
#define _ONEDPL_SYCL_SUB_GROUP_LOAD_STORE_PRESENT false

// Macro to check if we are compiling for Intel devices
// Macro to check if we are compiling for SPIR-V devices. This macro must only be used within
// SYCL kernels for determining SPIR-V compilation. Using this macro on the host may lead to incorrect behavior.
#ifndef _ONEDPL_DETECT_SPIRV_COMPILATION // Check if overridden for testing
# if (defined(__SPIR__) || defined(__SPIRV__)) && defined(__SYCL_DEVICE_ONLY__)
# define _ONEDPL_DETECT_SPIRV_COMPILATION 1
Expand All @@ -72,8 +73,9 @@
# define _ONEDPL_SYCL_REQD_SUB_GROUP_SIZE(SIZE) intel::reqd_sub_group_size(SIZE)
#endif

// Only require a subgroup size if we are compiling to SPIRV. Otherwise, an empty
// attribute will be provided.
// This macro is intended to be used for specifying a subgroup size as a SYCL kernel attribute for SPIR-V targets
// only. For non-SPIR-V targets, it will be empty. This macro should only be used in device code and may lead
// to incorrect behavior if used on the host.
#if _ONEDPL_DETECT_SPIRV_COMPILATION
# define _ONEDPL_SYCL_REQD_SUB_GROUP_SIZE_IF_SUPPORTED(SIZE) _ONEDPL_SYCL_REQD_SUB_GROUP_SIZE(SIZE)
#else
Expand Down
20 changes: 11 additions & 9 deletions include/oneapi/dpl/pstl/hetero/dpcpp/unseq_backend_sycl.h
Original file line number Diff line number Diff line change
Expand Up @@ -259,24 +259,27 @@ struct transform_reduce
__local_mem[__local_idx] = __res;
}
}

// For non-SPIR-V targets, we check if the operator is commutative before selecting the appropriate codepath.
// On SPIR-V targets, the sequential implementation with the non-commutative operator is currently more performant.
static constexpr bool __use_nonseq_impl = !oneapi::dpl::__internal::__is_spirv_target_v && _Commutative::value;

template <typename _NDItemId, typename _Size, typename _AccLocal, typename... _Acc>
inline void
operator()(const _NDItemId& __item_id, const _Size& __n, const _Size& __global_offset, const _AccLocal& __local_mem,
const _Acc&... __acc) const
{
#ifndef _ONEDPL_DETECT_SPIRV_COMPILATION
if constexpr (_Commutative::value)
if constexpr (__use_nonseq_impl)
return nonseq_impl(__item_id, __n, __global_offset, __local_mem, __acc...);
#endif // _ONEDPL_DETECT_SPIRV_COMPILATION
return seq_impl(__item_id, __n, __global_offset, __local_mem, __acc...);
else
return seq_impl(__item_id, __n, __global_offset, __local_mem, __acc...);
}

template <typename _Size>
_Size
output_size(const _Size& __n, const ::std::uint16_t& __work_group_size) const
{
#ifndef _ONEDPL_DETECT_SPIRV_COMPILATION
if constexpr (_Commutative::value)
if constexpr (__use_nonseq_impl)
{
_Size __items_per_work_group = __work_group_size * __iters_per_work_item;
_Size __full_group_contrib = (__n / __items_per_work_group) * __work_group_size;
Expand All @@ -285,9 +288,8 @@ struct transform_reduce
_Size __last_wg_contrib = ::std::min(__last_wg_remainder, static_cast<_Size>(__work_group_size));
return __full_group_contrib + __last_wg_contrib;
}
#endif // _ONEDPL_DETECT_SPIRV_COMPILATION

return oneapi::dpl::__internal::__dpl_ceiling_div(__n, __iters_per_work_item);
else
return oneapi::dpl::__internal::__dpl_ceiling_div(__n, __iters_per_work_item);
}
};

Expand Down
15 changes: 15 additions & 0 deletions include/oneapi/dpl/pstl/utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -702,6 +702,21 @@ __iterators_possibly_equal(_Iterator1 __it1, _Iterator2 __it2)
}
}

// Conditionally sets type to _SpirvT if oneDPL is being compiled to a SPIR-V target with the SYCL backend and _NonSpirvT otherwise.
template <typename _SpirvT, typename _NonSpirvT>
struct __spirv_target_conditional :
#if _ONEDPL_DETECT_SPIRV_COMPILATION
_SpirvT
#else
_NonSpirvT
#endif
{
};

// Trait that has a true value if _ONEDPL_DETECT_SPIRV_COMPILATION is set and false otherwise. This may be used within kernels
// to determine SPIR-V targets.
inline constexpr bool __is_spirv_target_v = __spirv_target_conditional<::std::true_type, ::std::false_type>::value;

} // namespace __internal
} // namespace dpl
} // namespace oneapi
Expand Down
2 changes: 1 addition & 1 deletion test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ macro(onedpl_construct_exec test_source_file _test_name switch_off_checked_itera
target_compile_definitions(${_test_name} PRIVATE _PSTL_TEST_SUCCESSFUL_KEYWORD=1)

if (NOT ${custom_define} STREQUAL "")
target_compile_definitions(${_test_name} PRIVATE custom_define)
target_compile_definitions(${_test_name} PRIVATE ${custom_define})
endif()
if (MSVC)
target_compile_options(${_test_name} PRIVATE /bigobj)
Expand Down

0 comments on commit c5643ba

Please sign in to comment.