Skip to content

Commit

Permalink
@@@ fix an error in merge_sort
Browse files Browse the repository at this point in the history
Signed-off-by: Sergey Kopienko <[email protected]>
  • Loading branch information
SergeyKopienko committed Dec 22, 2024
1 parent 1afe819 commit 12fb908
Show file tree
Hide file tree
Showing 2 changed files with 120 additions and 157 deletions.
98 changes: 0 additions & 98 deletions include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_merge.h
Original file line number Diff line number Diff line change
Expand Up @@ -130,104 +130,6 @@ __find_start_point(const _Rng1& __rng1, const _Index __rng1_from, _Index __rng1_
return _split_point_t<_Index>{*__res, __index_sum - *__res + 1};
}

//Searching for an intersection of a merge matrix (n1, n2) diagonal with the Merge Path to define sub-ranges
//to serial merge. For example, a merge matrix for [0,1,1,2,3] and [0,0,2,3] is shown below:
// 0 1 1 2 3
// ------------------
// |--->
// 0 | 0 | 1 1 1 1
// | |
// 0 | 0 | 1 1 1 1
// | ---------->
// 2 | 0 0 0 0 | 1
// | ---->
// 3 | 0 0 0 0 0 |
template <typename _Rng1, typename _Rng2, typename _Index, typename _Compare>
_split_point_t<_Index>
__find_start_point_in(const _Rng1& __rng1, const _Index __rng1_from, _Index __rng1_to, const _Rng2& __rng2,
const _Index __rng2_from, _Index __rng2_to, const _Index __i_elem, _Compare __comp)
{
// ----------------------- EXAMPLE ------------------------
// Let's consider the following input data:
// rng1.size() = 10
// rng2.size() = 6
// i_diag = 9
// Let's define the following ranges for processing:
// rng1: [3, ..., 9) -> __rng1_from = 3, __rng1_to = 9
// rng2: [1, ..., 4) -> __rng2_from = 1, __rng2_to = 4
//
// The goal: required to process only X' items of the merge matrix
// as intersection of rng1[3, ..., 9) and rng2[1, ..., 4)
//
// --------------------------------------------------------
//
// __diag_it_begin(rng1) __diag_it_end(rng1)
// (init state) (dest state) (init state, dest state)
// | | |
// V V V
// + + + + + +
// \ rng1 0 1 2 3 4 5 6 7 8 9
// rng2 +--------------------------------------+
// 0 | ^ ^ ^ X | <--- __diag_it_end(rng2) (init state)
// + 1 | <----------------- + + X'2 ^ | <--- __diag_it_end(rng2) (dest state)
// + 2 | <----------------- + X'1 | |
// + 3 | <----------------- X'0 | | <--- __diag_it_begin(rng2) (dest state)
// 4 | X ^ | |
// 5 | X | | | <--- __diag_it_begin(rng2) (init state)
// +-------AX-----------+-----------+-----+
// AX | |
// AX | |
// Run lower_bound:[from = 5, to = 8)
//
// AX - absent items in rng2
//
// We have three points on diagonal for call comparison:
// X'0 : call __comp(rng1[5], rng2[3]) // 5 + 3 == 9 - 1 == 8
// X'1 : call __comp(rng1[6], rng2[2]) // 6 + 2 == 9 - 1 == 8
// X'3 : call __comp(rng1[7], rng2[1]) // 7 + 1 == 9 - 1 == 8
// - where for every comparing pairs idx(rng1) + idx(rng2) == i_diag - 1

////////////////////////////////////////////////////////////////////////////////////
// Taking into account the specified constraints of the range of processed data
const auto __index_sum = __i_elem - 1;

using _IndexSigned = std::make_signed_t<_Index>;

_IndexSigned idx1_from = __rng1_from;
_IndexSigned idx1_to = __rng1_to;

_IndexSigned idx2_from = __index_sum - (__rng1_to - 1);
_IndexSigned idx2_to = __index_sum - __rng1_from + 1;

const _IndexSigned idx2_from_diff =
idx2_from < (_IndexSigned)__rng2_from ? (_IndexSigned)__rng2_from - idx2_from : 0;
const _IndexSigned idx2_to_diff = idx2_to > (_IndexSigned)__rng2_to ? idx2_to - (_IndexSigned)__rng2_to : 0;

idx1_to -= idx2_from_diff;
idx1_from += idx2_to_diff;

idx2_from = __index_sum - (idx1_to - 1);
idx2_to = __index_sum - idx1_from + 1;

////////////////////////////////////////////////////////////////////////////////////
// Run search of split point on diagonal

using __it_t = oneapi::dpl::counting_iterator<_Index>;

__it_t __diag_it_begin(idx1_from);
__it_t __diag_it_end(idx1_to);

constexpr int kValue = 1;
const __it_t __res = std::lower_bound(__diag_it_begin, __diag_it_end, kValue,
[&__rng1, &__rng2, __index_sum, __comp](_Index __idx, const auto& __value) {
const auto __zero_or_one =
__comp(__rng2[__index_sum - __idx], __rng1[__idx]);
return __zero_or_one < kValue;
});

return _split_point_t<_Index>{*__res, __index_sum - *__res + 1};
}

// Do serial merge of the data from rng1 (starting from start1) and rng2 (starting from start2) and writing
// to rng3 (starting from start3) in 'chunk' steps, but do not exceed the total size of the sequences (n1 and n2)
template <typename _Rng1, typename _Rng2, typename _Rng3, typename _Index, typename _Compare>
Expand Down
Loading

0 comments on commit 12fb908

Please sign in to comment.