From c1419dafd07cfb923219c3c2d7dba77a469c1cdb Mon Sep 17 00:00:00 2001 From: Raghuveer Devulapalli Date: Tue, 18 Jul 2023 12:52:24 -0700 Subject: [PATCH] Add avx512_argselect --- src/avx512-64bit-argsort.hpp | 102 ++++++++++++ src/avx512-common-argsort.h | 5 + tests/test-argselect.hpp | 265 ++++++++++++++++++++++++++++++ tests/test-argsort-common.h | 27 +++ tests/test-argsort.cpp | 308 +---------------------------------- tests/test-argsort.hpp | 273 +++++++++++++++++++++++++++++++ 6 files changed, 678 insertions(+), 302 deletions(-) create mode 100644 tests/test-argselect.hpp create mode 100644 tests/test-argsort-common.h create mode 100644 tests/test-argsort.hpp diff --git a/src/avx512-64bit-argsort.hpp b/src/avx512-64bit-argsort.hpp index 80c6ce4a..bf7d8ec6 100644 --- a/src/avx512-64bit-argsort.hpp +++ b/src/avx512-64bit-argsort.hpp @@ -283,6 +283,39 @@ inline void argsort_64bit_(type_t *arr, argsort_64bit_(arr, arg, pivot_index, right, max_iters - 1); } +template +static void argselect_64bit_(type_t *arr, + int64_t *arg, + int64_t pos, + int64_t left, + int64_t right, + int64_t max_iters) +{ + /* + * Resort to std::sort if quicksort isnt making any progress + */ + if (max_iters <= 0) { + std_argsort(arr, arg, left, right + 1); + return; + } + /* + * Base case: use bitonic networks to sort arrays <= 64 + */ + if (right + 1 - left <= 64) { + argsort_64_64bit(arr, arg + left, (int32_t)(right + 1 - left)); + return; + } + type_t pivot = get_pivot_64bit(arr, arg, left, right); + type_t smallest = vtype::type_max(); + type_t biggest = vtype::type_min(); + int64_t pivot_index = partition_avx512_unrolled( + arr, arg, left, right + 1, pivot, &smallest, &biggest); + if ((pivot != smallest) && (pos < pivot_index)) + argselect_64bit_(arr, arg, pos, left, pivot_index - 1, max_iters - 1); + else if ((pivot != biggest) && (pos >= pivot_index)) + argselect_64bit_(arr, arg, pos, pivot_index, right, max_iters - 1); +} + template bool has_nan(type_t* arr, int64_t arrsize) { @@ -310,6 +343,8 @@ bool has_nan(type_t* arr, int64_t arrsize) return found_nan; } + +/* argsort methods for 32-bit and 64-bit dtypes */ template void avx512_argsort(T* arr, int64_t *arg, int64_t arrsize) { @@ -375,4 +410,71 @@ std::vector avx512_argsort(T* arr, int64_t arrsize) return indices; } +/* argselect methods for 32-bit and 64-bit dtypes */ +template +void avx512_argselect(T* arr, int64_t *arg, int64_t k, int64_t arrsize) +{ + if (arrsize > 1) { + argselect_64bit_>( + arr, arg, k, 0, arrsize - 1, 2 * (int64_t)log2(arrsize)); + } +} + +template <> +void avx512_argselect(double* arr, int64_t *arg, int64_t k, int64_t arrsize) +{ + if (arrsize > 1) { + if (has_nan>(arr, arrsize)) { + /* FIXME: no need to do a full argsort */ + std_argsort_withnan(arr, arg, 0, arrsize); + } + else { + argselect_64bit_>( + arr, arg, k, 0, arrsize - 1, 2 * (int64_t)log2(arrsize)); + } + } +} + +template <> +void avx512_argselect(int32_t* arr, int64_t *arg, int64_t k, int64_t arrsize) +{ + if (arrsize > 1) { + argselect_64bit_>( + arr, arg, k, 0, arrsize - 1, 2 * (int64_t)log2(arrsize)); + } +} + +template <> +void avx512_argselect(uint32_t* arr, int64_t *arg, int64_t k, int64_t arrsize) +{ + if (arrsize > 1) { + argselect_64bit_>( + arr, arg, k, 0, arrsize - 1, 2 * (int64_t)log2(arrsize)); + } +} + +template <> +void avx512_argselect(float* arr, int64_t *arg, int64_t k, int64_t arrsize) +{ + if (arrsize > 1) { + if (has_nan>(arr, arrsize)) { + /* FIXME: no need to do a full argsort */ + std_argsort_withnan(arr, arg, 0, arrsize); + } + else { + argselect_64bit_>( + arr, arg, k, 0, arrsize - 1, 2 * (int64_t)log2(arrsize)); + } + } +} + +template +std::vector avx512_argselect(T* arr, int64_t k, int64_t arrsize) +{ + std::vector indices(arrsize); + std::iota(indices.begin(), indices.end(), 0); + avx512_argselect(arr, indices.data(), k, arrsize); + return indices; +} + #endif // AVX512_ARGSORT_64BIT diff --git a/src/avx512-common-argsort.h b/src/avx512-common-argsort.h index e0dcaccc..0ae50c49 100644 --- a/src/avx512-common-argsort.h +++ b/src/avx512-common-argsort.h @@ -21,6 +21,11 @@ void avx512_argsort(T *arr, int64_t *arg, int64_t arrsize); template std::vector avx512_argsort(T *arr, int64_t arrsize); +template +void avx512_argselect(T *arr, int64_t *arg, int64_t k, int64_t arrsize); + +template +std::vector avx512_argselect(T *arr, int64_t k, int64_t arrsize); /* * Parition one ZMM register based on the pivot and returns the index of the * last element that is less than equal to the pivot. diff --git a/tests/test-argselect.hpp b/tests/test-argselect.hpp new file mode 100644 index 00000000..33dee37b --- /dev/null +++ b/tests/test-argselect.hpp @@ -0,0 +1,265 @@ +/******************************************* + * * Copyright (C) 2023 Intel Corporation + * * SPDX-License-Identifier: BSD-3-Clause + * *******************************************/ + +template +class avx512argselect : public ::testing::Test { +}; +TYPED_TEST_SUITE_P(avx512argselect); + +TYPED_TEST_P(avx512argselect, test_random) +{ + if (cpu_has_avx512bw()) { + const int arrsize = 1024; + auto arr = get_uniform_rand_array(arrsize); + std::vector kth; + for (int64_t ii = 0; ii < arrsize; ++ii) { + kth.push_back(ii); + } + std::vector sorted_inx = std_argsort(arr); + for (auto &k : kth) { + std::vector inx + = avx512_argselect(arr.data(), k, arr.size()); + EXPECT_EQ(arr[sorted_inx[k]], arr[inx[k]]) << "Failed at index k = " << k; + EXPECT_UNIQUE(inx) + } + } + else { + GTEST_SKIP() << "Skipping this test, it requires avx512bw ISA"; + } +} + +//TYPED_TEST_P(avx512argselect, test_constant) +//{ +// if (cpu_has_avx512bw()) { +// std::vector arrsizes; +// for (int64_t ii = 0; ii <= 1024; ++ii) { +// arrsizes.push_back(ii); +// } +// std::vector arr; +// for (auto &size : arrsizes) { +// /* constant array */ +// auto elem = get_uniform_rand_array(1)[0]; +// for (int64_t jj = 0; jj < size; ++jj) { +// arr.push_back(elem); +// } +// std::vector inx1 = std_argsort(arr); +// std::vector inx2 +// = avx512_argsort(arr.data(), arr.size()); +// std::vector sort1, sort2; +// for (size_t jj = 0; jj < size; ++jj) { +// sort1.push_back(arr[inx1[jj]]); +// sort2.push_back(arr[inx2[jj]]); +// } +// EXPECT_EQ(sort1, sort2) << "Array size =" << size; +// EXPECT_UNIQUE(inx2) +// arr.clear(); +// } +// } +// else { +// GTEST_SKIP() << "Skipping this test, it requires avx512bw ISA"; +// } +//} +// +//TYPED_TEST_P(avx512argselect, test_small_range) +//{ +// if (cpu_has_avx512bw()) { +// std::vector arrsizes; +// for (int64_t ii = 0; ii <= 1024; ++ii) { +// arrsizes.push_back(ii); +// } +// std::vector arr; +// for (auto &size : arrsizes) { +// /* array with a smaller range of values */ +// arr = get_uniform_rand_array(size, 20, 1); +// std::vector inx1 = std_argsort(arr); +// std::vector inx2 +// = avx512_argsort(arr.data(), arr.size()); +// std::vector sort1, sort2; +// for (size_t jj = 0; jj < size; ++jj) { +// sort1.push_back(arr[inx1[jj]]); +// sort2.push_back(arr[inx2[jj]]); +// } +// EXPECT_EQ(sort1, sort2) << "Array size = " << size; +// EXPECT_UNIQUE(inx2) +// arr.clear(); +// } +// } +// else { +// GTEST_SKIP() << "Skipping this test, it requires avx512bw ISA"; +// } +//} +// +//TYPED_TEST_P(avx512argselect, test_sorted) +//{ +// if (cpu_has_avx512bw()) { +// std::vector arrsizes; +// for (int64_t ii = 0; ii <= 1024; ++ii) { +// arrsizes.push_back(ii); +// } +// std::vector arr; +// for (auto &size : arrsizes) { +// arr = get_uniform_rand_array(size); +// std::sort(arr.begin(), arr.end()); +// std::vector inx1 = std_argsort(arr); +// std::vector inx2 +// = avx512_argsort(arr.data(), arr.size()); +// std::vector sort1, sort2; +// for (size_t jj = 0; jj < size; ++jj) { +// sort1.push_back(arr[inx1[jj]]); +// sort2.push_back(arr[inx2[jj]]); +// } +// EXPECT_EQ(sort1, sort2) << "Array size =" << size; +// EXPECT_UNIQUE(inx2) +// arr.clear(); +// } +// } +// else { +// GTEST_SKIP() << "Skipping this test, it requires avx512bw ISA"; +// } +//} +// +//TYPED_TEST_P(avx512argselect, test_reverse) +//{ +// if (cpu_has_avx512bw()) { +// std::vector arrsizes; +// for (int64_t ii = 0; ii <= 1024; ++ii) { +// arrsizes.push_back(ii); +// } +// std::vector arr; +// for (auto &size : arrsizes) { +// arr = get_uniform_rand_array(size); +// std::sort(arr.begin(), arr.end()); +// std::reverse(arr.begin(), arr.end()); +// std::vector inx1 = std_argsort(arr); +// std::vector inx2 +// = avx512_argsort(arr.data(), arr.size()); +// std::vector sort1, sort2; +// for (size_t jj = 0; jj < size; ++jj) { +// sort1.push_back(arr[inx1[jj]]); +// sort2.push_back(arr[inx2[jj]]); +// } +// EXPECT_EQ(sort1, sort2) << "Array size =" << size; +// EXPECT_UNIQUE(inx2) +// arr.clear(); +// } +// } +// else { +// GTEST_SKIP() << "Skipping this test, it requires avx512bw ISA"; +// } +//} +// +//TYPED_TEST_P(avx512argselect, test_array_with_nan) +//{ +// if (!cpu_has_avx512bw()) { +// GTEST_SKIP() << "Skipping this test, it requires avx512bw ISA"; +// } +// if (!std::is_floating_point::value) { +// GTEST_SKIP() << "Skipping this test, it is meant for float/double"; +// } +// std::vector arrsizes; +// for (int64_t ii = 2; ii <= 1024; ++ii) { +// arrsizes.push_back(ii); +// } +// std::vector arr; +// for (auto &size : arrsizes) { +// arr = get_uniform_rand_array(size); +// arr[0] = std::numeric_limits::quiet_NaN(); +// arr[1] = std::numeric_limits::quiet_NaN(); +// std::vector inx +// = avx512_argsort(arr.data(), arr.size()); +// std::vector sort1; +// for (size_t jj = 0; jj < size; ++jj) { +// sort1.push_back(arr[inx[jj]]); +// } +// if ((!std::isnan(sort1[size - 1])) || (!std::isnan(sort1[size - 2]))) { +// FAIL() << "NAN's aren't sorted to the end"; +// } +// if (!std::is_sorted(sort1.begin(), sort1.end() - 2)) { +// FAIL() << "Array isn't sorted"; +// } +// EXPECT_UNIQUE(inx) +// arr.clear(); +// } +//} +// +//TYPED_TEST_P(avx512argselect, test_max_value_at_end_of_array) +//{ +// if (!cpu_has_avx512bw()) { +// GTEST_SKIP() << "Skipping this test, it requires avx512bw ISA"; +// } +// std::vector arrsizes; +// for (int64_t ii = 1; ii <= 256; ++ii) { +// arrsizes.push_back(ii); +// } +// std::vector arr; +// for (auto &size : arrsizes) { +// arr = get_uniform_rand_array(size); +// if (std::numeric_limits::has_infinity) { +// arr[size - 1] = std::numeric_limits::infinity(); +// } +// else { +// arr[size - 1] = std::numeric_limits::max(); +// } +// std::vector inx = avx512_argsort(arr.data(), arr.size()); +// std::vector sorted; +// for (size_t jj = 0; jj < size; ++jj) { +// sorted.push_back(arr[inx[jj]]); +// } +// if (!std::is_sorted(sorted.begin(), sorted.end())) { +// EXPECT_TRUE(false) << "Array of size " << size << "is not sorted"; +// } +// EXPECT_UNIQUE(inx) +// arr.clear(); +// } +//} +// +//TYPED_TEST_P(avx512argselect, test_all_inf_array) +//{ +// if (!cpu_has_avx512bw()) { +// GTEST_SKIP() << "Skipping this test, it requires avx512bw ISA"; +// } +// std::vector arrsizes; +// for (int64_t ii = 1; ii <= 256; ++ii) { +// arrsizes.push_back(ii); +// } +// std::vector arr; +// for (auto &size : arrsizes) { +// arr = get_uniform_rand_array(size); +// if (std::numeric_limits::has_infinity) { +// for (int64_t jj = 1; jj <= size; ++jj) { +// if (rand() % 0x1) { +// arr.push_back(std::numeric_limits::infinity()); +// } +// } +// } +// else { +// for (int64_t jj = 1; jj <= size; ++jj) { +// if (rand() % 0x1) { +// arr.push_back(std::numeric_limits::max()); +// } +// } +// } +// std::vector inx = avx512_argsort(arr.data(), arr.size()); +// std::vector sorted; +// for (size_t jj = 0; jj < size; ++jj) { +// sorted.push_back(arr[inx[jj]]); +// } +// if (!std::is_sorted(sorted.begin(), sorted.end())) { +// EXPECT_TRUE(false) << "Array of size " << size << "is not sorted"; +// } +// EXPECT_UNIQUE(inx) +// arr.clear(); +// } +//} + +REGISTER_TYPED_TEST_SUITE_P(avx512argselect, + test_random); + //test_reverse, + //test_constant, + //test_sorted, + //test_small_range, + //test_all_inf_array, + //test_array_with_nan, + //test_max_value_at_end_of_array); diff --git a/tests/test-argsort-common.h b/tests/test-argsort-common.h new file mode 100644 index 00000000..a21110a5 --- /dev/null +++ b/tests/test-argsort-common.h @@ -0,0 +1,27 @@ +#include +#include +#include +#include "cpuinfo.h" +#include "rand_array.h" +#include "avx512-64bit-argsort.hpp" + +template +std::vector std_argsort(const std::vector &array) +{ + std::vector indices(array.size()); + std::iota(indices.begin(), indices.end(), 0); + std::sort(indices.begin(), + indices.end(), + [&array](int left, int right) -> bool { + // sort indices according to corresponding array sizeent + return array[left] < array[right]; + }); + + return indices; +} + +#define EXPECT_UNIQUE(sorted_arg) \ + std::sort(sorted_arg.begin(), sorted_arg.end()); \ + std::vector expected_arg(sorted_arg.size()); \ + std::iota(expected_arg.begin(), expected_arg.end(), 0); \ + EXPECT_EQ(sorted_arg, expected_arg) << "Indices aren't unique. Array size = " << sorted_arg.size(); diff --git a/tests/test-argsort.cpp b/tests/test-argsort.cpp index 8048d751..b4d2b9e2 100644 --- a/tests/test-argsort.cpp +++ b/tests/test-argsort.cpp @@ -1,305 +1,9 @@ -/******************************************* - * * Copyright (C) 2023 Intel Corporation - * * SPDX-License-Identifier: BSD-3-Clause - * *******************************************/ +#include "test-argsort-common.h" +#include "test-argselect.hpp" +#include "test-argsort.hpp" -#include "avx512-64bit-argsort.hpp" -#include "cpuinfo.h" -#include "rand_array.h" -#include -#include -#include - -template -class avx512argsort : public ::testing::Test { -}; -TYPED_TEST_SUITE_P(avx512argsort); - -template -std::vector std_argsort(const std::vector &array) -{ - std::vector indices(array.size()); - std::iota(indices.begin(), indices.end(), 0); - std::sort(indices.begin(), - indices.end(), - [&array](int left, int right) -> bool { - // sort indices according to corresponding array sizeent - return array[left] < array[right]; - }); - - return indices; -} - -#define EXPECT_UNIQUE(sorted_arg) \ - std::sort(sorted_arg.begin(), sorted_arg.end()); \ - std::vector expected_arg(sorted_arg.size()); \ - std::iota(expected_arg.begin(), expected_arg.end(), 0); \ - EXPECT_EQ(sorted_arg, expected_arg) << "Indices aren't unique. Array size = " << sorted_arg.size(); - -TYPED_TEST_P(avx512argsort, test_random) -{ - if (cpu_has_avx512bw()) { - std::vector arrsizes; - for (int64_t ii = 0; ii <= 1024; ++ii) { - arrsizes.push_back(ii); - } - std::vector arr; - for (auto &size : arrsizes) { - /* Random array */ - arr = get_uniform_rand_array(size); - std::vector inx1 = std_argsort(arr); - std::vector inx2 - = avx512_argsort(arr.data(), arr.size()); - std::vector sort1, sort2; - for (size_t jj = 0; jj < size; ++jj) { - sort1.push_back(arr[inx1[jj]]); - sort2.push_back(arr[inx2[jj]]); - } - EXPECT_EQ(sort1, sort2) << "Array size =" << size; - EXPECT_UNIQUE(inx2) - arr.clear(); - } - } - else { - GTEST_SKIP() << "Skipping this test, it requires avx512bw ISA"; - } -} - -TYPED_TEST_P(avx512argsort, test_constant) -{ - if (cpu_has_avx512bw()) { - std::vector arrsizes; - for (int64_t ii = 0; ii <= 1024; ++ii) { - arrsizes.push_back(ii); - } - std::vector arr; - for (auto &size : arrsizes) { - /* constant array */ - auto elem = get_uniform_rand_array(1)[0]; - for (int64_t jj = 0; jj < size; ++jj) { - arr.push_back(elem); - } - std::vector inx1 = std_argsort(arr); - std::vector inx2 - = avx512_argsort(arr.data(), arr.size()); - std::vector sort1, sort2; - for (size_t jj = 0; jj < size; ++jj) { - sort1.push_back(arr[inx1[jj]]); - sort2.push_back(arr[inx2[jj]]); - } - EXPECT_EQ(sort1, sort2) << "Array size =" << size; - EXPECT_UNIQUE(inx2) - arr.clear(); - } - } - else { - GTEST_SKIP() << "Skipping this test, it requires avx512bw ISA"; - } -} - -TYPED_TEST_P(avx512argsort, test_small_range) -{ - if (cpu_has_avx512bw()) { - std::vector arrsizes; - for (int64_t ii = 0; ii <= 1024; ++ii) { - arrsizes.push_back(ii); - } - std::vector arr; - for (auto &size : arrsizes) { - /* array with a smaller range of values */ - arr = get_uniform_rand_array(size, 20, 1); - std::vector inx1 = std_argsort(arr); - std::vector inx2 - = avx512_argsort(arr.data(), arr.size()); - std::vector sort1, sort2; - for (size_t jj = 0; jj < size; ++jj) { - sort1.push_back(arr[inx1[jj]]); - sort2.push_back(arr[inx2[jj]]); - } - EXPECT_EQ(sort1, sort2) << "Array size = " << size; - EXPECT_UNIQUE(inx2) - arr.clear(); - } - } - else { - GTEST_SKIP() << "Skipping this test, it requires avx512bw ISA"; - } -} - -TYPED_TEST_P(avx512argsort, test_sorted) -{ - if (cpu_has_avx512bw()) { - std::vector arrsizes; - for (int64_t ii = 0; ii <= 1024; ++ii) { - arrsizes.push_back(ii); - } - std::vector arr; - for (auto &size : arrsizes) { - arr = get_uniform_rand_array(size); - std::sort(arr.begin(), arr.end()); - std::vector inx1 = std_argsort(arr); - std::vector inx2 - = avx512_argsort(arr.data(), arr.size()); - std::vector sort1, sort2; - for (size_t jj = 0; jj < size; ++jj) { - sort1.push_back(arr[inx1[jj]]); - sort2.push_back(arr[inx2[jj]]); - } - EXPECT_EQ(sort1, sort2) << "Array size =" << size; - EXPECT_UNIQUE(inx2) - arr.clear(); - } - } - else { - GTEST_SKIP() << "Skipping this test, it requires avx512bw ISA"; - } -} - -TYPED_TEST_P(avx512argsort, test_reverse) -{ - if (cpu_has_avx512bw()) { - std::vector arrsizes; - for (int64_t ii = 0; ii <= 1024; ++ii) { - arrsizes.push_back(ii); - } - std::vector arr; - for (auto &size : arrsizes) { - arr = get_uniform_rand_array(size); - std::sort(arr.begin(), arr.end()); - std::reverse(arr.begin(), arr.end()); - std::vector inx1 = std_argsort(arr); - std::vector inx2 - = avx512_argsort(arr.data(), arr.size()); - std::vector sort1, sort2; - for (size_t jj = 0; jj < size; ++jj) { - sort1.push_back(arr[inx1[jj]]); - sort2.push_back(arr[inx2[jj]]); - } - EXPECT_EQ(sort1, sort2) << "Array size =" << size; - EXPECT_UNIQUE(inx2) - arr.clear(); - } - } - else { - GTEST_SKIP() << "Skipping this test, it requires avx512bw ISA"; - } -} - -TYPED_TEST_P(avx512argsort, test_array_with_nan) -{ - if (!cpu_has_avx512bw()) { - GTEST_SKIP() << "Skipping this test, it requires avx512bw ISA"; - } - if (!std::is_floating_point::value) { - GTEST_SKIP() << "Skipping this test, it is meant for float/double"; - } - std::vector arrsizes; - for (int64_t ii = 2; ii <= 1024; ++ii) { - arrsizes.push_back(ii); - } - std::vector arr; - for (auto &size : arrsizes) { - arr = get_uniform_rand_array(size); - arr[0] = std::numeric_limits::quiet_NaN(); - arr[1] = std::numeric_limits::quiet_NaN(); - std::vector inx - = avx512_argsort(arr.data(), arr.size()); - std::vector sort1; - for (size_t jj = 0; jj < size; ++jj) { - sort1.push_back(arr[inx[jj]]); - } - if ((!std::isnan(sort1[size - 1])) || (!std::isnan(sort1[size - 2]))) { - FAIL() << "NAN's aren't sorted to the end"; - } - if (!std::is_sorted(sort1.begin(), sort1.end() - 2)) { - FAIL() << "Array isn't sorted"; - } - EXPECT_UNIQUE(inx) - arr.clear(); - } -} - -TYPED_TEST_P(avx512argsort, test_max_value_at_end_of_array) -{ - if (!cpu_has_avx512bw()) { - GTEST_SKIP() << "Skipping this test, it requires avx512bw ISA"; - } - std::vector arrsizes; - for (int64_t ii = 1; ii <= 256; ++ii) { - arrsizes.push_back(ii); - } - std::vector arr; - for (auto &size : arrsizes) { - arr = get_uniform_rand_array(size); - if (std::numeric_limits::has_infinity) { - arr[size - 1] = std::numeric_limits::infinity(); - } - else { - arr[size - 1] = std::numeric_limits::max(); - } - std::vector inx = avx512_argsort(arr.data(), arr.size()); - std::vector sorted; - for (size_t jj = 0; jj < size; ++jj) { - sorted.push_back(arr[inx[jj]]); - } - if (!std::is_sorted(sorted.begin(), sorted.end())) { - EXPECT_TRUE(false) << "Array of size " << size << "is not sorted"; - } - EXPECT_UNIQUE(inx) - arr.clear(); - } -} - -TYPED_TEST_P(avx512argsort, test_all_inf_array) -{ - if (!cpu_has_avx512bw()) { - GTEST_SKIP() << "Skipping this test, it requires avx512bw ISA"; - } - std::vector arrsizes; - for (int64_t ii = 1; ii <= 256; ++ii) { - arrsizes.push_back(ii); - } - std::vector arr; - for (auto &size : arrsizes) { - arr = get_uniform_rand_array(size); - if (std::numeric_limits::has_infinity) { - for (int64_t jj = 1; jj <= size; ++jj) { - if (rand() % 0x1) { - arr.push_back(std::numeric_limits::infinity()); - } - } - } - else { - for (int64_t jj = 1; jj <= size; ++jj) { - if (rand() % 0x1) { - arr.push_back(std::numeric_limits::max()); - } - } - } - std::vector inx = avx512_argsort(arr.data(), arr.size()); - std::vector sorted; - for (size_t jj = 0; jj < size; ++jj) { - sorted.push_back(arr[inx[jj]]); - } - if (!std::is_sorted(sorted.begin(), sorted.end())) { - EXPECT_TRUE(false) << "Array of size " << size << "is not sorted"; - } - EXPECT_UNIQUE(inx) - arr.clear(); - } -} - -REGISTER_TYPED_TEST_SUITE_P(avx512argsort, - test_random, - test_reverse, - test_constant, - test_sorted, - test_small_range, - test_all_inf_array, - test_array_with_nan, - test_max_value_at_end_of_array); - -using ArgSortTestTypes +using ArgTestTypes = testing::Types; -INSTANTIATE_TYPED_TEST_SUITE_P(T, avx512argsort, ArgSortTestTypes); +INSTANTIATE_TYPED_TEST_SUITE_P(T, avx512argsort, ArgTestTypes); +INSTANTIATE_TYPED_TEST_SUITE_P(T, avx512argselect, ArgTestTypes); diff --git a/tests/test-argsort.hpp b/tests/test-argsort.hpp new file mode 100644 index 00000000..5ef9e6ea --- /dev/null +++ b/tests/test-argsort.hpp @@ -0,0 +1,273 @@ +/******************************************* + * * Copyright (C) 2023 Intel Corporation + * * SPDX-License-Identifier: BSD-3-Clause + * *******************************************/ + +template +class avx512argsort : public ::testing::Test { +}; +TYPED_TEST_SUITE_P(avx512argsort); + +TYPED_TEST_P(avx512argsort, test_random) +{ + if (cpu_has_avx512bw()) { + std::vector arrsizes; + for (int64_t ii = 0; ii <= 1024; ++ii) { + arrsizes.push_back(ii); + } + std::vector arr; + for (auto &size : arrsizes) { + /* Random array */ + arr = get_uniform_rand_array(size); + std::vector inx1 = std_argsort(arr); + std::vector inx2 + = avx512_argsort(arr.data(), arr.size()); + std::vector sort1, sort2; + for (size_t jj = 0; jj < size; ++jj) { + sort1.push_back(arr[inx1[jj]]); + sort2.push_back(arr[inx2[jj]]); + } + EXPECT_EQ(sort1, sort2) << "Array size =" << size; + EXPECT_UNIQUE(inx2) + arr.clear(); + } + } + else { + GTEST_SKIP() << "Skipping this test, it requires avx512bw ISA"; + } +} + +TYPED_TEST_P(avx512argsort, test_constant) +{ + if (cpu_has_avx512bw()) { + std::vector arrsizes; + for (int64_t ii = 0; ii <= 1024; ++ii) { + arrsizes.push_back(ii); + } + std::vector arr; + for (auto &size : arrsizes) { + /* constant array */ + auto elem = get_uniform_rand_array(1)[0]; + for (int64_t jj = 0; jj < size; ++jj) { + arr.push_back(elem); + } + std::vector inx1 = std_argsort(arr); + std::vector inx2 + = avx512_argsort(arr.data(), arr.size()); + std::vector sort1, sort2; + for (size_t jj = 0; jj < size; ++jj) { + sort1.push_back(arr[inx1[jj]]); + sort2.push_back(arr[inx2[jj]]); + } + EXPECT_EQ(sort1, sort2) << "Array size =" << size; + EXPECT_UNIQUE(inx2) + arr.clear(); + } + } + else { + GTEST_SKIP() << "Skipping this test, it requires avx512bw ISA"; + } +} + +TYPED_TEST_P(avx512argsort, test_small_range) +{ + if (cpu_has_avx512bw()) { + std::vector arrsizes; + for (int64_t ii = 0; ii <= 1024; ++ii) { + arrsizes.push_back(ii); + } + std::vector arr; + for (auto &size : arrsizes) { + /* array with a smaller range of values */ + arr = get_uniform_rand_array(size, 20, 1); + std::vector inx1 = std_argsort(arr); + std::vector inx2 + = avx512_argsort(arr.data(), arr.size()); + std::vector sort1, sort2; + for (size_t jj = 0; jj < size; ++jj) { + sort1.push_back(arr[inx1[jj]]); + sort2.push_back(arr[inx2[jj]]); + } + EXPECT_EQ(sort1, sort2) << "Array size = " << size; + EXPECT_UNIQUE(inx2) + arr.clear(); + } + } + else { + GTEST_SKIP() << "Skipping this test, it requires avx512bw ISA"; + } +} + +TYPED_TEST_P(avx512argsort, test_sorted) +{ + if (cpu_has_avx512bw()) { + std::vector arrsizes; + for (int64_t ii = 0; ii <= 1024; ++ii) { + arrsizes.push_back(ii); + } + std::vector arr; + for (auto &size : arrsizes) { + arr = get_uniform_rand_array(size); + std::sort(arr.begin(), arr.end()); + std::vector inx1 = std_argsort(arr); + std::vector inx2 + = avx512_argsort(arr.data(), arr.size()); + std::vector sort1, sort2; + for (size_t jj = 0; jj < size; ++jj) { + sort1.push_back(arr[inx1[jj]]); + sort2.push_back(arr[inx2[jj]]); + } + EXPECT_EQ(sort1, sort2) << "Array size =" << size; + EXPECT_UNIQUE(inx2) + arr.clear(); + } + } + else { + GTEST_SKIP() << "Skipping this test, it requires avx512bw ISA"; + } +} + +TYPED_TEST_P(avx512argsort, test_reverse) +{ + if (cpu_has_avx512bw()) { + std::vector arrsizes; + for (int64_t ii = 0; ii <= 1024; ++ii) { + arrsizes.push_back(ii); + } + std::vector arr; + for (auto &size : arrsizes) { + arr = get_uniform_rand_array(size); + std::sort(arr.begin(), arr.end()); + std::reverse(arr.begin(), arr.end()); + std::vector inx1 = std_argsort(arr); + std::vector inx2 + = avx512_argsort(arr.data(), arr.size()); + std::vector sort1, sort2; + for (size_t jj = 0; jj < size; ++jj) { + sort1.push_back(arr[inx1[jj]]); + sort2.push_back(arr[inx2[jj]]); + } + EXPECT_EQ(sort1, sort2) << "Array size =" << size; + EXPECT_UNIQUE(inx2) + arr.clear(); + } + } + else { + GTEST_SKIP() << "Skipping this test, it requires avx512bw ISA"; + } +} + +TYPED_TEST_P(avx512argsort, test_array_with_nan) +{ + if (!cpu_has_avx512bw()) { + GTEST_SKIP() << "Skipping this test, it requires avx512bw ISA"; + } + if (!std::is_floating_point::value) { + GTEST_SKIP() << "Skipping this test, it is meant for float/double"; + } + std::vector arrsizes; + for (int64_t ii = 2; ii <= 1024; ++ii) { + arrsizes.push_back(ii); + } + std::vector arr; + for (auto &size : arrsizes) { + arr = get_uniform_rand_array(size); + arr[0] = std::numeric_limits::quiet_NaN(); + arr[1] = std::numeric_limits::quiet_NaN(); + std::vector inx + = avx512_argsort(arr.data(), arr.size()); + std::vector sort1; + for (size_t jj = 0; jj < size; ++jj) { + sort1.push_back(arr[inx[jj]]); + } + if ((!std::isnan(sort1[size - 1])) || (!std::isnan(sort1[size - 2]))) { + FAIL() << "NAN's aren't sorted to the end"; + } + if (!std::is_sorted(sort1.begin(), sort1.end() - 2)) { + FAIL() << "Array isn't sorted"; + } + EXPECT_UNIQUE(inx) + arr.clear(); + } +} + +TYPED_TEST_P(avx512argsort, test_max_value_at_end_of_array) +{ + if (!cpu_has_avx512bw()) { + GTEST_SKIP() << "Skipping this test, it requires avx512bw ISA"; + } + std::vector arrsizes; + for (int64_t ii = 1; ii <= 256; ++ii) { + arrsizes.push_back(ii); + } + std::vector arr; + for (auto &size : arrsizes) { + arr = get_uniform_rand_array(size); + if (std::numeric_limits::has_infinity) { + arr[size - 1] = std::numeric_limits::infinity(); + } + else { + arr[size - 1] = std::numeric_limits::max(); + } + std::vector inx = avx512_argsort(arr.data(), arr.size()); + std::vector sorted; + for (size_t jj = 0; jj < size; ++jj) { + sorted.push_back(arr[inx[jj]]); + } + if (!std::is_sorted(sorted.begin(), sorted.end())) { + EXPECT_TRUE(false) << "Array of size " << size << "is not sorted"; + } + EXPECT_UNIQUE(inx) + arr.clear(); + } +} + +TYPED_TEST_P(avx512argsort, test_all_inf_array) +{ + if (!cpu_has_avx512bw()) { + GTEST_SKIP() << "Skipping this test, it requires avx512bw ISA"; + } + std::vector arrsizes; + for (int64_t ii = 1; ii <= 256; ++ii) { + arrsizes.push_back(ii); + } + std::vector arr; + for (auto &size : arrsizes) { + arr = get_uniform_rand_array(size); + if (std::numeric_limits::has_infinity) { + for (int64_t jj = 1; jj <= size; ++jj) { + if (rand() % 0x1) { + arr.push_back(std::numeric_limits::infinity()); + } + } + } + else { + for (int64_t jj = 1; jj <= size; ++jj) { + if (rand() % 0x1) { + arr.push_back(std::numeric_limits::max()); + } + } + } + std::vector inx = avx512_argsort(arr.data(), arr.size()); + std::vector sorted; + for (size_t jj = 0; jj < size; ++jj) { + sorted.push_back(arr[inx[jj]]); + } + if (!std::is_sorted(sorted.begin(), sorted.end())) { + EXPECT_TRUE(false) << "Array of size " << size << "is not sorted"; + } + EXPECT_UNIQUE(inx) + arr.clear(); + } +} + +REGISTER_TYPED_TEST_SUITE_P(avx512argsort, + test_random, + test_reverse, + test_constant, + test_sorted, + test_small_range, + test_all_inf_array, + test_array_with_nan, + test_max_value_at_end_of_array); +