Skip to content

Commit

Permalink
Fix negative increment
Browse files Browse the repository at this point in the history
  • Loading branch information
aacostadiaz committed Jul 28, 2023
1 parent 8d5f6e6 commit 8154920
Show file tree
Hide file tree
Showing 5 changed files with 63 additions and 26 deletions.
8 changes: 6 additions & 2 deletions include/views/view.h
Original file line number Diff line number Diff line change
Expand Up @@ -97,13 +97,17 @@ struct VectorView {
template <bool use_as_ptr = false>
SYCL_BLAS_INLINE typename std::enable_if<!use_as_ptr, value_t &>::type eval(
index_t i) {
return (strd_ == 1) ? *(data_ + i) : *(data_ + i * strd_);
return (strd_ == 1) ? *(data_ + i)
: (strd_ > 0) ? *(data_ + i * strd_)
: *(data_ + (size_ * -strd_) + ((i + 1) * strd_));
}

template <bool use_as_ptr = false>
SYCL_BLAS_INLINE typename std::enable_if<!use_as_ptr, value_t>::type eval(
index_t i) const {
return (strd_ == 1) ? *(data_ + i) : *(data_ + i * strd_);
return (strd_ == 1) ? *(data_ + i)
: (strd_ > 0) ? *(data_ + i * strd_)
: *(data_ + (size_ * -strd_) + ((i + 1) * strd_));
}

SYCL_BLAS_INLINE value_t &eval(cl::sycl::nd_item<1> ndItem) {
Expand Down
18 changes: 16 additions & 2 deletions test/blas_test.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -307,7 +307,14 @@ template <class T, class... Args>
inline void generate_name_helper(std::ostream &ss, T arg, Args... args) {
auto token = strtok(nullptr, ", ");
ss << "__" << token << "_";
dump_arg(ss, arg);
if constexpr (std::is_arithmetic<T>::value) {
if (arg < 0) {
ss << "minus_";
}
dump_arg(ss, std::abs<T>(arg));
} else {
dump_arg(ss, arg);
}
generate_name_helper(ss, args...);
}

Expand All @@ -325,7 +332,14 @@ inline std::string generate_name_helper(char *str_args, T arg, Args... args) {
std::stringstream ss;
auto token = strtok(str_args, ", ");
ss << token << "_";
dump_arg(ss, arg);
if constexpr (std::is_arithmetic<T>::value) {
if (arg < 0) {
ss << "minus_";
}
dump_arg(ss, std::abs<T>(arg));
} else {
dump_arg(ss, arg);
}
generate_name_helper(ss, args...);
return ss.str();
}
Expand Down
1 change: 1 addition & 0 deletions test/unittest/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ include_directories(${SYCLBLAS_TEST} ${BLAS_INCLUDE_DIRS})
# compiling tests
set(SYCL_UNITTEST_SRCS
# Blas 1 tests
${SYCLBLAS_UNITTEST}/blas1/blas1_asum_test.cpp
${SYCLBLAS_UNITTEST}/blas1/blas1_axpy_test.cpp
${SYCLBLAS_UNITTEST}/blas1/blas1_copy_test.cpp
${SYCLBLAS_UNITTEST}/blas1/blas1_scal_test.cpp
Expand Down
38 changes: 27 additions & 11 deletions test/unittest/blas1/blas1_asum_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,18 +26,21 @@
#include "blas_test.hpp"

template <typename scalar_t>
using combination_t = std::tuple<std::string, api_type, int, int>;
using combination_t = std::tuple<std::string, api_type, int, int, scalar_t>;

template <typename scalar_t, helper::AllocType mem_alloc>
void run_test(const combination_t<scalar_t> combi) {
std::string alloc;
api_type api;
index_t size;
index_t incX;
std::tie(alloc, api, size, incX) = combi;
scalar_t unused;
std::tie(alloc, api, size, incX, unused) = combi;

auto vector_size = size * std::abs(incX);

// Input vector
std::vector<scalar_t> x_v(size * incX);
std::vector<scalar_t> x_v(vector_size);
fill_random<scalar_t>(x_v);

// We need to guarantee that cl::sycl::half can hold the sum
Expand All @@ -47,18 +50,28 @@ void run_test(const combination_t<scalar_t> combi) {

// Output scalar
scalar_t out_s = 0;
scalar_t out_cpu_s;

// Reference implementation
scalar_t out_cpu_s = reference_blas::asum(size, x_v.data(), incX);
if (incX < 0) {
// Some reference implementations of BLAS do not support negative
// increments for asum. To simulate what is specified in the
// oneAPI spec, invert the vector and use a positive increment.
std::vector<scalar_t> x_v_inv(vector_size);
std::reverse_copy(x_v.begin(), x_v.end() + (incX + 1), x_v_inv.begin());
out_cpu_s = reference_blas::asum(size, x_v_inv.data(), -incX);
} else {
out_cpu_s = reference_blas::asum(size, x_v.data(), incX);
}

// SYCL implementation
auto q = make_queue();
blas::SB_Handle sb_handle(q);

// Iterators
auto gpu_x_v = helper::allocate<mem_alloc, scalar_t>(size * incX, q);
auto gpu_x_v = helper::allocate<mem_alloc, scalar_t>(vector_size, q);
auto copy_x =
helper::copy_to_device<scalar_t>(q, x_v.data(), gpu_x_v, size * incX);
helper::copy_to_device<scalar_t>(q, x_v.data(), gpu_x_v, vector_size);

if (api == api_type::async) {
auto gpu_out_s = helper::allocate<mem_alloc, scalar_t>(1, q);
Expand Down Expand Up @@ -87,7 +100,8 @@ static void run_test(const combination_t<scalar_t> combi) {
api_type api;
index_t size;
index_t incX;
std::tie(alloc, api, size, incX) = combi;
scalar_t unused;
std::tie(alloc, api, size, incX, unused) = combi;

if (alloc == "usm") {
#ifdef SB_ENABLE_USM
Expand All @@ -102,12 +116,13 @@ static void run_test(const combination_t<scalar_t> combi) {

template <typename scalar_t>
const auto combi =
::testing::Combine(::testing::Values("usm", "buf"), // allocation type
::testing::Combine(::testing::Values("usm", "buf"), // allocation type
::testing::Values(api_type::async,
api_type::sync), // Api
::testing::Values(11, 65, 10000,
1002400), // size
::testing::Values(1, 4) // incX
1002400), // size
::testing::Values(1, 4, -1, -3), // incX
::testing::Values(0) // unused
);

template <class T>
Expand All @@ -116,7 +131,8 @@ static std::string generate_name(
std::string alloc;
api_type api;
int size, incX;
BLAS_GENERATE_NAME(info.param, alloc, api, size, incX);
T unused;
BLAS_GENERATE_NAME(info.param, alloc, api, size, incX, unused);
}

BLAS_REGISTER_TEST_ALL(Asum, combination_t, combi, generate_name);
24 changes: 13 additions & 11 deletions test/unittest/blas1/blas1_axpy_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,13 +37,15 @@ void run_test(const combination_t<scalar_t> combi) {
index_t incY;
std::tie(alloc, size, alpha, incX, incY) = combi;

auto x_size = size * std::abs(incX);
auto y_size = size * std::abs(incY);
// Input vector
std::vector<scalar_t> x_v(size * incX);
std::vector<scalar_t> x_v(x_size);
fill_random(x_v);

// Output vector
std::vector<scalar_t> y_v(size * incY, 10.0);
std::vector<scalar_t> y_cpu_v(size * incY, 10.0);
std::vector<scalar_t> y_v(y_size, 10.0);
std::vector<scalar_t> y_cpu_v(y_size, 10.0);

// Reference implementation
reference_blas::axpy(size, alpha, x_v.data(), incX, y_cpu_v.data(), incY);
Expand All @@ -53,17 +55,17 @@ void run_test(const combination_t<scalar_t> combi) {
blas::SB_Handle sb_handle(q);

// Iterators
auto gpu_x_v = helper::allocate<mem_alloc, scalar_t>(size * incX, q);
auto gpu_y_v = helper::allocate<mem_alloc, scalar_t>(size * incY, q);
auto gpu_x_v = helper::allocate<mem_alloc, scalar_t>(x_size, q);
auto gpu_y_v = helper::allocate<mem_alloc, scalar_t>(y_size, q);

auto copy_x = helper::copy_to_device(q, x_v.data(), gpu_x_v, size * incX);
auto copy_y = helper::copy_to_device(q, y_v.data(), gpu_y_v, size * incY);
auto copy_x = helper::copy_to_device(q, x_v.data(), gpu_x_v, x_size);
auto copy_y = helper::copy_to_device(q, y_v.data(), gpu_y_v, y_size);

auto axpy_event = _axpy(sb_handle, size, alpha, gpu_x_v, incX, gpu_y_v, incY,
{copy_x, copy_y});
sb_handle.wait(axpy_event);

auto event = helper::copy_to_host(q, gpu_y_v, y_v.data(), size * incY);
auto event = helper::copy_to_host(q, gpu_y_v, y_v.data(), y_size);
sb_handle.wait(event);

// Validate the result
Expand Down Expand Up @@ -108,9 +110,9 @@ template <typename scalar_t>
const auto combi =
::testing::Combine(::testing::Values("usm", "buf"), // allocation type
::testing::Values(11, 1002), // size
::testing::Values<scalar_t>(0.0, 1.5), // alpha
::testing::Values(1, 4), // incX
::testing::Values(1, 3) // incY
::testing::Values<scalar_t>(0.0, 1.0), // alpha
::testing::Values(1, 4, -1, -3), // incX
::testing::Values(1, 3, -2) // incY
);
#endif

Expand Down

0 comments on commit 8154920

Please sign in to comment.