From ecad1fa5fcef3e52cb85470ceaadf95a2cb40051 Mon Sep 17 00:00:00 2001 From: Alejandro Date: Fri, 28 Jul 2023 16:34:07 +0100 Subject: [PATCH 1/3] Fix nrm2 --- src/interface/blas1_interface.hpp | 4 +-- test/unittest/CMakeLists.txt | 2 +- test/unittest/blas1/blas1_nrm2_test.cpp | 38 ++++++++++++++++++------- 3 files changed, 30 insertions(+), 14 deletions(-) diff --git a/src/interface/blas1_interface.hpp b/src/interface/blas1_interface.hpp index 866588d68..d4393159e 100644 --- a/src/interface/blas1_interface.hpp +++ b/src/interface/blas1_interface.hpp @@ -332,10 +332,10 @@ typename sb_handle_t::event_t _nrm2( const auto nWG = 2 * localSize; auto assignOp = make_assign_reduction(rs, prdOp, localSize, localSize * nWG); - auto ret0 = sb_handle.execute(assignOp); + auto ret0 = sb_handle.execute(assignOp, _dependencies); auto sqrtOp = make_op(rs); auto assignOpFinal = make_op(rs, sqrtOp); - auto ret1 = sb_handle.execute(assignOpFinal, _dependencies); + auto ret1 = sb_handle.execute(assignOpFinal, ret0); return blas::concatenate_vectors(ret0, ret1); } diff --git a/test/unittest/CMakeLists.txt b/test/unittest/CMakeLists.txt index f76b6218c..c4b860432 100644 --- a/test/unittest/CMakeLists.txt +++ b/test/unittest/CMakeLists.txt @@ -39,6 +39,7 @@ set(SYCL_UNITTEST_SRCS ${SYCLBLAS_UNITTEST}/blas1/blas1_rotmg_test.cpp ${SYCLBLAS_UNITTEST}/blas1/blas1_rotg_test.cpp ${SYCLBLAS_UNITTEST}/blas1/blas1_sdsdot_test.cpp + ${SYCLBLAS_UNITTEST}/blas1/blas1_nrm2_test.cpp # # Blas 2 tests ${SYCLBLAS_UNITTEST}/blas2/blas2_gbmv_test.cpp ${SYCLBLAS_UNITTEST}/blas2/blas2_gemv_test.cpp @@ -72,7 +73,6 @@ if(is_computecpp) # Blas 1 tests ${SYCLBLAS_UNITTEST}/blas1/blas1_swap_test.cpp ${SYCLBLAS_UNITTEST}/blas1/blas1_dot_test.cpp - ${SYCLBLAS_UNITTEST}/blas1/blas1_nrm2_test.cpp ${SYCLBLAS_UNITTEST}/blas1/blas1_iamax_test.cpp ${SYCLBLAS_UNITTEST}/blas1/blas1_iamin_test.cpp # Blas 2 tests diff --git a/test/unittest/blas1/blas1_nrm2_test.cpp b/test/unittest/blas1/blas1_nrm2_test.cpp index 4efd61725..2e63aba9a 100644 --- a/test/unittest/blas1/blas1_nrm2_test.cpp +++ b/test/unittest/blas1/blas1_nrm2_test.cpp @@ -26,7 +26,7 @@ #include "blas_test.hpp" template -using combination_t = std::tuple; +using combination_t = std::tuple; template void run_test(const combination_t combi) { @@ -34,27 +34,39 @@ void run_test(const combination_t combi) { api_type api; index_t size; index_t incX; - std::tie(alloc, api, size, incX) = combi; + scalar_t unused; + std::tie(alloc, api, size, incX, unused) = combi; + auto vector_size = size * std::abs(incX); // Input vectors - std::vector x_v(size * incX); + std::vector x_v(vector_size); fill_random(x_v); // Output scalar scalar_t out_s = 10.0; + scalar_t out_cpu_s = 20.0; // Reference implementation - auto out_cpu_s = reference_blas::nrm2(size, x_v.data(), incX); + if (incX < 0) { + // Some reference implementations of BLAS do not support negative + // increments for nrm2. To simulate what is specified in the + // oneAPI spec, invert the vector and use a positive increment. + std::vector x_v_inv(vector_size); + std::reverse_copy(x_v.begin(), x_v.end() + (incX + 1), x_v_inv.begin()); + out_cpu_s = reference_blas::nrm2(size, x_v_inv.data(), -incX); + } else { + out_cpu_s = reference_blas::nrm2(size, x_v.data(), incX); + } // SYCL implementation auto q = make_queue(); blas::SB_Handle sb_handle(q); // Iterators - auto gpu_x_v = blas::helper::allocate(size * incX, q); + auto gpu_x_v = blas::helper::allocate(vector_size, q); auto copy_x = - blas::helper::copy_to_device(q, x_v.data(), gpu_x_v, size * incX); + blas::helper::copy_to_device(q, x_v.data(), gpu_x_v, vector_size); if (api == api_type::async) { auto gpu_out_s = blas::helper::allocate(1, q); @@ -71,6 +83,8 @@ void run_test(const combination_t combi) { out_s = _nrm2(sb_handle, size, gpu_x_v, incX, {copy_x}); } + std::cout << "SYCL " << out_s << " vs ref " << out_cpu_s << std::endl; + // Validate the result const bool isAlmostEqual = utils::almost_equal(out_s, out_cpu_s); ASSERT_TRUE(isAlmostEqual); @@ -84,7 +98,8 @@ void run_test(const combination_t combi) { api_type api; index_t size; index_t incX; - std::tie(alloc, api, size, incX) = combi; + scalar_t unused; + std::tie(alloc, api, size, incX, unused) = combi; if (alloc == "usm") { // usm alloc #ifdef SB_ENABLE_USM @@ -98,12 +113,12 @@ void run_test(const combination_t combi) { } template const auto combi = - ::testing::Combine(::testing::Values("usm", "buf"), // allocation type + ::testing::Combine(::testing::Values("usm", "buf"), // allocation type ::testing::Values(api_type::async, api_type::sync), // Api ::testing::Values(11, 1002), // size - ::testing::Values(1, 4) // incX - ); + ::testing::Values(1, 4, -3), // incX + ::testing::Values(scalar_t{1})); template static std::string generate_name( @@ -111,7 +126,8 @@ static std::string generate_name( std::string alloc; api_type api; int size, incX; - BLAS_GENERATE_NAME(info.param, alloc, api, size, incX); + T unused; + BLAS_GENERATE_NAME(info.param, alloc, api, size, incX, unused); } BLAS_REGISTER_TEST_ALL(Nrm2, combination_t, combi, generate_name); From 114e6d3b4c84d662071b4135aee92cc8dbabfff8 Mon Sep 17 00:00:00 2001 From: Alejandro Acosta Date: Wed, 2 Aug 2023 09:17:46 +0100 Subject: [PATCH 2/3] Remove cout --- test/unittest/blas1/blas1_nrm2_test.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/test/unittest/blas1/blas1_nrm2_test.cpp b/test/unittest/blas1/blas1_nrm2_test.cpp index 2e63aba9a..383fab54c 100644 --- a/test/unittest/blas1/blas1_nrm2_test.cpp +++ b/test/unittest/blas1/blas1_nrm2_test.cpp @@ -83,8 +83,6 @@ void run_test(const combination_t combi) { out_s = _nrm2(sb_handle, size, gpu_x_v, incX, {copy_x}); } - std::cout << "SYCL " << out_s << " vs ref " << out_cpu_s << std::endl; - // Validate the result const bool isAlmostEqual = utils::almost_equal(out_s, out_cpu_s); ASSERT_TRUE(isAlmostEqual); From 8113d4dcfab27003f8c92826d653616b9a4da9ad Mon Sep 17 00:00:00 2001 From: aacostadiaz <127198532+aacostadiaz@users.noreply.github.com> Date: Thu, 3 Aug 2023 17:01:18 +0100 Subject: [PATCH 3/3] Fixes issue with dependencies in TBMV (#454) This PR fixes an issue with dependencies in TBMV (similar to the fix in #451) --- src/interface/blas2_interface.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/interface/blas2_interface.hpp b/src/interface/blas2_interface.hpp index 21dae4b34..acd7fe736 100644 --- a/src/interface/blas2_interface.hpp +++ b/src/interface/blas2_interface.hpp @@ -661,8 +661,8 @@ typename sb_handle_t::event_t _tbmv_impl( global_size, _dependencies); auto assignOp = make_op(vx, vres); - auto ret = concatenate_vectors( - tbmvEvent, sb_handle.execute(assignOp, local_range, _dependencies)); + auto assignEvent = sb_handle.execute(assignOp, local_range, tbmvEvent); + auto ret = concatenate_vectors(tbmvEvent, assignEvent); blas::helper::enqueue_deallocate(ret, res_buffer, sb_handle.get_queue());