From 864cb778bca89279d927ae8c71b969baaeb04535 Mon Sep 17 00:00:00 2001 From: "andrei.matraguna" Date: Wed, 28 Aug 2024 13:09:21 +0100 Subject: [PATCH] add check for test_matrix_a in gbmv,tbmv,hbmv and sbmv --- include/clblast.h | 10 +++--- src/clblast.cpp | 60 +++++++++++++++++------------------ src/clblast_c.cpp | 22 ++++++------- src/routines/level2/xgbmv.cpp | 7 ++-- src/routines/level2/xgbmv.hpp | 2 +- src/routines/level2/xgemv.cpp | 9 +++--- src/routines/level2/xgemv.hpp | 4 +-- src/routines/level2/xhbmv.cpp | 4 +-- src/routines/level2/xhbmv.hpp | 2 +- src/routines/level2/xsbmv.cpp | 4 +-- src/routines/level2/xsbmv.hpp | 2 +- src/routines/level2/xtbmv.cpp | 4 +-- src/routines/level2/xtbmv.hpp | 2 +- src/utilities/buffer_test.hpp | 10 ++++++ 14 files changed, 78 insertions(+), 64 deletions(-) diff --git a/include/clblast.h b/include/clblast.h index 0d3b9d9f..65c4e961 100644 --- a/include/clblast.h +++ b/include/clblast.h @@ -281,7 +281,7 @@ StatusCode Gemv(const Layout layout, const Transpose a_transpose, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const T beta, cl_mem y_buffer, const size_t y_offset, const size_t y_inc, - cl_command_queue* queue, cl_event* event = nullptr); + cl_command_queue* queue, cl_event* event = nullptr, const bool do_test_matrix_a=true); // General banded matrix-vector multiplication: SGBMV/DGBMV/CGBMV/ZGBMV/HGBMV template @@ -292,7 +292,7 @@ StatusCode Gbmv(const Layout layout, const Transpose a_transpose, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const T beta, cl_mem y_buffer, const size_t y_offset, const size_t y_inc, - cl_command_queue* queue, cl_event* event = nullptr); + cl_command_queue* queue, cl_event* event = nullptr, const bool do_test_matrix_a=true); // Hermitian matrix-vector multiplication: CHEMV/ZHEMV template @@ -314,7 +314,7 @@ StatusCode Hbmv(const Layout layout, const Triangle triangle, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const T beta, cl_mem y_buffer, const size_t y_offset, const size_t y_inc, - cl_command_queue* queue, cl_event* event = nullptr); + cl_command_queue* queue, cl_event* event = nullptr, const bool do_test_matrix_a=true); // Hermitian packed matrix-vector multiplication: CHPMV/ZHPMV template @@ -347,7 +347,7 @@ StatusCode Sbmv(const Layout layout, const Triangle triangle, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const T beta, cl_mem y_buffer, const size_t y_offset, const size_t y_inc, - cl_command_queue* queue, cl_event* event = nullptr); + cl_command_queue* queue, cl_event* event = nullptr, const bool do_test_matrix_a=true); // Symmetric packed matrix-vector multiplication: SSPMV/DSPMV/HSPMV template @@ -374,7 +374,7 @@ StatusCode Tbmv(const Layout layout, const Triangle triangle, const Transpose a_ const size_t n, const size_t k, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, cl_mem x_buffer, const size_t x_offset, const size_t x_inc, - cl_command_queue* queue, cl_event* event = nullptr); + cl_command_queue* queue, cl_event* event = nullptr, const bool do_test_matrix_a=true); // Triangular packed matrix-vector multiplication: STPMV/DTPMV/CTPMV/ZTPMV/HTPMV template diff --git a/src/clblast.cpp b/src/clblast.cpp index 180693e7..8f17df4e 100644 --- a/src/clblast.cpp +++ b/src/clblast.cpp @@ -615,7 +615,7 @@ StatusCode Gemv(const Layout layout, const Transpose a_transpose, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const T beta, cl_mem y_buffer, const size_t y_offset, const size_t y_inc, - cl_command_queue* queue, cl_event* event) { + cl_command_queue* queue, cl_event* event,const bool do_test_matrix_a) { try { auto queue_cpp = Queue(*queue); auto routine = Xgemv(queue_cpp, event); @@ -625,7 +625,7 @@ StatusCode Gemv(const Layout layout, const Transpose a_transpose, Buffer(a_buffer), a_offset, a_ld, Buffer(x_buffer), x_offset, x_inc, beta, - Buffer(y_buffer), y_offset, y_inc); + Buffer(y_buffer), y_offset, y_inc, do_test_matrix_a); return StatusCode::kSuccess; } catch (...) { return DispatchException(); } } @@ -636,7 +636,7 @@ template StatusCode PUBLIC_API Gemv(const Layout, const Transpose, const cl_mem, const size_t, const size_t, const float, cl_mem, const size_t, const size_t, - cl_command_queue*, cl_event*); + cl_command_queue*, cl_event*,const bool); template StatusCode PUBLIC_API Gemv(const Layout, const Transpose, const size_t, const size_t, const double, @@ -644,7 +644,7 @@ template StatusCode PUBLIC_API Gemv(const Layout, const Transpose, const cl_mem, const size_t, const size_t, const double, cl_mem, const size_t, const size_t, - cl_command_queue*, cl_event*); + cl_command_queue*, cl_event*,const bool); template StatusCode PUBLIC_API Gemv(const Layout, const Transpose, const size_t, const size_t, const float2, @@ -652,7 +652,7 @@ template StatusCode PUBLIC_API Gemv(const Layout, const Transpose, const cl_mem, const size_t, const size_t, const float2, cl_mem, const size_t, const size_t, - cl_command_queue*, cl_event*); + cl_command_queue*, cl_event*,const bool); template StatusCode PUBLIC_API Gemv(const Layout, const Transpose, const size_t, const size_t, const double2, @@ -660,7 +660,7 @@ template StatusCode PUBLIC_API Gemv(const Layout, const Transpose, const cl_mem, const size_t, const size_t, const double2, cl_mem, const size_t, const size_t, - cl_command_queue*, cl_event*); + cl_command_queue*, cl_event*,const bool); template StatusCode PUBLIC_API Gemv(const Layout, const Transpose, const size_t, const size_t, const half, @@ -668,7 +668,7 @@ template StatusCode PUBLIC_API Gemv(const Layout, const Transpose, const cl_mem, const size_t, const size_t, const half, cl_mem, const size_t, const size_t, - cl_command_queue*, cl_event*); + cl_command_queue*, cl_event*, const bool); // General banded matrix-vector multiplication: SGBMV/DGBMV/CGBMV/ZGBMV/HGBMV template @@ -679,7 +679,7 @@ StatusCode Gbmv(const Layout layout, const Transpose a_transpose, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const T beta, cl_mem y_buffer, const size_t y_offset, const size_t y_inc, - cl_command_queue* queue, cl_event* event) { + cl_command_queue* queue, cl_event* event,const bool do_test_matrix_a) { try { auto queue_cpp = Queue(*queue); auto routine = Xgbmv(queue_cpp, event); @@ -689,7 +689,7 @@ StatusCode Gbmv(const Layout layout, const Transpose a_transpose, Buffer(a_buffer), a_offset, a_ld, Buffer(x_buffer), x_offset, x_inc, beta, - Buffer(y_buffer), y_offset, y_inc); + Buffer(y_buffer), y_offset, y_inc,do_test_matrix_a); return StatusCode::kSuccess; } catch (...) { return DispatchException(); } } @@ -700,7 +700,7 @@ template StatusCode PUBLIC_API Gbmv(const Layout, const Transpose, const cl_mem, const size_t, const size_t, const float, cl_mem, const size_t, const size_t, - cl_command_queue*, cl_event*); + cl_command_queue*, cl_event*, const bool); template StatusCode PUBLIC_API Gbmv(const Layout, const Transpose, const size_t, const size_t, const size_t, const size_t, const double, @@ -708,7 +708,7 @@ template StatusCode PUBLIC_API Gbmv(const Layout, const Transpose, const cl_mem, const size_t, const size_t, const double, cl_mem, const size_t, const size_t, - cl_command_queue*, cl_event*); + cl_command_queue*, cl_event*, const bool); template StatusCode PUBLIC_API Gbmv(const Layout, const Transpose, const size_t, const size_t, const size_t, const size_t, const float2, @@ -716,7 +716,7 @@ template StatusCode PUBLIC_API Gbmv(const Layout, const Transpose, const cl_mem, const size_t, const size_t, const float2, cl_mem, const size_t, const size_t, - cl_command_queue*, cl_event*); + cl_command_queue*, cl_event*, const bool); template StatusCode PUBLIC_API Gbmv(const Layout, const Transpose, const size_t, const size_t, const size_t, const size_t, const double2, @@ -724,7 +724,7 @@ template StatusCode PUBLIC_API Gbmv(const Layout, const Transpose, const cl_mem, const size_t, const size_t, const double2, cl_mem, const size_t, const size_t, - cl_command_queue*, cl_event*); + cl_command_queue*, cl_event*, const bool); template StatusCode PUBLIC_API Gbmv(const Layout, const Transpose, const size_t, const size_t, const size_t, const size_t, const half, @@ -732,7 +732,7 @@ template StatusCode PUBLIC_API Gbmv(const Layout, const Transpose, const cl_mem, const size_t, const size_t, const half, cl_mem, const size_t, const size_t, - cl_command_queue*, cl_event*); + cl_command_queue*, cl_event*, const bool); // Hermitian matrix-vector multiplication: CHEMV/ZHEMV template @@ -783,7 +783,7 @@ StatusCode Hbmv(const Layout layout, const Triangle triangle, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const T beta, cl_mem y_buffer, const size_t y_offset, const size_t y_inc, - cl_command_queue* queue, cl_event* event) { + cl_command_queue* queue, cl_event* event, const bool do_test_matrix_a) { try { auto queue_cpp = Queue(*queue); auto routine = Xhbmv(queue_cpp, event); @@ -793,7 +793,7 @@ StatusCode Hbmv(const Layout layout, const Triangle triangle, Buffer(a_buffer), a_offset, a_ld, Buffer(x_buffer), x_offset, x_inc, beta, - Buffer(y_buffer), y_offset, y_inc); + Buffer(y_buffer), y_offset, y_inc,do_test_matrix_a); return StatusCode::kSuccess; } catch (...) { return DispatchException(); } } @@ -804,7 +804,7 @@ template StatusCode PUBLIC_API Hbmv(const Layout, const Triangle, const cl_mem, const size_t, const size_t, const float2, cl_mem, const size_t, const size_t, - cl_command_queue*, cl_event*); + cl_command_queue*, cl_event*, const bool); template StatusCode PUBLIC_API Hbmv(const Layout, const Triangle, const size_t, const size_t, const double2, @@ -812,7 +812,7 @@ template StatusCode PUBLIC_API Hbmv(const Layout, const Triangle, const cl_mem, const size_t, const size_t, const double2, cl_mem, const size_t, const size_t, - cl_command_queue*, cl_event*); + cl_command_queue*, cl_event*, const bool); // Hermitian packed matrix-vector multiplication: CHPMV/ZHPMV template @@ -911,7 +911,7 @@ StatusCode Sbmv(const Layout layout, const Triangle triangle, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const T beta, cl_mem y_buffer, const size_t y_offset, const size_t y_inc, - cl_command_queue* queue, cl_event* event) { + cl_command_queue* queue, cl_event* event,const bool do_test_matrix_a) { try { auto queue_cpp = Queue(*queue); auto routine = Xsbmv(queue_cpp, event); @@ -921,7 +921,7 @@ StatusCode Sbmv(const Layout layout, const Triangle triangle, Buffer(a_buffer), a_offset, a_ld, Buffer(x_buffer), x_offset, x_inc, beta, - Buffer(y_buffer), y_offset, y_inc); + Buffer(y_buffer), y_offset, y_inc,do_test_matrix_a); return StatusCode::kSuccess; } catch (...) { return DispatchException(); } } @@ -932,7 +932,7 @@ template StatusCode PUBLIC_API Sbmv(const Layout, const Triangle, const cl_mem, const size_t, const size_t, const float, cl_mem, const size_t, const size_t, - cl_command_queue*, cl_event*); + cl_command_queue*, cl_event*, const bool); template StatusCode PUBLIC_API Sbmv(const Layout, const Triangle, const size_t, const size_t, const double, @@ -940,7 +940,7 @@ template StatusCode PUBLIC_API Sbmv(const Layout, const Triangle, const cl_mem, const size_t, const size_t, const double, cl_mem, const size_t, const size_t, - cl_command_queue*, cl_event*); + cl_command_queue*, cl_event*, const bool); template StatusCode PUBLIC_API Sbmv(const Layout, const Triangle, const size_t, const size_t, const half, @@ -948,7 +948,7 @@ template StatusCode PUBLIC_API Sbmv(const Layout, const Triangle, const cl_mem, const size_t, const size_t, const half, cl_mem, const size_t, const size_t, - cl_command_queue*, cl_event*); + cl_command_queue*, cl_event*, const bool); // Symmetric packed matrix-vector multiplication: SSPMV/DSPMV/HSPMV template @@ -1047,14 +1047,14 @@ StatusCode Tbmv(const Layout layout, const Triangle triangle, const Transpose a_ const size_t n, const size_t k, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, cl_mem x_buffer, const size_t x_offset, const size_t x_inc, - cl_command_queue* queue, cl_event* event) { + cl_command_queue* queue, cl_event* event, const bool do_test_matrix_a) { try { auto queue_cpp = Queue(*queue); auto routine = Xtbmv(queue_cpp, event); routine.DoTbmv(layout, triangle, a_transpose, diagonal, n, k, Buffer(a_buffer), a_offset, a_ld, - Buffer(x_buffer), x_offset, x_inc); + Buffer(x_buffer), x_offset, x_inc,do_test_matrix_a); return StatusCode::kSuccess; } catch (...) { return DispatchException(); } } @@ -1062,27 +1062,27 @@ template StatusCode PUBLIC_API Tbmv(const Layout, const Triangle, const T const size_t, const size_t, const cl_mem, const size_t, const size_t, cl_mem, const size_t, const size_t, - cl_command_queue*, cl_event*); + cl_command_queue*, cl_event*, const bool); template StatusCode PUBLIC_API Tbmv(const Layout, const Triangle, const Transpose, const Diagonal, const size_t, const size_t, const cl_mem, const size_t, const size_t, cl_mem, const size_t, const size_t, - cl_command_queue*, cl_event*); + cl_command_queue*, cl_event*, const bool); template StatusCode PUBLIC_API Tbmv(const Layout, const Triangle, const Transpose, const Diagonal, const size_t, const size_t, const cl_mem, const size_t, const size_t, cl_mem, const size_t, const size_t, - cl_command_queue*, cl_event*); + cl_command_queue*, cl_event*, const bool); template StatusCode PUBLIC_API Tbmv(const Layout, const Triangle, const Transpose, const Diagonal, const size_t, const size_t, const cl_mem, const size_t, const size_t, cl_mem, const size_t, const size_t, - cl_command_queue*, cl_event*); + cl_command_queue*, cl_event*, const bool); template StatusCode PUBLIC_API Tbmv(const Layout, const Triangle, const Transpose, const Diagonal, const size_t, const size_t, const cl_mem, const size_t, const size_t, cl_mem, const size_t, const size_t, - cl_command_queue*, cl_event*); + cl_command_queue*, cl_event*, const bool); // Triangular packed matrix-vector multiplication: STPMV/DTPMV/CTPMV/ZTPMV/HTPMV template diff --git a/src/clblast_c.cpp b/src/clblast_c.cpp index a224230a..54c36490 100644 --- a/src/clblast_c.cpp +++ b/src/clblast_c.cpp @@ -1145,7 +1145,7 @@ CLBlastStatusCode CLBlastSgbmv(const CLBlastLayout layout, const CLBlastTranspos const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const float beta, cl_mem y_buffer, const size_t y_offset, const size_t y_inc, - cl_command_queue* queue, cl_event* event) { + cl_command_queue* queue, cl_event* event, const bool do_test_matrix_a) { try { return static_cast( clblast::Gbmv(static_cast(layout), @@ -1156,7 +1156,7 @@ CLBlastStatusCode CLBlastSgbmv(const CLBlastLayout layout, const CLBlastTranspos x_buffer, x_offset, x_inc, beta, y_buffer, y_offset, y_inc, - queue, event) + queue, event, do_test_matrix_a) ); } catch (...) { return static_cast(clblast::DispatchExceptionForC()); } } @@ -1167,7 +1167,7 @@ CLBlastStatusCode CLBlastDgbmv(const CLBlastLayout layout, const CLBlastTranspos const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const double beta, cl_mem y_buffer, const size_t y_offset, const size_t y_inc, - cl_command_queue* queue, cl_event* event) { + cl_command_queue* queue, cl_event* event, const bool do_test_matrix_a) { try { return static_cast( clblast::Gbmv(static_cast(layout), @@ -1178,7 +1178,7 @@ CLBlastStatusCode CLBlastDgbmv(const CLBlastLayout layout, const CLBlastTranspos x_buffer, x_offset, x_inc, beta, y_buffer, y_offset, y_inc, - queue, event) + queue, event, do_test_matrix_a) ); } catch (...) { return static_cast(clblast::DispatchExceptionForC()); } } @@ -1189,7 +1189,7 @@ CLBlastStatusCode CLBlastCgbmv(const CLBlastLayout layout, const CLBlastTranspos const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_float2 beta, cl_mem y_buffer, const size_t y_offset, const size_t y_inc, - cl_command_queue* queue, cl_event* event) { + cl_command_queue* queue, cl_event* event, const bool do_test_matrix_a) { try { return static_cast( clblast::Gbmv(static_cast(layout), @@ -1200,7 +1200,7 @@ CLBlastStatusCode CLBlastCgbmv(const CLBlastLayout layout, const CLBlastTranspos x_buffer, x_offset, x_inc, float2{beta.s[0], beta.s[1]}, y_buffer, y_offset, y_inc, - queue, event) + queue, event, do_test_matrix_a) ); } catch (...) { return static_cast(clblast::DispatchExceptionForC()); } } @@ -1211,7 +1211,7 @@ CLBlastStatusCode CLBlastZgbmv(const CLBlastLayout layout, const CLBlastTranspos const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_double2 beta, cl_mem y_buffer, const size_t y_offset, const size_t y_inc, - cl_command_queue* queue, cl_event* event) { + cl_command_queue* queue, cl_event* event, const bool do_test_matrix_a) { try { return static_cast( clblast::Gbmv(static_cast(layout), @@ -1222,7 +1222,7 @@ CLBlastStatusCode CLBlastZgbmv(const CLBlastLayout layout, const CLBlastTranspos x_buffer, x_offset, x_inc, double2{beta.s[0], beta.s[1]}, y_buffer, y_offset, y_inc, - queue, event) + queue, event, do_test_matrix_a) ); } catch (...) { return static_cast(clblast::DispatchExceptionForC()); } } @@ -1233,7 +1233,7 @@ CLBlastStatusCode CLBlastHgbmv(const CLBlastLayout layout, const CLBlastTranspos const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_half beta, cl_mem y_buffer, const size_t y_offset, const size_t y_inc, - cl_command_queue* queue, cl_event* event) { + cl_command_queue* queue, cl_event* event, const bool do_test_matrix_a) { try { return static_cast( clblast::Gbmv(static_cast(layout), @@ -1243,8 +1243,8 @@ CLBlastStatusCode CLBlastHgbmv(const CLBlastLayout layout, const CLBlastTranspos a_buffer, a_offset, a_ld, x_buffer, x_offset, x_inc, beta, - y_buffer, y_offset, y_inc, - queue, event) + y_buffer, y_offset, y_inc, + queue, event, do_test_matrix_a) ); } catch (...) { return static_cast(clblast::DispatchExceptionForC()); } } diff --git a/src/routines/level2/xgbmv.cpp b/src/routines/level2/xgbmv.cpp index e80b9a96..e0e71939 100644 --- a/src/routines/level2/xgbmv.cpp +++ b/src/routines/level2/xgbmv.cpp @@ -35,13 +35,16 @@ void Xgbmv::DoGbmv(const Layout layout, const Transpose a_transpose, const Buffer &a_buffer, const size_t a_offset, const size_t a_ld, const Buffer &x_buffer, const size_t x_offset, const size_t x_inc, const T beta, - const Buffer &y_buffer, const size_t y_offset, const size_t y_inc) { + const Buffer &y_buffer, const size_t y_offset, const size_t y_inc, const bool do_test_matrix_a) { // Reverses the upper and lower band count auto rotated = (layout == Layout::kRowMajor); auto kl_real = (rotated) ? ku : kl; auto ku_real = (rotated) ? kl : ku; + // The matrix A has different constraints compared to what is normally tested in MatVec below + TestMatrixBanded(n, kl, ku, a_buffer, a_offset, a_ld); + // Runs the generic matrix-vector multiplication, disabling the use of fast vectorized kernels. // The specific hermitian matrix-accesses are implemented in the kernel guarded by the // ROUTINE_GBMV define. @@ -52,7 +55,7 @@ void Xgbmv::DoGbmv(const Layout layout, const Transpose a_transpose, x_buffer, x_offset, x_inc, beta, y_buffer, y_offset, y_inc, fast_kernels, fast_kernels, - 0, false, kl_real, ku_real); + 0, false, kl_real, ku_real,/*do_test_matrix_a=*/do_test_matrix_a); } // ================================================================================================= diff --git a/src/routines/level2/xgbmv.hpp b/src/routines/level2/xgbmv.hpp index e5f670ec..ea76361f 100644 --- a/src/routines/level2/xgbmv.hpp +++ b/src/routines/level2/xgbmv.hpp @@ -39,7 +39,7 @@ class Xgbmv: public Xgemv { const Buffer &a_buffer, const size_t a_offset, const size_t a_ld, const Buffer &x_buffer, const size_t x_offset, const size_t x_inc, const T beta, - const Buffer &y_buffer, const size_t y_offset, const size_t y_inc); + const Buffer &y_buffer, const size_t y_offset, const size_t y_inc, const bool do_test_matrix_a=true); }; // ================================================================================================= diff --git a/src/routines/level2/xgemv.cpp b/src/routines/level2/xgemv.cpp index 63dab9f7..2ab2b5d9 100644 --- a/src/routines/level2/xgemv.cpp +++ b/src/routines/level2/xgemv.cpp @@ -39,7 +39,7 @@ void Xgemv::DoGemv(const Layout layout, const Transpose a_transpose, const Buffer &a_buffer, const size_t a_offset, const size_t a_ld, const Buffer &x_buffer, const size_t x_offset, const size_t x_inc, const T beta, - const Buffer &y_buffer, const size_t y_offset, const size_t y_inc) { + const Buffer &y_buffer, const size_t y_offset, const size_t y_inc, const bool do_test_matrix_a) { // Performs the matrix-vector multiplication MatVec(layout, a_transpose, @@ -48,7 +48,7 @@ void Xgemv::DoGemv(const Layout layout, const Transpose a_transpose, x_buffer, x_offset, x_inc, beta, y_buffer, y_offset, y_inc, true, true, - 0, false, 0, 0); // N/A for this routine + 0, false, 0, 0,do_test_matrix_a); // N/A for this routine } // ================================================================================================= @@ -64,7 +64,7 @@ void Xgemv::MatVec(const Layout layout, const Transpose a_transpose, const Buffer &y_buffer, const size_t y_offset, const size_t y_inc, bool fast_kernel, bool fast_kernel_rot, const size_t parameter, const bool packed, - const size_t kl, const size_t ku) { + const size_t kl, const size_t ku,const bool do_test_matrix_a) { // Makes sure all dimensions are larger than zero if (m == 0 || n == 0) { throw BLASError(StatusCode::kInvalidDimension); } @@ -92,7 +92,8 @@ void Xgemv::MatVec(const Layout layout, const Transpose a_transpose, // Tests the matrix and the vectors for validity if (packed) { TestMatrixAP(n, a_buffer, a_offset); } - else { TestMatrixA(a_one, a_two, a_buffer, a_offset, a_ld); } + else if (do_test_matrix_a) { TestMatrixA(a_one, a_two, a_buffer, a_offset, a_ld); } + TestVectorX(n_real, x_buffer, x_offset, x_inc); TestVectorY(m_real, y_buffer, y_offset, y_inc); diff --git a/src/routines/level2/xgemv.hpp b/src/routines/level2/xgemv.hpp index 1e1fa726..e2452c43 100644 --- a/src/routines/level2/xgemv.hpp +++ b/src/routines/level2/xgemv.hpp @@ -34,7 +34,7 @@ class Xgemv: public Routine { const Buffer &a_buffer, const size_t a_offset, const size_t a_ld, const Buffer &x_buffer, const size_t x_offset, const size_t x_inc, const T beta, - const Buffer &y_buffer, const size_t y_offset, const size_t y_inc); + const Buffer &y_buffer, const size_t y_offset, const size_t y_inc, const bool do_test_matrix_a=true); // Generic version used also for other matrix-vector multiplications void MatVec(const Layout layout, const Transpose a_transpose, @@ -46,7 +46,7 @@ class Xgemv: public Routine { const Buffer &y_buffer, const size_t y_offset, const size_t y_inc, bool fast_kernel, bool fast_kernel_rot, const size_t parameter, const bool packed, - const size_t kl, const size_t ku); + const size_t kl, const size_t ku, const bool do_test_matrix_a = true); }; // ================================================================================================= diff --git a/src/routines/level2/xhbmv.cpp b/src/routines/level2/xhbmv.cpp index c7c9ed9d..0b0e9df7 100644 --- a/src/routines/level2/xhbmv.cpp +++ b/src/routines/level2/xhbmv.cpp @@ -35,7 +35,7 @@ void Xhbmv::DoHbmv(const Layout layout, const Triangle triangle, const Buffer &a_buffer, const size_t a_offset, const size_t a_ld, const Buffer &x_buffer, const size_t x_offset, const size_t x_inc, const T beta, - const Buffer &y_buffer, const size_t y_offset, const size_t y_inc) { + const Buffer &y_buffer, const size_t y_offset, const size_t y_inc, const bool do_test_matrix_a) { // The data is either in the upper or lower triangle size_t is_upper = ((triangle == Triangle::kUpper && layout != Layout::kRowMajor) || @@ -51,7 +51,7 @@ void Xhbmv::DoHbmv(const Layout layout, const Triangle triangle, x_buffer, x_offset, x_inc, beta, y_buffer, y_offset, y_inc, fast_kernels, fast_kernels, - is_upper, false, k, 0); + is_upper, false, k, 0,do_test_matrix_a); } // ================================================================================================= diff --git a/src/routines/level2/xhbmv.hpp b/src/routines/level2/xhbmv.hpp index 76d3c91e..920b178c 100644 --- a/src/routines/level2/xhbmv.hpp +++ b/src/routines/level2/xhbmv.hpp @@ -39,7 +39,7 @@ class Xhbmv: public Xgemv { const Buffer &a_buffer, const size_t a_offset, const size_t a_ld, const Buffer &x_buffer, const size_t x_offset, const size_t x_inc, const T beta, - const Buffer &y_buffer, const size_t y_offset, const size_t y_inc); + const Buffer &y_buffer, const size_t y_offset, const size_t y_inc, const bool do_test_matrix_a=true); }; // ================================================================================================= diff --git a/src/routines/level2/xsbmv.cpp b/src/routines/level2/xsbmv.cpp index e47430d1..2d75c821 100644 --- a/src/routines/level2/xsbmv.cpp +++ b/src/routines/level2/xsbmv.cpp @@ -35,7 +35,7 @@ void Xsbmv::DoSbmv(const Layout layout, const Triangle triangle, const Buffer &a_buffer, const size_t a_offset, const size_t a_ld, const Buffer &x_buffer, const size_t x_offset, const size_t x_inc, const T beta, - const Buffer &y_buffer, const size_t y_offset, const size_t y_inc) { + const Buffer &y_buffer, const size_t y_offset, const size_t y_inc,const bool do_test_matrix_a) { // The data is either in the upper or lower triangle size_t is_upper = ((triangle == Triangle::kUpper && layout != Layout::kRowMajor) || @@ -51,7 +51,7 @@ void Xsbmv::DoSbmv(const Layout layout, const Triangle triangle, x_buffer, x_offset, x_inc, beta, y_buffer, y_offset, y_inc, fast_kernels, fast_kernels, - is_upper, false, k, 0); + is_upper, false, k, 0,do_test_matrix_a); } // ================================================================================================= diff --git a/src/routines/level2/xsbmv.hpp b/src/routines/level2/xsbmv.hpp index a4542f49..f16edcfd 100644 --- a/src/routines/level2/xsbmv.hpp +++ b/src/routines/level2/xsbmv.hpp @@ -39,7 +39,7 @@ class Xsbmv: public Xgemv { const Buffer &a_buffer, const size_t a_offset, const size_t a_ld, const Buffer &x_buffer, const size_t x_offset, const size_t x_inc, const T beta, - const Buffer &y_buffer, const size_t y_offset, const size_t y_inc); + const Buffer &y_buffer, const size_t y_offset, const size_t y_inc, const bool do_test_matrix_a); }; // ================================================================================================= diff --git a/src/routines/level2/xtbmv.cpp b/src/routines/level2/xtbmv.cpp index 87053deb..b23154a5 100644 --- a/src/routines/level2/xtbmv.cpp +++ b/src/routines/level2/xtbmv.cpp @@ -33,7 +33,7 @@ void Xtbmv::DoTbmv(const Layout layout, const Triangle triangle, const Transpose a_transpose, const Diagonal diagonal, const size_t n, const size_t k, const Buffer &a_buffer, const size_t a_offset, const size_t a_ld, - const Buffer &x_buffer, const size_t x_offset, const size_t x_inc) { + const Buffer &x_buffer, const size_t x_offset, const size_t x_inc,const bool do_test_matrix_a ) { // Creates a copy of X: a temporary scratch buffer const auto x_size = (1 + (n - 1) * x_inc) + x_offset; @@ -58,7 +58,7 @@ void Xtbmv::DoTbmv(const Layout layout, const Triangle triangle, scratch_buffer, x_offset, x_inc, ConstantZero(), x_buffer, x_offset, x_inc, fast_kernels, fast_kernels, - parameter, false, k, 0); + parameter, false, k, 0,do_test_matrix_a); } catch (BLASError &e) { // Returns the proper error code (renames vector Y to X) switch (e.status()) { diff --git a/src/routines/level2/xtbmv.hpp b/src/routines/level2/xtbmv.hpp index abd12db6..dcd4f00c 100644 --- a/src/routines/level2/xtbmv.hpp +++ b/src/routines/level2/xtbmv.hpp @@ -39,7 +39,7 @@ class Xtbmv: public Xgemv { const Transpose a_transpose, const Diagonal diagonal, const size_t n, const size_t k, const Buffer &a_buffer, const size_t a_offset, const size_t a_ld, - const Buffer &x_buffer, const size_t x_offset, const size_t x_inc); + const Buffer &x_buffer, const size_t x_offset, const size_t x_inc, const bool do_test_matrix_a = true); }; // ================================================================================================= diff --git a/src/utilities/buffer_test.hpp b/src/utilities/buffer_test.hpp index 4a2a2c95..b55e0671 100644 --- a/src/utilities/buffer_test.hpp +++ b/src/utilities/buffer_test.hpp @@ -22,6 +22,16 @@ namespace clblast { // ================================================================================================= +// Tests matrix 'A' (for banded matrix-vector computations) for validity +template +void TestMatrixBanded(const size_t n, const size_t kl, const size_t ku, const Buffer &buffer, + const size_t offset, const size_t ld, const bool test_lead_dim = true) { + if (test_lead_dim && ld < kl + ku) { throw BLASError(StatusCode::kInvalidLeadDimA); } + try { + const auto required_size = (ld * n + offset) * sizeof(T); + if (buffer.GetSize() < required_size) { throw BLASError(StatusCode::kInsufficientMemoryA); } + } catch (const Error &e) { throw BLASError(StatusCode::kInvalidMatrixA, e.what()); } +} // Tests matrix 'A' for validity template