diff --git a/benchmark/syclblas/extension/omatcopy.cpp b/benchmark/syclblas/extension/omatcopy.cpp index c6a655407..4394ceabb 100644 --- a/benchmark/syclblas/extension/omatcopy.cpp +++ b/benchmark/syclblas/extension/omatcopy.cpp @@ -23,6 +23,7 @@ * **************************************************************************/ +#include "../../../test/unittest/extension/extension_reference.hpp" #include "../utils.hpp" template @@ -74,8 +75,9 @@ void run(benchmark::State& state, blas::SB_Handle* sb_handle_ptr, int ti, #ifdef BLAS_VERIFY_BENCHMARK // Run a first time with a verification of the results std::vector m_b_ref = m_b; - reference_blas::omatcopy(*t_str, m, n, alpha, m_a.data(), lda, m_b_ref.data(), - ldb); + + reference_blas::omatcopy_ref(*t_str, m, n, alpha, m_a, lda, m_b_ref, ldb); + std::vector m_b_temp = m_b; { auto m_b_temp_gpu = diff --git a/common/include/common/system_reference_blas.hpp b/common/include/common/system_reference_blas.hpp index 01a76772b..d2581072d 100644 --- a/common/include/common/system_reference_blas.hpp +++ b/common/include/common/system_reference_blas.hpp @@ -422,14 +422,6 @@ void syr2k(const char *uplo, const char *trans, int n, int k, scalar_t alpha, ldb, beta, c, ldc); } -// blas-like extensions -template -void omatcopy(char trans, int m, int n, scalar_t alpha, scalar_t *a, int lda, - scalar_t *b, int ldb) { - auto func = - blas_system_function(&cblas_somatcopy, &cblas_domatcopy); - func(CblasColMajor, c_trans(trans), m, n, alpha, a, lda, b, ldb); -} } // namespace reference_blas #endif /* end of include guard: SYSTEM_REFERENCE_BLAS_HPP */ diff --git a/external/cblas/include/cblas.h b/external/cblas/include/cblas.h index 36c2557b1..e6fa818e9 100644 --- a/external/cblas/include/cblas.h +++ b/external/cblas/include/cblas.h @@ -562,16 +562,6 @@ void cblas_zher2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, void cblas_xerbla(int p, const char *rout, const char *form, ...); -// blas-like extensions -void cblas_somatcopy(const enum CBLAS_ORDER CORDER, - const enum CBLAS_TRANSPOSE CTRANS, const int crows, - const int ccols, const float calpha, const float *a, - const int clda, float *b, const int cldb); -void cblas_domatcopy(const enum CBLAS_ORDER CORDER, - const enum CBLAS_TRANSPOSE CTRANS, const int crows, - const int ccols, const double calpha, const double *a, - const int clda, double *b, const int cldb); - #ifdef __cplusplus } #endif diff --git a/test/unittest/extension/extension_reference.hpp b/test/unittest/extension/extension_reference.hpp new file mode 100644 index 000000000..592e9024a --- /dev/null +++ b/test/unittest/extension/extension_reference.hpp @@ -0,0 +1,94 @@ +/*************************************************************************** + * + * @license + * Copyright (C) Codeplay Software Limited + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * For your convenience, a copy of the License has been included in this + * repository. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * SYCL-BLAS: BLAS implementation using SYCL + * + * @filename extension_reference.hpp + * + **************************************************************************/ +#ifndef SYCL_BLAS_EXTENSION_REFERENCE_IMPLEMENTATION_HPP +#define SYCL_BLAS_EXTENSION_REFERENCE_IMPLEMENTATION_HPP + +#include + +namespace reference_blas { +/*! + * @brief Host-baseline implementation for omatcopy used as reference in test + * and benchmark + * @param trans tranpose input matrix or not + * @param m,n matrices dimensions + * @param alpha scalar factor for input matrix + * @param A input matrix + * @param lda leading dimension of input matrix + * @param B output matrix + * @param ldb leading dimension of output matrix + */ +template +void omatcopy_ref(char trans, const index_t m, const index_t n, + const scalar_t alpha, std::vector& A, + const index_t lda, std::vector& B, index_t ldb) { + if (trans != 't') { + for (index_t j = 0; j < n; j++) { + for (index_t i = 0; i < m; i++) { + B[j * ldb + i] = alpha * A[j * lda + i]; + } + } + } else { + for (index_t j = 0; j < n; j++) { + for (index_t i = 0; i < m; i++) { + B[i * ldb + j] = alpha * A[j * lda + i]; + } + } + } +} + +/*! + * @brief Host-baseline implementation of omatcopy2 used as reference. + */ +template +void omatcopy2_ref(const char& t, const index_t& m, const index_t& n, + const scalar_t& alpha, std::vector& in_matrix, + const index_t& ld_in, const index_t& inc_in, + std::vector& out_matrix, const index_t& ld_out, + const index_t inc_out) { + if (t == 't') { + for (int i = 0; i < m; ++i) { + for (int j = 0, c = 0; j < n; ++j, ++c) { + { + out_matrix[j * inc_out + i * ld_out] = + alpha * in_matrix[i * inc_in + j * ld_in]; + } + } + } + } else { + for (int i = 0; i < n; ++i) { + for (int j = 0, c = 0; j < m; ++j, ++c) { + { + out_matrix[j * inc_out + i * ld_out] = + alpha * in_matrix[j * inc_in + i * ld_in]; + } + } + } + } + return; +} + +} // namespace reference_blas + +#endif diff --git a/test/unittest/extension/omatcopy2_test.cpp b/test/unittest/extension/omatcopy2_test.cpp index ef822ef18..27a3764bc 100644 --- a/test/unittest/extension/omatcopy2_test.cpp +++ b/test/unittest/extension/omatcopy2_test.cpp @@ -24,44 +24,12 @@ **************************************************************************/ #include "blas_test.hpp" +#include "extension_reference.hpp" template using combination_t = std::tuple; -namespace reference_blas { -/*! - * @brief Host-baseline implementation of omatcopy2 used as reference. - */ -template -std::enable_if_t> omatcopy2( - const char& t, const index_t& m, const index_t& n, const scalar_t& alpha, - std::vector& in_matrix, const index_t& ld_in, - const index_t& inc_in, std::vector& out_matrix, - const index_t& ld_out, const index_t inc_out) { - if (t == 't') { - for (int i = 0; i < m; ++i) { - for (int j = 0, c = 0; j < n; ++j, ++c) { - { - out_matrix[j * inc_out + i * ld_out] = - alpha * in_matrix[i * inc_in + j * ld_in]; - } - } - } - } else { - for (int i = 0; i < n; ++i) { - for (int j = 0, c = 0; j < m; ++j, ++c) { - { - out_matrix[j * inc_out + i * ld_out] = - alpha * in_matrix[j * inc_in + i * ld_in]; - } - } - } - } - return out_matrix; -} -} // namespace reference_blas - template void run_test(const combination_t combi) { char trans; @@ -94,11 +62,11 @@ void run_test(const combination_t combi) { std::vector B_ref = B; // Reference implementation - // TODO: There isn't a reference implementation from any library. So we compare - // the results with a basic host implementation above. Working on a better - // comparison. - reference_blas::omatcopy2(trans, m, n, alpha, A_ref, ld_in, inc_in, - B_ref, ld_out, inc_out); + // TODO: There isn't a reference implementation from any library. So we + // compare the results with a basic host implementation above. Working on a + // better comparison. + reference_blas::omatcopy2_ref(trans, m, n, alpha, A_ref, ld_in, inc_in, B_ref, + ld_out, inc_out); auto matrix_in = blas::make_sycl_iterator_buffer(A, m_a_size); auto matrix_out = blas::make_sycl_iterator_buffer(B, m_b_size); diff --git a/test/unittest/extension/omatcopy_test.cpp b/test/unittest/extension/omatcopy_test.cpp index 47c21b0e8..01188afc8 100644 --- a/test/unittest/extension/omatcopy_test.cpp +++ b/test/unittest/extension/omatcopy_test.cpp @@ -24,6 +24,7 @@ **************************************************************************/ #include "blas_test.hpp" +#include "extension_reference.hpp" using index_t = int; @@ -58,8 +59,8 @@ void run_test(const combination_t combi) { std::vector B_ref = B; // Reference implementation - reference_blas::omatcopy(trans, m, n, alpha, A_ref.data(), ld_in, - B_ref.data(), ld_out); + reference_blas::omatcopy_ref(trans, m, n, alpha, A_ref, ld_in, + B_ref, ld_out); auto matrix_in = blas::make_sycl_iterator_buffer(A, size_a); auto matrix_out = blas::make_sycl_iterator_buffer(B, size_b);