Skip to content

Commit

Permalink
Fixed failing iamin tests
Browse files Browse the repository at this point in the history
  • Loading branch information
muhammad-tanvir-1211 committed Nov 27, 2023
1 parent 773595c commit 3219bcb
Show file tree
Hide file tree
Showing 4 changed files with 12 additions and 12 deletions.
5 changes: 3 additions & 2 deletions src/interface/blas1/backend/amd_gpu.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -86,8 +86,9 @@ typename sb_handle_t::event_t _iamin(
constexpr int localSize = 512;
const index_t nWG = std::min((_N + localSize - 1) / (localSize * 4),
static_cast<index_t>(512));
return blas::internal::_iamax_iamin_impl<localSize, 0, false, false>(
sb_handle, _N, _vx, _incx, _rs, nWG, _dependencies);
return blas::internal::_iamax_iamin_impl<localSize, localSize, false,
false>(sb_handle, _N, _vx, _incx,
_rs, nWG, _dependencies);
}
}
} // namespace backend
Expand Down
6 changes: 2 additions & 4 deletions src/interface/blas1/backend/default_cpu.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -49,12 +49,11 @@ template <typename sb_handle_t, typename container_0_t, typename container_1_t,
typename sb_handle_t::event_t _iamax(
sb_handle_t& sb_handle, index_t _N, container_0_t _vx, increment_t _incx,
container_1_t _rs, const typename sb_handle_t::event_t& _dependencies) {
constexpr int localSize = 128;
if (_N < 8192) {
constexpr int localSize = 128;
return blas::internal::_iamax_iamin_impl<localSize, 0, true, true>(
sb_handle, _N, _vx, _incx, _rs, static_cast<index_t>(1), _dependencies);
} else {
constexpr int localSize = 128;
const index_t nWG = std::min((_N + localSize - 1) / (localSize * 4),
static_cast<index_t>(512));
return blas::internal::_iamax_iamin_impl<localSize, 0, true, false>(
Expand All @@ -71,12 +70,11 @@ template <typename sb_handle_t, typename container_0_t, typename container_1_t,
typename sb_handle_t::event_t _iamin(
sb_handle_t& sb_handle, index_t _N, container_0_t _vx, increment_t _incx,
container_1_t _rs, const typename sb_handle_t::event_t& _dependencies) {
constexpr int localSize = 128;
if (_N < 8192) {
constexpr int localSize = 128;
return blas::internal::_iamax_iamin_impl<localSize, 0, false, true>(
sb_handle, _N, _vx, _incx, _rs, static_cast<index_t>(1), _dependencies);
} else {
constexpr int localSize = 128;
const index_t nWG = std::min((_N + localSize - 1) / (localSize * 4),
static_cast<index_t>(512));
return blas::internal::_iamax_iamin_impl<localSize, 0, false, false>(
Expand Down
6 changes: 2 additions & 4 deletions src/interface/blas1/backend/intel_gpu.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -50,12 +50,11 @@ template <typename sb_handle_t, typename container_0_t, typename container_1_t,
typename sb_handle_t::event_t _iamax(
sb_handle_t& sb_handle, index_t _N, container_0_t _vx, increment_t _incx,
container_1_t _rs, const typename sb_handle_t::event_t& _dependencies) {
constexpr int localSize = 128;
if (_N < 8192) {
constexpr int localSize = 128;
return blas::internal::_iamax_iamin_impl<localSize, localSize, true, true>(
sb_handle, _N, _vx, _incx, _rs, static_cast<index_t>(1), _dependencies);
} else {
constexpr int localSize = 128;
const index_t nWG = std::min((_N + localSize - 1) / (localSize * 4),
static_cast<index_t>(512));
return blas::internal::_iamax_iamin_impl<localSize, localSize, true, false>(
Expand All @@ -72,12 +71,11 @@ template <typename sb_handle_t, typename container_0_t, typename container_1_t,
typename sb_handle_t::event_t _iamin(
sb_handle_t& sb_handle, index_t _N, container_0_t _vx, increment_t _incx,
container_1_t _rs, const typename sb_handle_t::event_t& _dependencies) {
constexpr int localSize = 128;
if (_N < 8192) {
constexpr int localSize = 128;
return blas::internal::_iamax_iamin_impl<localSize, localSize, false, true>(
sb_handle, _N, _vx, _incx, _rs, static_cast<index_t>(1), _dependencies);
} else {
constexpr int localSize = 128;
const index_t nWG = std::min((_N + localSize - 1) / (localSize * 4),
static_cast<index_t>(512));
return blas::internal::_iamax_iamin_impl<localSize, localSize, false,
Expand Down
7 changes: 5 additions & 2 deletions src/interface/blas1_interface.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -257,7 +257,7 @@ typename sb_handle_t::event_t _asum_impl(
*
* @tparam localSize value to indicate work group size
* @tparam localMemSize value to indicate size of local memory required
* @tparam is_max boolean variable to indicate if required operation is
* @tparam is_max boolean variable to indicate if required operation is
* iamax or not
* @tparam single boolean variable to indicate whether to execute a single
* step reduction or a two step reduction
Expand Down Expand Up @@ -322,7 +322,10 @@ typename sb_handle_t::event_t _iamax_iamin_impl(
auto step0 = make_integer_max_min<is_max, true>(gpu_res_vec, tupOp);
auto step1 = make_integer_max_min<is_max, false>(rs, gpu_res_vec);
if constexpr (localMemSize == 0) {
tuple_t init{(index_t)0, (scalar_t)0.f};
const scalar_t val = is_max ? static_cast<scalar_t>(0)
: std::numeric_limits<scalar_t>::max();
const index_t idx = std::numeric_limits<index_t>::max();
const tuple_t init{idx, val};
ret = typename sb_handle_t::event_t{
helper::fill(q, gpu_res, init, memory_size, _dependencies)};
ret = concatenate_vectors(
Expand Down

0 comments on commit 3219bcb

Please sign in to comment.