Skip to content

Commit

Permalink
Prevent bdsqr_estimate from rounding t to zero (#819)
Browse files Browse the repository at this point in the history
In bdsqr_estimate, the tolerance used to decide that elements of E have converged is modified by the value t, which changes as the loop traverses elements of D and E. Thanks to the order of operations used to calculate it, the updated t may be rounded down to zero in some cases, which brings the tolerance down to zero and prevents elements of E from being considered to have converged.

* Prevent bdsqr_estimate from rounding t to zero

* Addressed review comments

* Addressed review comment
  • Loading branch information
tfalders authored Sep 16, 2024
1 parent 52f7f39 commit 8d183cf
Show file tree
Hide file tree
Showing 3 changed files with 72 additions and 42 deletions.
75 changes: 49 additions & 26 deletions clients/common/lapack/testing_gesvd.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -206,23 +206,38 @@ void gesvd_initData(const rocblas_handle handle,
const rocblas_int bc,
Th& hA,
std::vector<T>& A,
bool test = true)
const bool test,
const bool singular)
{
if(CPU)
{
rocblas_init<T>(hA, true);

for(rocblas_int b = 0; b < bc; ++b)
{
// scale A to avoid singularities
for(rocblas_int i = 0; i < m; i++)
if(!singular)
{
for(rocblas_int j = 0; j < n; j++)
// scale A to avoid singularities
for(rocblas_int i = 0; i < m; i++)
{
if(i == j)
hA[b][i + j * lda] += 400;
else
hA[b][i + j * lda] -= 4;
for(rocblas_int j = 0; j < n; j++)
{
if(i == j)
hA[b][i + j * lda] += 400;
else
hA[b][i + j * lda] -= 4;
}
}
}
else
{
// form a singular matrix consisting of all ones
for(rocblas_int i = 0; i < m; i++)
{
for(rocblas_int j = 0; j < n; j++)
{
hA[b][i + j * lda] = 1;
}
}
}

Expand Down Expand Up @@ -289,7 +304,8 @@ void gesvd_getError(const rocblas_handle handle,
Ih& hinfo,
Ih& hinfoRes,
double* max_err,
double* max_errv)
double* max_errv,
const bool singular)
{
using W = decltype(std::real(T{}));

Expand All @@ -300,7 +316,8 @@ void gesvd_getError(const rocblas_handle handle,
std::vector<T> A(lda * n * bc);

// input data initialization
gesvd_initData<true, true, T>(handle, left_svect, right_svect, m, n, dA, lda, bc, hA, A);
gesvd_initData<true, true, T>(handle, left_svect, right_svect, m, n, dA, lda, bc, hA, A, true,
singular);

// execute computations:
// complementary execution to compute all singular vectors if needed (always in-place to ensure
Expand All @@ -315,7 +332,8 @@ void gesvd_getError(const rocblas_handle handle,
if(right_svect == rocblas_svect_none && left_svect != rocblas_svect_none)
CHECK_HIP_ERROR(Vres.transfer_from(dVT));

gesvd_initData<false, true, T>(handle, left_svect, right_svect, m, n, dA, lda, bc, hA, A);
gesvd_initData<false, true, T>(handle, left_svect, right_svect, m, n, dA, lda, bc, hA, A, true,
singular);

// CPU lapack
for(rocblas_int b = 0; b < bc; ++b)
Expand Down Expand Up @@ -436,7 +454,8 @@ void gesvd_getPerfData(const rocblas_handle handle,
const rocblas_int hot_calls,
const int profile,
const bool profile_kernels,
const bool perf)
const bool perf,
const bool singular)
{
using W = decltype(std::real(T{}));

Expand All @@ -448,7 +467,8 @@ void gesvd_getPerfData(const rocblas_handle handle,

if(!perf)
{
gesvd_initData<true, false, T>(handle, left_svect, right_svect, m, n, dA, lda, bc, hA, A, 0);
gesvd_initData<true, false, T>(handle, left_svect, right_svect, m, n, dA, lda, bc, hA, A,
false, singular);

// cpu-lapack performance (only if not in perf mode)
*cpu_time_used = get_time_us_no_sync();
Expand All @@ -458,12 +478,14 @@ void gesvd_getPerfData(const rocblas_handle handle,
*cpu_time_used = get_time_us_no_sync() - *cpu_time_used;
}

gesvd_initData<true, false, T>(handle, left_svect, right_svect, m, n, dA, lda, bc, hA, A, 0);
gesvd_initData<true, false, T>(handle, left_svect, right_svect, m, n, dA, lda, bc, hA, A, false,
singular);

// cold calls
for(int iter = 0; iter < 2; iter++)
{
gesvd_initData<false, true, T>(handle, left_svect, right_svect, m, n, dA, lda, bc, hA, A, 0);
gesvd_initData<false, true, T>(handle, left_svect, right_svect, m, n, dA, lda, bc, hA, A,
false, singular);

CHECK_ROCBLAS_ERROR(rocsolver_gesvd(
STRIDED, handle, left_svect, right_svect, m, n, dA.data(), lda, stA, dS.data(), stS,
Expand All @@ -487,7 +509,8 @@ void gesvd_getPerfData(const rocblas_handle handle,

for(rocblas_int iter = 0; iter < hot_calls; iter++)
{
gesvd_initData<false, true, T>(handle, left_svect, right_svect, m, n, dA, lda, bc, hA, A, 0);
gesvd_initData<false, true, T>(handle, left_svect, right_svect, m, n, dA, lda, bc, hA, A,
false, singular);

start = get_time_us_sync(stream);
rocsolver_gesvd(STRIDED, handle, left_svect, right_svect, m, n, dA.data(), lda, stA,
Expand Down Expand Up @@ -768,16 +791,16 @@ void testing_gesvd(Arguments& argus)
stU, dV, ldv, stV, dE, stE, fa, dinfo, bc, leftvT, rightvT,
mT, nT, dUT, lduT, stUT, dVT, ldvT, stVT, hA, hS, hSres, hU,
Ures, ldures, hV, Vres, ldvres, hinfo, hinfoRes, &max_error,
&max_errorv);
&max_errorv, argus.singular);
}

// collect performance data
if(argus.timing)
{
gesvd_getPerfData<STRIDED, T>(handle, leftv, rightv, m, n, dA, lda, stA, dS, stS, dU,
ldu, stU, dV, ldv, stV, dE, stE, fa, dinfo, bc, hA, hS,
hU, hV, hinfo, &gpu_time_used, &cpu_time_used, hot_calls,
argus.profile, argus.profile_kernels, argus.perf);
gesvd_getPerfData<STRIDED, T>(
handle, leftv, rightv, m, n, dA, lda, stA, dS, stS, dU, ldu, stU, dV, ldv, stV, dE,
stE, fa, dinfo, bc, hA, hS, hU, hV, hinfo, &gpu_time_used, &cpu_time_used,
hot_calls, argus.profile, argus.profile_kernels, argus.perf, argus.singular);
}
}

Expand Down Expand Up @@ -810,16 +833,16 @@ void testing_gesvd(Arguments& argus)
stU, dV, ldv, stV, dE, stE, fa, dinfo, bc, leftvT, rightvT,
mT, nT, dUT, lduT, stUT, dVT, ldvT, stVT, hA, hS, hSres, hU,
Ures, ldures, hV, Vres, ldvres, hinfo, hinfoRes, &max_error,
&max_errorv);
&max_errorv, argus.singular);
}

// collect performance data
if(argus.timing)
{
gesvd_getPerfData<STRIDED, T>(handle, leftv, rightv, m, n, dA, lda, stA, dS, stS, dU,
ldu, stU, dV, ldv, stV, dE, stE, fa, dinfo, bc, hA, hS,
hU, hV, hinfo, &gpu_time_used, &cpu_time_used, hot_calls,
argus.profile, argus.profile_kernels, argus.perf);
gesvd_getPerfData<STRIDED, T>(
handle, leftv, rightv, m, n, dA, lda, stA, dS, stS, dU, ldu, stU, dV, ldv, stV, dE,
stE, fa, dinfo, bc, hA, hS, hU, hV, hinfo, &gpu_time_used, &cpu_time_used,
hot_calls, argus.profile, argus.profile_kernels, argus.perf, argus.singular);
}
}

Expand Down
34 changes: 19 additions & 15 deletions clients/gtest/lapack/gesvd_gtest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,10 @@ using namespace std;

typedef std::tuple<vector<int>, vector<int>> gesvd_tuple;

// each size_range vector is a {m, n, fa};
// each size_range vector is a {m, n, fa, singular};
// if fa = 0 then no fast algorithm is allowed
// if fa = 1 fast algorithm is used when possible
// of singular = 1 then test with a singular matrix of all ones

// each opt_range vector is a {lda, ldu, ldv, leftsv, rightsv};
// if ldx = -1 then ldx < limit (invalid size)
Expand All @@ -54,21 +55,22 @@ typedef std::tuple<vector<int>, vector<int>> gesvd_tuple;
// for checkin_lapack tests
const vector<vector<int>> size_range = {
// quick return
{0, 0, 0},
{0, 1, 0},
{1, 0, 0},
{0, 0, 0, 0},
{0, 1, 0, 0},
{1, 0, 0, 0},
// invalid
{-1, 1, 0},
{1, -1, 0},
{-1, 1, 0, 0},
{1, -1, 0, 0},
// normal (valid) samples
{1, 1, 0},
{20, 20, 0},
{40, 30, 0},
{60, 30, 0},
{60, 30, 1},
{30, 40, 0},
{30, 60, 0},
{30, 60, 1}};
{1, 1, 0, 0},
{15, 15, 0, 1},
{20, 20, 0, 0},
{40, 30, 0, 0},
{60, 30, 0, 0},
{60, 30, 1, 0},
{30, 40, 0, 0},
{30, 60, 0, 0},
{30, 60, 1, 0}};

const vector<vector<int>> opt_range = {
// invalid
Expand All @@ -95,7 +97,8 @@ const vector<vector<int>> opt_range = {

// for daily_lapack tests
const vector<vector<int>> large_size_range
= {{120, 100, 0}, {300, 120, 0}, {300, 120, 1}, {100, 120, 1}, {120, 300, 0}, {120, 300, 1}};
= {{100, 100, 0, 1}, {120, 100, 0, 0}, {300, 120, 0, 0}, {300, 120, 1, 0},
{100, 120, 1, 0}, {120, 300, 0, 0}, {120, 300, 1, 0}};

const vector<vector<int>> large_opt_range
= {{0, 0, 0, 3, 3}, {1, 0, 0, 0, 1}, {0, 1, 0, 1, 0}, {0, 0, 1, 1, 1},
Expand Down Expand Up @@ -150,6 +153,7 @@ Arguments gesvd_setup_arguments(gesvd_tuple tup)
// only testing standard use case/defaults for strides

arg.timing = 0;
arg.singular = size[3];

return arg;
}
Expand Down
5 changes: 4 additions & 1 deletion library/src/auxiliary/rocauxiliary_bdsqr.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,10 @@ __device__ T bdsqr_estimate(const rocblas_int n, T* D, T* E, int t2b, T tol, int
smin = -1;
break;
}
t = std::abs(D[jd]) * t / (t + std::abs(E[je]));

// Note: Order of operations is important to prevent t from being rounded down to zero
t = std::abs(D[jd]) * (t / (t + std::abs(E[je])));

smin = (t < smin) ? t : smin;
}

Expand Down

0 comments on commit 8d183cf

Please sign in to comment.