gesvd return empty result on cuda backend if M<N #561

npolina4 · 2024-08-27T23:26:41Z

Summary

oneapi::mkl::lapack::gesvd return empty result on cuda backend if M<N.

Version

GIT_TAG f2d2dcb

Environment

HW you use: NVIDIA CUDA BACKEND, NVIDIA GeForce GT 1030 6.1 [CUDA 12.2]
OS name and version: Ubuntu 22.04.3 LTS (GNU/Linux 6.2.0-32-generic x86_64)
Compiler version: Intel(R) oneAPI DPC++/C++ Compiler 2024.2.0 (2024.2.0.20240602), cuda version 12.2

Example

Using example from https://www.intel.com/content/www/us/en/docs/onemkl/code-samples-lapack/2022-1/cgesvd-example-c.html
test.cpp

#include <sycl/sycl.hpp>
#include <vector>
#include <iostream>
#include <string>
#include "oneapi/mkl.hpp"

using namespace sycl;

/* Parameters */
#define M 3
#define N 4
#define LDA M
#define LDU M
#define LDVT N

typedef std::complex<float> fcomplex;
namespace mkl_lapack = oneapi::mkl::lapack;


/* Auxiliary routine: printing a matrix */
void print_matrix_u(char* desc, int m, int n, fcomplex* _a, int lda, sycl::queue q) {
    int i, j;
    printf("\n %s\n", desc);
    fcomplex a[LDU * M];
    q.copy<fcomplex>(_a, a, LDU * M).wait();
    for (i = 0; i < m; i++) {
        for (j = 0; j < n; j++)
            printf(" (%6.2f,%6.2f)", a[i + j * lda].real(), a[i + j * lda].imag());
        printf("\n");
    }
}

/* Auxiliary routine: printing a matrix */
void print_matrix_vt(char* desc, int m, int n, fcomplex* _a, int lda, sycl::queue q) {
    int i, j;
    printf("\n %s\n", desc);
    fcomplex a[LDVT * N];
    q.copy<fcomplex>(_a, a, LDVT * N).wait();
    for (i = 0; i < m; i++) {
        for (j = 0; j < n; j++)
            printf(" (%6.2f,%6.2f)", a[i + j * lda].real(), a[i + j * lda].imag());
        printf("\n");
    }
}

/* Auxiliary routine: printing a real matrix */
void print_rmatrix(char* desc, int m, int n, float* _a, int lda, sycl::queue q) {
    int i, j;
    printf("\n %s\n", desc);
    float a[M];
    q.copy<float>(_a, a, M).wait();
    for (i = 0; i < m; i++) {
        for (j = 0; j < n; j++) printf(" %6.2f", a[i + j * lda]);
        printf("\n");
    }
}

/* Main program */
int main() {
    /* Locals */
    int m = M, n = N, lda = LDA, ldu = LDU, ldvt = LDVT;
    sycl::queue q;
    std::cout << "Device name is: " << q.get_device().get_info<sycl::info::device::name>() << std::endl;
    float* s = sycl::malloc_device<float>(M, q);
    fcomplex* u = sycl::malloc_device<fcomplex>(LDU * M, q);
    fcomplex* vt = sycl::malloc_device<fcomplex>(LDVT * N, q);
    fcomplex a_host[LDA * N] = {
       { 5.91f, -5.69f}, {-3.15f, -4.08f}, {-4.89f,  4.20f},
       { 7.09f,  2.72f}, {-1.89f,  3.27f}, { 4.10f, -6.70f},
       { 7.78f, -4.06f}, { 4.57f, -2.07f}, { 3.28f, -3.84f},
       {-0.79f, -7.21f}, {-3.88f, -3.30f}, { 3.84f,  1.19f}
    };

    fcomplex* a = sycl::malloc_device<fcomplex>(LDA * N, q);
    const sycl::event& copy_a = q.copy<fcomplex>(a_host, a, LDA * N);
    const std::int64_t scratchpad_size = 10240;
    fcomplex* scratchpad = sycl::malloc_device<fcomplex>(scratchpad_size, q);

    mkl_lapack::gesvd(q, oneapi::mkl::jobsvd::vectors, oneapi::mkl::jobsvd::vectors, m, n, a, lda, s, u, ldu, vt, ldvt, scratchpad, scratchpad_size, { copy_a }).wait();

    printf(" GESVD Example Program Results\n");
    print_rmatrix("Singular values", 1, m, s, 1, q);
    print_matrix_u("Left singular vectors (stored columnwise)", m, m, u, ldu, q);
    print_matrix_vt("Right singular vectors (stored rowwise)", m, n, vt, ldvt, q);

    sycl::free(s, q);
    sycl::free(u, q);
    sycl::free(vt, q);
    sycl::free(a, q);
    exit(0);
}

icpx -fsycl -fsycl-targets=nvptx64-nvidia-cuda test.cpp libonemkl.so.0 libonemkl_lapack_cusolver.so.0

Observed behavior

GESVD Example Program Results

Singular values
0.00 0.00 0.00

Left singular vectors (stored columnwise)
( 0.00, 0.00) ( 0.00, 0.00) ( 0.00, 0.00)
( 0.00, 0.00) ( 0.00, 0.00) ( 0.00, 0.00)
( 0.00, 0.00) ( 0.00, 0.00) ( 0.00, 0.00)

Right singular vectors (stored rowwise)
( 0.00, 0.00) ( 0.00, 0.00) ( 0.00, 0.00) ( 0.00, 0.00)
( 0.00, 0.00) ( 0.00, 0.00) ( 0.00, 0.00) ( 0.00, 0.00)
( 0.00, 0.00) ( 0.00, 0.00) ( 0.00, 0.00) ( 0.00, 0.00)

Expected behavior

Expected out:
GESVD Example Program Results

Singular values
17.63 11.61 6.78

Left singular vectors(stored columnwise)
(-0.86, -0.00) (0.40, -0.00) (0.32, 0.00)
(-0.35, 0.13) (-0.24, -0.21) (-0.63, 0.60)
(0.15, 0.32) (0.61, 0.61) (-0.36, 0.10)

Right singular vectors(stored rowwise)
(-0.22, 0.51) (-0.37, -0.32) (-0.53, 0.11) (0.15, 0.38)
(0.31, 0.31) (0.09, -0.57) (0.18, -0.39) (0.38, -0.39)
(0.53, 0.24) (0.49, 0.28) (-0.47, -0.25) (-0.15, 0.19)

The text was updated successfully, but these errors were encountered:

JackAKirk · 2024-10-10T15:01:36Z

This is explained in the cuda documentation here https://docs.nvidia.com/cuda/cusolver/index.html#cuSolverDN-lt-t-gt-gesvd
In particular the line:

"Remark 1: gesvd only supports m>=n"

However it would be better if the cusolver backend of oneMKL lapack reported this as an error to the user.

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

gesvd return empty result on cuda backend if M<N #561

gesvd return empty result on cuda backend if M<N #561

npolina4 commented Aug 27, 2024

JackAKirk commented Oct 10, 2024

gesvd return empty result on cuda backend if M<N #561

gesvd return empty result on cuda backend if M<N #561

Comments

npolina4 commented Aug 27, 2024

Summary

Version

Environment

Example

Observed behavior

Expected behavior

JackAKirk commented Oct 10, 2024