Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

gesvd return empty result on cuda backend if M<N #561

Open
npolina4 opened this issue Aug 27, 2024 · 1 comment
Open

gesvd return empty result on cuda backend if M<N #561

npolina4 opened this issue Aug 27, 2024 · 1 comment

Comments

@npolina4
Copy link

Summary

oneapi::mkl::lapack::gesvd return empty result on cuda backend if M<N.

Version

GIT_TAG f2d2dcb

Environment

  • HW you use: NVIDIA CUDA BACKEND, NVIDIA GeForce GT 1030 6.1 [CUDA 12.2]
  • OS name and version: Ubuntu 22.04.3 LTS (GNU/Linux 6.2.0-32-generic x86_64)
  • Compiler version: Intel(R) oneAPI DPC++/C++ Compiler 2024.2.0 (2024.2.0.20240602), cuda version 12.2

Example

Using example from https://www.intel.com/content/www/us/en/docs/onemkl/code-samples-lapack/2022-1/cgesvd-example-c.html
test.cpp

#include <sycl/sycl.hpp>
#include <vector>
#include <iostream>
#include <string>
#include "oneapi/mkl.hpp"

using namespace sycl;

/* Parameters */
#define M 3
#define N 4
#define LDA M
#define LDU M
#define LDVT N

typedef std::complex<float> fcomplex;
namespace mkl_lapack = oneapi::mkl::lapack;


/* Auxiliary routine: printing a matrix */
void print_matrix_u(char* desc, int m, int n, fcomplex* _a, int lda, sycl::queue q) {
    int i, j;
    printf("\n %s\n", desc);
    fcomplex a[LDU * M];
    q.copy<fcomplex>(_a, a, LDU * M).wait();
    for (i = 0; i < m; i++) {
        for (j = 0; j < n; j++)
            printf(" (%6.2f,%6.2f)", a[i + j * lda].real(), a[i + j * lda].imag());
        printf("\n");
    }
}

/* Auxiliary routine: printing a matrix */
void print_matrix_vt(char* desc, int m, int n, fcomplex* _a, int lda, sycl::queue q) {
    int i, j;
    printf("\n %s\n", desc);
    fcomplex a[LDVT * N];
    q.copy<fcomplex>(_a, a, LDVT * N).wait();
    for (i = 0; i < m; i++) {
        for (j = 0; j < n; j++)
            printf(" (%6.2f,%6.2f)", a[i + j * lda].real(), a[i + j * lda].imag());
        printf("\n");
    }
}

/* Auxiliary routine: printing a real matrix */
void print_rmatrix(char* desc, int m, int n, float* _a, int lda, sycl::queue q) {
    int i, j;
    printf("\n %s\n", desc);
    float a[M];
    q.copy<float>(_a, a, M).wait();
    for (i = 0; i < m; i++) {
        for (j = 0; j < n; j++) printf(" %6.2f", a[i + j * lda]);
        printf("\n");
    }
}

/* Main program */
int main() {
    /* Locals */
    int m = M, n = N, lda = LDA, ldu = LDU, ldvt = LDVT;
    sycl::queue q;
    std::cout << "Device name is: " << q.get_device().get_info<sycl::info::device::name>() << std::endl;
    float* s = sycl::malloc_device<float>(M, q);
    fcomplex* u = sycl::malloc_device<fcomplex>(LDU * M, q);
    fcomplex* vt = sycl::malloc_device<fcomplex>(LDVT * N, q);
    fcomplex a_host[LDA * N] = {
       { 5.91f, -5.69f}, {-3.15f, -4.08f}, {-4.89f,  4.20f},
       { 7.09f,  2.72f}, {-1.89f,  3.27f}, { 4.10f, -6.70f},
       { 7.78f, -4.06f}, { 4.57f, -2.07f}, { 3.28f, -3.84f},
       {-0.79f, -7.21f}, {-3.88f, -3.30f}, { 3.84f,  1.19f}
    };

    fcomplex* a = sycl::malloc_device<fcomplex>(LDA * N, q);
    const sycl::event& copy_a = q.copy<fcomplex>(a_host, a, LDA * N);
    const std::int64_t scratchpad_size = 10240;
    fcomplex* scratchpad = sycl::malloc_device<fcomplex>(scratchpad_size, q);

    mkl_lapack::gesvd(q, oneapi::mkl::jobsvd::vectors, oneapi::mkl::jobsvd::vectors, m, n, a, lda, s, u, ldu, vt, ldvt, scratchpad, scratchpad_size, { copy_a }).wait();

    printf(" GESVD Example Program Results\n");
    print_rmatrix("Singular values", 1, m, s, 1, q);
    print_matrix_u("Left singular vectors (stored columnwise)", m, m, u, ldu, q);
    print_matrix_vt("Right singular vectors (stored rowwise)", m, n, vt, ldvt, q);

    sycl::free(s, q);
    sycl::free(u, q);
    sycl::free(vt, q);
    sycl::free(a, q);
    exit(0);
}

icpx -fsycl -fsycl-targets=nvptx64-nvidia-cuda test.cpp libonemkl.so.0 libonemkl_lapack_cusolver.so.0

Observed behavior

GESVD Example Program Results

Singular values
0.00 0.00 0.00

Left singular vectors (stored columnwise)
( 0.00, 0.00) ( 0.00, 0.00) ( 0.00, 0.00)
( 0.00, 0.00) ( 0.00, 0.00) ( 0.00, 0.00)
( 0.00, 0.00) ( 0.00, 0.00) ( 0.00, 0.00)

Right singular vectors (stored rowwise)
( 0.00, 0.00) ( 0.00, 0.00) ( 0.00, 0.00) ( 0.00, 0.00)
( 0.00, 0.00) ( 0.00, 0.00) ( 0.00, 0.00) ( 0.00, 0.00)
( 0.00, 0.00) ( 0.00, 0.00) ( 0.00, 0.00) ( 0.00, 0.00)

Expected behavior

Expected out:
GESVD Example Program Results

Singular values
17.63 11.61 6.78

Left singular vectors(stored columnwise)
(-0.86, -0.00) (0.40, -0.00) (0.32, 0.00)
(-0.35, 0.13) (-0.24, -0.21) (-0.63, 0.60)
(0.15, 0.32) (0.61, 0.61) (-0.36, 0.10)

Right singular vectors(stored rowwise)
(-0.22, 0.51) (-0.37, -0.32) (-0.53, 0.11) (0.15, 0.38)
(0.31, 0.31) (0.09, -0.57) (0.18, -0.39) (0.38, -0.39)
(0.53, 0.24) (0.49, 0.28) (-0.47, -0.25) (-0.15, 0.19)

@JackAKirk
Copy link
Contributor

This is explained in the cuda documentation here https://docs.nvidia.com/cuda/cusolver/index.html#cuSolverDN-lt-t-gt-gesvd
In particular the line:

"Remark 1: gesvd only supports m>=n"

However it would be better if the cusolver backend of oneMKL lapack reported this as an error to the user.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

2 participants