Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow generalized eigensolver to re-use pre-factorized matrix #1167

Merged
merged 16 commits into from
Jun 21, 2024
300 changes: 263 additions & 37 deletions include/dlaf/eigensolver/gen_eigensolver.h

Large diffs are not rendered by default.

8 changes: 6 additions & 2 deletions include/dlaf/eigensolver/gen_eigensolver/api.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,17 @@
#include <dlaf/types.h>

namespace dlaf::eigensolver::internal {

enum class Factorization { do_factorization, already_factorized };

template <Backend backend, Device device, class T>
struct GenEigensolver {
static void call(blas::Uplo uplo, Matrix<T, device>& mat_a, Matrix<T, device>& mat_b,
Matrix<BaseType<T>, device>& eigenvalues, Matrix<T, device>& eigenvectors);
Matrix<BaseType<T>, device>& eigenvalues, Matrix<T, device>& eigenvectors,
const Factorization factorization);
static void call(comm::CommunicatorGrid& grid, blas::Uplo uplo, Matrix<T, device>& mat_a,
Matrix<T, device>& mat_b, Matrix<BaseType<T>, device>& eigenvalues,
Matrix<T, device>& eigenvectors);
Matrix<T, device>& eigenvectors, const Factorization factorizatio);
RMeli marked this conversation as resolved.
Show resolved Hide resolved
};

// ETI
Expand Down
15 changes: 11 additions & 4 deletions include/dlaf/eigensolver/gen_eigensolver/impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,17 @@
#include <dlaf/solver/triangular.h>
#include <dlaf/util_matrix.h>

#include "api.h"

namespace dlaf::eigensolver::internal {

template <Backend B, Device D, class T>
void GenEigensolver<B, D, T>::call(blas::Uplo uplo, Matrix<T, D>& mat_a, Matrix<T, D>& mat_b,
Matrix<BaseType<T>, D>& eigenvalues, Matrix<T, D>& eigenvectors) {
cholesky_factorization<B>(uplo, mat_b);
Matrix<BaseType<T>, D>& eigenvalues, Matrix<T, D>& eigenvectors,
const Factorization factorization) {
if (factorization == Factorization::do_factorization) {
cholesky_factorization<B>(uplo, mat_b);
}
generalized_to_standard<B>(uplo, mat_a, mat_b);

hermitian_eigensolver<B>(uplo, mat_a, eigenvalues, eigenvectors);
Expand All @@ -34,8 +39,10 @@ void GenEigensolver<B, D, T>::call(blas::Uplo uplo, Matrix<T, D>& mat_a, Matrix<
template <Backend B, Device D, class T>
void GenEigensolver<B, D, T>::call(comm::CommunicatorGrid& grid, blas::Uplo uplo, Matrix<T, D>& mat_a,
Matrix<T, D>& mat_b, Matrix<BaseType<T>, D>& eigenvalues,
Matrix<T, D>& eigenvectors) {
cholesky_factorization<B>(grid, uplo, mat_b);
Matrix<T, D>& eigenvectors, const Factorization factorization) {
if (factorization == Factorization::do_factorization) {
cholesky_factorization<B>(grid, uplo, mat_b);
}
generalized_to_standard<B>(grid, uplo, mat_a, mat_b);

hermitian_eigensolver<B>(grid, uplo, mat_a, eigenvalues, eigenvectors);
Expand Down
114 changes: 114 additions & 0 deletions include/dlaf_c/eigensolver/gen_eigensolver.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,52 @@ DLAF_EXTERN_C int dlaf_hermitian_generalized_eigensolver_z(
dlaf_complex_z* b, const struct DLAF_descriptor dlaf_descb, double* w, dlaf_complex_z* z,
const struct DLAF_descriptor dlaf_descz) DLAF_NOEXCEPT;

/// Generalized eigensolver
///
/// @pre The matrices \f$\mathbf{A}\f$, \f$\mathbf{B}\f$, and \f$\mathbf{Z}\f$ are assumed to be
/// distributed and in host memory. The vector of eigenvalues \f$\mathbf{w}\f$ is assumed to be local
/// (non-distributed) and in host memory. Moving to and from GPU memory is handled internally.
///
/// @pre The matrix \f$\mathbf{B}\f$ is assumed to be factorized; it is the result of a Cholesky
/// factorization
///
/// @post The pika runtime is resumed when this function is called and suspended when the call
/// terminates.
///
/// @param dlaf_context context associated to the DLA-Future grid created with @ref dlaf_create_grid
/// @param uplo indicates whether the upper ('U') or lower ('L') triangular part of the global submatrix
/// \f$\mathbf{A}\f$ is referenced
/// @param a Local part of the global matrix \f$\mathbf{A}\f$
/// @param dlaf_desca DLA-Future descriptor of the global matrix \f$\mathbf{A}\f$
/// @param b Local part of the Cholesky factorization of the global matrix \f$\mathbf{B}\f$
/// @param dlaf_descb DLA-Future descriptor of the global matrix \f$\mathbf{B}\f$
/// @param w Local vector of eigenvalues (non-distributed)
/// @param z Local part of the global matrix \f$\mathbf{Z}\f$
/// @param dlaf_descz DLA-Future descriptor of the global matrix \f$\mathbf{Z}\f$
/// @return 0 if the eigensolver completed normally
DLAF_EXTERN_C int dlaf_symmetric_generalized_eigensolver_factorized_s(
const int dlaf_context, const char uplo, float* a, const struct DLAF_descriptor dlaf_desca, float* b,
const struct DLAF_descriptor dlaf_descb, float* w, float* z,
const struct DLAF_descriptor dlaf_descz) DLAF_NOEXCEPT;

/// @copydoc dlaf_symmetric_generalized_eigensolver_factorized_s
DLAF_EXTERN_C int dlaf_symmetric_generalized_eigensolver_factorized_d(
const int dlaf_context, const char uplo, double* a, const struct DLAF_descriptor dlaf_desca,
double* b, const struct DLAF_descriptor dlaf_descb, double* w, double* z,
const struct DLAF_descriptor dlaf_descz) DLAF_NOEXCEPT;

/// @copydoc dlaf_symmetric_generalized_eigensolver_factorized_s
DLAF_EXTERN_C int dlaf_hermitian_generalized_eigensolver_factorized_c(
const int dlaf_context, const char uplo, dlaf_complex_c* a, const struct DLAF_descriptor dlaf_desca,
dlaf_complex_c* b, const struct DLAF_descriptor dlaf_descb, float* w, dlaf_complex_c* z,
const struct DLAF_descriptor dlaf_descz) DLAF_NOEXCEPT;

/// @copydoc dlaf_symmetric_generalized_eigensolver_factorized_s
DLAF_EXTERN_C int dlaf_hermitian_generalized_eigensolver_factorized_z(
const int dlaf_context, const char uplo, dlaf_complex_z* a, const struct DLAF_descriptor dlaf_desca,
dlaf_complex_z* b, const struct DLAF_descriptor dlaf_descb, double* w, dlaf_complex_z* z,
const struct DLAF_descriptor dlaf_descz) DLAF_NOEXCEPT;

#ifdef DLAF_WITH_SCALAPACK

/// Generalized eigensolver
Expand Down Expand Up @@ -121,4 +167,72 @@ DLAF_EXTERN_C void dlaf_pzhegvx(const char uplo, const int n, dlaf_complex_z* a,
const int jb, const int descb[9], double* w, dlaf_complex_z* z,
const int iz, const int jz, const int descz[9], int* info) DLAF_NOEXCEPT;

/// Generalized eigensolver
///
/// @remark This function is only available when DLAF_WITH_SCALAPACK=ON.
///
/// @pre The matrices \f$\mathbf{A}\f$, \f$\mathbf{B}\f$, and \f$\mathbf{Z}\f$ are assumed to be
/// distributed and in host memory. The vector of eigenvalues \f$\mathbf{w}\f$ is assumed to be local
/// (non-distributed) and in host memory. Moving to and from GPU memory is handled internally.
///
/// @pre The matrix \f$\mathbf{B}\f$ is assumed to be factorized; it is the result of a Cholesky
/// factorization
///
/// @pre Submatrices are currently not supported, so @p n is the size of the full matrices
/// \f$\mathbf{A}\f$, \f$\mathbf{B}\f$, and \f$\mathbf{Z}\f$ and @p ia, @p ja, @p ib, @p jb, @p iz, and
/// @p jz need to be 1.
///
/// @post The pika runtime is resumed when this function is called and suspended when the call
/// terminates.
///
/// @param uplo indicates whether the upper ('U') or lower ('L') triangular part of the global submatrix
/// \f$\mathbf{A}\f$ is referenced
/// @param n order of the sumbatrix \f$\mathbf{A}\f$ used in the computation
/// @param a Local part of the global matrix \f$\mathbf{A}\f$
/// @param ia row index of the global matrix \f$\mathbf{A}\f$ identifying the first row of the submatrix
/// $A$, has to be 1
/// @param ja column index of the global matrix \f$\mathbf{A}\f$ identifying the first column of the
/// submatrix \f$\mathbf{A}\f$, has to be 1
/// @param desca ScaLAPACK array descriptor of the global matrix \f$\mathbf{A}\f$
/// @param b Local part of the Cholesky factorization of the global matrix \f$\mathbf{B}\f$
/// @param ib row index of the global matrix \f$\mathbf{B}\f$ identifying the first row of the submatrix
/// \f$\mathbf{B}\f$, has to be 1
/// @param jb column index of the global matrix \f$\mathbf{A}\f$ identifying the first column of the
/// submatrix \f$\mathbf{B}\f$, has to be 1
/// @param descb ScaLAPACK array descriptor of the global matrix \f$\mathbf{B}\f$
/// @param w Local vector of eigenvalues (non-distributed)
/// @param z Local part of the global matrix \f$\mathbf{Z}\f$
/// @param iz row index of the global matrix \f$\mathbf{Z}\f$ identifying the first row of the submatrix
/// \f$\mathbf{Z}\f$, has to be 1
/// @param jz column index of the global matrix \f$\mathbf{A}\f$ identifying the first column of the
/// submatrix \f$\mathbf{A}\f$, has to be 1
/// @param descz ScaLAPACK array descriptor of the global matrix \f$\mathbf{Z}\f$
/// @param[out] info 0 if the eigensolver completed normally
DLAF_EXTERN_C void dlaf_pssygvx_factorized(const char uplo, const int n, float* a, const int ia,
const int ja, const int desca[9], float* b, const int ib,
const int jb, const int descb[9], float* w, float* z,
const int iz, const int jz, const int descz[9],
int* info) DLAF_NOEXCEPT;

/// @copydoc dlaf_pssygvx_factorized
DLAF_EXTERN_C void dlaf_pdsygvx_factorized(const char uplo, const int n, double* a, const int ia,
const int ja, const int desca[9], double* b, const int ib,
const int jb, const int descb[9], double* w, double* z,
const int iz, const int jz, const int descz[9],
int* info) DLAF_NOEXCEPT;

/// @copydoc dlaf_pssygvx_factorized
DLAF_EXTERN_C void dlaf_pchegvx_factorized(const char uplo, const int n, dlaf_complex_c* a, const int ia,
const int ja, const int desca[9], dlaf_complex_c* b,
const int ib, const int jb, const int descb[9], float* w,
dlaf_complex_c* z, const int iz, const int jz,
const int descz[9], int* info) DLAF_NOEXCEPT;

/// @copydoc dlaf_pssygvx_factorized
DLAF_EXTERN_C void dlaf_pzhegvx_factorized(const char uplo, const int n, dlaf_complex_z* a, const int ia,
const int ja, const int desca[9], dlaf_complex_z* b,
const int ib, const int jb, const int descb[9], double* w,
dlaf_complex_z* z, const int iz, const int jz,
const int descz[9], int* info) DLAF_NOEXCEPT;

#endif
64 changes: 64 additions & 0 deletions src/c_api/eigensolver/gen_eigensolver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,40 @@ int dlaf_hermitian_generalized_eigensolver_z(const int dlaf_context, const char
dlaf_descb, w, z, dlaf_descz);
}

int dlaf_symmetric_generalized_eigensolver_factorized_s(
const int dlaf_context, const char uplo, float* a, const struct DLAF_descriptor dlaf_desca, float* b,
const struct DLAF_descriptor dlaf_descb, float* w, float* z,
const struct DLAF_descriptor dlaf_descz) noexcept {
return hermitian_generalized_eigensolver_factorized<float>(dlaf_context, uplo, a, dlaf_desca, b,
dlaf_descb, w, z, dlaf_descz);
}

int dlaf_symmetric_generalized_eigensolver_factorized_d(
const int dlaf_context, const char uplo, double* a, const struct DLAF_descriptor dlaf_desca,
double* b, const struct DLAF_descriptor dlaf_descb, double* w, double* z,
const struct DLAF_descriptor dlaf_descz) noexcept {
return hermitian_generalized_eigensolver_factorized<double>(dlaf_context, uplo, a, dlaf_desca, b,
dlaf_descb, w, z, dlaf_descz);
}

int dlaf_hermitian_generalized_eigensolver_factorized_c(
const int dlaf_context, const char uplo, dlaf_complex_c* a, const struct DLAF_descriptor dlaf_desca,
dlaf_complex_c* b, const struct DLAF_descriptor dlaf_descb, float* w, dlaf_complex_c* z,
const struct DLAF_descriptor dlaf_descz) noexcept {
return hermitian_generalized_eigensolver_factorized<std::complex<float>>(dlaf_context, uplo, a,
dlaf_desca, b, dlaf_descb, w,
z, dlaf_descz);
}

int dlaf_hermitian_generalized_eigensolver_factorized_z(
const int dlaf_context, const char uplo, dlaf_complex_z* a, const struct DLAF_descriptor dlaf_desca,
dlaf_complex_z* b, const struct DLAF_descriptor dlaf_descb, double* w, dlaf_complex_z* z,
const struct DLAF_descriptor dlaf_descz) noexcept {
return hermitian_generalized_eigensolver_factorized<std::complex<double>>(dlaf_context, uplo, a,
dlaf_desca, b, dlaf_descb, w,
z, dlaf_descz);
}

#ifdef DLAF_WITH_SCALAPACK

void dlaf_pssygvx(const char uplo, const int m, float* a, const int ia, const int ja, const int desca[9],
Expand Down Expand Up @@ -81,4 +115,34 @@ void dlaf_pzhegvx(const char uplo, const int m, dlaf_complex_z* a, const int ia,
pxhegvx<std::complex<double>>(uplo, m, a, ia, ja, desca, b, ib, jb, descb, w, z, iz, jz, descz, *info);
}

void dlaf_pssygvx_factorized(const char uplo, const int m, float* a, const int ia, const int ja,
RMeli marked this conversation as resolved.
Show resolved Hide resolved
const int desca[9], float* b, const int ib, const int jb,
const int descb[9], float* w, float* z, const int iz, const int jz,
const int descz[9], int* info) noexcept {
pxhegvx_factorized<float>(uplo, m, a, ia, ja, desca, b, ib, jb, descb, w, z, iz, jz, descz, *info);
}

void dlaf_pdsygvx_factorized(const char uplo, const int m, double* a, const int ia, const int ja,
const int desca[9], double* b, const int ib, const int jb,
const int descb[9], double* w, double* z, const int iz, const int jz,
const int descz[9], int* info) noexcept {
pxhegvx_factorized<double>(uplo, m, a, ia, ja, desca, b, ib, jb, descb, w, z, iz, jz, descz, *info);
}

void dlaf_pchegvx_factorized(const char uplo, const int m, dlaf_complex_c* a, const int ia, const int ja,
const int desca[9], dlaf_complex_c* b, const int ib, const int jb,
const int descb[9], float* w, dlaf_complex_c* z, const int iz, const int jz,
const int descz[9], int* info) noexcept {
pxhegvx_factorized<std::complex<float>>(uplo, m, a, ia, ja, desca, b, ib, jb, descb, w, z, iz, jz,
descz, *info);
}

void dlaf_pzhegvx_factorized(const char uplo, const int m, dlaf_complex_z* a, const int ia, const int ja,
const int desca[9], dlaf_complex_z* b, const int ib, const int jb,
const int descb[9], double* w, dlaf_complex_z* z, const int iz,
const int jz, const int descz[9], int* info) noexcept {
pxhegvx_factorized<std::complex<double>>(uplo, m, a, ia, ja, desca, b, ib, jb, descb, w, z, iz, jz,
descz, *info);
}

#endif
Loading
Loading