eth-cscs · rasolca · Sep 13, 2023 · Jul 20, 2023 · Jul 20, 2023 · Jul 20, 2023
diff --git a/include/dlaf/eigensolver/bt_band_to_tridiag.h b/include/dlaf/eigensolver/bt_band_to_tridiag.h
@@ -19,40 +19,40 @@
 
 namespace dlaf::eigensolver {
 
-// Eigenvalue back-transformation implementation on local memory, which applies the inverse of the
-// transformation used to get a tridiagonal matrix from a band one.
-//
-// It computes E -= V T V* E, applying to a general matrix E the inverse of the transformation described
-// by the reflectors in V (block-wise, so T represents the T factor which embeds the information about
-// taus), which are the ones used to transform a band matrix to a tridiagonal matrix.
-//
-// In particular, V and T are obtained using data about reflectors and taus passed via @p mat_hh
-// where they are stored using following compact representation
-//
-// compact           extended
-// AT BT CT DT       1  0  0  0
-// A1 B1 C1 D1       A1 1  0  0
-// A2 B2 C2 D2       A2 B1 1  0
-// A3 B3 C3 D3       A3 B2 C1 1
-//                   0  B3 C2 D1
-//                   0  0  C3 D2
-//                   0  0  0  D3
-//
-// where A, B, C and D refers to distinct reflectors, with their components numbered and their taus
-// identified by the letter T.
-//
-// @param mat_hh matrix containing reflectors together with taus (compact form see representation above)
-// @param mat_e matrix to which the inverse transformation is applied to
-// @param band_size size of the reflectors (normal one, not constrained by any matrix size limit)
-// @pre mat_hh has a square size
-// @pre mat_hh has a square block size
-// @pre mat_e and mat_hh share the same number of rows
-// @pre mat_e block size and mat_hh block size share the same number of rows
-// @pre band_size is a divisor of mat_hh.blockSize().cols()
-// @pre mat_e is not distributed
-// @pre mat_hh is not distributed
-// @pre mat_e has equal tile and block sizes
-// @pre mat_hh has equal tile and block sizes
+/// Eigenvalue back-transformation implementation on local memory, which applies the inverse of the
+/// transformation used to get a tridiagonal matrix from a band one.
+///
+/// It computes E -= V T V* E, applying to a general matrix E the inverse of the transformation described
+/// by the reflectors in V (block-wise, so T represents the T factor which embeds the information about
+/// taus), which are the ones used to transform a band matrix to a tridiagonal matrix.
+///
+/// In particular, V and T are obtained using data about reflectors and taus passed via @p mat_hh
+/// where they are stored using following compact representation
+///
+/// compact           extended
+/// AT BT CT DT       1  0  0  0
+/// A1 B1 C1 D1       A1 1  0  0
+/// A2 B2 C2 D2       A2 B1 1  0
+/// A3 B3 C3 D3       A3 B2 C1 1
+///                   0  B3 C2 D1
+///                   0  0  C3 D2
+///                   0  0  0  D3
+///
+/// where A, B, C and D refers to distinct reflectors, with their components numbered and their taus
+/// identified by the letter T.
+///
+/// @param mat_hh matrix containing reflectors together with taus (compact form see representation above)
+/// @param mat_e matrix to which the inverse transformation is applied to
+/// @param band_size size of the reflectors (normal one, not constrained by any matrix size limit)
+/// @pre mat_hh has a square size
+/// @pre mat_hh has a square block size
+/// @pre mat_e and mat_hh share the same number of rows
+/// @pre mat_e block size and mat_hh block size share the same number of rows
+/// @pre band_size is a divisor of mat_hh.blockSize().cols()
+/// @pre mat_e is not distributed
+/// @pre mat_hh is not distributed
+/// @pre mat_e has equal tile and block sizes
+/// @pre mat_hh has equal tile and block sizes
 template <Backend B, Device D, class T>
 void backTransformationBandToTridiag(const SizeType band_size, matrix::Matrix<T, D>& mat_e,
                                      matrix::Matrix<const T, Device::CPU>& mat_hh) {
@@ -74,6 +74,40 @@ void backTransformationBandToTridiag(const SizeType band_size, matrix::Matrix<T,
   internal::BackTransformationT2B<B, D, T>::call(band_size, mat_e, mat_hh);
 }
 
+/// Eigenvalue back-transformation implementation, which applies the inverse of the transformation used
+/// to get a tridiagonal matrix from a band one.
+///
+/// It computes E -= V T V* E, applying to a general matrix E the inverse of the transformation described
+/// by the reflectors in V (block-wise, so T represents the T factor which embeds the information about
+/// taus), which are the ones used to transform a band matrix to a tridiagonal matrix.
+///
+/// In particular, V and T are obtained using data about reflectors and taus passed via @p mat_hh
+/// where they are stored using following compact representation
+///
+/// compact           extended
+/// AT BT CT DT       1  0  0  0
+/// A1 B1 C1 D1       A1 1  0  0
+/// A2 B2 C2 D2       A2 B1 1  0
+/// A3 B3 C3 D3       A3 B2 C1 1
+///                   0  B3 C2 D1
+///                   0  0  C3 D2
+///                   0  0  0  D3
+///
+/// where A, B, C and D refers to distinct reflectors, with their components numbered and their taus
+/// identified by the letter T.
+///
+/// @param mat_hh matrix containing reflectors together with taus (compact form see representation above)
+/// @param mat_e matrix to which the inverse transformation is applied to
+/// @param band_size size of the reflectors (normal one, not constrained by any matrix size limit)
+/// @pre mat_hh has a square size
+/// @pre mat_hh has a square block size
+/// @pre mat_e and mat_hh share the same number of rows
+/// @pre mat_e block size and mat_hh block size share the same number of rows
+/// @pre band_size is a divisor of mat_hh.blockSize().cols()
+/// @pre mat_e is distributed according to grid
+/// @pre mat_hh is distributed according to grid
+/// @pre mat_e has equal tile and block sizes
+/// @pre mat_hh has equal tile and block sizes
 template <Backend B, Device D, class T>
 void backTransformationBandToTridiag(comm::CommunicatorGrid grid, const SizeType band_size,
                                      matrix::Matrix<T, D>& mat_e,

diff --git a/include/dlaf/eigensolver/eigensolver.h b/include/dlaf/eigensolver/eigensolver.h
@@ -35,6 +35,15 @@ namespace dlaf::eigensolver {
 /// @param mat contains the Hermitian matrix A
 /// @param eigenvalues is a N x 1 matrix which on output contains the eigenvalues
 /// @param eigenvectors is a N x N matrix which on output contains the eigenvectors
+/// @pre mat is not distributed
+/// @pre mat has a square size
+/// @pre mat has a square blocksize
+/// @pre mat has equal tile and block sizes
+/// @pre eigenvalues is not distributed
+/// @pre eigenvalues has equal tile and block sizes
+/// @pre eigenvectors is not distributed
+/// @pre eigenvectors has a square blocksize
+/// @pre eigenvectors has equal tile and block sizes
 template <Backend B, Device D, class T>
 void eigensolver(blas::Uplo uplo, Matrix<T, D>& mat, Matrix<BaseType<T>, D>& eigenvalues,
                  Matrix<T, D>& eigenvectors) {
@@ -69,6 +78,10 @@ void eigensolver(blas::Uplo uplo, Matrix<T, D>& mat, Matrix<BaseType<T>, D>& eig
 /// @return struct ReturnEigensolverType with eigenvalues, as a vector<T>, and eigenvectors as a Matrix
 /// @param uplo specifies if upper or lower triangular part of @p mat will be referenced
 /// @param mat contains the Hermitian matrix A
+/// @pre mat is not distributed
+/// @pre mat has a square size
+/// @pre mat has a square blocksize
+/// @pre mat has equal tile and block sizes
 template <Backend B, Device D, class T>
 EigensolverResult<T, D> eigensolver(blas::Uplo uplo, Matrix<T, D>& mat) {
   const SizeType size = mat.size().rows();
@@ -95,6 +108,15 @@ EigensolverResult<T, D> eigensolver(blas::Uplo uplo, Matrix<T, D>& mat) {
 /// @param mat contains the Hermitian matrix A
 /// @param eigenvalues is a N x 1 matrix which on output contains the eigenvalues
 /// @param eigenvectors is a N x N matrix which on output contains the eigenvectors
+/// @pre mat is distributed according to grid
+/// @pre mat has a square size
+/// @pre mat has a square blocksize
+/// @pre mat has equal tile and block sizes
+/// @pre eigenvalues is not distributed
+/// @pre eigenvalues has equal tile and block sizes
+/// @pre eigenvectors is distributed according to grid
+/// @pre eigenvectors has a square blocksize
+/// @pre eigenvectors has equal tile and block sizes
 template <Backend B, Device D, class T>
 void eigensolver(comm::CommunicatorGrid grid, blas::Uplo uplo, Matrix<T, D>& mat,
                  Matrix<BaseType<T>, D>& eigenvalues, Matrix<T, D>& eigenvectors) {
@@ -130,6 +152,10 @@ void eigensolver(comm::CommunicatorGrid grid, blas::Uplo uplo, Matrix<T, D>& mat
 /// @param grid is the communicator grid on which the matrix @p mat has been distributed,
 /// @param uplo specifies if upper or lower triangular part of @p mat will be referenced
 /// @param mat contains the Hermitian matrix A
+/// @pre mat is distributed according to grid
+/// @pre mat has a square size
+/// @pre mat has a square blocksize
+/// @pre mat has equal tile and block sizes
 template <Backend B, Device D, class T>
 EigensolverResult<T, D> eigensolver(comm::CommunicatorGrid grid, blas::Uplo uplo, Matrix<T, D>& mat) {
   const SizeType size = mat.size().rows();

diff --git a/include/dlaf/eigensolver/gen_eigensolver.h b/include/dlaf/eigensolver/gen_eigensolver.h
@@ -38,6 +38,19 @@ namespace dlaf::eigensolver {
 /// @param mat_b contains the Hermitian positive definite matrix B
 /// @param eigenvalues is a N x 1 matrix which on output contains the eigenvalues
 /// @param eigenvectors is a N x N matrix which on output contains the eigenvectors
+/// @pre mat_a is not distributed
+/// @pre mat_a has a square size
+/// @pre mat_a has a square blocksize
+/// @pre mat_a has equal tile and block sizes
+/// @pre mat_b is not distributed
+/// @pre mat_b has a square size
+/// @pre mat_b has a square blocksize
+/// @pre mat_b has equal tile and block sizes
+/// @pre eigenvalues is not distributed
+/// @pre eigenvalues has equal tile and block sizes
+/// @pre eigenvectors is not distributed
+/// @pre eigenvectors has a square blocksize
+/// @pre eigenvectors has equal tile and block sizes
 template <Backend B, Device D, class T>
 void genEigensolver(blas::Uplo uplo, Matrix<T, D>& mat_a, Matrix<T, D>& mat_b,
                     Matrix<BaseType<T>, D>& eigenvalues, Matrix<T, D>& eigenvectors) {
@@ -81,6 +94,14 @@ void genEigensolver(blas::Uplo uplo, Matrix<T, D>& mat_a, Matrix<T, D>& mat_b,
 /// @param uplo specifies if upper or lower triangular part of @p mat_a and @p mat_b will be referenced
 /// @param mat_a contains the Hermitian matrix A
 /// @param mat_b contains the Hermitian positive definite matrix B
+/// @pre mat_a is not distributed
+/// @pre mat_a has a square size
+/// @pre mat_a has a square blocksize
+/// @pre mat_a has equal tile and block sizes
+/// @pre mat_b is not distributed
+/// @pre mat_b has a square size
+/// @pre mat_b has a square blocksize
+/// @pre mat_b has equal tile and block sizes
 template <Backend B, Device D, class T>
 EigensolverResult<T, D> genEigensolver(blas::Uplo uplo, Matrix<T, D>& mat_a, Matrix<T, D>& mat_b) {
   DLAF_ASSERT(matrix::local_matrix(mat_a), mat_a);
@@ -122,6 +143,19 @@ EigensolverResult<T, D> genEigensolver(blas::Uplo uplo, Matrix<T, D>& mat_a, Mat
 /// @param mat_b contains the Hermitian positive definite matrix B
 /// @param eigenvalues is a N x 1 matrix which on output contains the eigenvalues
 /// @param eigenvectors is a N x N matrix which on output contains the eigenvectors
+/// @pre mat_a is distributed according to grid
+/// @pre mat_a has a square size
+/// @pre mat_a has a square blocksize
+/// @pre mat_a has equal tile and block sizes
+/// @pre mat_b is distributed according to grid
+/// @pre mat_b has a square size
+/// @pre mat_b has a square blocksize
+/// @pre mat_b has equal tile and block sizes
+/// @pre eigenvalues is not distributed
+/// @pre eigenvalues has equal tile and block sizes
+/// @pre eigenvectors is distributed according to grid
+/// @pre eigenvectors has a square blocksize
+/// @pre eigenvectors has equal tile and block sizes
 template <Backend B, Device D, class T>
 void genEigensolver(comm::CommunicatorGrid grid, blas::Uplo uplo, Matrix<T, D>& mat_a,
                     Matrix<T, D>& mat_b, Matrix<BaseType<T>, D>& eigenvalues,
@@ -167,6 +201,14 @@ void genEigensolver(comm::CommunicatorGrid grid, blas::Uplo uplo, Matrix<T, D>&
 /// @param uplo specifies if upper or lower triangular part of @p mat_a and @p mat_b will be referenced
 /// @param mat_a contains the Hermitian matrix A
 /// @param mat_b contains the Hermitian positive definite matrix B
+/// @pre mat_a is distributed according to grid
+/// @pre mat_a has a square size
+/// @pre mat_a has a square blocksize
+/// @pre mat_a has equal tile and block sizes
+/// @pre mat_b is distributed according to grid
+/// @pre mat_b has a square size
+/// @pre mat_b has a square blocksize
+/// @pre mat_b has equal tile and block sizes
 template <Backend B, Device D, class T>
 EigensolverResult<T, D> genEigensolver(comm::CommunicatorGrid grid, blas::Uplo uplo, Matrix<T, D>& mat_a,
                                        Matrix<T, D>& mat_b) {

diff --git a/include/dlaf/eigensolver/tridiag_solver.h b/include/dlaf/eigensolver/tridiag_solver.h
@@ -26,10 +26,9 @@ namespace eigensolver {
 /// @param tridiag [in/out] (n x 2) local matrix with the diagonal and off-diagonal of the symmetric
 ///                tridiagonal matrix in the first column and second columns respectively. The last entry
 ///                of the second column is not used.
-/// @param evals [out] (n x 1) local matrix holding the eigenvalues of the the symmetric tridiagonal
-///              matrix
-/// @param evecs [out] (n x n) local matrix holding the eigenvectors of the the symmetric tridiagonal
-///              matrix on exit.
+/// @param evals [out] (n x 1) local matrix holding the eigenvalues of the symmetric tridiagonal matrix
+/// @param evecs [out] (n x n) local matrix holding the eigenvectors of the symmetric tridiagonal matrix
+///              on exit.
 ///
 /// @pre tridiag and @p evals and @p evecs are local matrices
 /// @pre tridiag has 2 columns and column block size of 2
@@ -75,10 +74,8 @@ void tridiagSolver(Matrix<BaseType<T>, Device::CPU>& tridiag, Matrix<BaseType<T>
 /// @param tridiag [in/out] (n x 2) local matrix with the diagonal and off-diagonal of the symmetric
 ///                tridiagonal matrix in the first column and second columns respectively. The last entry
 ///                of the second column is not used.
-/// @param evals [out] (n x 1) local matrix holding the eigenvalues of the the symmetric tridiagonal
-///              matrix
-/// @param evecs [out] (n x n) distributed matrix holding the eigenvectors of the the symmetric
-/// tridiagonal
+/// @param evals [out] (n x 1) local matrix holding the eigenvalues of the symmetric tridiagonal matrix
+/// @param evecs [out] (n x n) distributed matrix holding the eigenvectors of the symmetric tridiagonal
 ///              matrix on exit.
 ///
 /// @pre tridiag and @p evals are local matrices and are the same on all ranks

diff --git a/include/dlaf/eigensolver/tridiag_solver/merge.h b/include/dlaf/eigensolver/tridiag_solver/merge.h
@@ -388,8 +388,8 @@ std::vector<GivensRotation<T>> applyDeflationToArrays(T rho, T tol, const SizeTy
     d2 = tmp;
 
     rots.push_back(GivensRotation<T>{i1s, i2s, c, s});
-    //  Set the the `i1` column as "Dense" if the `i2` column has opposite non-zero structure (i.e if
-    //  one comes from Q1 and the other from Q2 or vice-versa)
+    //  Set the `i1` column as "Dense" if the `i2` column has opposite non-zero structure (i.e if one
+    //  comes from Q1 and the other from Q2 or vice-versa)
     if ((c1 == ColType::UpperHalf && c2 == ColType::LowerHalf) ||
         (c1 == ColType::LowerHalf && c2 == ColType::UpperHalf)) {
       c1 = ColType::Dense;

diff --git a/include/dlaf/eigensolver/tridiag_solver/rot.h b/include/dlaf/eigensolver/tridiag_solver/rot.h
@@ -165,12 +165,12 @@ void applyGivensRotationsToMatrixColumns(const SizeType i_begin, const SizeType
     common::internal::SingleThreadedBlasScope single;
 
     for (const GivensRotation<T>& rot : rots) {
-      // Get the index of the tile that has column `rot.i` and the the index of the column within the tile.
+      // Get the index of the tile that has column `rot.i` and the index of the column within the tile.
       const SizeType i_tile = distr.globalTileLinearIndex(GlobalElementIndex(0, rot.i));
       const SizeType i_el = distr.tileElementFromGlobalElement<Coord::Col>(rot.i);
       T* x = tiles[to_sizet(i_tile)].ptr(TileElementIndex(0, i_el));
 
-      // Get the index of the tile that has column `rot.j` and the the index of the column within the tile.
+      // Get the index of the tile that has column `rot.j` and the index of the column within the tile.
       const SizeType j_tile = distr.globalTileLinearIndex(GlobalElementIndex(0, rot.j));
       const SizeType j_el = distr.tileElementFromGlobalElement<Coord::Col>(rot.j);
       T* y = tiles[to_sizet(j_tile)].ptr(TileElementIndex(0, j_el));

diff --git a/include/dlaf/permutations/general.h b/include/dlaf/permutations/general.h
@@ -34,7 +34,16 @@ namespace dlaf::permutations {
 ///        the range [i_begin,i_end) are accessed in read-only mode.
 /// @param mat_out is the output matrix. Only tiles whose both row and col tile coords are in
 ///        the range [i_begin,i_end) are accessed in write-only mode.
-///
+/// @pre perms is not distributed
+/// @pre perms has equal tile and block sizes
+/// @pre mat_in is not distributed
+/// @pre mat_in has equal tile and block sizes
+/// @pre mat_in has a square size
+/// @pre mat_in has a square blocksize
+/// @pre mat_out is not distributed
+/// @pre mat_out has equal tile and block sizes
+/// @pre mat_out has a square size
+/// @pre mat_out has a square blocksize
 template <Backend B, Device D, class T, Coord coord>
 void permute(SizeType i_begin, SizeType i_end, Matrix<const SizeType, D>& perms,
              Matrix<const T, D>& mat_in, Matrix<T, D>& mat_out) {
@@ -83,6 +92,16 @@ void permute(SizeType i_begin, SizeType i_end, Matrix<const SizeType, D>& perms,
 ///        the range [i_begin,i_end) are accessed in readwrite-mode.
 /// @param mat_out is the distributed output matrix. Only tiles whose both global row and col tile coords are in
 ///        the range [i_begin,i_end) are accessed in readwrite-mode.
+/// @pre perms is not distributed
+/// @pre perms has equal tile and block sizes
+/// @pre mat_in is distributed according to grid
+/// @pre mat_in has equal tile and block sizes
+/// @pre mat_in has a square size
+/// @pre mat_in has a square blocksize
+/// @pre mat_out is distributed according to grid
+/// @pre mat_out has equal tile and block sizes
+/// @pre mat_out has a square size
+/// @pre mat_out has a square blocksize
 ///
 /// Note: The Pipeline<> API allows to use permute() within other algorithms without having to clone communicators
 ///       internally.
@@ -122,7 +141,6 @@ void permute(comm::CommunicatorGrid grid, common::Pipeline<comm::Communicator>&
 ///
 /// This overload clones the row communicator (if Coord::Col) or column communicator (if Coord::Row) of
 /// @p grid internally.
-///
 template <Backend B, Device D, class T, Coord coord>
 void permute(comm::CommunicatorGrid grid, SizeType i_begin, SizeType i_end,
              Matrix<const SizeType, D>& perms, Matrix<const T, D>& mat_in, Matrix<T, D>& mat_out) {

diff --git a/include/dlaf/sender/transform_mpi.h b/include/dlaf/sender/transform_mpi.h
@@ -34,7 +34,7 @@ void consumeCommunicatorWrapper(T&) {}
 ///
 /// Wrapper type around calls to MPI functions. Provides a call operator that
 /// creates an MPI request and passes it as the last argument to the provided
-/// callable. The wrapper then waits for the the request to complete with
+/// callable. The wrapper then waits for the request to complete with
 /// yield_while.
 ///
 /// This could in theory be a lambda inside transformMPI.  However, clang at