From ee10c84d9fef7811f830a7aa9e3b5a2701088d6f Mon Sep 17 00:00:00 2001 From: Sebastian Grimberg Date: Mon, 29 Jan 2024 21:59:20 -0800 Subject: [PATCH] Add HypreCSRMatrix wrapper class to replace mfem::SparseMatrix This allows OpenMP and better GPU acceleration for ceed::Operator full assembly into a CSR matrix using Hypre's functionality. --- palace/fem/bilinearform.cpp | 4 +- palace/fem/bilinearform.hpp | 17 +- palace/fem/libceed/operator.cpp | 284 +++++++++++++---------------- palace/fem/libceed/operator.hpp | 14 +- palace/linalg/divfree.cpp | 5 +- palace/linalg/rap.cpp | 217 +++++++++++----------- palace/linalg/rap.hpp | 14 +- palace/models/curlcurloperator.cpp | 5 +- palace/models/laplaceoperator.cpp | 5 +- palace/models/spaceoperator.cpp | 7 +- test/unit/test-libceed.cpp | 106 +++++++---- 11 files changed, 338 insertions(+), 340 deletions(-) diff --git a/palace/fem/bilinearform.cpp b/palace/fem/bilinearform.cpp index 0463c8ed5..307ffa784 100644 --- a/palace/fem/bilinearform.cpp +++ b/palace/fem/bilinearform.cpp @@ -113,8 +113,8 @@ BilinearForm::PartialAssemble(const FiniteElementSpace &trial_fespace, return op; } -std::unique_ptr BilinearForm::FullAssemble(const ceed::Operator &op, - bool skip_zeros, bool set) +std::unique_ptr BilinearForm::FullAssemble(const ceed::Operator &op, + bool skip_zeros, bool set) { return ceed::CeedOperatorFullAssemble(op, skip_zeros, set); } diff --git a/palace/fem/bilinearform.hpp b/palace/fem/bilinearform.hpp index 9059785e3..fef50f91f 100644 --- a/palace/fem/bilinearform.hpp +++ b/palace/fem/bilinearform.hpp @@ -9,6 +9,7 @@ #include #include "fem/integrator.hpp" #include "fem/libceed/operator.hpp" +#include "linalg/hypre.hpp" namespace palace { @@ -69,19 +70,19 @@ class BilinearForm return PartialAssemble(GetTrialSpace(), GetTestSpace()); } - std::unique_ptr FullAssemble(bool skip_zeros) const + std::unique_ptr FullAssemble(bool skip_zeros) const { return FullAssemble(*PartialAssemble(), skip_zeros, false); } - static std::unique_ptr FullAssemble(const ceed::Operator &op, - bool skip_zeros) + static std::unique_ptr FullAssemble(const ceed::Operator &op, + bool skip_zeros) { return FullAssemble(op, skip_zeros, false); } - static std::unique_ptr FullAssemble(const ceed::Operator &op, - bool skip_zeros, bool set); + static std::unique_ptr FullAssemble(const ceed::Operator &op, + bool skip_zeros, bool set); std::unique_ptr Assemble(bool skip_zeros) const; @@ -120,13 +121,13 @@ class DiscreteLinearOperator std::unique_ptr PartialAssemble() const; - std::unique_ptr FullAssemble(bool skip_zeros) const + std::unique_ptr FullAssemble(bool skip_zeros) const { return BilinearForm::FullAssemble(*PartialAssemble(), skip_zeros, true); } - static std::unique_ptr FullAssemble(const ceed::Operator &op, - bool skip_zeros) + static std::unique_ptr FullAssemble(const ceed::Operator &op, + bool skip_zeros) { return BilinearForm::FullAssemble(op, skip_zeros, true); } diff --git a/palace/fem/libceed/operator.cpp b/palace/fem/libceed/operator.cpp index df642e94e..da0fe2a70 100644 --- a/palace/fem/libceed/operator.cpp +++ b/palace/fem/libceed/operator.cpp @@ -5,8 +5,10 @@ #include #include +#include #include #include "fem/fespace.hpp" +#include "linalg/hypre.hpp" #include "utils/omp.hpp" namespace palace::ceed @@ -211,174 +213,71 @@ int CeedInternalFree(void *p) #define CeedInternalCalloc(n, p) CeedInternalCallocArray((n), sizeof(**(p)), p) -void CeedOperatorAssembleCOORemoveZeros(Ceed ceed, CeedSize *nnz, CeedInt **rows, - CeedInt **cols, CeedVector *vals, CeedMemType *mem) -{ - // Filter out zero entries. For now, eliminating zeros happens all on the host. - // XX TODO: Use Thrust for this (thrust::copy_if and thrust::zip_iterator) - CeedInt *new_rows, *new_cols; - PalaceCeedCall(ceed, CeedInternalCalloc(*nnz, &new_rows)); - PalaceCeedCall(ceed, CeedInternalCalloc(*nnz, &new_cols)); - - CeedVector new_vals; - PalaceCeedCall(ceed, CeedVectorCreate(ceed, *nnz, &new_vals)); - - CeedSize q = 0; - const CeedScalar *vals_array; - CeedScalar *new_vals_array; - PalaceCeedCall(ceed, CeedVectorGetArrayRead(*vals, CEED_MEM_HOST, &vals_array)); - PalaceCeedCall(ceed, CeedVectorGetArrayWrite(new_vals, CEED_MEM_HOST, &new_vals_array)); - for (CeedSize k = 0; k < *nnz; k++) - { - if (vals_array[k] != 0.0) - { - new_rows[q] = (*rows)[k]; - new_cols[q] = (*cols)[k]; - new_vals_array[q] = vals_array[k]; - q++; - } - } - PalaceCeedCall(ceed, CeedVectorRestoreArrayRead(*vals, &vals_array)); - PalaceCeedCall(ceed, CeedVectorRestoreArray(new_vals, &new_vals_array)); - - PalaceCeedCall(ceed, CeedInternalFree(rows)); - PalaceCeedCall(ceed, CeedInternalFree(cols)); - PalaceCeedCall(ceed, CeedVectorDestroy(vals)); - - *rows = new_rows; - *cols = new_cols; - *vals = new_vals; - *nnz = q; -} - -void CeedOperatorAssembleCOO(const Operator &op, bool skip_zeros, CeedSize *nnz, +void CeedOperatorAssembleCOO(Ceed ceed, CeedOperator op, bool skip_zeros, CeedSize *nnz, CeedInt **rows, CeedInt **cols, CeedVector *vals, CeedMemType *mem) { - const std::size_t nt = op.Size(); - if (nt == 0) - { - *nnz = 0; - *rows = nullptr; - *cols = nullptr; - *vals = nullptr; - *mem = CEED_MEM_HOST; - return; - } - - Ceed ceed; - CeedScalar *vals_array; - std::vector loc_nnz(nt), loc_offsets(nt + 1); - std::vector loc_rows(nt), loc_cols(nt); - std::vector loc_vals(nt); - - PalaceCeedCallBackend(CeedOperatorGetCeed(op[0], &ceed)); PalaceCeedCall(ceed, CeedGetPreferredMemType(ceed, mem)); - if (!mfem::Device::Allows(mfem::Backend::DEVICE_MASK)) - { - *mem = CEED_MEM_HOST; - } - - PalacePragmaOmp(parallel if (nt > 1)) - { - Ceed ceed; - const int id = utils::GetThreadNum(); - MFEM_ASSERT(id < op.Size() && op[id], - "Out of bounds access for thread number " << id << "!"); - PalaceCeedCallBackend(CeedOperatorGetCeed(op[id], &ceed)); - // Assemble sparsity pattern (rows, cols are always host pointers). - PalaceCeedCall(ceed, CeedOperatorLinearAssembleSymbolic(op[id], &loc_nnz[id], - &loc_rows[id], &loc_cols[id])); + // Assemble sparsity pattern (rows, cols are always host pointers). + PalaceCeedCall(ceed, CeedOperatorLinearAssembleSymbolic(op, nnz, rows, cols)); - // Assemble values. - PalaceCeedCall(ceed, CeedVectorCreate(ceed, loc_nnz[id], &loc_vals[id])); - PalaceCeedCall(ceed, CeedOperatorLinearAssemble(op[id], loc_vals[id])); - } + // Assemble values. + PalaceCeedCall(ceed, CeedVectorCreate(ceed, *nnz, vals)); + PalaceCeedCall(ceed, CeedOperatorLinearAssemble(op, *vals)); - loc_offsets[0] = 0; - std::inclusive_scan(loc_nnz.begin(), loc_nnz.end(), loc_offsets.begin() + 1); - *nnz = loc_offsets.back(); - if (nt == 1) - { - // Assemble values. - *rows = loc_rows[0]; - *cols = loc_cols[0]; - *vals = loc_vals[0]; - } - else + // Filter out zero entries. For now, eliminating zeros happens all on the host. + // std::cout << " Operator full assembly (COO) has " << *nnz << " NNZ"; + if (skip_zeros && *nnz > 0) { - // Global assembly. - PalaceCeedCall(ceed, CeedInternalCalloc(*nnz, rows)); - PalaceCeedCall(ceed, CeedInternalCalloc(*nnz, cols)); - PalaceCeedCall(ceed, CeedVectorCreate(ceed, *nnz, vals)); - PalaceCeedCall(ceed, CeedVectorGetArrayWrite(*vals, *mem, &vals_array)); + // XX TODO: Use Thrust for this (thrust::copy_if and thrust::zip_iterator) + CeedInt *new_rows, *new_cols; + PalaceCeedCall(ceed, CeedInternalCalloc(*nnz, &new_rows)); + PalaceCeedCall(ceed, CeedInternalCalloc(*nnz, &new_cols)); + + CeedVector new_vals; + PalaceCeedCall(ceed, CeedVectorCreate(ceed, *nnz, &new_vals)); - PalacePragmaOmp(parallel if (nt > 1)) + CeedSize q = 0; + const CeedScalar *vals_array; + CeedScalar *new_vals_array; + PalaceCeedCall(ceed, CeedVectorGetArrayRead(*vals, CEED_MEM_HOST, &vals_array)); + PalaceCeedCall(ceed, CeedVectorGetArrayWrite(new_vals, CEED_MEM_HOST, &new_vals_array)); + for (CeedSize k = 0; k < *nnz; k++) { - const int id = utils::GetThreadNum(); - MFEM_ASSERT(id < op.Size() && op[id], - "Out of bounds access for thread number " << id << "!"); - const auto start = loc_offsets[id]; - const auto end = loc_offsets[id + 1]; - for (auto k = start; k < end; k++) + if (vals_array[k] != 0.0) { - (*rows)[k] = loc_rows[id][k - start]; - (*cols)[k] = loc_cols[id][k - start]; + new_rows[q] = (*rows)[k]; + new_cols[q] = (*cols)[k]; + new_vals_array[q] = vals_array[k]; + q++; } - - // The CeedVector is on only on device when MFEM is also using the device. This is - // also correctly a non-OpenMP loop when executed on the CPU (OpenMP is handled in - // outer scope above). - Ceed ceed; - const CeedScalar *loc_vals_array; - PalaceCeedCallBackend(CeedVectorGetCeed(loc_vals[id], &ceed)); - PalaceCeedCall(ceed, CeedVectorGetArrayRead(loc_vals[id], *mem, &loc_vals_array)); - mfem::forall_switch(*mem == CEED_MEM_DEVICE, end - start, - [=] MFEM_HOST_DEVICE(int k) - { vals_array[k + start] = loc_vals_array[k]; }); - PalaceCeedCall(ceed, CeedVectorRestoreArrayRead(loc_vals[id], &loc_vals_array)); - PalaceCeedCall(ceed, CeedInternalFree(&loc_rows[id])); - PalaceCeedCall(ceed, CeedInternalFree(&loc_cols[id])); - PalaceCeedCall(ceed, CeedVectorDestroy(&loc_vals[id])); } + PalaceCeedCall(ceed, CeedVectorRestoreArrayRead(*vals, &vals_array)); + PalaceCeedCall(ceed, CeedVectorRestoreArray(new_vals, &new_vals_array)); - PalaceCeedCall(ceed, CeedVectorRestoreArray(*vals, &vals_array)); - } + PalaceCeedCall(ceed, CeedInternalFree(rows)); + PalaceCeedCall(ceed, CeedInternalFree(cols)); + PalaceCeedCall(ceed, CeedVectorDestroy(vals)); + + *nnz = q; + *rows = new_rows; + *cols = new_cols; + *vals = new_vals; - // std::cout << " Operator full assembly (COO) has " << *nnz << " NNZ"; - if (skip_zeros && *nnz > 0) - { - CeedOperatorAssembleCOORemoveZeros(ceed, nnz, rows, cols, vals, mem); // std::cout << " (new NNZ after removal: " << *nnz << ")"; } // std::cout << "\n"; } -} // namespace - -std::unique_ptr CeedOperatorFullAssemble(const Operator &op, - bool skip_zeros, bool set) +std::unique_ptr OperatorCOOtoCSR(Ceed ceed, CeedInt m, CeedInt n, + CeedSize nnz, CeedInt *rows, + CeedInt *cols, CeedVector vals, + CeedMemType mem, bool set) { - // First, get matrix on master thread in COO format, withs rows/cols always on host and - // vals potentially on the device. Process skipping zeros if desired. - CeedSize nnz; - CeedInt *rows, *cols; - CeedVector vals; - CeedMemType mem; - CeedOperatorAssembleCOO(op, skip_zeros, &nnz, &rows, &cols, &vals, &mem); - // Preallocate CSR memory on host (like PETSc's MatSetValuesCOO). - auto mat = std::make_unique(); - mat->OverrideSize(op.Height(), op.Width()); - mat->GetMemoryI().New(op.Height() + 1); - auto *I = mat->GetI(); - mfem::Array J(nnz), perm(nnz), Jmap(nnz + 1); - - for (int i = 0; i < op.Height() + 1; i++) - { - I[i] = 0; - } + mfem::Array I(m + 1), J(nnz), perm(nnz), Jmap(nnz + 1); + I = 0; for (int k = 0; k < nnz; k++) { perm[k] = k; @@ -419,11 +318,13 @@ std::unique_ptr CeedOperatorFullAssemble(const Operator &op, } } } - const int nnz_new = q + 1; + PalaceCeedCall(ceed, CeedInternalFree(&rows)); + PalaceCeedCall(ceed, CeedInternalFree(&cols)); // Finalize I, Jmap. + const int nnz_new = q + 1; I[0] = 0; - for (int i = 0; i < op.Height(); i++) + for (int i = 0; i < m; i++) { I[i + 1] += I[i]; } @@ -433,23 +334,25 @@ std::unique_ptr CeedOperatorFullAssemble(const Operator &op, Jmap[k + 1] += Jmap[k]; } - mat->GetMemoryJ().New(nnz_new, mat->GetMemoryJ().GetMemoryType()); - mat->GetMemoryData().New(nnz_new, mat->GetMemoryJ().GetMemoryType()); + // Construct and fill the final CSR matrix. On GPU, MFEM and Hypre share the same memory + // space. On CPU, the inner nested OpenMP loop (if enabled in MFEM) should be ignored. + auto mat = std::make_unique(m, n, nnz_new); + { + const auto *d_I_old = I.Read(); + auto *d_I = mat->GetI(); + mfem::forall(m + 1, [=] MFEM_HOST_DEVICE(int i) { d_I[i] = d_I_old[i]; }); + } { - // This always executes on the device. const auto *d_J_old = J.Read(); - auto *d_J = mfem::Write(mat->GetMemoryJ(), nnz_new); + auto *d_J = mat->GetJ(); mfem::forall(nnz_new, [=] MFEM_HOST_DEVICE(int k) { d_J[k] = d_J_old[k]; }); } - - // Fill the values (also always on device). - if (vals) { auto FillValues = [&](const double *vals_array) { const auto *d_perm = perm.Read(); const auto *d_Jmap = Jmap.Read(); - auto *d_A = mfem::Write(mat->GetMemoryData(), nnz_new); + auto *d_A = mat->GetData(); if (set) { mfem::forall(nnz_new, [=] MFEM_HOST_DEVICE(int k) @@ -477,7 +380,6 @@ std::unique_ptr CeedOperatorFullAssemble(const Operator &op, { // Copy values to device before filling. Vector d_vals(nnz); - d_vals.UseDevice(true); { auto *d_vals_array = d_vals.HostWrite(); PalacePragmaOmp(parallel for schedule(static)) @@ -495,8 +397,72 @@ std::unique_ptr CeedOperatorFullAssemble(const Operator &op, } PalaceCeedCall(ceed, CeedVectorRestoreArrayRead(vals, &vals_array)); PalaceCeedCall(ceed, CeedVectorDestroy(&vals)); - PalaceCeedCall(ceed, CeedInternalFree(&rows)); - PalaceCeedCall(ceed, CeedInternalFree(&cols)); + } + + return mat; +} + +} // namespace + +std::unique_ptr CeedOperatorFullAssemble(const Operator &op, + bool skip_zeros, bool set) +{ + if (op.Size() == 0) + { + return std::make_unique(op.Height(), op.Width(), 0); + } + + // Assemble operators on each thread. + std::vector> loc_mat(op.Size()); + PalacePragmaOmp(parallel if (op.Size() > 1)) + { + Ceed ceed; + const int id = utils::GetThreadNum(); + MFEM_ASSERT(id < op.Size() && op[id], + "Out of bounds access for thread number " << id << "!"); + PalaceCeedCallBackend(CeedOperatorGetCeed(op[id], &ceed)); + + // First, get matrix on master thread in COO format, withs rows/cols always on host and + // vals potentially on the device. Process skipping zeros if desired. + CeedSize nnz; + CeedInt *rows, *cols; + CeedVector vals; + CeedMemType mem; + CeedOperatorAssembleCOO(ceed, op[id], skip_zeros, &nnz, &rows, &cols, &vals, &mem); + + // Convert COO to CSR (on each thread). The COO memory is free'd internally. + loc_mat[id] = + OperatorCOOtoCSR(ceed, op.Height(), op.Width(), nnz, rows, cols, vals, mem, set); + } + + // Add CSR matrix objects from each thread (HYPRE's hypre_CSRMatrixAdd uses threads + // internally as available). We have to scale the duplicated nonzeros when set = true. + auto mat = std::move(loc_mat[0]); + std::unique_ptr b_mat; + if (set && op.Size() > 1) + { + b_mat = std::make_unique(hypre_CSRMatrixClone(*mat, 0)); + hypre_CSRMatrixSetConstantValues(*b_mat, 1.0); + for (std::size_t i = 1; i < op.Size(); i++) + { + hypre_CSRMatrix *b_loc_mat = hypre_CSRMatrixClone(*loc_mat[i], 0); + hypre_CSRMatrixSetConstantValues(b_loc_mat, 1.0); + b_mat = std::make_unique( + hypre_CSRMatrixAdd(1.0, *b_mat, 1.0, b_loc_mat)); + hypre_CSRMatrixDestroy(b_loc_mat); + } + } + for (std::size_t i = 1; i < op.Size(); i++) + { + mat = std::make_unique( + hypre_CSRMatrixAdd(1.0, *mat, 1.0, *loc_mat[i])); + } + if (set && op.Size() > 1) + { + const auto *d_b_data = b_mat->GetData(); + auto *d_data = mat->GetData(); + mfem::forall(mat->NNZ(), + [=] MFEM_HOST_DEVICE(int i) { d_data[i] *= 1.0 / d_b_data[i]; }); } return mat; diff --git a/palace/fem/libceed/operator.hpp b/palace/fem/libceed/operator.hpp index 68dd90585..fe44d85ad 100644 --- a/palace/fem/libceed/operator.hpp +++ b/palace/fem/libceed/operator.hpp @@ -6,7 +6,6 @@ #include #include -#include #include "fem/libceed/ceed.hpp" #include "linalg/operator.hpp" #include "linalg/vector.hpp" @@ -16,6 +15,13 @@ namespace palace class FiniteElementSpace; +namespace hypre +{ + +class HypreCSRMatrix; + +} // namespace hypre + namespace ceed { @@ -67,9 +73,9 @@ class SymmetricOperator : public Operator } }; -// Assemble a ceed::Operator as an mfem::SparseMatrix. -std::unique_ptr CeedOperatorFullAssemble(const Operator &op, - bool skip_zeros, bool set); +// Assemble a ceed::Operator as a CSR matrix. +std::unique_ptr CeedOperatorFullAssemble(const Operator &op, + bool skip_zeros, bool set); // Construct a coarse-level ceed::Operator, reusing the quadrature data and quadrature // function from the fine-level operator. Only available for square, symmetric operators diff --git a/palace/linalg/divfree.cpp b/palace/linalg/divfree.cpp index af158c517..853a26d82 100644 --- a/palace/linalg/divfree.cpp +++ b/palace/linalg/divfree.cpp @@ -64,6 +64,10 @@ DivFreeSolver::DivFreeSolver( const auto &h1_fespace_l = h1_fespaces.GetFESpaceAtLevel(l); auto M_l = BuildLevelParOperator(std::move(m_vec[l]), h1_fespace_l); M_l->SetEssentialTrueDofs(h1_bdr_tdof_lists[l], Operator::DiagonalPolicy::DIAG_ONE); + if (l == h1_fespaces.GetNumLevels() - 1) + { + bdr_tdof_list_M = M_l->GetEssentialTrueDofs(); + } M_mg->AddOperator(std::move(M_l)); } M = std::move(M_mg); @@ -76,7 +80,6 @@ DivFreeSolver::DivFreeSolver( h1_fespaces.GetFinestFESpace(), false); } Grad = &h1_fespaces.GetFinestFESpace().GetDiscreteInterpolator(); - bdr_tdof_list_M = &h1_bdr_tdof_lists.back(); // The system matrix for the projection is real and SPD. auto amg = std::make_unique>( diff --git a/palace/linalg/rap.cpp b/palace/linalg/rap.cpp index 3b8611da9..f76fe40ad 100644 --- a/palace/linalg/rap.cpp +++ b/palace/linalg/rap.cpp @@ -4,6 +4,7 @@ #include "rap.hpp" #include "fem/bilinearform.hpp" +#include "linalg/hypre.hpp" namespace palace { @@ -14,7 +15,7 @@ ParOperator::ParOperator(std::unique_ptr &&dA, const Operator *pA, : Operator(test_fespace.GetTrueVSize(), trial_fespace.GetTrueVSize()), data_A(std::move(dA)), A((data_A != nullptr) ? data_A.get() : pA), trial_fespace(trial_fespace), test_fespace(test_fespace), use_R(test_restrict), - dbc_tdof_list(nullptr), diag_policy(DiagonalPolicy::DIAG_ONE), RAP(nullptr) + diag_policy(DiagonalPolicy::DIAG_ONE), RAP(nullptr) { MFEM_VERIFY(A, "Cannot construct ParOperator from an empty matrix!"); } @@ -40,25 +41,20 @@ void ParOperator::SetEssentialTrueDofs(const mfem::Array &tdof_list, "only DiagonalPolicy::DIAG_ONE or DiagonalPolicy::DIAG_ZERO!"); MFEM_VERIFY(height == width, "Set essential true dofs for both test and trial spaces " "for rectangular ParOperator!"); - dbc_tdof_list = &tdof_list; - dbc_tdof_list->Read(true); // Copy to device + tdof_list.Read(); + dbc_tdof_list.MakeRef(tdof_list); diag_policy = policy; } void ParOperator::EliminateRHS(const Vector &x, Vector &b) const { - if (!dbc_tdof_list) - { - return; - } - MFEM_VERIFY(A, "No local matrix available for ParOperator::EliminateRHS!"); auto &lx = trial_fespace.GetLVector(); auto &ly = GetTestLVector(); { auto &tx = trial_fespace.GetTVector(); tx = 0.0; - linalg::SetSubVector(tx, *dbc_tdof_list, x); + linalg::SetSubVector(tx, dbc_tdof_list, x); trial_fespace.GetProlongationMatrix()->Mult(tx, lx); } @@ -70,11 +66,11 @@ void ParOperator::EliminateRHS(const Vector &x, Vector &b) const b.Add(-1.0, ty); if (diag_policy == DiagonalPolicy::DIAG_ONE) { - linalg::SetSubVector(b, *dbc_tdof_list, x); + linalg::SetSubVector(b, dbc_tdof_list, x); } else if (diag_policy == DiagonalPolicy::DIAG_ZERO) { - linalg::SetSubVector(b, *dbc_tdof_list, 0.0); + linalg::SetSubVector(b, dbc_tdof_list, 0.0); } } @@ -86,59 +82,50 @@ mfem::HypreParMatrix &ParOperator::ParallelAssemble(bool skip_zeros) const } // Build the square or rectangular assembled HypreParMatrix. - const auto *sA = dynamic_cast(A); - std::unique_ptr data_sA; + const auto *sA = dynamic_cast(A); + std::unique_ptr data_sA; if (!sA) { const auto *cA = dynamic_cast(A); - MFEM_VERIFY(cA, "ParOperator::ParallelAssemble requires A as an mfem::SparseMatrix or " - "ceed::Operator!"); + MFEM_VERIFY(cA, + "ParOperator::ParallelAssemble requires A as an hypre::HypreCSRMatrix or " + "ceed::Operator!"); data_sA = BilinearForm::FullAssemble(*cA, skip_zeros, use_R); sA = data_sA.get(); } - if (&trial_fespace == &test_fespace) + + hypre_ParCSRMatrix *hA = hypre_ParCSRMatrixCreate( + trial_fespace.GetComm(), test_fespace.GlobalVSize(), trial_fespace.GlobalVSize(), + test_fespace.Get().GetDofOffsets(), trial_fespace.Get().GetDofOffsets(), 0, sA->NNZ(), + 0); + hypre_CSRMatrix *hA_diag = hypre_ParCSRMatrixDiag(hA); + hypre_ParCSRMatrixDiag(hA) = *const_cast(sA); + hypre_ParCSRMatrixInitialize(hA); + + const mfem::HypreParMatrix *P = trial_fespace.Get().Dof_TrueDof_Matrix(); + if (!use_R) { - mfem::HypreParMatrix *hA = new mfem::HypreParMatrix( - trial_fespace.GetComm(), trial_fespace.GlobalVSize(), - trial_fespace.Get().GetDofOffsets(), const_cast(sA)); - const mfem::HypreParMatrix *P = trial_fespace.Get().Dof_TrueDof_Matrix(); - RAP = std::make_unique(hypre_ParCSRMatrixRAP(*P, *hA, *P), true); - delete hA; + const mfem::HypreParMatrix *Rt = test_fespace.Get().Dof_TrueDof_Matrix(); + RAP = std::make_unique(hypre_ParCSRMatrixRAP(*Rt, hA, *P), true); } else { - mfem::HypreParMatrix *hA = new mfem::HypreParMatrix( - trial_fespace.GetComm(), test_fespace.GlobalVSize(), trial_fespace.GlobalVSize(), - test_fespace.Get().GetDofOffsets(), trial_fespace.Get().GetDofOffsets(), - const_cast(sA)); - const mfem::HypreParMatrix *P = trial_fespace.Get().Dof_TrueDof_Matrix(); - if (!use_R) - { - const mfem::HypreParMatrix *Rt = test_fespace.Get().Dof_TrueDof_Matrix(); - RAP = - std::make_unique(hypre_ParCSRMatrixRAP(*Rt, *hA, *P), true); - } - else - { - mfem::SparseMatrix *sRt = mfem::Transpose(*test_fespace.GetRestrictionMatrix()); - mfem::HypreParMatrix *hRt = new mfem::HypreParMatrix( - test_fespace.GetComm(), test_fespace.GlobalVSize(), - test_fespace.GlobalTrueVSize(), test_fespace.Get().GetDofOffsets(), - test_fespace.Get().GetTrueDofOffsets(), sRt); - RAP = std::make_unique(hypre_ParCSRMatrixRAP(*hRt, *hA, *P), - true); - delete sRt; - delete hRt; - } - delete hA; + mfem::SparseMatrix *sRt = mfem::Transpose(*test_fespace.GetRestrictionMatrix()); + mfem::HypreParMatrix *hRt = new mfem::HypreParMatrix( + test_fespace.GetComm(), test_fespace.GlobalVSize(), test_fespace.GlobalTrueVSize(), + test_fespace.Get().GetDofOffsets(), test_fespace.Get().GetTrueDofOffsets(), sRt); + RAP = std::make_unique(hypre_ParCSRMatrixRAP(*hRt, hA, *P), true); + delete sRt; + delete hRt; } + + hypre_ParCSRMatrixDiag(hA) = hA_diag; + hypre_ParCSRMatrixDestroy(hA); hypre_ParCSRMatrixSetNumNonzeros(*RAP); // Eliminate boundary conditions on the assembled (square) matrix. - if (dbc_tdof_list) - { - RAP->EliminateBC(*dbc_tdof_list, diag_policy); - } + RAP->EliminateBC(dbc_tdof_list, diag_policy); + return *RAP; } @@ -156,14 +143,7 @@ void ParOperator::AssembleDiagonal(Vector &diag) const MFEM_VERIFY(&trial_fespace == &test_fespace, "Diagonal assembly is only available for square ParOperator!"); auto &lx = trial_fespace.GetLVector(); - if (const auto *sA = dynamic_cast(A)) - { - sA->GetDiag(lx); - } - else - { - A->AssembleDiagonal(lx); - } + A->AssembleDiagonal(lx); // Parallel assemble and eliminate essential true dofs. const Operator *P = test_fespace.GetProlongationMatrix(); @@ -175,15 +155,17 @@ void ParOperator::AssembleDiagonal(Vector &diag) const { P->MultTranspose(lx, diag); } - if (dbc_tdof_list) + + // Eliminate essential true dofs. + if (dbc_tdof_list.Size()) { if (diag_policy == DiagonalPolicy::DIAG_ONE) { - linalg::SetSubVector(diag, *dbc_tdof_list, 1.0); + linalg::SetSubVector(diag, dbc_tdof_list, 1.0); } else if (diag_policy == DiagonalPolicy::DIAG_ZERO) { - linalg::SetSubVector(diag, *dbc_tdof_list, 0.0); + linalg::SetSubVector(diag, dbc_tdof_list, 0.0); } } } @@ -200,11 +182,11 @@ void ParOperator::Mult(const Vector &x, Vector &y) const auto &lx = trial_fespace.GetLVector(); auto &ly = GetTestLVector(); - if (dbc_tdof_list) + if (dbc_tdof_list.Size()) { auto &tx = trial_fespace.GetTVector(); tx = x; - linalg::SetSubVector(tx, *dbc_tdof_list, 0.0); + linalg::SetSubVector(tx, dbc_tdof_list, 0.0); trial_fespace.GetProlongationMatrix()->Mult(tx, lx); } else @@ -216,15 +198,15 @@ void ParOperator::Mult(const Vector &x, Vector &y) const A->Mult(lx, ly); RestrictionMatrixMult(ly, y); - if (dbc_tdof_list) + if (dbc_tdof_list.Size()) { if (diag_policy == DiagonalPolicy::DIAG_ONE) { - linalg::SetSubVector(y, *dbc_tdof_list, x); + linalg::SetSubVector(y, dbc_tdof_list, x); } else if (diag_policy == DiagonalPolicy::DIAG_ZERO) { - linalg::SetSubVector(y, *dbc_tdof_list, 0.0); + linalg::SetSubVector(y, dbc_tdof_list, 0.0); } } } @@ -241,11 +223,11 @@ void ParOperator::MultTranspose(const Vector &x, Vector &y) const auto &lx = trial_fespace.GetLVector(); auto &ly = GetTestLVector(); - if (dbc_tdof_list) + if (dbc_tdof_list.Size()) { auto &ty = test_fespace.GetTVector(); ty = x; - linalg::SetSubVector(ty, *dbc_tdof_list, 0.0); + linalg::SetSubVector(ty, dbc_tdof_list, 0.0); RestrictionMatrixMultTranspose(ty, ly); } else @@ -257,15 +239,15 @@ void ParOperator::MultTranspose(const Vector &x, Vector &y) const A->MultTranspose(ly, lx); trial_fespace.GetProlongationMatrix()->MultTranspose(lx, y); - if (dbc_tdof_list) + if (dbc_tdof_list.Size()) { if (diag_policy == DiagonalPolicy::DIAG_ONE) { - linalg::SetSubVector(y, *dbc_tdof_list, x); + linalg::SetSubVector(y, dbc_tdof_list, x); } else if (diag_policy == DiagonalPolicy::DIAG_ZERO) { - linalg::SetSubVector(y, *dbc_tdof_list, 0.0); + linalg::SetSubVector(y, dbc_tdof_list, 0.0); } } } @@ -282,11 +264,11 @@ void ParOperator::AddMult(const Vector &x, Vector &y, const double a) const auto &lx = trial_fespace.GetLVector(); auto &ly = GetTestLVector(); - if (dbc_tdof_list) + if (dbc_tdof_list.Size()) { auto &tx = trial_fespace.GetTVector(); tx = x; - linalg::SetSubVector(tx, *dbc_tdof_list, 0.0); + linalg::SetSubVector(tx, dbc_tdof_list, 0.0); trial_fespace.GetProlongationMatrix()->Mult(tx, lx); } else @@ -299,15 +281,15 @@ void ParOperator::AddMult(const Vector &x, Vector &y, const double a) const auto &ty = test_fespace.GetTVector(); RestrictionMatrixMult(ly, ty); - if (dbc_tdof_list) + if (dbc_tdof_list.Size()) { if (diag_policy == DiagonalPolicy::DIAG_ONE) { - linalg::SetSubVector(ty, *dbc_tdof_list, x); + linalg::SetSubVector(ty, dbc_tdof_list, x); } else if (diag_policy == DiagonalPolicy::DIAG_ZERO) { - linalg::SetSubVector(ty, *dbc_tdof_list, 0.0); + linalg::SetSubVector(ty, dbc_tdof_list, 0.0); } } y.Add(a, ty); @@ -325,11 +307,11 @@ void ParOperator::AddMultTranspose(const Vector &x, Vector &y, const double a) c auto &lx = trial_fespace.GetLVector(); auto &ly = GetTestLVector(); - if (dbc_tdof_list) + if (dbc_tdof_list.Size()) { auto &ty = test_fespace.GetTVector(); ty = x; - linalg::SetSubVector(ty, *dbc_tdof_list, 0.0); + linalg::SetSubVector(ty, dbc_tdof_list, 0.0); RestrictionMatrixMultTranspose(ty, ly); } else @@ -342,15 +324,15 @@ void ParOperator::AddMultTranspose(const Vector &x, Vector &y, const double a) c auto &tx = trial_fespace.GetTVector(); trial_fespace.GetProlongationMatrix()->MultTranspose(lx, tx); - if (dbc_tdof_list) + if (dbc_tdof_list.Size()) { if (diag_policy == DiagonalPolicy::DIAG_ONE) { - linalg::SetSubVector(tx, *dbc_tdof_list, x); + linalg::SetSubVector(tx, dbc_tdof_list, x); } else if (diag_policy == DiagonalPolicy::DIAG_ZERO) { - linalg::SetSubVector(tx, *dbc_tdof_list, 0.0); + linalg::SetSubVector(tx, dbc_tdof_list, 0.0); } } y.Add(a, tx); @@ -397,8 +379,7 @@ ComplexParOperator::ComplexParOperator(std::unique_ptr &&dAr, ? std::make_unique(std::move(dAr), std::move(dAi)) : std::make_unique(pAr, pAi)), A(data_A.get()), trial_fespace(trial_fespace), test_fespace(test_fespace), - use_R(test_restrict), dbc_tdof_list(nullptr), - diag_policy(Operator::DiagonalPolicy::DIAG_ONE), + use_R(test_restrict), diag_policy(Operator::DiagonalPolicy::DIAG_ONE), RAPr(A->Real() ? std::make_unique(*A->Real(), trial_fespace, test_fespace, use_R) : nullptr), @@ -441,8 +422,8 @@ void ComplexParOperator::SetEssentialTrueDofs(const mfem::Array &tdof_list, "DiagonalPolicy::DIAG_ONE specified for ComplexParOperator with no real part!"); MFEM_VERIFY(height == width, "Set essential true dofs for both test and trial spaces " "for rectangular ComplexParOperator!"); - dbc_tdof_list = &tdof_list; - dbc_tdof_list->Read(true); // Copy to device + tdof_list.Read(); + dbc_tdof_list.MakeRef(tdof_list); diag_policy = policy; if (RAPr) { @@ -472,13 +453,14 @@ void ComplexParOperator::Mult(const ComplexVector &x, ComplexVector &y) const { MFEM_ASSERT(x.Size() == width && y.Size() == height, "Incompatible dimensions for ComplexParOperator::Mult!"); + auto &lx = trial_fespace.GetLVector(); auto &ly = GetTestLVector(); - if (dbc_tdof_list) + if (dbc_tdof_list.Size()) { auto &tx = trial_fespace.GetTVector(); tx = x; - linalg::SetSubVector(tx, *dbc_tdof_list, 0.0); + linalg::SetSubVector(tx, dbc_tdof_list, 0.0); trial_fespace.GetProlongationMatrix()->Mult(tx.Real(), lx.Real()); trial_fespace.GetProlongationMatrix()->Mult(tx.Imag(), lx.Imag()); } @@ -492,15 +474,15 @@ void ComplexParOperator::Mult(const ComplexVector &x, ComplexVector &y) const A->Mult(lx, ly); RestrictionMatrixMult(ly, y); - if (dbc_tdof_list) + if (dbc_tdof_list.Size()) { if (diag_policy == Operator::DiagonalPolicy::DIAG_ONE) { - linalg::SetSubVector(y, *dbc_tdof_list, x); + linalg::SetSubVector(y, dbc_tdof_list, x); } else if (diag_policy == Operator::DiagonalPolicy::DIAG_ZERO) { - linalg::SetSubVector(y, *dbc_tdof_list, 0.0); + linalg::SetSubVector(y, dbc_tdof_list, 0.0); } } } @@ -509,13 +491,14 @@ void ComplexParOperator::MultTranspose(const ComplexVector &x, ComplexVector &y) { MFEM_ASSERT(x.Size() == height && y.Size() == width, "Incompatible dimensions for ComplexParOperator::MultTranspose!"); + auto &lx = trial_fespace.GetLVector(); auto &ly = GetTestLVector(); - if (dbc_tdof_list) + if (dbc_tdof_list.Size()) { auto &ty = test_fespace.GetTVector(); ty = x; - linalg::SetSubVector(ty, *dbc_tdof_list, 0.0); + linalg::SetSubVector(ty, dbc_tdof_list, 0.0); RestrictionMatrixMultTranspose(ty, ly); } else @@ -528,15 +511,15 @@ void ComplexParOperator::MultTranspose(const ComplexVector &x, ComplexVector &y) trial_fespace.GetProlongationMatrix()->MultTranspose(lx.Real(), y.Real()); trial_fespace.GetProlongationMatrix()->MultTranspose(lx.Imag(), y.Imag()); - if (dbc_tdof_list) + if (dbc_tdof_list.Size()) { if (diag_policy == Operator::DiagonalPolicy::DIAG_ONE) { - linalg::SetSubVector(y, *dbc_tdof_list, x); + linalg::SetSubVector(y, dbc_tdof_list, x); } else if (diag_policy == Operator::DiagonalPolicy::DIAG_ZERO) { - linalg::SetSubVector(y, *dbc_tdof_list, 0.0); + linalg::SetSubVector(y, dbc_tdof_list, 0.0); } } } @@ -546,13 +529,14 @@ void ComplexParOperator::MultHermitianTranspose(const ComplexVector &x, { MFEM_ASSERT(x.Size() == height && y.Size() == width, "Incompatible dimensions for ComplexParOperator::MultHermitianTranspose!"); + auto &lx = trial_fespace.GetLVector(); auto &ly = GetTestLVector(); - if (dbc_tdof_list) + if (dbc_tdof_list.Size()) { auto &ty = test_fespace.GetTVector(); ty = x; - linalg::SetSubVector(ty, *dbc_tdof_list, 0.0); + linalg::SetSubVector(ty, dbc_tdof_list, 0.0); RestrictionMatrixMultTranspose(ty, ly); } else @@ -565,15 +549,15 @@ void ComplexParOperator::MultHermitianTranspose(const ComplexVector &x, trial_fespace.GetProlongationMatrix()->MultTranspose(lx.Real(), y.Real()); trial_fespace.GetProlongationMatrix()->MultTranspose(lx.Imag(), y.Imag()); - if (dbc_tdof_list) + if (dbc_tdof_list.Size()) { if (diag_policy == Operator::DiagonalPolicy::DIAG_ONE) { - linalg::SetSubVector(y, *dbc_tdof_list, x); + linalg::SetSubVector(y, dbc_tdof_list, x); } else if (diag_policy == Operator::DiagonalPolicy::DIAG_ZERO) { - linalg::SetSubVector(y, *dbc_tdof_list, 0.0); + linalg::SetSubVector(y, dbc_tdof_list, 0.0); } } } @@ -583,13 +567,14 @@ void ComplexParOperator::AddMult(const ComplexVector &x, ComplexVector &y, { MFEM_ASSERT(x.Size() == width && y.Size() == height, "Incompatible dimensions for ComplexParOperator::AddMult!"); + auto &lx = trial_fespace.GetLVector(); auto &ly = GetTestLVector(); - if (dbc_tdof_list) + if (dbc_tdof_list.Size()) { auto &tx = trial_fespace.GetTVector(); tx = x; - linalg::SetSubVector(tx, *dbc_tdof_list, 0.0); + linalg::SetSubVector(tx, dbc_tdof_list, 0.0); trial_fespace.GetProlongationMatrix()->Mult(tx.Real(), lx.Real()); trial_fespace.GetProlongationMatrix()->Mult(tx.Imag(), lx.Imag()); } @@ -604,15 +589,15 @@ void ComplexParOperator::AddMult(const ComplexVector &x, ComplexVector &y, auto &ty = test_fespace.GetTVector(); RestrictionMatrixMult(ly, ty); - if (dbc_tdof_list) + if (dbc_tdof_list.Size()) { if (diag_policy == Operator::DiagonalPolicy::DIAG_ONE) { - linalg::SetSubVector(ty, *dbc_tdof_list, x); + linalg::SetSubVector(ty, dbc_tdof_list, x); } else if (diag_policy == Operator::DiagonalPolicy::DIAG_ZERO) { - linalg::SetSubVector(ty, *dbc_tdof_list, 0.0); + linalg::SetSubVector(ty, dbc_tdof_list, 0.0); } } y.AXPY(a, ty); @@ -623,13 +608,14 @@ void ComplexParOperator::AddMultTranspose(const ComplexVector &x, ComplexVector { MFEM_ASSERT(x.Size() == height && y.Size() == width, "Incompatible dimensions for ComplexParOperator::AddMultTranspose!"); + auto &lx = trial_fespace.GetLVector(); auto &ly = GetTestLVector(); - if (dbc_tdof_list) + if (dbc_tdof_list.Size()) { auto &ty = test_fespace.GetTVector(); ty = x; - linalg::SetSubVector(ty, *dbc_tdof_list, 0.0); + linalg::SetSubVector(ty, dbc_tdof_list, 0.0); RestrictionMatrixMultTranspose(ty, ly); } else @@ -643,15 +629,15 @@ void ComplexParOperator::AddMultTranspose(const ComplexVector &x, ComplexVector auto &tx = trial_fespace.GetTVector(); trial_fespace.GetProlongationMatrix()->MultTranspose(lx.Real(), tx.Real()); trial_fespace.GetProlongationMatrix()->MultTranspose(lx.Imag(), tx.Imag()); - if (dbc_tdof_list) + if (dbc_tdof_list.Size()) { if (diag_policy == Operator::DiagonalPolicy::DIAG_ONE) { - linalg::SetSubVector(tx, *dbc_tdof_list, x); + linalg::SetSubVector(tx, dbc_tdof_list, x); } else if (diag_policy == Operator::DiagonalPolicy::DIAG_ZERO) { - linalg::SetSubVector(tx, *dbc_tdof_list, 0.0); + linalg::SetSubVector(tx, dbc_tdof_list, 0.0); } } y.AXPY(a, tx); @@ -662,13 +648,14 @@ void ComplexParOperator::AddMultHermitianTranspose(const ComplexVector &x, Compl { MFEM_ASSERT(x.Size() == height && y.Size() == width, "Incompatible dimensions for ComplexParOperator::AddMultHermitianTranspose!"); + auto &lx = trial_fespace.GetLVector(); auto &ly = GetTestLVector(); - if (dbc_tdof_list) + if (dbc_tdof_list.Size()) { auto &ty = test_fespace.GetTVector(); ty = x; - linalg::SetSubVector(ty, *dbc_tdof_list, 0.0); + linalg::SetSubVector(ty, dbc_tdof_list, 0.0); RestrictionMatrixMultTranspose(ty, ly); } else @@ -682,15 +669,15 @@ void ComplexParOperator::AddMultHermitianTranspose(const ComplexVector &x, Compl auto &tx = trial_fespace.GetTVector(); trial_fespace.GetProlongationMatrix()->MultTranspose(lx.Real(), tx.Real()); trial_fespace.GetProlongationMatrix()->MultTranspose(lx.Imag(), tx.Imag()); - if (dbc_tdof_list) + if (dbc_tdof_list.Size()) { if (diag_policy == Operator::DiagonalPolicy::DIAG_ONE) { - linalg::SetSubVector(tx, *dbc_tdof_list, x); + linalg::SetSubVector(tx, dbc_tdof_list, x); } else if (diag_policy == Operator::DiagonalPolicy::DIAG_ZERO) { - linalg::SetSubVector(tx, *dbc_tdof_list, 0.0); + linalg::SetSubVector(tx, dbc_tdof_list, 0.0); } } y.AXPY(a, tx); diff --git a/palace/linalg/rap.hpp b/palace/linalg/rap.hpp index 4c9d36174..1be52fa20 100644 --- a/palace/linalg/rap.hpp +++ b/palace/linalg/rap.hpp @@ -32,7 +32,7 @@ class ParOperator : public Operator const bool use_R; // Lists of constrained essential boundary true dofs for elimination. - const mfem::Array *dbc_tdof_list; + mfem::Array dbc_tdof_list; // Diagonal policy for constrained true dofs. DiagonalPolicy diag_policy; @@ -78,7 +78,10 @@ class ParOperator : public Operator // Get the essential boundary condition true dofs associated with the operator. May be // nullptr. - const mfem::Array *GetEssentialTrueDofs() const { return dbc_tdof_list; } + const mfem::Array *GetEssentialTrueDofs() const + { + return dbc_tdof_list.Size() ? &dbc_tdof_list : nullptr; + } // Eliminate essential true dofs from the RHS vector b, using the essential boundary // condition values in x. @@ -119,7 +122,7 @@ class ComplexParOperator : public ComplexOperator const bool use_R; // Lists of constrained essential boundary true dofs for elimination. - const mfem::Array *dbc_tdof_list; + mfem::Array dbc_tdof_list; // Diagonal policy for constrained true dofs. Operator::DiagonalPolicy diag_policy; @@ -174,7 +177,10 @@ class ComplexParOperator : public ComplexOperator // Get the essential boundary condition true dofs associated with the operator. May be // nullptr. - const mfem::Array *GetEssentialTrueDofs() const { return dbc_tdof_list; } + const mfem::Array *GetEssentialTrueDofs() const + { + return dbc_tdof_list.Size() ? &dbc_tdof_list : nullptr; + } void AssembleDiagonal(ComplexVector &diag) const override; diff --git a/palace/models/curlcurloperator.cpp b/palace/models/curlcurloperator.cpp index ce49dae17..0fdd0eb15 100644 --- a/palace/models/curlcurloperator.cpp +++ b/palace/models/curlcurloperator.cpp @@ -8,6 +8,7 @@ #include "fem/integrator.hpp" #include "fem/mesh.hpp" #include "fem/multigrid.hpp" +#include "linalg/hypre.hpp" #include "linalg/rap.hpp" #include "utils/communication.hpp" #include "utils/geodata.hpp" @@ -168,9 +169,9 @@ std::unique_ptr CurlCurlOperator::GetStiffnessMatrix() { Mpi::Print(" Level {:d} (p = {:d}): {:d} unknowns", l, nd_fespace_l.GetMaxElementOrder(), nd_fespace_l.GlobalTrueVSize()); - if (const auto *k_spm = dynamic_cast(k_vec[l].get())) + if (const auto *k_spm = dynamic_cast(k_vec[l].get())) { - HYPRE_BigInt nnz = k_spm->NumNonZeroElems(); + HYPRE_BigInt nnz = k_spm->NNZ(); Mpi::GlobalSum(1, &nnz, nd_fespace_l.GetComm()); Mpi::Print(", {:d} NNZ\n", nnz); } diff --git a/palace/models/laplaceoperator.cpp b/palace/models/laplaceoperator.cpp index 265b30fc4..769b26756 100644 --- a/palace/models/laplaceoperator.cpp +++ b/palace/models/laplaceoperator.cpp @@ -7,6 +7,7 @@ #include "fem/integrator.hpp" #include "fem/mesh.hpp" #include "fem/multigrid.hpp" +#include "linalg/hypre.hpp" #include "linalg/rap.hpp" #include "utils/communication.hpp" #include "utils/geodata.hpp" @@ -187,9 +188,9 @@ std::unique_ptr LaplaceOperator::GetStiffnessMatrix() { Mpi::Print(" Level {:d} (p = {:d}): {:d} unknowns", l, h1_fespace_l.GetMaxElementOrder(), h1_fespace_l.GlobalTrueVSize()); - if (const auto *k_spm = dynamic_cast(k_vec[l].get())) + if (const auto *k_spm = dynamic_cast(k_vec[l].get())) { - HYPRE_BigInt nnz = k_spm->NumNonZeroElems(); + HYPRE_BigInt nnz = k_spm->NNZ(); Mpi::GlobalSum(1, &nnz, h1_fespace_l.GetComm()); Mpi::Print(", {:d} NNZ\n", nnz); } diff --git a/palace/models/spaceoperator.cpp b/palace/models/spaceoperator.cpp index 7b94367dd..096e68e55 100644 --- a/palace/models/spaceoperator.cpp +++ b/palace/models/spaceoperator.cpp @@ -9,6 +9,7 @@ #include "fem/integrator.hpp" #include "fem/mesh.hpp" #include "fem/multigrid.hpp" +#include "linalg/hypre.hpp" #include "linalg/rap.hpp" #include "utils/communication.hpp" #include "utils/geodata.hpp" @@ -769,14 +770,14 @@ std::unique_ptr SpaceOperator::GetPreconditionerMatrix(double a0, doub { Mpi::Print(" Level {:d}{} (p = {:d}): {:d} unknowns", l, aux ? " (auxiliary)" : "", fespace_l.GetMaxElementOrder(), fespace_l.GlobalTrueVSize()); - const auto *b_spm = dynamic_cast(br_l.get()); + const auto *b_spm = dynamic_cast(br_l.get()); if (!b_spm) { - b_spm = dynamic_cast(bi_l.get()); + b_spm = dynamic_cast(bi_l.get()); } if (b_spm) { - HYPRE_BigInt nnz = b_spm->NumNonZeroElems(); + HYPRE_BigInt nnz = b_spm->NNZ(); Mpi::GlobalSum(1, &nnz, fespace_l.GetComm()); Mpi::Print(", {:d} NNZ\n", nnz); } diff --git a/test/unit/test-libceed.cpp b/test/unit/test-libceed.cpp index 397852e1e..725b716bc 100644 --- a/test/unit/test-libceed.cpp +++ b/test/unit/test-libceed.cpp @@ -12,6 +12,7 @@ #include "fem/fespace.hpp" #include "fem/integrator.hpp" #include "fem/mesh.hpp" +#include "linalg/hypre.hpp" #include "models/materialoperator.hpp" #include "utils/communication.hpp" @@ -253,8 +254,7 @@ void TestCeedOperatorMult(const Operator &op_test, const Operator &op_ref, } } -void TestCeedOperatorFullAssemble(const mfem::SparseMatrix &mat_test, - const mfem::SparseMatrix &mat_ref) +void TestCeedOperatorFullAssemble(mfem::SparseMatrix &mat_test, mfem::SparseMatrix &mat_ref) { // Ensure host memory is up to date (mfem::Add is missing the device to host copy). mat_test.HostReadI(); @@ -270,20 +270,47 @@ void TestCeedOperatorFullAssemble(const mfem::SparseMatrix &mat_test, REQUIRE(mat_diff->MaxNorm() < 1.0e-12 * std::max(mat_ref.MaxNorm(), 1.0)); } +void TestCeedOperatorFullAssemble(hypre::HypreCSRMatrix &mat_test, + mfem::SparseMatrix &mat_ref) +{ + // Copy test matrix into MFEM's sparse matrix data type. + hypre_CSRMatrixMigrate(mat_test, HYPRE_MEMORY_HOST); + mfem::SparseMatrix mat_test_sp(mat_test.GetI(), mat_test.GetJ(), mat_test.GetData(), + mat_test.Height(), mat_test.Width(), false, false, false); + + // Perform the test. + TestCeedOperatorFullAssemble(mat_test_sp, mat_ref); +} + +void TestCeedOperatorFullAssemble(hypre::HypreCSRMatrix &mat_test, + hypre::HypreCSRMatrix &mat_ref) +{ + // Copy test and reference matrix into MFEM's sparse matrix data type. + hypre_CSRMatrixMigrate(mat_test, HYPRE_MEMORY_HOST); + hypre_CSRMatrixMigrate(mat_ref, HYPRE_MEMORY_HOST); + mfem::SparseMatrix mat_test_sp(mat_test.GetI(), mat_test.GetJ(), mat_test.GetData(), + mat_test.Height(), mat_test.Width(), false, false, false); + mfem::SparseMatrix mat_ref_sp(mat_ref.GetI(), mat_ref.GetJ(), mat_ref.GetData(), + mat_ref.Height(), mat_ref.Width(), false, false, false); + + // Perform the test. + TestCeedOperatorFullAssemble(mat_test_sp, mat_ref_sp); +} + template void TestCeedOperator(T1 &a_test, T2 &a_ref, bool test_transpose, bool skip_zeros) { a_ref.Assemble(skip_zeros); a_ref.Finalize(skip_zeros); - const mfem::SparseMatrix *mat_ref = &a_ref.SpMat(); - const Operator *op_ref = mat_ref; + auto *mat_ref = &a_ref.SpMat(); + auto *op_ref = mat_ref; // Test operator application. - const auto op_test = a_test.PartialAssemble(); + auto op_test = a_test.PartialAssemble(); TestCeedOperatorMult(*op_test, *op_ref, test_transpose); // Test full assembly. - const auto mat_test = a_test.FullAssemble(*op_test, skip_zeros); + auto mat_test = a_test.FullAssemble(*op_test, skip_zeros); TestCeedOperatorFullAssemble(*mat_test, *mat_ref); // Test diagonal assembly if possible. @@ -350,11 +377,11 @@ void BenchmarkCeedIntegrator(FiniteElementSpace &fespace, T1 AssembleTest, if (!benchmark_no_fa) { constexpr bool bdr_integ = true; - const auto op_test = AssembleTest(fespace, bdr_integ); - const auto op_test_ref = AssembleTestRef(fespace, bdr_integ); - const auto mat_test = BilinearForm::FullAssemble(*op_test, skip_zeros); - const auto mat_test_ref = BilinearForm::FullAssemble(*op_test_ref, skip_zeros); - nnz = mat_test->NumNonZeroElems(); + auto op_test = AssembleTest(fespace, bdr_integ); + auto op_test_ref = AssembleTestRef(fespace, bdr_integ); + auto mat_test = BilinearForm::FullAssemble(*op_test, skip_zeros); + auto mat_test_ref = BilinearForm::FullAssemble(*op_test_ref, skip_zeros); + nnz = mat_test->NNZ(); TestCeedOperatorFullAssemble(*mat_test, *mat_test_ref); } @@ -363,11 +390,11 @@ void BenchmarkCeedIntegrator(FiniteElementSpace &fespace, T1 AssembleTest, { BENCHMARK("Assemble (MFEM Legacy)") { - const auto op_ref = AssembleRef(fespace, mfem::AssemblyLevel::LEGACY, skip_zeros); + auto op_ref = AssembleRef(fespace, mfem::AssemblyLevel::LEGACY, skip_zeros); return op_ref->Height(); }; { - const auto op_ref = AssembleRef(fespace, mfem::AssemblyLevel::LEGACY, skip_zeros); + auto op_ref = AssembleRef(fespace, mfem::AssemblyLevel::LEGACY, skip_zeros); y_ref = 0.0; BENCHMARK("AddMult (MFEM Legacy)") { @@ -382,11 +409,11 @@ void BenchmarkCeedIntegrator(FiniteElementSpace &fespace, T1 AssembleTest, { BENCHMARK("Assemble (MFEM Partial)") { - const auto op_ref = AssembleRef(fespace, mfem::AssemblyLevel::PARTIAL, skip_zeros); + auto op_ref = AssembleRef(fespace, mfem::AssemblyLevel::PARTIAL, skip_zeros); return op_ref->Height(); }; { - const auto op_ref = AssembleRef(fespace, mfem::AssemblyLevel::PARTIAL, skip_zeros); + auto op_ref = AssembleRef(fespace, mfem::AssemblyLevel::PARTIAL, skip_zeros); y_ref = 0.0; BENCHMARK("AddMult (MFEM Partial)") { @@ -401,11 +428,11 @@ void BenchmarkCeedIntegrator(FiniteElementSpace &fespace, T1 AssembleTest, // Benchmark libCEED assembly. BENCHMARK("Assemble (libCEED)") { - const auto op_test = AssembleTest(fespace); + auto op_test = AssembleTest(fespace); return op_test->Height(); }; { - const auto op_test = AssembleTest(fespace); + auto op_test = AssembleTest(fespace); y_test = 0.0; BENCHMARK("AddMult (libCEED)") { @@ -417,9 +444,9 @@ void BenchmarkCeedIntegrator(FiniteElementSpace &fespace, T1 AssembleTest, { BENCHMARK("Full Assemble (libCEED)") { - const auto op_test = AssembleTest(fespace); - const auto mat_test = BilinearForm::FullAssemble(*op_test, skip_zeros); - return mat_test->NumNonZeroElems(); + auto op_test = AssembleTest(fespace); + auto mat_test = BilinearForm::FullAssemble(*op_test, skip_zeros); + return mat_test->NNZ(); }; } @@ -473,12 +500,12 @@ void BenchmarkCeedInterpolator(FiniteElementSpace &trial_fespace, std::size_t nnz = 0; if (!benchmark_no_fa) { - const auto op_test = AssembleTest(trial_fespace, test_fespace); - const auto op_ref = AssembleRef(trial_fespace, test_fespace, - mfem::AssemblyLevel::LEGACY, skip_zeros_interp); - const auto mat_test = DiscreteLinearOperator::FullAssemble(*op_test, skip_zeros_interp); - const auto *mat_ref = &op_ref->SpMat(); - nnz = mat_test->NumNonZeroElems(); + auto op_test = AssembleTest(trial_fespace, test_fespace); + auto op_ref = AssembleRef(trial_fespace, test_fespace, mfem::AssemblyLevel::LEGACY, + skip_zeros_interp); + auto mat_test = DiscreteLinearOperator::FullAssemble(*op_test, skip_zeros_interp); + auto *mat_ref = &op_ref->SpMat(); + nnz = mat_test->NNZ(); TestCeedOperatorFullAssemble(*mat_test, *mat_ref); } @@ -487,13 +514,13 @@ void BenchmarkCeedInterpolator(FiniteElementSpace &trial_fespace, { BENCHMARK("Assemble (MFEM Legacy)") { - const auto op_ref = AssembleRef(trial_fespace, test_fespace, - mfem::AssemblyLevel::LEGACY, skip_zeros_interp); + auto op_ref = AssembleRef(trial_fespace, test_fespace, mfem::AssemblyLevel::LEGACY, + skip_zeros_interp); return op_ref->Height(); }; { - const auto op_ref = AssembleRef(trial_fespace, test_fespace, - mfem::AssemblyLevel::LEGACY, skip_zeros_interp); + auto op_ref = AssembleRef(trial_fespace, test_fespace, mfem::AssemblyLevel::LEGACY, + skip_zeros_interp); y_ref = 0.0; BENCHMARK("AddMult (MFEM Legacy)") { @@ -509,13 +536,13 @@ void BenchmarkCeedInterpolator(FiniteElementSpace &trial_fespace, { BENCHMARK("Assemble (MFEM Partial)") { - const auto op_ref = AssembleRef(trial_fespace, test_fespace, - mfem::AssemblyLevel::PARTIAL, skip_zeros_interp); + auto op_ref = AssembleRef(trial_fespace, test_fespace, mfem::AssemblyLevel::PARTIAL, + skip_zeros_interp); return op_ref->Height(); }; { - const auto op_ref = AssembleRef(trial_fespace, test_fespace, - mfem::AssemblyLevel::PARTIAL, skip_zeros_interp); + auto op_ref = AssembleRef(trial_fespace, test_fespace, mfem::AssemblyLevel::PARTIAL, + skip_zeros_interp); y_ref = 0.0; BENCHMARK("AddMult (MFEM Partial)") { @@ -530,11 +557,11 @@ void BenchmarkCeedInterpolator(FiniteElementSpace &trial_fespace, // Benchmark libCEED assembly. BENCHMARK("Assemble (libCEED)") { - const auto op_test = AssembleTest(trial_fespace, test_fespace); + auto op_test = AssembleTest(trial_fespace, test_fespace); return op_test->Height(); }; { - const auto op_test = AssembleTest(trial_fespace, test_fespace); + auto op_test = AssembleTest(trial_fespace, test_fespace); y_test = 0.0; BENCHMARK("AddMult (libCEED)") { @@ -546,10 +573,9 @@ void BenchmarkCeedInterpolator(FiniteElementSpace &trial_fespace, { BENCHMARK("Full Assemble (libCEED)") { - const auto op_test = AssembleTest(trial_fespace, test_fespace); - const auto mat_test = - DiscreteLinearOperator::FullAssemble(*op_test, skip_zeros_interp); - return mat_test->NumNonZeroElems(); + auto op_test = AssembleTest(trial_fespace, test_fespace); + auto mat_test = DiscreteLinearOperator::FullAssemble(*op_test, skip_zeros_interp); + return mat_test->NNZ(); }; }