diff --git a/palace/fem/libceed/operator.cpp b/palace/fem/libceed/operator.cpp index f41389867..0201199a2 100644 --- a/palace/fem/libceed/operator.cpp +++ b/palace/fem/libceed/operator.cpp @@ -72,20 +72,15 @@ void Operator::AssembleDiagonal(Vector &diag) const { Ceed ceed; CeedMemType mem; - CeedScalar *data; MFEM_VERIFY(diag.Size() == height, "Invalid size for diagonal vector!"); diag = 0.0; PalaceCeedCallBackend(CeedOperatorGetCeed(op[0], &ceed)); PalaceCeedCall(ceed, CeedGetPreferredMemType(ceed, &mem)); - if (mfem::Device::Allows(mfem::Backend::DEVICE_MASK) && mem == CEED_MEM_DEVICE) + if (!mfem::Device::Allows(mfem::Backend::DEVICE_MASK) && mem == CEED_MEM_DEVICE) { - data = diag.ReadWrite(); - } - else - { - data = diag.HostReadWrite(); mem = CEED_MEM_HOST; } + auto *diag_data = diag.ReadWrite(mem == CEED_MEM_DEVICE); PalacePragmaOmp(parallel if (op.size() > 1)) { @@ -94,7 +89,7 @@ void Operator::AssembleDiagonal(Vector &diag) const MFEM_ASSERT(id < op.size() && op[id], "Out of bounds access for thread number " << id << "!"); PalaceCeedCallBackend(CeedOperatorGetCeed(op[id], &ceed)); - PalaceCeedCall(ceed, CeedVectorSetArray(v[id], mem, CEED_USE_POINTER, data)); + PalaceCeedCall(ceed, CeedVectorSetArray(v[id], mem, CEED_USE_POINTER, diag_data)); PalaceCeedCall( ceed, CeedOperatorLinearAssembleAddDiagonal(op[id], v[id], CEED_REQUEST_IMMEDIATE)); PalaceCeedCall(ceed, CeedVectorTakeArray(v[id], mem, nullptr)); @@ -110,21 +105,14 @@ inline void CeedAddMult(const std::vector &op, { Ceed ceed; CeedMemType mem; - const CeedScalar *x_data; - CeedScalar *y_data; PalaceCeedCallBackend(CeedOperatorGetCeed(op[0], &ceed)); PalaceCeedCall(ceed, CeedGetPreferredMemType(ceed, &mem)); - if (mfem::Device::Allows(mfem::Backend::DEVICE_MASK) && mem == CEED_MEM_DEVICE) - { - x_data = x.Read(); - y_data = y.ReadWrite(); - } - else + if (!mfem::Device::Allows(mfem::Backend::DEVICE_MASK) && mem == CEED_MEM_DEVICE) { - x_data = x.HostRead(); - y_data = y.HostReadWrite(); mem = CEED_MEM_HOST; } + const auto *x_data = x.Read(mem == CEED_MEM_DEVICE); + auto *y_data = y.ReadWrite(mem == CEED_MEM_DEVICE); PalacePragmaOmp(parallel if (op.size() > 1)) { @@ -346,57 +334,52 @@ std::unique_ptr OperatorCOOtoCSR(Ceed ceed, CeedInt m, Ce Jmap[k + 1] += Jmap[k]; } - // Construct and fill the final CSR matrix. This is correctly a non-OpenMP loop when - // executed on CPU (OpenMP is handled in outer scope above). + // Construct and fill the final CSR matrix. On GPU, MFEM and Hypre share the same memory + // space. On CPU, the inner nested OpenMP loop (if enabled in MFEM) should be ignored. auto mat = std::make_unique(m, n, nnz_new); - const bool use_dev = (mat->GetMemoryLocation() == HYPRE_MEMORY_DEVICE); { - const auto *d_I_old = I.Read(use_dev); + const auto *d_I_old = I.Read(); auto *d_I = mat->GetI(); - mfem::forall_switch(use_dev, m + 1, - [=] MFEM_HOST_DEVICE(int i) { d_I[i] = d_I_old[i]; }); + mfem::forall(m + 1, [=] MFEM_HOST_DEVICE(int i) { d_I[i] = d_I_old[i]; }); } { - const auto *d_J_old = J.Read(use_dev); + const auto *d_J_old = J.Read(); auto *d_J = mat->GetJ(); - mfem::forall_switch(use_dev, nnz_new, - [=] MFEM_HOST_DEVICE(int k) { d_J[k] = d_J_old[k]; }); + mfem::forall(nnz_new, [=] MFEM_HOST_DEVICE(int k) { d_J[k] = d_J_old[k]; }); } { auto FillValues = [&](const double *vals_array) { - const auto *d_perm = perm.Read(use_dev); - const auto *d_Jmap = Jmap.Read(use_dev); + const auto *d_perm = perm.Read(); + const auto *d_Jmap = Jmap.Read(); auto *d_A = mat->GetData(); if (set) { - mfem::forall_switch(use_dev, nnz_new, - [=] MFEM_HOST_DEVICE(int k) - { d_A[k] = vals_array[d_perm[d_Jmap[k]]]; }); + mfem::forall(nnz_new, [=] MFEM_HOST_DEVICE(int k) + { d_A[k] = vals_array[d_perm[d_Jmap[k]]]; }); } else { - mfem::forall_switch(use_dev, nnz_new, - [=] MFEM_HOST_DEVICE(int k) - { - double sum = 0.0; - for (int p = d_Jmap[k]; p < d_Jmap[k + 1]; p++) - { - sum += vals_array[d_perm[p]]; - } - d_A[k] = sum; - }); + mfem::forall(nnz_new, + [=] MFEM_HOST_DEVICE(int k) + { + double sum = 0.0; + for (int p = d_Jmap[k]; p < d_Jmap[k + 1]; p++) + { + sum += vals_array[d_perm[p]]; + } + d_A[k] = sum; + }); } }; Ceed ceed; const CeedScalar *vals_array; PalaceCeedCallBackend(CeedVectorGetCeed(vals, &ceed)); PalaceCeedCall(ceed, CeedVectorGetArrayRead(vals, mem, &vals_array)); - if (use_dev && mem != CEED_MEM_DEVICE) + if (mfem::Device::Allows(mfem::Backend::DEVICE_MASK) && mem != CEED_MEM_DEVICE) { // Copy values to device before filling. Vector d_vals(nnz); - d_vals.UseDevice(true); { auto *d_vals_array = d_vals.HostWrite(); PalacePragmaOmp(parallel for schedule(static)) diff --git a/palace/linalg/amg.cpp b/palace/linalg/amg.cpp index 5bd9fe735..cdf89ee62 100644 --- a/palace/linalg/amg.cpp +++ b/palace/linalg/amg.cpp @@ -16,13 +16,10 @@ BoomerAmgSolver::BoomerAmgSolver(int cycle_it, int smooth_it, int print) // Set additional BoomerAMG options. int agg_levels = 1; // Number of aggressive coarsening levels double theta = 0.5; // AMG strength parameter = 0.25 is 2D optimal (0.5-0.8 for 3D) + if (mfem::Device::Allows(mfem::Backend::DEVICE_MASK)) { - HYPRE_MemoryLocation loc; - HYPRE_GetMemoryLocation(&loc); - if (loc == HYPRE_MEMORY_DEVICE) // Modify options for GPU-supported features - { - agg_levels = 0; - } + // Modify options for GPU-supported features. + agg_levels = 0; } SetAggressiveCoarsening(agg_levels); diff --git a/palace/linalg/ams.cpp b/palace/linalg/ams.cpp index 74b45b611..7c69f30cd 100644 --- a/palace/linalg/ams.cpp +++ b/palace/linalg/ams.cpp @@ -161,16 +161,13 @@ void HypreAmsSolver::InitializeSolver() int relax_type = 2; // 2 = l1-SSOR, 4 = trunc. l1-SSOR, 1 = l1-Jacobi, 16 = Chebyshev double weight = 1.0; double omega = 1.0; + if (mfem::Device::Allows(mfem::Backend::DEVICE_MASK)) { - HYPRE_MemoryLocation loc; - HYPRE_GetMemoryLocation(&loc); - if (loc == HYPRE_MEMORY_DEVICE) // Modify options for GPU-supported features - { - coarsen_type = 8; - amg_agg_levels = 0; - amg_relax_type = 18; - relax_type = 1; - } + // Modify options for GPU-supported features. + coarsen_type = 8; + amg_agg_levels = 0; + amg_relax_type = 18; + relax_type = 1; } HYPRE_AMSSetSmoothingOptions(ams, relax_type, ams_smooth_it, weight, omega); diff --git a/palace/linalg/divfree.cpp b/palace/linalg/divfree.cpp index af158c517..853a26d82 100644 --- a/palace/linalg/divfree.cpp +++ b/palace/linalg/divfree.cpp @@ -64,6 +64,10 @@ DivFreeSolver::DivFreeSolver( const auto &h1_fespace_l = h1_fespaces.GetFESpaceAtLevel(l); auto M_l = BuildLevelParOperator(std::move(m_vec[l]), h1_fespace_l); M_l->SetEssentialTrueDofs(h1_bdr_tdof_lists[l], Operator::DiagonalPolicy::DIAG_ONE); + if (l == h1_fespaces.GetNumLevels() - 1) + { + bdr_tdof_list_M = M_l->GetEssentialTrueDofs(); + } M_mg->AddOperator(std::move(M_l)); } M = std::move(M_mg); @@ -76,7 +80,6 @@ DivFreeSolver::DivFreeSolver( h1_fespaces.GetFinestFESpace(), false); } Grad = &h1_fespaces.GetFinestFESpace().GetDiscreteInterpolator(); - bdr_tdof_list_M = &h1_bdr_tdof_lists.back(); // The system matrix for the projection is real and SPD. auto amg = std::make_unique>( diff --git a/palace/linalg/hypre.cpp b/palace/linalg/hypre.cpp index 46b800d8f..7fd4d175b 100644 --- a/palace/linalg/hypre.cpp +++ b/palace/linalg/hypre.cpp @@ -13,23 +13,20 @@ void HypreVector::Update(const Vector &x) { vec = hypre_SeqVectorCreate(N); hypre_SeqVectorSetDataOwner(vec, 0); - hypre_VectorData(vec) = const_cast( - x.Read(hypre_VectorMemoryLocation(vec) == HYPRE_MEMORY_DEVICE)); + hypre_VectorData(vec) = const_cast(x.Read()); hypre_SeqVectorInitialize(vec); } else { hypre_SeqVectorSetSize(vec, N); - hypre_VectorData(vec) = const_cast( - x.Read(hypre_VectorMemoryLocation(vec) == HYPRE_MEMORY_DEVICE)); + hypre_VectorData(vec) = const_cast(x.Read()); } } void HypreCSRMatrix::AssembleDiagonal(Vector &diag) const { diag.SetSize(height); - hypre_CSRMatrixExtractDiagonal( - mat, diag.Write(hypre_CSRMatrixMemoryLocation(mat) == HYPRE_MEMORY_DEVICE), 0); + hypre_CSRMatrixExtractDiagonal(mat, diag.Write(), 0); } namespace diff --git a/palace/linalg/hypre.hpp b/palace/linalg/hypre.hpp index 81e217305..4fe947350 100644 --- a/palace/linalg/hypre.hpp +++ b/palace/linalg/hypre.hpp @@ -11,6 +11,15 @@ namespace palace::hypre { +// Helper function to initialize HYPRE and control use of GPU at runtime. This will call +// HYPRE_SetMemoryLocation and HYPRE_SetExecutionPolicy to match the mfem::Device +// configuration. +inline void Initialize() +{ + mfem::Hypre::Init(); + // HYPRE_SetSpGemmUseCusparse(1); // MFEM sets to zero, so leave as is for now +} + // // Wrapper class for HYPRE's hypre_Vector, which can alias an mfem::Vector object for use // with HYPRE. @@ -25,7 +34,6 @@ class HypreVector HypreVector(const Vector &x) : vec(nullptr) { Update(x); } ~HypreVector() { hypre_SeqVectorDestroy(vec); } - auto GetMemoryLocation() const { return hypre_VectorMemoryLocation(vec); } auto Size() const { return hypre_VectorSize(vec); } void Update(const Vector &x); @@ -55,7 +63,6 @@ class HypreCSRMatrix : public palace::Operator } ~HypreCSRMatrix() { hypre_CSRMatrixDestroy(mat); } - auto GetMemoryLocation() const { return hypre_CSRMatrixMemoryLocation(mat); } auto NNZ() const { return hypre_CSRMatrixNumNonzeros(mat); } const auto *GetI() const { return hypre_CSRMatrixI(mat); } diff --git a/palace/linalg/rap.cpp b/palace/linalg/rap.cpp index c556a3bda..f76fe40ad 100644 --- a/palace/linalg/rap.cpp +++ b/palace/linalg/rap.cpp @@ -15,7 +15,7 @@ ParOperator::ParOperator(std::unique_ptr &&dA, const Operator *pA, : Operator(test_fespace.GetTrueVSize(), trial_fespace.GetTrueVSize()), data_A(std::move(dA)), A((data_A != nullptr) ? data_A.get() : pA), trial_fespace(trial_fespace), test_fespace(test_fespace), use_R(test_restrict), - dbc_tdof_list(nullptr), diag_policy(DiagonalPolicy::DIAG_ONE), RAP(nullptr) + diag_policy(DiagonalPolicy::DIAG_ONE), RAP(nullptr) { MFEM_VERIFY(A, "Cannot construct ParOperator from an empty matrix!"); } @@ -41,25 +41,20 @@ void ParOperator::SetEssentialTrueDofs(const mfem::Array &tdof_list, "only DiagonalPolicy::DIAG_ONE or DiagonalPolicy::DIAG_ZERO!"); MFEM_VERIFY(height == width, "Set essential true dofs for both test and trial spaces " "for rectangular ParOperator!"); - dbc_tdof_list = &tdof_list; - dbc_tdof_list->Read(true); // Copy to device + tdof_list.Read(); + dbc_tdof_list.MakeRef(tdof_list); diag_policy = policy; } void ParOperator::EliminateRHS(const Vector &x, Vector &b) const { - if (!dbc_tdof_list) - { - return; - } - MFEM_VERIFY(A, "No local matrix available for ParOperator::EliminateRHS!"); auto &lx = trial_fespace.GetLVector(); auto &ly = GetTestLVector(); { auto &tx = trial_fespace.GetTVector(); tx = 0.0; - linalg::SetSubVector(tx, *dbc_tdof_list, x); + linalg::SetSubVector(tx, dbc_tdof_list, x); trial_fespace.GetProlongationMatrix()->Mult(tx, lx); } @@ -71,11 +66,11 @@ void ParOperator::EliminateRHS(const Vector &x, Vector &b) const b.Add(-1.0, ty); if (diag_policy == DiagonalPolicy::DIAG_ONE) { - linalg::SetSubVector(b, *dbc_tdof_list, x); + linalg::SetSubVector(b, dbc_tdof_list, x); } else if (diag_policy == DiagonalPolicy::DIAG_ZERO) { - linalg::SetSubVector(b, *dbc_tdof_list, 0.0); + linalg::SetSubVector(b, dbc_tdof_list, 0.0); } } @@ -129,10 +124,8 @@ mfem::HypreParMatrix &ParOperator::ParallelAssemble(bool skip_zeros) const hypre_ParCSRMatrixSetNumNonzeros(*RAP); // Eliminate boundary conditions on the assembled (square) matrix. - if (dbc_tdof_list) - { - RAP->EliminateBC(*dbc_tdof_list, diag_policy); - } + RAP->EliminateBC(dbc_tdof_list, diag_policy); + return *RAP; } @@ -162,15 +155,17 @@ void ParOperator::AssembleDiagonal(Vector &diag) const { P->MultTranspose(lx, diag); } - if (dbc_tdof_list) + + // Eliminate essential true dofs. + if (dbc_tdof_list.Size()) { if (diag_policy == DiagonalPolicy::DIAG_ONE) { - linalg::SetSubVector(diag, *dbc_tdof_list, 1.0); + linalg::SetSubVector(diag, dbc_tdof_list, 1.0); } else if (diag_policy == DiagonalPolicy::DIAG_ZERO) { - linalg::SetSubVector(diag, *dbc_tdof_list, 0.0); + linalg::SetSubVector(diag, dbc_tdof_list, 0.0); } } } @@ -187,11 +182,11 @@ void ParOperator::Mult(const Vector &x, Vector &y) const auto &lx = trial_fespace.GetLVector(); auto &ly = GetTestLVector(); - if (dbc_tdof_list) + if (dbc_tdof_list.Size()) { auto &tx = trial_fespace.GetTVector(); tx = x; - linalg::SetSubVector(tx, *dbc_tdof_list, 0.0); + linalg::SetSubVector(tx, dbc_tdof_list, 0.0); trial_fespace.GetProlongationMatrix()->Mult(tx, lx); } else @@ -203,15 +198,15 @@ void ParOperator::Mult(const Vector &x, Vector &y) const A->Mult(lx, ly); RestrictionMatrixMult(ly, y); - if (dbc_tdof_list) + if (dbc_tdof_list.Size()) { if (diag_policy == DiagonalPolicy::DIAG_ONE) { - linalg::SetSubVector(y, *dbc_tdof_list, x); + linalg::SetSubVector(y, dbc_tdof_list, x); } else if (diag_policy == DiagonalPolicy::DIAG_ZERO) { - linalg::SetSubVector(y, *dbc_tdof_list, 0.0); + linalg::SetSubVector(y, dbc_tdof_list, 0.0); } } } @@ -228,11 +223,11 @@ void ParOperator::MultTranspose(const Vector &x, Vector &y) const auto &lx = trial_fespace.GetLVector(); auto &ly = GetTestLVector(); - if (dbc_tdof_list) + if (dbc_tdof_list.Size()) { auto &ty = test_fespace.GetTVector(); ty = x; - linalg::SetSubVector(ty, *dbc_tdof_list, 0.0); + linalg::SetSubVector(ty, dbc_tdof_list, 0.0); RestrictionMatrixMultTranspose(ty, ly); } else @@ -244,15 +239,15 @@ void ParOperator::MultTranspose(const Vector &x, Vector &y) const A->MultTranspose(ly, lx); trial_fespace.GetProlongationMatrix()->MultTranspose(lx, y); - if (dbc_tdof_list) + if (dbc_tdof_list.Size()) { if (diag_policy == DiagonalPolicy::DIAG_ONE) { - linalg::SetSubVector(y, *dbc_tdof_list, x); + linalg::SetSubVector(y, dbc_tdof_list, x); } else if (diag_policy == DiagonalPolicy::DIAG_ZERO) { - linalg::SetSubVector(y, *dbc_tdof_list, 0.0); + linalg::SetSubVector(y, dbc_tdof_list, 0.0); } } } @@ -269,11 +264,11 @@ void ParOperator::AddMult(const Vector &x, Vector &y, const double a) const auto &lx = trial_fespace.GetLVector(); auto &ly = GetTestLVector(); - if (dbc_tdof_list) + if (dbc_tdof_list.Size()) { auto &tx = trial_fespace.GetTVector(); tx = x; - linalg::SetSubVector(tx, *dbc_tdof_list, 0.0); + linalg::SetSubVector(tx, dbc_tdof_list, 0.0); trial_fespace.GetProlongationMatrix()->Mult(tx, lx); } else @@ -286,15 +281,15 @@ void ParOperator::AddMult(const Vector &x, Vector &y, const double a) const auto &ty = test_fespace.GetTVector(); RestrictionMatrixMult(ly, ty); - if (dbc_tdof_list) + if (dbc_tdof_list.Size()) { if (diag_policy == DiagonalPolicy::DIAG_ONE) { - linalg::SetSubVector(ty, *dbc_tdof_list, x); + linalg::SetSubVector(ty, dbc_tdof_list, x); } else if (diag_policy == DiagonalPolicy::DIAG_ZERO) { - linalg::SetSubVector(ty, *dbc_tdof_list, 0.0); + linalg::SetSubVector(ty, dbc_tdof_list, 0.0); } } y.Add(a, ty); @@ -312,11 +307,11 @@ void ParOperator::AddMultTranspose(const Vector &x, Vector &y, const double a) c auto &lx = trial_fespace.GetLVector(); auto &ly = GetTestLVector(); - if (dbc_tdof_list) + if (dbc_tdof_list.Size()) { auto &ty = test_fespace.GetTVector(); ty = x; - linalg::SetSubVector(ty, *dbc_tdof_list, 0.0); + linalg::SetSubVector(ty, dbc_tdof_list, 0.0); RestrictionMatrixMultTranspose(ty, ly); } else @@ -329,15 +324,15 @@ void ParOperator::AddMultTranspose(const Vector &x, Vector &y, const double a) c auto &tx = trial_fespace.GetTVector(); trial_fespace.GetProlongationMatrix()->MultTranspose(lx, tx); - if (dbc_tdof_list) + if (dbc_tdof_list.Size()) { if (diag_policy == DiagonalPolicy::DIAG_ONE) { - linalg::SetSubVector(tx, *dbc_tdof_list, x); + linalg::SetSubVector(tx, dbc_tdof_list, x); } else if (diag_policy == DiagonalPolicy::DIAG_ZERO) { - linalg::SetSubVector(tx, *dbc_tdof_list, 0.0); + linalg::SetSubVector(tx, dbc_tdof_list, 0.0); } } y.Add(a, tx); @@ -384,8 +379,7 @@ ComplexParOperator::ComplexParOperator(std::unique_ptr &&dAr, ? std::make_unique(std::move(dAr), std::move(dAi)) : std::make_unique(pAr, pAi)), A(data_A.get()), trial_fespace(trial_fespace), test_fespace(test_fespace), - use_R(test_restrict), dbc_tdof_list(nullptr), - diag_policy(Operator::DiagonalPolicy::DIAG_ONE), + use_R(test_restrict), diag_policy(Operator::DiagonalPolicy::DIAG_ONE), RAPr(A->Real() ? std::make_unique(*A->Real(), trial_fespace, test_fespace, use_R) : nullptr), @@ -428,8 +422,8 @@ void ComplexParOperator::SetEssentialTrueDofs(const mfem::Array &tdof_list, "DiagonalPolicy::DIAG_ONE specified for ComplexParOperator with no real part!"); MFEM_VERIFY(height == width, "Set essential true dofs for both test and trial spaces " "for rectangular ComplexParOperator!"); - dbc_tdof_list = &tdof_list; - dbc_tdof_list->Read(true); // Copy to device + tdof_list.Read(); + dbc_tdof_list.MakeRef(tdof_list); diag_policy = policy; if (RAPr) { @@ -462,11 +456,11 @@ void ComplexParOperator::Mult(const ComplexVector &x, ComplexVector &y) const auto &lx = trial_fespace.GetLVector(); auto &ly = GetTestLVector(); - if (dbc_tdof_list) + if (dbc_tdof_list.Size()) { auto &tx = trial_fespace.GetTVector(); tx = x; - linalg::SetSubVector(tx, *dbc_tdof_list, 0.0); + linalg::SetSubVector(tx, dbc_tdof_list, 0.0); trial_fespace.GetProlongationMatrix()->Mult(tx.Real(), lx.Real()); trial_fespace.GetProlongationMatrix()->Mult(tx.Imag(), lx.Imag()); } @@ -480,15 +474,15 @@ void ComplexParOperator::Mult(const ComplexVector &x, ComplexVector &y) const A->Mult(lx, ly); RestrictionMatrixMult(ly, y); - if (dbc_tdof_list) + if (dbc_tdof_list.Size()) { if (diag_policy == Operator::DiagonalPolicy::DIAG_ONE) { - linalg::SetSubVector(y, *dbc_tdof_list, x); + linalg::SetSubVector(y, dbc_tdof_list, x); } else if (diag_policy == Operator::DiagonalPolicy::DIAG_ZERO) { - linalg::SetSubVector(y, *dbc_tdof_list, 0.0); + linalg::SetSubVector(y, dbc_tdof_list, 0.0); } } } @@ -500,11 +494,11 @@ void ComplexParOperator::MultTranspose(const ComplexVector &x, ComplexVector &y) auto &lx = trial_fespace.GetLVector(); auto &ly = GetTestLVector(); - if (dbc_tdof_list) + if (dbc_tdof_list.Size()) { auto &ty = test_fespace.GetTVector(); ty = x; - linalg::SetSubVector(ty, *dbc_tdof_list, 0.0); + linalg::SetSubVector(ty, dbc_tdof_list, 0.0); RestrictionMatrixMultTranspose(ty, ly); } else @@ -517,15 +511,15 @@ void ComplexParOperator::MultTranspose(const ComplexVector &x, ComplexVector &y) trial_fespace.GetProlongationMatrix()->MultTranspose(lx.Real(), y.Real()); trial_fespace.GetProlongationMatrix()->MultTranspose(lx.Imag(), y.Imag()); - if (dbc_tdof_list) + if (dbc_tdof_list.Size()) { if (diag_policy == Operator::DiagonalPolicy::DIAG_ONE) { - linalg::SetSubVector(y, *dbc_tdof_list, x); + linalg::SetSubVector(y, dbc_tdof_list, x); } else if (diag_policy == Operator::DiagonalPolicy::DIAG_ZERO) { - linalg::SetSubVector(y, *dbc_tdof_list, 0.0); + linalg::SetSubVector(y, dbc_tdof_list, 0.0); } } } @@ -538,11 +532,11 @@ void ComplexParOperator::MultHermitianTranspose(const ComplexVector &x, auto &lx = trial_fespace.GetLVector(); auto &ly = GetTestLVector(); - if (dbc_tdof_list) + if (dbc_tdof_list.Size()) { auto &ty = test_fespace.GetTVector(); ty = x; - linalg::SetSubVector(ty, *dbc_tdof_list, 0.0); + linalg::SetSubVector(ty, dbc_tdof_list, 0.0); RestrictionMatrixMultTranspose(ty, ly); } else @@ -555,15 +549,15 @@ void ComplexParOperator::MultHermitianTranspose(const ComplexVector &x, trial_fespace.GetProlongationMatrix()->MultTranspose(lx.Real(), y.Real()); trial_fespace.GetProlongationMatrix()->MultTranspose(lx.Imag(), y.Imag()); - if (dbc_tdof_list) + if (dbc_tdof_list.Size()) { if (diag_policy == Operator::DiagonalPolicy::DIAG_ONE) { - linalg::SetSubVector(y, *dbc_tdof_list, x); + linalg::SetSubVector(y, dbc_tdof_list, x); } else if (diag_policy == Operator::DiagonalPolicy::DIAG_ZERO) { - linalg::SetSubVector(y, *dbc_tdof_list, 0.0); + linalg::SetSubVector(y, dbc_tdof_list, 0.0); } } } @@ -576,11 +570,11 @@ void ComplexParOperator::AddMult(const ComplexVector &x, ComplexVector &y, auto &lx = trial_fespace.GetLVector(); auto &ly = GetTestLVector(); - if (dbc_tdof_list) + if (dbc_tdof_list.Size()) { auto &tx = trial_fespace.GetTVector(); tx = x; - linalg::SetSubVector(tx, *dbc_tdof_list, 0.0); + linalg::SetSubVector(tx, dbc_tdof_list, 0.0); trial_fespace.GetProlongationMatrix()->Mult(tx.Real(), lx.Real()); trial_fespace.GetProlongationMatrix()->Mult(tx.Imag(), lx.Imag()); } @@ -595,15 +589,15 @@ void ComplexParOperator::AddMult(const ComplexVector &x, ComplexVector &y, auto &ty = test_fespace.GetTVector(); RestrictionMatrixMult(ly, ty); - if (dbc_tdof_list) + if (dbc_tdof_list.Size()) { if (diag_policy == Operator::DiagonalPolicy::DIAG_ONE) { - linalg::SetSubVector(ty, *dbc_tdof_list, x); + linalg::SetSubVector(ty, dbc_tdof_list, x); } else if (diag_policy == Operator::DiagonalPolicy::DIAG_ZERO) { - linalg::SetSubVector(ty, *dbc_tdof_list, 0.0); + linalg::SetSubVector(ty, dbc_tdof_list, 0.0); } } y.AXPY(a, ty); @@ -617,11 +611,11 @@ void ComplexParOperator::AddMultTranspose(const ComplexVector &x, ComplexVector auto &lx = trial_fespace.GetLVector(); auto &ly = GetTestLVector(); - if (dbc_tdof_list) + if (dbc_tdof_list.Size()) { auto &ty = test_fespace.GetTVector(); ty = x; - linalg::SetSubVector(ty, *dbc_tdof_list, 0.0); + linalg::SetSubVector(ty, dbc_tdof_list, 0.0); RestrictionMatrixMultTranspose(ty, ly); } else @@ -635,15 +629,15 @@ void ComplexParOperator::AddMultTranspose(const ComplexVector &x, ComplexVector auto &tx = trial_fespace.GetTVector(); trial_fespace.GetProlongationMatrix()->MultTranspose(lx.Real(), tx.Real()); trial_fespace.GetProlongationMatrix()->MultTranspose(lx.Imag(), tx.Imag()); - if (dbc_tdof_list) + if (dbc_tdof_list.Size()) { if (diag_policy == Operator::DiagonalPolicy::DIAG_ONE) { - linalg::SetSubVector(tx, *dbc_tdof_list, x); + linalg::SetSubVector(tx, dbc_tdof_list, x); } else if (diag_policy == Operator::DiagonalPolicy::DIAG_ZERO) { - linalg::SetSubVector(tx, *dbc_tdof_list, 0.0); + linalg::SetSubVector(tx, dbc_tdof_list, 0.0); } } y.AXPY(a, tx); @@ -657,11 +651,11 @@ void ComplexParOperator::AddMultHermitianTranspose(const ComplexVector &x, Compl auto &lx = trial_fespace.GetLVector(); auto &ly = GetTestLVector(); - if (dbc_tdof_list) + if (dbc_tdof_list.Size()) { auto &ty = test_fespace.GetTVector(); ty = x; - linalg::SetSubVector(ty, *dbc_tdof_list, 0.0); + linalg::SetSubVector(ty, dbc_tdof_list, 0.0); RestrictionMatrixMultTranspose(ty, ly); } else @@ -675,15 +669,15 @@ void ComplexParOperator::AddMultHermitianTranspose(const ComplexVector &x, Compl auto &tx = trial_fespace.GetTVector(); trial_fespace.GetProlongationMatrix()->MultTranspose(lx.Real(), tx.Real()); trial_fespace.GetProlongationMatrix()->MultTranspose(lx.Imag(), tx.Imag()); - if (dbc_tdof_list) + if (dbc_tdof_list.Size()) { if (diag_policy == Operator::DiagonalPolicy::DIAG_ONE) { - linalg::SetSubVector(tx, *dbc_tdof_list, x); + linalg::SetSubVector(tx, dbc_tdof_list, x); } else if (diag_policy == Operator::DiagonalPolicy::DIAG_ZERO) { - linalg::SetSubVector(tx, *dbc_tdof_list, 0.0); + linalg::SetSubVector(tx, dbc_tdof_list, 0.0); } } y.AXPY(a, tx); diff --git a/palace/linalg/rap.hpp b/palace/linalg/rap.hpp index 4c9d36174..1be52fa20 100644 --- a/palace/linalg/rap.hpp +++ b/palace/linalg/rap.hpp @@ -32,7 +32,7 @@ class ParOperator : public Operator const bool use_R; // Lists of constrained essential boundary true dofs for elimination. - const mfem::Array *dbc_tdof_list; + mfem::Array dbc_tdof_list; // Diagonal policy for constrained true dofs. DiagonalPolicy diag_policy; @@ -78,7 +78,10 @@ class ParOperator : public Operator // Get the essential boundary condition true dofs associated with the operator. May be // nullptr. - const mfem::Array *GetEssentialTrueDofs() const { return dbc_tdof_list; } + const mfem::Array *GetEssentialTrueDofs() const + { + return dbc_tdof_list.Size() ? &dbc_tdof_list : nullptr; + } // Eliminate essential true dofs from the RHS vector b, using the essential boundary // condition values in x. @@ -119,7 +122,7 @@ class ComplexParOperator : public ComplexOperator const bool use_R; // Lists of constrained essential boundary true dofs for elimination. - const mfem::Array *dbc_tdof_list; + mfem::Array dbc_tdof_list; // Diagonal policy for constrained true dofs. Operator::DiagonalPolicy diag_policy; @@ -174,7 +177,10 @@ class ComplexParOperator : public ComplexOperator // Get the essential boundary condition true dofs associated with the operator. May be // nullptr. - const mfem::Array *GetEssentialTrueDofs() const { return dbc_tdof_list; } + const mfem::Array *GetEssentialTrueDofs() const + { + return dbc_tdof_list.Size() ? &dbc_tdof_list : nullptr; + } void AssembleDiagonal(ComplexVector &diag) const override; diff --git a/palace/linalg/strumpack.cpp b/palace/linalg/strumpack.cpp index 3241d438f..47413efef 100644 --- a/palace/linalg/strumpack.cpp +++ b/palace/linalg/strumpack.cpp @@ -127,12 +127,7 @@ void StrumpackSolverBase::SetOperator(const Operator &op) "StrumpackSolver requires a square HypreParMatrix operator!"); auto *parcsr = (hypre_ParCSRMatrix *)const_cast(*hA); hypre_CSRMatrix *csr = hypre_MergeDiagAndOffd(parcsr); -#if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP) || defined(HYPRE_USING_SYCL) - if (hypre_GetActualMemLocation(hypre_CSRMatrixMemoryLocation(csr)) != hypre_MEMORY_HOST) - { - hypre_CSRMatrixMigrate(csr, HYPRE_MEMORY_HOST); - } -#endif + hypre_CSRMatrixMigrate(csr, HYPRE_MEMORY_HOST); // Create the STRUMPACKRowLocMatrix by taking the internal data from a hypre_CSRMatrix. HYPRE_BigInt glob_n = hypre_ParCSRMatrixGlobalNumRows(parcsr); diff --git a/palace/linalg/superlu.cpp b/palace/linalg/superlu.cpp index 551974c79..749e0bd9f 100644 --- a/palace/linalg/superlu.cpp +++ b/palace/linalg/superlu.cpp @@ -95,12 +95,7 @@ void SuperLUSolver::SetOperator(const Operator &op) "SuperLUSolver requires a square HypreParMatrix operator!"); auto *parcsr = (hypre_ParCSRMatrix *)const_cast(*hA); hypre_CSRMatrix *csr = hypre_MergeDiagAndOffd(parcsr); -#if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP) || defined(HYPRE_USING_SYCL) - if (hypre_GetActualMemLocation(hypre_CSRMatrixMemoryLocation(csr)) != hypre_MEMORY_HOST) - { - hypre_CSRMatrixMigrate(csr, HYPRE_MEMORY_HOST); - } -#endif + hypre_CSRMatrixMigrate(csr, HYPRE_MEMORY_HOST); // Create the SuperLURowLocMatrix by taking the internal data from a hypre_CSRMatrix. HYPRE_BigInt glob_n = hypre_ParCSRMatrixGlobalNumRows(parcsr); diff --git a/palace/linalg/vector.cpp b/palace/linalg/vector.cpp index 3b33497eb..3d31981d3 100644 --- a/palace/linalg/vector.cpp +++ b/palace/linalg/vector.cpp @@ -69,9 +69,8 @@ void ComplexVector::Set(const ComplexVector &y) { MFEM_ASSERT(y.Size() == Size(), "Mismatch in dimension of provided parts in ComplexVector!"); - data = y.Data(); - xr.MakeRef(data, 0, Size()); - xi.MakeRef(data, Size(), Size()); + Real() = y.Real(); + Imag() = y.Imag(); } void ComplexVector::Set(const Vector &yr, const Vector &yi) @@ -581,7 +580,7 @@ std::complex LocalDot(const ComplexVector &x, const ComplexVector &y) { if (&x == &y) { - return {LocalDot(x.Data(), y.Data()), 0.0}; + return {LocalDot(x.Real(), y.Real()) + LocalDot(x.Imag(), y.Imag()), 0.0}; } else { diff --git a/palace/linalg/vector.hpp b/palace/linalg/vector.hpp index 463b4ad68..d6bb3cb6c 100644 --- a/palace/linalg/vector.hpp +++ b/palace/linalg/vector.hpp @@ -62,14 +62,6 @@ class ComplexVector const Vector &Imag() const { return xi; } Vector &Imag() { return xi; } - // Access the underlying (real-valued) vector data. - const Vector &Data() const - { - xr.SyncAliasMemory(data); - xi.SyncAliasMemory(data); - return data; - } - // Set from a ComplexVector, without resizing. void Set(const ComplexVector &y); ComplexVector &operator=(const ComplexVector &y) diff --git a/palace/main.cpp b/palace/main.cpp index 7c46125b0..ac3cfebf2 100644 --- a/palace/main.cpp +++ b/palace/main.cpp @@ -15,6 +15,7 @@ #include "fem/errorindicator.hpp" #include "fem/libceed/ceed.hpp" #include "fem/mesh.hpp" +#include "linalg/hypre.hpp" #include "linalg/slepc.hpp" #include "utils/communication.hpp" #include "utils/geodata.hpp" @@ -267,7 +268,7 @@ int main(int argc, char *argv[]) #endif // Initialize Hypre and, optionally, SLEPc/PETSc. - mfem::Hypre::Init(); + hypre::Initialize(); #if defined(PALACE_WITH_SLEPC) slepc::Initialize(argc, argv, nullptr, nullptr); if (PETSC_COMM_WORLD != world_comm) diff --git a/test/unit/test-libceed.cpp b/test/unit/test-libceed.cpp index f6a64b6eb..725b716bc 100644 --- a/test/unit/test-libceed.cpp +++ b/test/unit/test-libceed.cpp @@ -254,8 +254,7 @@ void TestCeedOperatorMult(const Operator &op_test, const Operator &op_ref, } } -void TestCeedOperatorFullAssemble(const mfem::SparseMatrix &mat_test, - const mfem::SparseMatrix &mat_ref) +void TestCeedOperatorFullAssemble(mfem::SparseMatrix &mat_test, mfem::SparseMatrix &mat_ref) { // Ensure host memory is up to date (mfem::Add is missing the device to host copy). mat_test.HostReadI(); @@ -271,14 +270,11 @@ void TestCeedOperatorFullAssemble(const mfem::SparseMatrix &mat_test, REQUIRE(mat_diff->MaxNorm() < 1.0e-12 * std::max(mat_ref.MaxNorm(), 1.0)); } -void TestCeedOperatorFullAssemble(const hypre::HypreCSRMatrix &mat_test, - const mfem::SparseMatrix &mat_ref) +void TestCeedOperatorFullAssemble(hypre::HypreCSRMatrix &mat_test, + mfem::SparseMatrix &mat_ref) { // Copy test matrix into MFEM's sparse matrix data type. - if (mat_test.GetMemoryLocation() != HYPRE_MEMORY_HOST) - { - hypre_CSRMatrixMigrate(mat_test, HYPRE_MEMORY_HOST); - } + hypre_CSRMatrixMigrate(mat_test, HYPRE_MEMORY_HOST); mfem::SparseMatrix mat_test_sp(mat_test.GetI(), mat_test.GetJ(), mat_test.GetData(), mat_test.Height(), mat_test.Width(), false, false, false); @@ -286,18 +282,12 @@ void TestCeedOperatorFullAssemble(const hypre::HypreCSRMatrix &mat_test, TestCeedOperatorFullAssemble(mat_test_sp, mat_ref); } -void TestCeedOperatorFullAssemble(const hypre::HypreCSRMatrix &mat_test, - const hypre::HypreCSRMatrix &mat_ref) +void TestCeedOperatorFullAssemble(hypre::HypreCSRMatrix &mat_test, + hypre::HypreCSRMatrix &mat_ref) { // Copy test and reference matrix into MFEM's sparse matrix data type. - if (mat_test.GetMemoryLocation() != HYPRE_MEMORY_HOST) - { - hypre_CSRMatrixMigrate(mat_test, HYPRE_MEMORY_HOST); - } - if (mat_ref.GetMemoryLocation() != HYPRE_MEMORY_HOST) - { - hypre_CSRMatrixMigrate(mat_ref, HYPRE_MEMORY_HOST); - } + hypre_CSRMatrixMigrate(mat_test, HYPRE_MEMORY_HOST); + hypre_CSRMatrixMigrate(mat_ref, HYPRE_MEMORY_HOST); mfem::SparseMatrix mat_test_sp(mat_test.GetI(), mat_test.GetJ(), mat_test.GetData(), mat_test.Height(), mat_test.Width(), false, false, false); mfem::SparseMatrix mat_ref_sp(mat_ref.GetI(), mat_ref.GetJ(), mat_ref.GetData(), @@ -312,15 +302,15 @@ void TestCeedOperator(T1 &a_test, T2 &a_ref, bool test_transpose, bool skip_zero { a_ref.Assemble(skip_zeros); a_ref.Finalize(skip_zeros); - const mfem::SparseMatrix *mat_ref = &a_ref.SpMat(); - const Operator *op_ref = mat_ref; + auto *mat_ref = &a_ref.SpMat(); + auto *op_ref = mat_ref; // Test operator application. - const auto op_test = a_test.PartialAssemble(); + auto op_test = a_test.PartialAssemble(); TestCeedOperatorMult(*op_test, *op_ref, test_transpose); // Test full assembly. - const auto mat_test = a_test.FullAssemble(*op_test, skip_zeros); + auto mat_test = a_test.FullAssemble(*op_test, skip_zeros); TestCeedOperatorFullAssemble(*mat_test, *mat_ref); // Test diagonal assembly if possible. @@ -387,10 +377,10 @@ void BenchmarkCeedIntegrator(FiniteElementSpace &fespace, T1 AssembleTest, if (!benchmark_no_fa) { constexpr bool bdr_integ = true; - const auto op_test = AssembleTest(fespace, bdr_integ); - const auto op_test_ref = AssembleTestRef(fespace, bdr_integ); - const auto mat_test = BilinearForm::FullAssemble(*op_test, skip_zeros); - const auto mat_test_ref = BilinearForm::FullAssemble(*op_test_ref, skip_zeros); + auto op_test = AssembleTest(fespace, bdr_integ); + auto op_test_ref = AssembleTestRef(fespace, bdr_integ); + auto mat_test = BilinearForm::FullAssemble(*op_test, skip_zeros); + auto mat_test_ref = BilinearForm::FullAssemble(*op_test_ref, skip_zeros); nnz = mat_test->NNZ(); TestCeedOperatorFullAssemble(*mat_test, *mat_test_ref); } @@ -400,11 +390,11 @@ void BenchmarkCeedIntegrator(FiniteElementSpace &fespace, T1 AssembleTest, { BENCHMARK("Assemble (MFEM Legacy)") { - const auto op_ref = AssembleRef(fespace, mfem::AssemblyLevel::LEGACY, skip_zeros); + auto op_ref = AssembleRef(fespace, mfem::AssemblyLevel::LEGACY, skip_zeros); return op_ref->Height(); }; { - const auto op_ref = AssembleRef(fespace, mfem::AssemblyLevel::LEGACY, skip_zeros); + auto op_ref = AssembleRef(fespace, mfem::AssemblyLevel::LEGACY, skip_zeros); y_ref = 0.0; BENCHMARK("AddMult (MFEM Legacy)") { @@ -419,11 +409,11 @@ void BenchmarkCeedIntegrator(FiniteElementSpace &fespace, T1 AssembleTest, { BENCHMARK("Assemble (MFEM Partial)") { - const auto op_ref = AssembleRef(fespace, mfem::AssemblyLevel::PARTIAL, skip_zeros); + auto op_ref = AssembleRef(fespace, mfem::AssemblyLevel::PARTIAL, skip_zeros); return op_ref->Height(); }; { - const auto op_ref = AssembleRef(fespace, mfem::AssemblyLevel::PARTIAL, skip_zeros); + auto op_ref = AssembleRef(fespace, mfem::AssemblyLevel::PARTIAL, skip_zeros); y_ref = 0.0; BENCHMARK("AddMult (MFEM Partial)") { @@ -438,11 +428,11 @@ void BenchmarkCeedIntegrator(FiniteElementSpace &fespace, T1 AssembleTest, // Benchmark libCEED assembly. BENCHMARK("Assemble (libCEED)") { - const auto op_test = AssembleTest(fespace); + auto op_test = AssembleTest(fespace); return op_test->Height(); }; { - const auto op_test = AssembleTest(fespace); + auto op_test = AssembleTest(fespace); y_test = 0.0; BENCHMARK("AddMult (libCEED)") { @@ -454,8 +444,8 @@ void BenchmarkCeedIntegrator(FiniteElementSpace &fespace, T1 AssembleTest, { BENCHMARK("Full Assemble (libCEED)") { - const auto op_test = AssembleTest(fespace); - const auto mat_test = BilinearForm::FullAssemble(*op_test, skip_zeros); + auto op_test = AssembleTest(fespace); + auto mat_test = BilinearForm::FullAssemble(*op_test, skip_zeros); return mat_test->NNZ(); }; } @@ -510,11 +500,11 @@ void BenchmarkCeedInterpolator(FiniteElementSpace &trial_fespace, std::size_t nnz = 0; if (!benchmark_no_fa) { - const auto op_test = AssembleTest(trial_fespace, test_fespace); - const auto op_ref = AssembleRef(trial_fespace, test_fespace, - mfem::AssemblyLevel::LEGACY, skip_zeros_interp); - const auto mat_test = DiscreteLinearOperator::FullAssemble(*op_test, skip_zeros_interp); - const auto *mat_ref = &op_ref->SpMat(); + auto op_test = AssembleTest(trial_fespace, test_fespace); + auto op_ref = AssembleRef(trial_fespace, test_fespace, mfem::AssemblyLevel::LEGACY, + skip_zeros_interp); + auto mat_test = DiscreteLinearOperator::FullAssemble(*op_test, skip_zeros_interp); + auto *mat_ref = &op_ref->SpMat(); nnz = mat_test->NNZ(); TestCeedOperatorFullAssemble(*mat_test, *mat_ref); } @@ -524,13 +514,13 @@ void BenchmarkCeedInterpolator(FiniteElementSpace &trial_fespace, { BENCHMARK("Assemble (MFEM Legacy)") { - const auto op_ref = AssembleRef(trial_fespace, test_fespace, - mfem::AssemblyLevel::LEGACY, skip_zeros_interp); + auto op_ref = AssembleRef(trial_fespace, test_fespace, mfem::AssemblyLevel::LEGACY, + skip_zeros_interp); return op_ref->Height(); }; { - const auto op_ref = AssembleRef(trial_fespace, test_fespace, - mfem::AssemblyLevel::LEGACY, skip_zeros_interp); + auto op_ref = AssembleRef(trial_fespace, test_fespace, mfem::AssemblyLevel::LEGACY, + skip_zeros_interp); y_ref = 0.0; BENCHMARK("AddMult (MFEM Legacy)") { @@ -546,13 +536,13 @@ void BenchmarkCeedInterpolator(FiniteElementSpace &trial_fespace, { BENCHMARK("Assemble (MFEM Partial)") { - const auto op_ref = AssembleRef(trial_fespace, test_fespace, - mfem::AssemblyLevel::PARTIAL, skip_zeros_interp); + auto op_ref = AssembleRef(trial_fespace, test_fespace, mfem::AssemblyLevel::PARTIAL, + skip_zeros_interp); return op_ref->Height(); }; { - const auto op_ref = AssembleRef(trial_fespace, test_fespace, - mfem::AssemblyLevel::PARTIAL, skip_zeros_interp); + auto op_ref = AssembleRef(trial_fespace, test_fespace, mfem::AssemblyLevel::PARTIAL, + skip_zeros_interp); y_ref = 0.0; BENCHMARK("AddMult (MFEM Partial)") { @@ -567,11 +557,11 @@ void BenchmarkCeedInterpolator(FiniteElementSpace &trial_fespace, // Benchmark libCEED assembly. BENCHMARK("Assemble (libCEED)") { - const auto op_test = AssembleTest(trial_fespace, test_fespace); + auto op_test = AssembleTest(trial_fespace, test_fespace); return op_test->Height(); }; { - const auto op_test = AssembleTest(trial_fespace, test_fespace); + auto op_test = AssembleTest(trial_fespace, test_fespace); y_test = 0.0; BENCHMARK("AddMult (libCEED)") { @@ -583,9 +573,8 @@ void BenchmarkCeedInterpolator(FiniteElementSpace &trial_fespace, { BENCHMARK("Full Assemble (libCEED)") { - const auto op_test = AssembleTest(trial_fespace, test_fespace); - const auto mat_test = - DiscreteLinearOperator::FullAssemble(*op_test, skip_zeros_interp); + auto op_test = AssembleTest(trial_fespace, test_fespace); + auto mat_test = DiscreteLinearOperator::FullAssemble(*op_test, skip_zeros_interp); return mat_test->NNZ(); }; }