From 2eb4ebe8a3191c2a4684c6126b575ca4f98c4e12 Mon Sep 17 00:00:00 2001 From: Sebastian Grimberg Date: Mon, 25 Mar 2024 11:10:44 -0700 Subject: [PATCH] Refactor libCEED operator construction to happen once rather than at every error estimate calculation --- palace/fem/libceed/basis.cpp | 12 +- palace/fem/libceed/integrator.cpp | 1 + palace/fem/libceed/operator.cpp | 4 +- palace/linalg/errorestimator.cpp | 258 ++++++++++++++++++++---------- palace/linalg/errorestimator.hpp | 17 +- 5 files changed, 190 insertions(+), 102 deletions(-) diff --git a/palace/fem/libceed/basis.cpp b/palace/fem/libceed/basis.cpp index 672f3e97b..28c69e1e7 100644 --- a/palace/fem/libceed/basis.cpp +++ b/palace/fem/libceed/basis.cpp @@ -120,11 +120,10 @@ void InitMfemInterpolatorBasis(const mfem::FiniteElement &trial_fe, MFEM_VERIFY(trial_num_comp == test_num_comp && trial_num_comp == 1, "libCEED discrete linear operator requires same vdim = 1 for trial and test " "FE spaces!"); - const int dim = trial_fe.GetDim(); const int trial_P = trial_fe.GetDof(); const int test_P = test_fe.GetDof(); - mfem::DenseMatrix qX(dim, test_P), Gt(trial_P, test_P * dim), Bt; - mfem::Vector qW(test_P); + mfem::DenseMatrix Bt, Gt(trial_P, test_P); + mfem::Vector qX(test_P), qW(test_P); mfem::IsoparametricTransformation dummy; dummy.SetIdentityTransformation(trial_fe.GetGeomType()); if (trial_fe.GetMapType() == test_fe.GetMapType()) @@ -159,9 +158,10 @@ void InitMfemInterpolatorBasis(const mfem::FiniteElement &trial_fe, qX = 0.0; qW = 0.0; - PalaceCeedCall(ceed, CeedBasisCreateH1(ceed, GetCeedTopology(trial_fe.GetGeomType()), - trial_num_comp, trial_P, test_P, Bt.GetData(), - Gt.GetData(), qX.GetData(), qW.GetData(), basis)); + // Note: ceed::GetCeedTopology(CEED_TOPOLOGY_LINE) == 1. + PalaceCeedCall(ceed, CeedBasisCreateH1(ceed, CEED_TOPOLOGY_LINE, trial_num_comp, trial_P, + test_P, Bt.GetData(), Gt.GetData(), qX.GetData(), + qW.GetData(), basis)); } } // namespace diff --git a/palace/fem/libceed/integrator.cpp b/palace/fem/libceed/integrator.cpp index 438d6688f..8e581f068 100644 --- a/palace/fem/libceed/integrator.cpp +++ b/palace/fem/libceed/integrator.cpp @@ -589,6 +589,7 @@ void AssembleCeedElementErrorIntegrator( PalaceCeedCall(ceed, CeedBasisGetNumQuadraturePoints(input1_basis, &num_qpts)); CeedBasis mesh_elem_basis; { + // Note: ceed::GetCeedTopology(CEED_TOPOLOGY_LINE) == 1. mfem::Vector Bt(num_qpts), Gt(num_qpts), qX(num_qpts), qW(num_qpts); Bt = 1.0; Gt = 0.0; diff --git a/palace/fem/libceed/operator.cpp b/palace/fem/libceed/operator.cpp index ecd46d167..8fdc162bd 100644 --- a/palace/fem/libceed/operator.cpp +++ b/palace/fem/libceed/operator.cpp @@ -67,8 +67,8 @@ void Operator::AddOper(CeedOperator sub_op, CeedOperator sub_op_t) PalaceCeedCallBackend(CeedOperatorGetCeed(sub_op, &ceed)); CeedSize l_in, l_out; PalaceCeedCall(ceed, CeedOperatorGetActiveVectorLengths(sub_op, &l_in, &l_out)); - MFEM_VERIFY(mfem::internal::to_int(l_in) == width && - mfem::internal::to_int(l_out) == height, + MFEM_VERIFY((l_in < 0 || mfem::internal::to_int(l_in) == width) && + (l_out < 0 || mfem::internal::to_int(l_out) == height), "Dimensions mismatch for CeedOperator!"); PalaceCeedCall(ceed, CeedCompositeOperatorAddSub(op[id], sub_op)); PalaceCeedCall(ceed, CeedOperatorDestroy(&sub_op)); diff --git a/palace/linalg/errorestimator.cpp b/palace/linalg/errorestimator.cpp index 39cdf56b5..e53def7de 100644 --- a/palace/linalg/errorestimator.cpp +++ b/palace/linalg/errorestimator.cpp @@ -190,44 +190,18 @@ template CurlFluxErrorEstimator::CurlFluxErrorEstimator( const MaterialOperator &mat_op, FiniteElementSpaceHierarchy &nd_fespaces, double tol, int max_it, int print, bool use_mg) - : mat_op(mat_op), nd_fespace(nd_fespaces.GetFinestFESpace()), - projector(mat_op, nd_fespaces, tol, max_it, print, use_mg), U_gf(nd_fespace.GetVSize()), - F(nd_fespace.GetTrueVSize()), F_gf(nd_fespace.GetVSize()) + : nd_fespace(nd_fespaces.GetFinestFESpace()), + projector(mat_op, nd_fespaces, tol, max_it, print, use_mg), + integ_op(2 * nd_fespace.GetMesh().GetNE(), nd_fespace.GetVSize()), + U_gf(nd_fespace.GetVSize()), F(nd_fespace.GetTrueVSize()), F_gf(nd_fespace.GetVSize()) { U_gf.UseDevice(true); F.UseDevice(true); F_gf.UseDevice(true); -} -template -void CurlFluxErrorEstimator::AddErrorIndicator(const VecType &U, - ErrorIndicator &indicator) const -{ - // Compute the projection of the discontinuous flux onto the smooth finite element space - // and populate the corresponding grid functions. - BlockTimer bt(Timer::ESTIMATION); - projector.Mult(U, F); - if constexpr (std::is_same::value) - { - nd_fespace.GetProlongationMatrix()->Mult(U.Real(), U_gf.Real()); - nd_fespace.GetProlongationMatrix()->Mult(U.Imag(), U_gf.Imag()); - nd_fespace.GetProlongationMatrix()->Mult(F.Real(), F_gf.Real()); - nd_fespace.GetProlongationMatrix()->Mult(F.Imag(), F_gf.Imag()); - } - else - { - nd_fespace.GetProlongationMatrix()->Mult(U, U_gf); - nd_fespace.GetProlongationMatrix()->Mult(F, F_gf); - } - - // Use libCEED operators to perform the error estimate integration over each element. The - // discontinuous flux is μ⁻¹ ∇ × U. Output is stored as error integral for all elements - // first, then scaling integral. + // Construct the libCEED operator used for integrating the element-wise error. The + // discontinuous flux is μ⁻¹ ∇ × U. const auto &mesh = nd_fespace.GetMesh(); - constexpr CeedInt elem_num_comp = 2; - Vector estimates(elem_num_comp * mesh.GetNE()); - estimates.UseDevice(true); - estimates = 0.0; const std::size_t nt = ceed::internal::GetCeedObjects().size(); PalacePragmaOmp(parallel if (nt > 1)) { @@ -240,10 +214,8 @@ void CurlFluxErrorEstimator::AddErrorIndicator(const VecType &U, continue; } - // Create libCEED vector wrappers for use with libCEED operators. Each thread writes - // to non-overlapping entries of the estimates vector. - CeedVector estimates_vec, U_gf_vec, F_gf_vec; - ceed::InitCeedVector(estimates, ceed, &estimates_vec); + // Create libCEED vector wrappers for use with libCEED operators. + CeedVector U_gf_vec, F_gf_vec; if constexpr (std::is_same::value) { ceed::InitCeedVector(U_gf.Real(), ceed, &U_gf_vec); @@ -256,12 +228,13 @@ void CurlFluxErrorEstimator::AddErrorIndicator(const VecType &U, } // Construct mesh element restriction for elements of this element geometry type. + constexpr CeedInt elem_num_comp = 2; CeedElemRestriction mesh_elem_restr; PalaceCeedCall(ceed, CeedElemRestrictionCreate( ceed, static_cast(data.indices.size()), 1, elem_num_comp, mesh.GetNE(), elem_num_comp * mesh.GetNE(), CEED_MEM_HOST, - CEED_COPY_VALUES, data.indices.data(), &mesh_elem_restr)); + CEED_USE_POINTER, data.indices.data(), &mesh_elem_restr)); // Element restriction and basis objects for inputs. CeedElemRestriction nd_restr = @@ -292,31 +265,105 @@ void CurlFluxErrorEstimator::AddErrorIndicator(const VecType &U, info.trial_ops = ceed::EvalMode::Curl; info.test_ops = ceed::EvalMode::Interp; - CeedOperator op; + CeedOperator sub_op; ceed::AssembleCeedElementErrorIntegrator( info, (void *)ctx.data(), ctx.size() * sizeof(CeedIntScalar), ceed, U_gf_vec, F_gf_vec, nd_restr, nd_restr, nd_basis, nd_basis, mesh_elem_restr, data.geom_data, - data.geom_data_restr, &op); + data.geom_data_restr, &sub_op); + integ_op.AddOper(sub_op); // Sub-operator owned by ceed::Operator + + // Element restriction and passive input vectors are owned by the operator. + PalaceCeedCall(ceed, CeedElemRestrictionDestroy(&mesh_elem_restr)); + PalaceCeedCall(ceed, CeedVectorDestroy(&U_gf_vec)); + PalaceCeedCall(ceed, CeedVectorDestroy(&F_gf_vec)); + } + } - // Do the integration (both input vectors are passive). For complex case, add sum of - // squares of real and imaginary parts to the estimates before square root. - PalaceCeedCall(ceed, CeedOperatorApplyAdd(op, CEED_VECTOR_NONE, estimates_vec, - CEED_REQUEST_IMMEDIATE)); + // Finalize the operator (call CeedOperatorCheckReady). + integ_op.Finalize(); +} + +template +void CurlFluxErrorEstimator::AddErrorIndicator(const VecType &U, + ErrorIndicator &indicator) const +{ + // Compute the projection of the discontinuous flux onto the smooth finite element space + // and populate the corresponding grid functions. + BlockTimer bt(Timer::ESTIMATION); + projector.Mult(U, F); + if constexpr (std::is_same::value) + { + nd_fespace.GetProlongationMatrix()->Mult(U.Real(), U_gf.Real()); + nd_fespace.GetProlongationMatrix()->Mult(U.Imag(), U_gf.Imag()); + nd_fespace.GetProlongationMatrix()->Mult(F.Real(), F_gf.Real()); + nd_fespace.GetProlongationMatrix()->Mult(F.Imag(), F_gf.Imag()); + } + else + { + nd_fespace.GetProlongationMatrix()->Mult(U, U_gf); + nd_fespace.GetProlongationMatrix()->Mult(F, F_gf); + } + + // Perform the integration over each element. Output is stored as error integral for all + // elements first, then scaling integral. + const auto &mesh = nd_fespace.GetMesh(); + Vector estimates(2 * mesh.GetNE()); + estimates.UseDevice(true); + estimates = 0.0; + const std::size_t nt = ceed::internal::GetCeedObjects().size(); + PalacePragmaOmp(parallel if (nt > 1)) + { + Ceed ceed = ceed::internal::GetCeedObjects()[utils::GetThreadNum()]; + + // We need to update the state of the underlying libCEED vectors to indicate that the + // data has changed. Each thread has it's own vector, referencing the same underlying + // data. + CeedVector U_gf_vec, F_gf_vec; + { + CeedInt nsub_ops; + CeedOperator *sub_ops; + PalaceCeedCall( + ceed, CeedCompositeOperatorGetNumSub(integ_op[utils::GetThreadNum()], &nsub_ops)); + PalaceCeedCall( + ceed, CeedCompositeOperatorGetSubList(integ_op[utils::GetThreadNum()], &sub_ops)); + MFEM_ASSERT(nsub_ops > 0, "Unexpected empty libCEED composite operator!"); + CeedOperatorField field; + PalaceCeedCall(ceed, CeedOperatorGetFieldByName(sub_ops[0], "curl_u_1", &field)); + PalaceCeedCall(ceed, CeedOperatorFieldGetVector(field, &U_gf_vec)); + PalaceCeedCall(ceed, CeedOperatorGetFieldByName(sub_ops[0], "u_2", &field)); + PalaceCeedCall(ceed, CeedOperatorFieldGetVector(field, &F_gf_vec)); if constexpr (std::is_same::value) { - ceed::InitCeedVector(U_gf.Imag(), ceed, &U_gf_vec, false); - ceed::InitCeedVector(F_gf.Imag(), ceed, &F_gf_vec, false); - PalaceCeedCall(ceed, CeedOperatorApplyAdd(op, CEED_VECTOR_NONE, estimates_vec, - CEED_REQUEST_IMMEDIATE)); + ceed::InitCeedVector(U_gf.Real(), ceed, &U_gf_vec, false); + ceed::InitCeedVector(F_gf.Real(), ceed, &F_gf_vec, false); + } + else + { + ceed::InitCeedVector(U_gf, ceed, &U_gf_vec, false); + ceed::InitCeedVector(F_gf, ceed, &F_gf_vec, false); } + } - // Cleanup. - PalaceCeedCall(ceed, CeedOperatorDestroy(&op)); - PalaceCeedCall(ceed, CeedElemRestrictionDestroy(&mesh_elem_restr)); - PalaceCeedCall(ceed, CeedVectorDestroy(&estimates_vec)); - PalaceCeedCall(ceed, CeedVectorDestroy(&U_gf_vec)); - PalaceCeedCall(ceed, CeedVectorDestroy(&F_gf_vec)); + // Each thread writes to non-overlapping entries of the estimates vector. + CeedVector estimates_vec; + ceed::InitCeedVector(estimates, ceed, &estimates_vec); + + // Do the integration (both input vectors are passive). For the complex case, add sum of + // squares of real and imaginary parts to the estimates before square root. + PalaceCeedCall(ceed, + CeedOperatorApplyAdd(integ_op[utils::GetThreadNum()], CEED_VECTOR_NONE, + estimates_vec, CEED_REQUEST_IMMEDIATE)); + if constexpr (std::is_same::value) + { + ceed::InitCeedVector(U_gf.Imag(), ceed, &U_gf_vec, false); + ceed::InitCeedVector(F_gf.Imag(), ceed, &F_gf_vec, false); + PalaceCeedCall(ceed, + CeedOperatorApplyAdd(integ_op[utils::GetThreadNum()], CEED_VECTOR_NONE, + estimates_vec, CEED_REQUEST_IMMEDIATE)); } + + // Cleanup. + PalaceCeedCall(ceed, CeedVectorDestroy(&estimates_vec)); } // Finalize the element-wise error estimates. @@ -332,33 +379,18 @@ GradFluxErrorEstimator::GradFluxErrorEstimator(const MaterialOperator &mat_op, FiniteElementSpaceHierarchy &rt_fespaces, double tol, int max_it, int print, bool use_mg) - : mat_op(mat_op), h1_fespace(h1_fespace), rt_fespace(rt_fespaces.GetFinestFESpace()), + : h1_fespace(h1_fespace), rt_fespace(rt_fespaces.GetFinestFESpace()), projector(mat_op, h1_fespace, rt_fespaces, tol, max_it, print, use_mg), + integ_op(2 * h1_fespace.GetMesh().GetNE(), h1_fespace.GetVSize()), U_gf(h1_fespace.GetVSize()), F(rt_fespace.GetTrueVSize()), F_gf(rt_fespace.GetVSize()) { U_gf.UseDevice(true); F.UseDevice(true); F_gf.UseDevice(true); -} - -void GradFluxErrorEstimator::AddErrorIndicator(const Vector &U, - ErrorIndicator &indicator) const -{ - // Compute the projection of the discontinuous flux onto the smooth finite element space - // and populate the corresponding grid functions. - BlockTimer bt(Timer::ESTIMATION); - projector.Mult(U, F); - h1_fespace.GetProlongationMatrix()->Mult(U, U_gf); - rt_fespace.GetProlongationMatrix()->Mult(F, F_gf); - // Use libCEED operators to perform the error estimate integration over each element. The - // discontinuous flux is ε ∇U. Output is stored as error integral for all elements first, - // then scaling integral. + // Construct the libCEED operator used for integrating the element-wise error. The + // discontinuous flux is ε ∇U. const auto &mesh = h1_fespace.GetMesh(); - constexpr CeedInt elem_num_comp = 2; - Vector estimates(elem_num_comp * mesh.GetNE()); - estimates.UseDevice(true); - estimates = 0.0; const std::size_t nt = ceed::internal::GetCeedObjects().size(); PalacePragmaOmp(parallel if (nt > 1)) { @@ -371,20 +403,19 @@ void GradFluxErrorEstimator::AddErrorIndicator(const Vector &U, continue; } - // Create libCEED vector wrappers for use with libCEED operators. Each thread writes - // to non-overlapping entries of the estimates vector. - CeedVector estimates_vec, U_gf_vec, F_gf_vec; - ceed::InitCeedVector(estimates, ceed, &estimates_vec); + // Create libCEED vector wrappers for use with libCEED operators. + CeedVector U_gf_vec, F_gf_vec; ceed::InitCeedVector(U_gf, ceed, &U_gf_vec); ceed::InitCeedVector(F_gf, ceed, &F_gf_vec); // Construct mesh element restriction for elements of this element geometry type. + constexpr CeedInt elem_num_comp = 2; CeedElemRestriction mesh_elem_restr; PalaceCeedCall(ceed, CeedElemRestrictionCreate( ceed, static_cast(data.indices.size()), 1, elem_num_comp, mesh.GetNE(), elem_num_comp * mesh.GetNE(), CEED_MEM_HOST, - CEED_COPY_VALUES, data.indices.data(), &mesh_elem_restr)); + CEED_USE_POINTER, data.indices.data(), &mesh_elem_restr)); // Element restriction and basis objects for inputs. CeedElemRestriction h1_restr = @@ -421,25 +452,80 @@ void GradFluxErrorEstimator::AddErrorIndicator(const Vector &U, info.trial_ops = ceed::EvalMode::Grad; info.test_ops = ceed::EvalMode::Interp; - CeedOperator op; + CeedOperator sub_op; ceed::AssembleCeedElementErrorIntegrator( info, (void *)ctx.data(), ctx.size() * sizeof(CeedIntScalar), ceed, U_gf_vec, F_gf_vec, h1_restr, rt_restr, h1_basis, rt_basis, mesh_elem_restr, data.geom_data, - data.geom_data_restr, &op); - - // Do the integration (both input vectors are passive). - PalaceCeedCall(ceed, CeedOperatorApplyAdd(op, CEED_VECTOR_NONE, estimates_vec, - CEED_REQUEST_IMMEDIATE)); + data.geom_data_restr, &sub_op); + integ_op.AddOper(sub_op); // Sub-operator owned by ceed::Operator - // Cleanup. - PalaceCeedCall(ceed, CeedOperatorDestroy(&op)); + // Element restriction and passive input vectors are owned by the operator. PalaceCeedCall(ceed, CeedElemRestrictionDestroy(&mesh_elem_restr)); - PalaceCeedCall(ceed, CeedVectorDestroy(&estimates_vec)); PalaceCeedCall(ceed, CeedVectorDestroy(&U_gf_vec)); PalaceCeedCall(ceed, CeedVectorDestroy(&F_gf_vec)); } } + // Finalize the operator (call CeedOperatorCheckReady). + integ_op.Finalize(); +} + +void GradFluxErrorEstimator::AddErrorIndicator(const Vector &U, + ErrorIndicator &indicator) const +{ + // Compute the projection of the discontinuous flux onto the smooth finite element space + // and populate the corresponding grid functions. + BlockTimer bt(Timer::ESTIMATION); + projector.Mult(U, F); + h1_fespace.GetProlongationMatrix()->Mult(U, U_gf); + rt_fespace.GetProlongationMatrix()->Mult(F, F_gf); + + // Use libCEED operators to perform the error estimate integration over each element. The + // discontinuous flux is ε ∇U. Output is stored as error integral for all elements first, + // then scaling integral. + const auto &mesh = h1_fespace.GetMesh(); + Vector estimates(2 * mesh.GetNE()); + estimates.UseDevice(true); + estimates = 0.0; + const std::size_t nt = ceed::internal::GetCeedObjects().size(); + PalacePragmaOmp(parallel if (nt > 1)) + { + Ceed ceed = ceed::internal::GetCeedObjects()[utils::GetThreadNum()]; + + // We need to update the state of the underlying libCEED vectors to indicate that the + // data has changed. Each thread has it's own vector, referencing the same underlying + // data. + { + CeedInt nsub_ops; + CeedOperator *sub_ops; + PalaceCeedCall( + ceed, CeedCompositeOperatorGetNumSub(integ_op[utils::GetThreadNum()], &nsub_ops)); + PalaceCeedCall( + ceed, CeedCompositeOperatorGetSubList(integ_op[utils::GetThreadNum()], &sub_ops)); + MFEM_ASSERT(nsub_ops > 0, "Unexpected empty libCEED composite operator!"); + CeedOperatorField field; + CeedVector U_gf_vec, F_gf_vec; + PalaceCeedCall(ceed, CeedOperatorGetFieldByName(sub_ops[0], "grad_u_1", &field)); + PalaceCeedCall(ceed, CeedOperatorFieldGetVector(field, &U_gf_vec)); + PalaceCeedCall(ceed, CeedOperatorGetFieldByName(sub_ops[0], "u_2", &field)); + PalaceCeedCall(ceed, CeedOperatorFieldGetVector(field, &F_gf_vec)); + ceed::InitCeedVector(U_gf, ceed, &U_gf_vec, false); + ceed::InitCeedVector(F_gf, ceed, &F_gf_vec, false); + } + + // Each thread writes to non-overlapping entries of the estimates vector. + CeedVector estimates_vec; + ceed::InitCeedVector(estimates, ceed, &estimates_vec); + + // Do the integration (both input vectors are passive). + PalaceCeedCall(ceed, + CeedOperatorApplyAdd(integ_op[utils::GetThreadNum()], CEED_VECTOR_NONE, + estimates_vec, CEED_REQUEST_IMMEDIATE)); + + // Cleanup. + PalaceCeedCall(ceed, CeedVectorDestroy(&estimates_vec)); + } + // Finalize the element-wise error estimates. Vector estimates0(estimates, 0, mesh.GetNE()), estimates1(estimates, mesh.GetNE(), mesh.GetNE()); diff --git a/palace/linalg/errorestimator.hpp b/palace/linalg/errorestimator.hpp index b22740466..872259810 100644 --- a/palace/linalg/errorestimator.hpp +++ b/palace/linalg/errorestimator.hpp @@ -8,6 +8,7 @@ #include #include "fem/errorindicator.hpp" #include "fem/fespace.hpp" +#include "fem/libceed/operator.hpp" #include "linalg/ksp.hpp" #include "linalg/operator.hpp" #include "linalg/vector.hpp" @@ -57,15 +58,15 @@ class FluxProjector template class CurlFluxErrorEstimator { - // Reference to material property data (not owned). - const MaterialOperator &mat_op; - // Finite element space used to represent U and F. - FiniteElementSpace &nd_fespace; + const FiniteElementSpace &nd_fespace; // Global L2 projection solver. FluxProjector projector; + // Operator which performs the integration of the flux error on each element. + ceed::Operator integ_op; + // Temporary vectors for error estimation. mutable VecType U_gf, F, F_gf; @@ -83,15 +84,15 @@ class CurlFluxErrorEstimator // denotes a smooth reconstruction of ε ∇Uₕ with continuous normal component. class GradFluxErrorEstimator { - // Reference to material property data (not owned). - const MaterialOperator &mat_op; - // Finite element spaces used to represent U and F. - FiniteElementSpace &h1_fespace, &rt_fespace; + const FiniteElementSpace &h1_fespace, &rt_fespace; // Global L2 projection solver. FluxProjector projector; + // Operator which performs the integration of the flux error on each element. + ceed::Operator integ_op; + // Temporary vectors for error estimation. mutable Vector U_gf, F, F_gf;