Skip to content

Commit

Permalink
Update OpenMP parallelism and add HypreCSRMatrix wrapper class to rep…
Browse files Browse the repository at this point in the history
…lace mfem::SparseMatrix

This allows OpenMP and better GPU acceleration for ceed::Operator full assembly into a CSR matrix using Hypre's functionality.
  • Loading branch information
sebastiangrimberg committed Feb 8, 2024
1 parent 0322fb6 commit e40c7c7
Show file tree
Hide file tree
Showing 20 changed files with 728 additions and 457 deletions.
18 changes: 8 additions & 10 deletions palace/fem/bilinearform.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -49,10 +49,9 @@ BilinearForm::PartialAssemble(const FiniteElementSpace &trial_fespace,
// This should work fine if some threads create an empty operator (no elements or boundary
// elements).
const std::size_t nt = ceed::internal::GetCeedObjects().size();
PalacePragmaOmp(parallel for schedule(static) if(nt > 1))
for (std::size_t i = 0; i < nt; i++)
PalacePragmaOmp(parallel if (nt > 1))
{
Ceed ceed = ceed::internal::GetCeedObjects()[i];
Ceed ceed = ceed::internal::GetCeedObjects()[utils::GetThreadNum()];

// Initialize the composite operator on each thread.
CeedOperator loc_op;
Expand Down Expand Up @@ -108,14 +107,14 @@ BilinearForm::PartialAssemble(const FiniteElementSpace &trial_fespace,
}
}
PalaceCeedCall(ceed, CeedOperatorCheckReady(loc_op));
op->AddOper(loc_op); // Thread-safe
op->AddOper(loc_op);
}

return op;
}

std::unique_ptr<mfem::SparseMatrix> BilinearForm::FullAssemble(const ceed::Operator &op,
bool skip_zeros, bool set)
std::unique_ptr<hypre::HypreCSRMatrix> BilinearForm::FullAssemble(const ceed::Operator &op,
bool skip_zeros, bool set)
{
return ceed::CeedOperatorFullAssemble(op, skip_zeros, set);
}
Expand Down Expand Up @@ -220,10 +219,9 @@ std::unique_ptr<ceed::Operator> DiscreteLinearOperator::PartialAssemble() const
// This should work fine if some threads create an empty operator (no elements or bounday
// elements).
const std::size_t nt = ceed::internal::GetCeedObjects().size();
PalacePragmaOmp(parallel for schedule(static) if(nt > 1))
for (std::size_t i = 0; i < nt; i++)
PalacePragmaOmp(parallel if (nt > 1))
{
Ceed ceed = ceed::internal::GetCeedObjects()[i];
Ceed ceed = ceed::internal::GetCeedObjects()[utils::GetThreadNum()];

// Initialize the composite operators for each thread.
CeedOperator loc_op, loc_op_t;
Expand Down Expand Up @@ -267,7 +265,7 @@ std::unique_ptr<ceed::Operator> DiscreteLinearOperator::PartialAssemble() const
}
PalaceCeedCall(ceed, CeedOperatorCheckReady(loc_op));
PalaceCeedCall(ceed, CeedOperatorCheckReady(loc_op_t));
op->AddOper(loc_op, loc_op_t); // Thread-safe
op->AddOper(loc_op, loc_op_t);
}

// Construct dof multiplicity vector for scaling to account for dofs shared between
Expand Down
17 changes: 9 additions & 8 deletions palace/fem/bilinearform.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#include <mfem.hpp>
#include "fem/integrator.hpp"
#include "fem/libceed/operator.hpp"
#include "linalg/hypre.hpp"

namespace palace
{
Expand Down Expand Up @@ -69,19 +70,19 @@ class BilinearForm
return PartialAssemble(GetTrialSpace(), GetTestSpace());
}

std::unique_ptr<mfem::SparseMatrix> FullAssemble(bool skip_zeros) const
std::unique_ptr<hypre::HypreCSRMatrix> FullAssemble(bool skip_zeros) const
{
return FullAssemble(*PartialAssemble(), skip_zeros, false);
}

static std::unique_ptr<mfem::SparseMatrix> FullAssemble(const ceed::Operator &op,
bool skip_zeros)
static std::unique_ptr<hypre::HypreCSRMatrix> FullAssemble(const ceed::Operator &op,
bool skip_zeros)
{
return FullAssemble(op, skip_zeros, false);
}

static std::unique_ptr<mfem::SparseMatrix> FullAssemble(const ceed::Operator &op,
bool skip_zeros, bool set);
static std::unique_ptr<hypre::HypreCSRMatrix> FullAssemble(const ceed::Operator &op,
bool skip_zeros, bool set);

std::unique_ptr<Operator> Assemble(bool skip_zeros) const;

Expand Down Expand Up @@ -120,13 +121,13 @@ class DiscreteLinearOperator

std::unique_ptr<ceed::Operator> PartialAssemble() const;

std::unique_ptr<mfem::SparseMatrix> FullAssemble(bool skip_zeros) const
std::unique_ptr<hypre::HypreCSRMatrix> FullAssemble(bool skip_zeros) const
{
return BilinearForm::FullAssemble(*PartialAssemble(), skip_zeros, true);
}

static std::unique_ptr<mfem::SparseMatrix> FullAssemble(const ceed::Operator &op,
bool skip_zeros)
static std::unique_ptr<hypre::HypreCSRMatrix> FullAssemble(const ceed::Operator &op,
bool skip_zeros)
{
return BilinearForm::FullAssemble(op, skip_zeros, true);
}
Expand Down
13 changes: 3 additions & 10 deletions palace/fem/libceed/ceed.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,6 @@
#include <string_view>
#include "utils/omp.hpp"

#if defined(MFEM_USE_OPENMP)
#include <omp.h>
#endif

namespace palace::ceed
{

Expand All @@ -31,13 +27,10 @@ void Initialize(const char *resource, const char *jit_source_dir)
{
PalacePragmaOmp(master)
{
#if defined(MFEM_USE_OPENMP)
// Only parallelize libCEED operators over threads when not using the GPU.
const int nt =
!std::string_view(resource).compare(0, 4, "/cpu") ? omp_get_num_threads() : 1;
#else
const int nt = 1;
#endif
const int nt = !std::string_view(resource).compare(0, 4, "/cpu")
? utils::GetNumActiveThreads()
: 1;
internal::ceeds.resize(nt, nullptr);
}
}
Expand Down
Loading

0 comments on commit e40c7c7

Please sign in to comment.