From 86d6055ff2ab537a27786c90247af1076346b8b9 Mon Sep 17 00:00:00 2001
From: Simon Lapointe <simlap@amazon.com>
Date: Fri, 25 Oct 2024 17:39:06 +0000
Subject: [PATCH 01/49] Try different ways to compute the affine transformation
 matrix

---
 palace/utils/geodata.cpp | 448 ++++++++++++++++++++++++++++++++++++++-
 1 file changed, 444 insertions(+), 4 deletions(-)
diff --git a/palace/utils/geodata.cpp b/palace/utils/geodata.cpp
index 65f535e83..f81dd9303 100644
--- a/palace/utils/geodata.cpp
+++ b/palace/utils/geodata.cpp
@@ -1720,6 +1720,344 @@ double RebalanceMesh(std::unique_ptr<mfem::ParMesh> &mesh, const IoData &iodata)
 namespace
 {
 
+void ComputeCentroid(std::unique_ptr<mfem::Mesh> &mesh,
+                     const std::unordered_set<int> &vertidxs,
+                     mfem::Vector &centroid, double &diameter)
+{
+  int sdim = mesh->SpaceDimension();
+  mfem::Vector xMax(sdim), xMin(sdim), xDiff(sdim);
+  centroid = 0.0;
+  for (const int v : vertidxs)
+  {
+    mfem::Vector coord(mesh->GetVertex(v), 3);
+    centroid += coord;
+    for (int j = 0; j < sdim; j++)
+    {
+      xMax[j] = std::max(xMax[j], coord[j]);
+      xMin[j] = std::min(xMin[j], coord[j]);
+    }
+  }
+  centroid /= (double)vertidxs.size();
+
+  xDiff = xMax;
+  xDiff -= xMin;
+  diameter = xDiff.Norml2(); // mesh diameter
+}
+
+void ComputeNormal(std::unique_ptr<mfem::Mesh> &periodic_mesh,
+                   const int elem, mfem::Vector &normal,
+                   bool inside, const double norm_tol = 1e-6)
+{
+  int sdim = periodic_mesh->SpaceDimension();
+
+  if (sdim==1) { MFEM_ABORT("Not implemented."); }
+  else if (sdim == 2) { MFEM_ABORT("Not implemented."); }
+
+  mfem::Array<int> vert_bdr, vert_adj;
+  periodic_mesh->GetBdrElementVertices(elem, vert_bdr);
+  mfem::Vector bdr_elem_center(sdim), adj_elem_center(sdim);
+  mfem::Vector bdr_elem_offset_p(sdim), bdr_elem_offset_n(sdim);
+  mfem::Vector p1(sdim), p2(sdim);
+  bdr_elem_center = 0.0;
+  normal = 0.0;
+  for (int j=0; j<vert_bdr.Size(); j++)
+  {
+    mfem::Vector coord(periodic_mesh->GetVertex(vert_bdr[j]), 3);
+    bdr_elem_center += coord;
+    if (j==0) p1 = coord;
+    if (j==1) p2 = coord;
+    if (j>1 and normal.Norml2() < norm_tol)
+    {
+      mfem::Vector v1(sdim), v2(sdim);
+      v1 = p2;
+      v1 -= p1;
+      v2 = coord;
+      v2 -= p1;
+      v1.cross3D(v2, normal);
+    }
+  }
+  bdr_elem_center /= vert_bdr.Size();
+  normal /= normal.Norml2();
+
+  int el, info;
+  periodic_mesh->GetBdrElementAdjacentElement(elem, el, info);
+  periodic_mesh->GetElementVertices(el, vert_adj);
+  adj_elem_center = 0.0;
+  for (int j=0; j<vert_adj.Size(); j++)
+  {
+    mfem::Vector vx(periodic_mesh->GetVertex(vert_adj[j]), 3);
+    adj_elem_center += vx;
+  }
+  adj_elem_center /= vert_adj.Size();
+
+  bdr_elem_offset_p = bdr_elem_center;
+  bdr_elem_offset_p += normal;
+  bdr_elem_offset_n = bdr_elem_center;
+  bdr_elem_offset_n -= normal;
+  //Mpi::Print("dist_n: {:.3e}, dist_p: {:.3e}\n", adj_elem_center.DistanceTo(bdr_elem_offset_n), adj_elem_center.DistanceTo(bdr_elem_offset_p));
+  if (inside && (adj_elem_center.DistanceTo(bdr_elem_offset_n) <
+                 adj_elem_center.DistanceTo(bdr_elem_offset_p)))
+  {
+    normal *= -1.0;
+  }
+  if (!inside && (adj_elem_center.DistanceTo(bdr_elem_offset_p) <
+                  adj_elem_center.DistanceTo(bdr_elem_offset_n)))
+  {
+    normal *= -1.0;
+  }
+}
+
+void FindUniquePoints(std::unique_ptr<mfem::Mesh> &mesh,
+                      const std::unordered_set<int> &vertidxs,
+                      const mfem::Vector &centroid,
+                      const double &diameter,
+                      std::vector<mfem::Vector> &unique_pts,
+                      const double &norm_tol = 1e-6)
+{
+  const int sdim = mesh->SpaceDimension();
+  mfem::Vector coord(sdim);
+  std::unordered_map<int, std::unordered_set<int>> dist2points;
+  for (const int v : vertidxs)
+  {
+    coord = mesh->GetVertex(v);
+    double dist = coord.DistanceTo(centroid);
+    // convert dist to integer to avoid floating differences
+    dist2points[std::round(dist/diameter*1e8)].insert(v);
+  }
+
+  std::priority_queue< std::pair<int, int>  ,
+                       std::vector< std::pair<int, int> >,
+                       std::greater <std::pair<int, int> > > q;
+  int k = 10; // number of points to keep
+  int num_unique_dist = 0;
+  for (const auto & [ dist, pts_set ] : dist2points)
+  {
+    // Only consider unique distances
+    if (pts_set.size() == 1)
+    {
+      num_unique_dist++;
+      int v = *pts_set.begin();
+      coord = mesh->GetVertex(v);
+      if (q.size() < k)
+      {
+        q.push(std::pair<int, int>(dist, v));
+      }
+      else if (q.top().first < dist)
+      {
+        q.pop();
+        q.push(std::pair<int, int>(dist, v));
+      }
+    }
+  }
+
+  Mpi::Print("num unique dist: {:d}, q.size(): {:d}\n", num_unique_dist, q.size());
+  unique_pts.push_back(centroid);
+  mfem::Vector normal(sdim);
+  normal = 0.0; //
+
+  while (q.size() > 0 and normal.Norml2() < norm_tol)
+  {
+    coord = mesh->GetVertex(q.top().second);
+    Mpi::Print("pts: {:d}, x/y/z: {:.3e}, {:.3e}, {:.3e}, dist: {:.12e}\n", q.top().second, coord[0], coord[1], coord[2], q.top().first);
+    Mpi::Print("dist2points.size(): {:d}\n",dist2points[std::round(q.top().first/diameter*1e8)].size());
+    q.pop();
+    unique_pts.push_back(coord);
+    if (unique_pts.size() == 3)
+    {
+      // v1 = P2 - P1, v2 = P3 - P1
+      mfem::Vector v1(sdim), v2(sdim);
+      v1 = unique_pts[1];
+      v1 -= unique_pts[0];
+      v2 = unique_pts[2];
+      v2 -= unique_pts[0];
+      v1.cross3D(v2, normal);
+      //Mpi::Print("q.size: {:d}, normal.linf: {:.3e}\n", q.size(), normal.Normlinf());
+      if (normal.Norml2() < norm_tol)
+      {
+        unique_pts.pop_back();
+      }
+    }
+  }
+}
+
+void ComputeAffineTransformation(const std::vector<mfem::Vector> &donor_pts,
+                                 const std::vector<mfem::Vector> &receiver_pts,
+                                 mfem::DenseMatrix &transformation)
+{
+  // SVD
+  // But this assumes known correspondence
+  /*
+  mfem::DenseMatrix Am(3, num_donor_pts);
+  mfem::DenseMatrix Bm(3, num_receiver_pts);
+  int idx = 0;
+  for (const int v : bdr_v_donor)
+  {
+    coord = periodic_mesh->GetVertex(v);
+    Am(0,idx) = coord[0] - donor_centroid[0];
+    Am(1,idx) = coord[1] - donor_centroid[1];
+    Am(2,idx) = coord[2] - donor_centroid[2];
+    idx++;
+  }
+  idx = 0;
+  for (const int v : bdr_v_receiver)
+  {
+    coord = periodic_mesh->GetVertex(v);
+    Bm(0,idx) = coord[0] - receiver_centroid[0];
+    Bm(1,idx) = coord[1] - receiver_centroid[1];
+    Bm(2,idx) = coord[2] - receiver_centroid[2];
+    idx++;
+  }
+  mfem::DenseMatrix H(3);
+  Bm.Transpose();
+  Mult(Am, Bm, H);
+  H.Print();
+  //mfem::DenseMatrixSVD svd(H,'A','A');
+  // Use eigen?
+  */
+
+
+    mfem::DenseMatrix A(12);
+    A = 0.0;
+    mfem::Vector rhs(12), affine_coeffs(12);
+    for (int i = 0; i < 4; i++)
+    {
+      A(3*i,0) = A(3*i+1,4) = A(3*i+2, 8)  = donor_pts[i][0];
+      A(3*i,1) = A(3*i+1,5) = A(3*i+2, 9)  = donor_pts[i][1];
+      A(3*i,2) = A(3*i+1,6) = A(3*i+2, 10) = donor_pts[i][2];
+      A(3*i,3) = A(3*i+1,7) = A(3*i+2, 11) = 1.0;
+      rhs[3*i+0] = receiver_pts[i][0];
+      rhs[3*i+1] = receiver_pts[i][1];
+      rhs[3*i+2] = receiver_pts[i][2];
+    }
+    Mpi::Print("Donor pts matrix:\n");
+    A.Print();
+    Mpi::Print("Receiver pts RHS:\n");
+    rhs.Print();
+    A.Invert(); // Invert in place
+    // coeffs = A^-1 rhs
+    A.Mult(rhs, affine_coeffs);
+    Mpi::Print("affine coeffs:\n");
+    affine_coeffs.Print();
+    // Build affine transformation matrix
+    transformation = 0.0;
+    for (int i = 0; i < 3; i++)
+    {
+      for (int j = 0; j < 4; j++)
+      {
+        transformation(i,j) = affine_coeffs[i*4+j];
+      }
+    }
+    transformation(3,3) = 1.0;
+    Mpi::Print("Affine transform matrix:\n");
+    transformation.Print();
+}
+
+void ComputeRotation(const mfem::Vector &normal1,
+                     const mfem::Vector &normal2,
+                     mfem::DenseMatrix &transformation)
+{
+  mfem::DenseMatrix R(3), vx(3), vx2(3);
+
+  mfem::Vector v(normal1.Size());
+  normal1.cross3D(normal2, v);
+  double s = v.Norml2();
+  double c = normal1*normal2;
+
+  vx(0,1) = -v[2];
+  vx(0,2) = v[1];
+  vx(1,0) = v[2];
+  vx(1,2) = -v[0];
+  vx(2,0) = -v[1];
+  vx(2,1) = v[0];
+
+  R(0,0) = R(1,1) = R(2,2) = 1.0;
+  R += vx;
+  Mult(vx, vx, vx2);
+  vx2.Set(1.0/(1.0+c), vx2);
+  R += vx2;
+  vx.Print();
+  Mpi::Print("R\n");
+  R.Print();
+
+  for(int i = 0; i < 3; i++)
+  {
+    for(int j = 0; j < 3; j++)
+    {
+      transformation(i,j) = R(i,j);
+    }
+  }
+}
+
+std::vector<int> CreatePeriodicVertexMapping(
+  std::unique_ptr<mfem::Mesh> &mesh,
+  const std::unordered_set<int> &donor_v,
+  const std::unordered_set<int> &receiver_v,
+  const mfem::DenseMatrix &transform,
+  double tol = 1e-6)
+{
+  const int sdim = mesh->SpaceDimension();
+
+  mfem::Vector coord(sdim), at(sdim), dx(sdim);
+
+  // Similar to MFEM's CreatePeriodicVertexMapping
+  // maps from replica to primary vertex
+  std::unordered_map<int, int> replica2primary;
+
+  // KD-tree containing all the receiver points
+  std::unique_ptr<mfem::KDTreeBase<int,double>> kdtree;
+  if (sdim == 1) { kdtree.reset(new mfem::KDTree1D); }
+  else if (sdim == 2) { kdtree.reset(new mfem::KDTree2D); }
+  else if (sdim == 3) { kdtree.reset(new mfem::KDTree3D); }
+  else { MFEM_ABORT("Invalid space dimension."); }
+
+  // Add all receiver points to KD-tree
+  for (const int v : receiver_v)
+  {
+    kdtree->AddPoint(mesh->GetVertex(v), v);
+  }
+  kdtree->Sort();
+
+  // Loop over donor points and find the corresponding receiver point
+  for (int vi : donor_v)
+  {
+    mfem::Vector donor_coord(4), receiver_coord(4);
+    donor_coord[3] = 1.0;
+    coord.MakeRef(donor_coord, 0);
+    at.MakeRef(receiver_coord, 0);
+
+    coord = mesh->GetVertex(vi);
+    //Mpi::Print("Mapping donor point: {:d} ({:.3e}, {:.3e}, {:.3e})", vi, donor_coord[0], donor_coord[1], donor_coord[2]);
+    // Apply transformation
+    // receiver = transform * donor
+    transform.Mult(donor_coord, receiver_coord);
+
+    const int vj = kdtree->FindClosestPoint(at.GetData());
+    coord = mesh->GetVertex(vj);
+    dx = at;
+    dx -= coord;
+    //Mpi::Print(" to receiver point: {:d} ({:.3e}, {:.3e}, {:.3e}), with transform error {:.3e}\n", vj, receiver_coord[0], receiver_coord[1], receiver_coord[2], dx.Norml2());
+
+    MFEM_VERIFY(dx.Norml2() < tol, "Could not match points on periodic boundaries.");
+
+    MFEM_VERIFY(replica2primary.find(vj) == replica2primary.end(), "Could not match points on periodic boundaries, multiple donor points map to the same receiver point.")
+
+    replica2primary[vj] = vi;
+
+  }
+
+  std::vector<int> v2v(mesh->GetNV());
+  for (int i = 0; i < v2v.size(); i++)
+  {
+    v2v[i] = i;
+  }
+  for (const auto &r2p : replica2primary)
+  {
+    v2v[r2p.first] = r2p.second;
+  }
+
+  return v2v;
+}
+
 std::unique_ptr<mfem::Mesh> LoadMesh(const std::string &mesh_file, bool remove_curvature,
                                      const config::BoundaryData &boundaries)
 {
@@ -1774,10 +2112,112 @@ std::unique_ptr<mfem::Mesh> LoadMesh(const std::string &mesh_file, bool remove_c
     auto periodic_mesh = std::move(mesh);
     for (const auto &data : boundaries.periodic)
     {
-      mfem::Vector translation(data.translation.size());
-      std::copy(data.translation.begin(), data.translation.end(), translation.GetData());
-      auto periodic_mapping =
-          periodic_mesh->CreatePeriodicVertexMapping({translation}, 1E-6);
+      // Compute the translation vector between donor and receiver boundaries.
+      const auto &da = data.donor_attributes, &ra = data.receiver_attributes;
+      const int sdim = periodic_mesh->SpaceDimension();
+      mfem::Vector coord(sdim), donor_centroid(sdim), receiver_centroid(sdim);
+      mfem::Vector translation2(sdim);
+
+      // test
+      mfem::Vector donor_normal(sdim), receiver_normal(sdim);
+      donor_normal = receiver_normal = 0.0;
+      std::unordered_set<int> bdr_v_donor, bdr_v_receiver;
+      std::unordered_set<int> bdr_e_donor, bdr_e_receiver;
+      for (int be = 0; be < periodic_mesh->GetNBE(); be++)
+      {
+        int attr = periodic_mesh->GetBdrAttribute(be);
+        auto donor = std::find(da.begin(), da.end(), attr) != da.end();
+        auto receiver = std::find(ra.begin(), ra.end(), attr) != ra.end();
+        if (donor || receiver)
+        {
+          if (donor) bdr_e_donor.insert(be);
+          if (receiver) bdr_e_receiver.insert(be);
+          mfem::Array<int> vertidxs;
+          //int f, o;
+          //periodic_mesh->GetBdrElementFace(be, &f, &o);
+          //periodic_mesh->GetFaceVertices(f, vertidxs);
+          //Mpi::Print("f: {:d}, o: {:d}\n", f, o);
+          periodic_mesh->GetBdrElementVertices(be, vertidxs);
+          for (int i = 0; i < vertidxs.Size(); i++)
+          {
+            coord = periodic_mesh->GetVertex(vertidxs[i]);
+            if (donor)
+            {
+              bdr_v_donor.insert(vertidxs[i]);
+            }
+            else if (receiver)
+            {
+              bdr_v_receiver.insert(vertidxs[i]);
+            }
+          }
+        }
+      }
+      double donor_dia, receiver_dia, diameter;
+      Mpi::Print("num donor/receiver pts {:d}, {:d}\n",bdr_v_donor.size(), bdr_v_receiver.size());
+      MFEM_VERIFY(bdr_v_donor.size() == bdr_v_receiver.size(), "Different number of vertices on donor and receiver boundaries. Cannot create periodic mesh.");
+      ComputeCentroid(periodic_mesh, bdr_v_donor, donor_centroid, donor_dia);
+      Mpi::Print("Donor centroid: {:.3e}, {:.3e}, {:.3e}\n", donor_centroid[0], donor_centroid[1], donor_centroid[2]);
+      ComputeCentroid(periodic_mesh, bdr_v_receiver, receiver_centroid, receiver_dia);
+      Mpi::Print("Receiver centroid: {:.3e}, {:.3e}, {:.3e}\n", receiver_centroid[0], receiver_centroid[1], receiver_centroid[2]);
+      translation2 = receiver_centroid;
+      translation2 -= donor_centroid;
+      Mpi::Print("computed translation: {:.9e}, {:.9e}, {:.9e}\n", translation2[0], translation2[1], translation2[2]);
+      Mpi::Print("config translation: {:.9e}, {:.9e}, {:.9e}\n", data.translation[0], data.translation[1], data.translation[2]);
+
+      diameter = std::max(donor_dia, receiver_dia);
+      const double norm_tol = 1e-6 * diameter;
+      // Compute normal so it points inside domain for donor and outside for receiver
+      ComputeNormal(periodic_mesh, *bdr_e_donor.begin(), donor_normal, true, norm_tol);
+      ComputeNormal(periodic_mesh, *bdr_e_receiver.begin(), receiver_normal, false, norm_tol);
+      Mpi::Print("Donor normal: {:.9e}, {:.9e}, {:.9e}\n", donor_normal[0], donor_normal[1], donor_normal[2]);
+      Mpi::Print("Receiver normal: {:.9e}, {:.9e}, {:.9e}\n", receiver_normal[0], receiver_normal[1], receiver_normal[2]);
+
+      std::vector<mfem::Vector> donor_pts, receiver_pts;
+      FindUniquePoints(periodic_mesh, bdr_v_donor, donor_centroid, diameter, donor_pts, norm_tol);
+      FindUniquePoints(periodic_mesh, bdr_v_receiver, receiver_centroid, diameter, receiver_pts, norm_tol);
+
+      // Add point offset from centroid in normal direction
+      donor_centroid += donor_normal;
+      receiver_centroid += receiver_normal;
+      donor_pts.push_back(donor_centroid);
+      receiver_pts.push_back(receiver_centroid);
+
+      Mpi::Print("Number of unique donor pts: {:d}\n", donor_pts.size());
+      Mpi::Print("Number of unique receiver pts: {:d}\n", receiver_pts.size());
+
+      MFEM_VERIFY(donor_pts.size() == receiver_pts.size(), "Different number of unique points on donor and receiver boundaries.");
+
+      mfem::DenseMatrix transformation(4);
+      if(donor_pts.size() == 4)
+      {
+        ComputeAffineTransformation(donor_pts, receiver_pts,
+                                    transformation);
+      }
+      else if (donor_pts.size() == 2)
+      {
+        // Use normals to compute a rotation matrix
+        ComputeRotation(donor_normal, receiver_normal,
+                        transformation);
+        // Use add centroids translation to transform matrix
+        transformation(0,3) = translation2[0];
+        transformation(1,3) = translation2[1];
+        transformation(2,3) = translation2[2];
+        transformation(3,3) = 1.0;
+        Mpi::Print("Affine transformation matrix\n");
+        transformation.Print();
+      }
+
+      auto periodic_mapping = CreatePeriodicVertexMapping(periodic_mesh,
+                                                 bdr_v_donor,
+                                                 bdr_v_receiver,
+                                                 transformation,
+                                                 norm_tol);
+
+
+      //mfem::Vector translation(data.translation.size());
+      //std::copy(data.translation.begin(), data.translation.end(), translation.GetData());
+      //auto periodic_mapping =
+      //    periodic_mesh->CreatePeriodicVertexMapping({translation2}, 1E-6);
       auto p_mesh = std::make_unique<mfem::Mesh>(
           mfem::Mesh::MakePeriodic(*periodic_mesh, periodic_mapping));
       periodic_mesh = std::move(p_mesh);

From 31a4a9b2b857f4f7e256ea3d9c6b063a724f2da6 Mon Sep 17 00:00:00 2001
From: Simon Lapointe <simlap@amazon.com>
Date: Tue, 29 Oct 2024 17:43:55 +0000
Subject: [PATCH 02/49] Add periodic boundary operator class for
 periodic/floquet BCs

---
 palace/models/CMakeLists.txt               |   1 +
 palace/models/periodicboundaryoperator.cpp | 196 +++++++++++++++++++
 palace/models/periodicboundaryoperator.hpp |  51 +++++
 palace/models/spaceoperator.cpp            | 211 +++++++++++++++++++--
 palace/models/spaceoperator.hpp            |  18 +-
 palace/utils/configfile.cpp                |  11 +-
 palace/utils/configfile.hpp                |   4 +
 palace/utils/geodata.cpp                   |   7 +-
 8 files changed, 478 insertions(+), 21 deletions(-)
 create mode 100644 palace/models/periodicboundaryoperator.cpp
 create mode 100644 palace/models/periodicboundaryoperator.hpp

diff --git a/palace/models/CMakeLists.txt b/palace/models/CMakeLists.txt
index 516afab65..c34824487 100644
--- a/palace/models/CMakeLists.txt
+++ b/palace/models/CMakeLists.txt
@@ -10,6 +10,7 @@ target_sources(${LIB_TARGET_NAME}
   ${CMAKE_CURRENT_SOURCE_DIR}/curlcurloperator.cpp
   ${CMAKE_CURRENT_SOURCE_DIR}/domainpostoperator.cpp
   ${CMAKE_CURRENT_SOURCE_DIR}/farfieldboundaryoperator.cpp
+  ${CMAKE_CURRENT_SOURCE_DIR}/periodicboundaryoperator.cpp
   ${CMAKE_CURRENT_SOURCE_DIR}/laplaceoperator.cpp
   ${CMAKE_CURRENT_SOURCE_DIR}/lumpedportoperator.cpp
   ${CMAKE_CURRENT_SOURCE_DIR}/materialoperator.cpp
diff --git a/palace/models/periodicboundaryoperator.cpp b/palace/models/periodicboundaryoperator.cpp
new file mode 100644
index 000000000..0fbf0d9db
--- /dev/null
+++ b/palace/models/periodicboundaryoperator.cpp
@@ -0,0 +1,196 @@
+// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+#include "periodicboundaryoperator.hpp"
+
+#include <set>
+#include "linalg/densematrix.hpp"
+#include "models/materialoperator.hpp"
+#include "utils/communication.hpp"
+#include "utils/geodata.hpp"
+#include "utils/iodata.hpp"
+#include "utils/prettyprint.hpp"
+
+namespace palace
+{
+
+PeriodicBoundaryOperator::PeriodicBoundaryOperator(const IoData &iodata,
+                                                   const MaterialOperator &mat_op,
+                                                   const mfem::ParMesh &mesh)
+  : mat_op(mat_op), periodic_attr(SetUpBoundaryProperties(iodata, mesh))
+{
+  // Print out BC info for all periodic attributes.
+  if (periodic_attr.Size())
+  {
+    Mpi::Print("\nConfiguring periodic BC at attributes:\n");
+    std::sort(periodic_attr.begin(), periodic_attr.end());
+    utils::PrettyPrint(periodic_attr);
+  }
+}
+
+mfem::Array<int>
+PeriodicBoundaryOperator::SetUpBoundaryProperties(const IoData &iodata,
+                                                  const mfem::ParMesh &mesh)
+{
+  // Check that periodic boundary attributes have been specified correctly.
+  int bdr_attr_max = mesh.bdr_attributes.Size() ? mesh.bdr_attributes.Max() : 0;
+  mfem::Array<int> bdr_attr_marker;
+  if (!iodata.boundaries.periodic.empty())
+  {
+    bdr_attr_marker.SetSize(bdr_attr_max);
+    bdr_attr_marker = 0;
+    for (auto attr : mesh.bdr_attributes)
+    {
+      bdr_attr_marker[attr - 1] = 1;
+    }
+    std::set<int> bdr_warn_list;
+    for (const auto &data : iodata.boundaries.periodic)
+    {
+      const auto &da = data.donor_attributes, &ra = data.receiver_attributes;
+      for (const auto attr : da)
+      {
+        if (attr <= 0 || attr > bdr_attr_max || !bdr_attr_marker[attr - 1])
+        {
+          bdr_warn_list.insert(attr);
+        }
+      }
+      for (const auto attr : ra)
+      {
+        if (attr <= 0 || attr > bdr_attr_max || !bdr_attr_marker[attr - 1])
+        {
+          bdr_warn_list.insert(attr);
+        }
+      }
+    }
+    if (!bdr_warn_list.empty())
+    {
+      Mpi::Print("\n");
+      Mpi::Warning(
+          "Unknown periodic boundary attributes!\nSolver will just ignore them!");
+      utils::PrettyPrint(bdr_warn_list, "Boundary attribute list:");
+      Mpi::Print("\n");
+    }
+  }
+
+  // Mark selected boundary attributes from the mesh as periodic.
+  // ???? IS THIS USEFUL???
+  mfem::Array<int> periodic_bcs;
+  for (const auto &data : iodata.boundaries.periodic)
+  {
+    const auto &da = data.donor_attributes, &ra = data.receiver_attributes;
+    for (const auto attr : da)
+    {
+      if (attr <= 0 || attr > bdr_attr_max || !bdr_attr_marker[attr - 1])
+      {
+        continue;  // Can just ignore if wrong
+      }
+      periodic_bcs.Append(attr);
+    }
+    for (const auto attr : ra)
+    {
+      if (attr <= 0 || attr > bdr_attr_max || !bdr_attr_marker[attr - 1])
+      {
+        continue;  // Can just ignore if wrong
+      }
+      periodic_bcs.Append(attr);
+    }
+
+    // Wave vector ???? SHOULD BE ONLY ONE WAVE VECTOR FOR THE ENTIRE SIM
+    // NOT ONE PER PERIODIC BC PAIR??? MOVE THIS OUTSIDE THE LOOP?
+    MFEM_VERIFY(data.wave_vector.size() == mesh.SpaceDimension(),
+    "Block wave vector size must equal the spatial dimension.");
+    wave_vector.SetSize(data.wave_vector.size());
+    std::copy(data.wave_vector.begin(), data.wave_vector.end(), wave_vector.GetData());
+    MFEM_VERIFY(periodic_bcs.Size() == 0 ||
+                wave_vector.Normlinf() < std::numeric_limits<double>::epsilon() ||
+                iodata.problem.type == config::ProblemData::Type::DRIVEN ||
+                iodata.problem.type == config::ProblemData::Type::EIGENMODE,
+                "Quasi-periodic Floquet boundary conditions are only available for "
+                " frequency domain driven or eigenmode simulations!");
+
+
+    // Matrix representation of cross product with wave vector
+    // [k x] = | 0  -k3  k2|
+    //         | k3  0  -k1|
+    //         |-k2  k1  0 |
+    wave_vector_cross.SetSize(3); // assumes 3D?
+    wave_vector_cross(0,1) = -wave_vector[2];
+    wave_vector_cross(0,2) = wave_vector[1];
+    wave_vector_cross(1,0) = wave_vector[2];
+    wave_vector_cross(1,2) = -wave_vector[0];
+    wave_vector_cross(2,0) = -wave_vector[1];
+    wave_vector_cross(2,1) = wave_vector[0];
+  }
+
+  return periodic_bcs;
+}
+
+void PeriodicBoundaryOperator::AddRealMassCoefficients(double coeff,
+                                                       MaterialPropertyCoefficient &f)
+{
+
+  if (periodic_attr.Size())
+  {
+    // [k x]^T 1/mu [k x]
+    mfem::DenseTensor kx(mat_op.GetInvPermeability().SizeI(),
+                         mat_op.GetInvPermeability().SizeJ(),
+                         mat_op.GetInvPermeability().SizeK());
+    mfem::DenseTensor kxT(kx.SizeI(), kx.SizeJ(), kx.SizeK());
+    for (int k = 0; k < kx.SizeK(); k++)
+    {
+      kx(k)  = wave_vector_cross;
+      kxT(k).Transpose(wave_vector_cross);
+    }
+    mfem::DenseTensor kxTmuinvkx = linalg::Mult(mat_op.GetInvPermeability(), kx);
+    kxTmuinvkx = linalg::Mult(kxT, kxTmuinvkx);
+    MaterialPropertyCoefficient kxTmuinvkx_func(mat_op.GetAttributeToMaterial(), kxTmuinvkx);
+    //muinvkx_func.RestrictCoefficient
+    f.AddCoefficient(kxTmuinvkx_func.GetAttributeToMaterial(),
+                     kxTmuinvkx_func.GetMaterialProperties(), coeff);
+  }
+}
+
+void PeriodicBoundaryOperator::AddWeakCurlCoefficients(double coeff,
+                                                       MaterialPropertyCoefficient &f)
+{
+
+  if (periodic_attr.Size())
+  {
+    // 1/mu [k x]
+    mfem::DenseTensor kx(mat_op.GetInvPermeability().SizeI(),
+                         mat_op.GetInvPermeability().SizeJ(),
+                         mat_op.GetInvPermeability().SizeK());
+    for (int k = 0; k < kx.SizeK(); k++)
+    {
+      kx(k)  = wave_vector_cross;
+    }
+    mfem::DenseTensor muinvkx = linalg::Mult(mat_op.GetInvPermeability(), kx);
+    MaterialPropertyCoefficient muinvkx_func(mat_op.GetAttributeToMaterial(), muinvkx);
+    //muinvkx_func.RestrictCoefficient
+    f.AddCoefficient(muinvkx_func.GetAttributeToMaterial(),
+                     muinvkx_func.GetMaterialProperties(), coeff);
+  }
+}
+
+void PeriodicBoundaryOperator::AddCurlCoefficients(double coeff,
+                                                   MaterialPropertyCoefficient &f)
+{
+
+  if (periodic_attr.Size())
+  {
+    // [k x]^T 1/mu
+    mfem::DenseTensor kxT(mat_op.GetInvPermeability().SizeI(),
+                          mat_op.GetInvPermeability().SizeJ(),
+                          mat_op.GetInvPermeability().SizeK());
+    for (int k = 0; k < kxT.SizeK(); k++)
+    {
+      kxT(k).Transpose(wave_vector_cross);
+    }
+    mfem::DenseTensor kxTmuinv = linalg::Mult(kxT, mat_op.GetInvPermeability());
+    MaterialPropertyCoefficient kxTmuinv_func(mat_op.GetAttributeToMaterial(), kxTmuinv);
+    //muinvkx_func.RestrictCoefficient
+    f.AddCoefficient(kxTmuinv_func.GetAttributeToMaterial(),
+                     kxTmuinv_func.GetMaterialProperties(), coeff);
+  }
+}
+}  // namespace palace
diff --git a/palace/models/periodicboundaryoperator.hpp b/palace/models/periodicboundaryoperator.hpp
new file mode 100644
index 000000000..2da6fa332
--- /dev/null
+++ b/palace/models/periodicboundaryoperator.hpp
@@ -0,0 +1,51 @@
+// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+#ifndef PALACE_MODELS_PERIODIC_BOUNDARY_OPERATOR_HPP
+#define PALACE_MODELS_PERIODIC_BOUNDARY_OPERATOR_HPP
+
+#include <mfem.hpp>
+
+namespace palace
+{
+
+class IoData;
+class MaterialOperator;
+class MaterialPropertyCoefficient;
+
+//
+// A class handling periodic boundaries.
+//
+class PeriodicBoundaryOperator
+{
+private:
+  // Reference to material property data (not owned).
+  const MaterialOperator &mat_op;
+
+  // List of all periodic boundary condition attributes.
+  mfem::Array<int> periodic_attr;
+
+  // Bloch wave vector for Floquet boundary conditions.
+  mfem::Vector wave_vector;
+
+  // Matrix representation of cross product with the wave_vector;
+  mfem::DenseMatrix wave_vector_cross;
+
+  mfem::Array<int> SetUpBoundaryProperties(const IoData &iodata, const mfem::ParMesh &mesh);
+
+public:
+  PeriodicBoundaryOperator(const IoData &iodata, const MaterialOperator &mat_op,
+                           const mfem::ParMesh &mesh);
+
+  // Returns array of periodic BC attributes.
+  const auto &GetAttrList() const { return periodic_attr; }
+
+  // Add contributions to system matrices
+  void AddRealMassCoefficients(double coeff, MaterialPropertyCoefficient &f);
+  void AddWeakCurlCoefficients(double coeff, MaterialPropertyCoefficient &f);
+  void AddCurlCoefficients(double coeff, MaterialPropertyCoefficient &f);
+};
+
+}  // namespace palace
+
+#endif  // PALACE_MODELS_PERIODIC_BOUNDARY_OPERATOR_HPP
diff --git a/palace/models/spaceoperator.cpp b/palace/models/spaceoperator.cpp
index b15c5c70d..8b8acafac 100644
--- a/palace/models/spaceoperator.cpp
+++ b/palace/models/spaceoperator.cpp
@@ -45,6 +45,7 @@ SpaceOperator::SpaceOperator(const IoData &iodata,
         iodata.solver.linear.estimator_mg ? iodata.solver.linear.mg_max_levels : 1, mesh,
         rt_fecs)),
     mat_op(iodata, *mesh.back()), farfield_op(iodata, mat_op, *mesh.back()),
+    periodic_op(iodata, mat_op, *mesh.back()),
     surf_sigma_op(iodata, mat_op, *mesh.back()), surf_z_op(iodata, mat_op, *mesh.back()),
     lumped_port_op(iodata, mat_op, *mesh.back()),
     wave_port_op(iodata, mat_op, GetNDSpace(), GetH1Space()),
@@ -119,6 +120,7 @@ void SpaceOperator::CheckBoundaryProperties()
   int bdr_attr_max = mesh.bdr_attributes.Size() ? mesh.bdr_attributes.Max() : 0;
   const auto dbc_marker = mesh::AttrToMarker(bdr_attr_max, dbc_attr);
   const auto farfield_marker = mesh::AttrToMarker(bdr_attr_max, farfield_op.GetAttrList());
+  const auto periodic_marker = mesh::AttrToMarker(bdr_attr_max, periodic_op.GetAttrList());
   const auto surf_sigma_marker =
       mesh::AttrToMarker(bdr_attr_max, surf_sigma_op.GetAttrList());
   const auto surf_z_Rs_marker = mesh::AttrToMarker(bdr_attr_max, surf_z_op.GetRsAttrList());
@@ -133,7 +135,7 @@ void SpaceOperator::CheckBoundaryProperties()
   for (int i = 0; i < dbc_marker.Size(); i++)
   {
     aux_bdr_marker[i] =
-        (dbc_marker[i] || farfield_marker[i] || surf_sigma_marker[i] ||
+        (dbc_marker[i] || farfield_marker[i] || periodic_marker[i] || surf_sigma_marker[i] ||
          surf_z_Rs_marker[i] || surf_z_Ls_marker[i] || lumped_port_Rs_marker[i] ||
          lumped_port_Ls_marker[i] || wave_port_marker[i]);
     if (aux_bdr_marker[i])
@@ -157,7 +159,7 @@ void SpaceOperator::CheckBoundaryProperties()
   const auto surf_j_marker = mesh::AttrToMarker(bdr_attr_max, surf_j_op.GetAttrList());
   for (int i = 0; i < dbc_marker.Size(); i++)
   {
-    MFEM_VERIFY(dbc_marker[i] + farfield_marker[i] + surf_sigma_marker[i] +
+    MFEM_VERIFY(dbc_marker[i] + farfield_marker[i] + periodic_marker[i] + surf_sigma_marker[i] +
                         surf_z_marker[i] + lumped_port_marker[i] + wave_port_marker[i] +
                         surf_j_marker[i] <=
                     1,
@@ -260,6 +262,23 @@ void AddAuxIntegrators(BilinearForm &a, const MaterialPropertyCoefficient *f,
   }
 }
 
+void AddMixedIntegrators(BilinearForm &a, const MaterialPropertyCoefficient *f,
+                         const MaterialPropertyCoefficient *fw, bool assemble_q_data = false)
+{
+  if (f && !f->empty())
+  {
+    a.AddDomainIntegrator<MixedVectorCurlIntegrator>(*f);
+  }
+  if (fw && !fw->empty())
+  {
+    a.AddDomainIntegrator<MixedVectorWeakCurlIntegrator>(*fw);
+  }
+  if (assemble_q_data)
+  {
+    a.AssembleQuadratureData();
+  }
+}
+
 auto AssembleOperator(const FiniteElementSpace &fespace,
                       const MaterialPropertyCoefficient *df,
                       const MaterialPropertyCoefficient *f,
@@ -294,6 +313,18 @@ auto AssembleAuxOperators(const FiniteElementSpaceHierarchy &fespaces,
   return a.Assemble(fespaces, skip_zeros, l0);
 }
 
+/*
+// Add fp, fpw logic to AssembleOperators and AddIntegrators instead
+auto AssembleMixedOperators(const FiniteElementSpaceHierarchy &fespaces,
+                            const MaterialPropertyCoefficient *fp,
+                            const MaterialPropertyCoefficient *fpw, bool skip_zeros = false,
+                            bool assemble_q_data = false, std::size_t l0 = 0)
+{
+  BilinearForm a(fespaces.GetFinestFESpace());
+  AddMixedIntegrators(a, fp, fpw, assemble_q_data);
+  return a.Assemble(fespaces, skip_zeros, l0); //can't use assemble when trial and test spaces differ
+}
+*/
 }  // namespace
 
 template <typename OperType>
@@ -442,12 +473,82 @@ SpaceOperator::GetExtraSystemMatrix(double omega, Operator::DiagonalPolicy diag_
   }
 }
 
+// Move some of this inside AssembleOperator(s)? AssembleMixedOperator(s)?
+template <typename OperType>
+std::unique_ptr<OperType>
+SpaceOperator::GetPeriodicWeakCurlMatrix()
+{
+  PrintHeader(GetH1Space(), GetNDSpace(), GetRTSpace(), print_hdr);
+  MaterialPropertyCoefficient f(mat_op.MaxCeedAttribute());
+  periodic_op.AddWeakCurlCoefficients(1.0, f);
+  int empty = (f.empty());
+  Mpi::GlobalMin(1, &empty, GetComm());
+  if (empty)
+  {
+    return {};
+  }
+  constexpr bool skip_zeros = false, assemble_q_data = false;
+  //BilinearForm a(GetNDSpace(), GetNDSpace()); //? which spaces and what order
+  BilinearForm a(GetNDSpace());//test
+  a.AddDomainIntegrator<MixedVectorWeakCurlIntegrator>(f);
+  if (assemble_q_data)
+  {
+    a.AssembleQuadratureData();
+  }
+  auto weakCurl = a.Assemble(skip_zeros);
+  if constexpr (std::is_same<OperType, ComplexOperator>::value)
+  {
+    auto WeakCurl = std::make_unique<ComplexParOperator>(std::move(weakCurl),nullptr, GetNDSpace(), GetNDSpace(),false);
+    return WeakCurl;
+  }
+  else
+  {
+    auto WeakCurl = std::make_unique<ParOperator>(std::move(weakCurl),GetNDSpace(), GetNDSpace(), false);
+    return WeakCurl;
+  }
+}
+
+template <typename OperType>
+std::unique_ptr<OperType>
+SpaceOperator::GetPeriodicCurlMatrix()
+{
+  PrintHeader(GetH1Space(), GetNDSpace(), GetRTSpace(), print_hdr);
+  MaterialPropertyCoefficient f(mat_op.MaxCeedAttribute());
+  periodic_op.AddCurlCoefficients(1.0, f);
+  int empty = (f.empty());
+  Mpi::GlobalMin(1, &empty, GetComm());
+  if (empty)
+  {
+    return {};
+  }
+  constexpr bool skip_zeros = false, assemble_q_data = false;
+  //BilinearForm a(GetNDSpace(), GetNDSpace()); //? which spaces and what order?
+  BilinearForm a(GetNDSpace());//test
+  a.AddDomainIntegrator<MixedVectorCurlIntegrator>(f);
+  if (assemble_q_data)
+  {
+    a.AssembleQuadratureData();
+  }
+  auto curl = a.Assemble(skip_zeros);
+  if constexpr (std::is_same<OperType, ComplexOperator>::value)
+  {
+    auto Curl = std::make_unique<ComplexParOperator>(std::move(curl),nullptr, GetNDSpace(), GetNDSpace(),false);
+    return Curl;
+  }
+  else
+  {
+    auto Curl = std::make_unique<ParOperator>(std::move(curl),GetNDSpace(), GetNDSpace(), false);
+    return Curl;
+  }
+}
+
 namespace
 {
 
 auto BuildParSumOperator(int h, int w, double a0, double a1, double a2,
                          const ParOperator *K, const ParOperator *C, const ParOperator *M,
-                         const ParOperator *A2, const FiniteElementSpace &fespace)
+                         const ParOperator *A2, double a4, double a5, const ParOperator *P1,
+                         const ParOperator *P2, const FiniteElementSpace &fespace)
 {
   auto sum = std::make_unique<SumOperator>(h, w);
   if (K && a0 != 0.0)
@@ -466,13 +567,23 @@ auto BuildParSumOperator(int h, int w, double a0, double a1, double a2,
   {
     sum->AddOperator(A2->LocalOperator(), 1.0);
   }
+  if (P1)
+  {
+    sum->AddOperator(P1->LocalOperator(), a4);
+  }
+  if (P2)
+  {
+    sum->AddOperator(P2->LocalOperator(), a5);
+  }
   return std::make_unique<ParOperator>(std::move(sum), fespace);
 }
 
 auto BuildParSumOperator(int h, int w, std::complex<double> a0, std::complex<double> a1,
                          std::complex<double> a2, const ComplexParOperator *K,
                          const ComplexParOperator *C, const ComplexParOperator *M,
-                         const ComplexParOperator *A2, const FiniteElementSpace &fespace)
+                         const ComplexParOperator *A2, std::complex<double> a4,
+                         std::complex<double> a5, const ComplexParOperator *P1,
+                         const ComplexParOperator *P2, const FiniteElementSpace &fespace)
 {
   // Block 2 x 2 equivalent-real formulation for each term in the sum:
   //                    [ sumr ]  +=  [ ar  -ai ] [ Ar ]
@@ -565,6 +676,56 @@ auto BuildParSumOperator(int h, int w, std::complex<double> a0, std::complex<dou
       sumi->AddOperator(*A2->LocalOperator().Imag(), 1.0);
     }
   }
+  if (P1 && a4 != 0.0)
+  {
+    if (a4.real() != 0.0)
+    {
+      if (P1->LocalOperator().Real())
+      {
+        sumr->AddOperator(*P1->LocalOperator().Real(), a4.real());
+      }
+      if (P1->LocalOperator().Imag())
+      {
+        sumi->AddOperator(*P1->LocalOperator().Imag(), a4.real());
+      }
+    }
+    if (a4.imag() != 0.0)
+    {
+      if (P1->LocalOperator().Imag())
+      {
+        sumr->AddOperator(*P1->LocalOperator().Imag(), -a4.imag());
+      }
+      if (P1->LocalOperator().Real())
+      {
+        sumi->AddOperator(*P1->LocalOperator().Real(), a4.imag());
+      }
+    }
+  }
+  if (P2 && a5 != 0.0)
+  {
+    if (a5.real() != 0.0)
+    {
+      if (P2->LocalOperator().Real())
+      {
+        sumr->AddOperator(*P2->LocalOperator().Real(), a5.real());
+      }
+      if (P2->LocalOperator().Imag())
+      {
+        sumi->AddOperator(*P2->LocalOperator().Imag(), a5.real());
+      }
+    }
+    if (a5.imag() != 0.0)
+    {
+      if (P2->LocalOperator().Imag())
+      {
+        sumr->AddOperator(*P2->LocalOperator().Imag(), -a5.imag());
+      }
+      if (P2->LocalOperator().Real())
+      {
+        sumi->AddOperator(*P2->LocalOperator().Real(), a5.imag());
+      }
+    }
+  }
   return std::make_unique<ComplexParOperator>(std::move(sumr), std::move(sumi), fespace);
 }
 
@@ -574,7 +735,8 @@ template <typename OperType, typename ScalarType>
 std::unique_ptr<OperType>
 SpaceOperator::GetSystemMatrix(ScalarType a0, ScalarType a1, ScalarType a2,
                                const OperType *K, const OperType *C, const OperType *M,
-                               const OperType *A2)
+                               const OperType *A2, ScalarType a4, ScalarType a5,
+                               const OperType *P1, const OperType *P2)
 {
   using ParOperType =
       typename std::conditional<std::is_same<OperType, ComplexOperator>::value,
@@ -584,7 +746,10 @@ SpaceOperator::GetSystemMatrix(ScalarType a0, ScalarType a1, ScalarType a2,
   const auto *PtAP_C = (C) ? dynamic_cast<const ParOperType *>(C) : nullptr;
   const auto *PtAP_M = (M) ? dynamic_cast<const ParOperType *>(M) : nullptr;
   const auto *PtAP_A2 = (A2) ? dynamic_cast<const ParOperType *>(A2) : nullptr;
-  MFEM_VERIFY((!K || PtAP_K) && (!C || PtAP_C) && (!M || PtAP_M) && (!A2 || PtAP_A2),
+  const auto *PtAP_P1 = (P1) ? dynamic_cast<const ParOperType *>(P1) : nullptr;
+  const auto *PtAP_P2 = (P2) ? dynamic_cast<const ParOperType *>(P2) : nullptr;
+  MFEM_VERIFY((!K || PtAP_K) && (!C || PtAP_C) && (!M || PtAP_M) && (!A2 || PtAP_A2)
+               && (!P1 || PtAP_P1) && (!P2 || PtAP_P2),
               "SpaceOperator requires ParOperator or ComplexParOperator for system matrix "
               "construction!");
 
@@ -609,10 +774,21 @@ SpaceOperator::GetSystemMatrix(ScalarType a0, ScalarType a1, ScalarType a2,
     height = PtAP_A2->LocalOperator().Height();
     width = PtAP_A2->LocalOperator().Width();
   }
+  else if (PtAP_P1)
+  {
+    height = PtAP_P1->LocalOperator().Height();
+    width = PtAP_P1->LocalOperator().Width();
+  }
+  else if (PtAP_P2)
+  {
+    height = PtAP_P2->LocalOperator().Height();
+    width = PtAP_P2->LocalOperator().Width();
+  }
   MFEM_VERIFY(height >= 0 && width >= 0,
               "At least one argument to GetSystemMatrix must not be empty!");
 
   auto A = BuildParSumOperator(height, width, a0, a1, a2, PtAP_K, PtAP_C, PtAP_M, PtAP_A2,
+                               a4, a5, PtAP_P1, PtAP_P2,
                                GetNDSpace());
   A->SetEssentialTrueDofs(nd_dbc_tdof_lists.back(), Operator::DiagonalPolicy::DIAG_ONE);
   return A;
@@ -689,7 +865,8 @@ auto BuildLevelParOperator<ComplexOperator>(std::unique_ptr<Operator> &&br,
 
 template <typename OperType>
 std::unique_ptr<OperType> SpaceOperator::GetPreconditionerMatrix(double a0, double a1,
-                                                                 double a2, double a3)
+                                                                 double a2, double a3,
+                                                                 double a4, double a5)
 {
   // XX TODO: Handle complex coeff a0/a1/a2/a3 (like GetSystemMatrix)
 
@@ -723,6 +900,8 @@ std::unique_ptr<OperType> SpaceOperator::GetPreconditionerMatrix(double a0, doub
     AddRealMassBdrCoefficients(pc_mat_shifted ? std::abs(a2) : a2, fbr);
     AddImagMassCoefficients(a2, fi);
     AddExtraSystemBdrCoefficients(a3, dfbr, dfbi, fbr, fbi);
+    //periodic_op.AddWeakCurlCoefficients(a4, fwi);
+    //periodic_op.AddCurlCoefficients(a5, fi);
     int empty[2] = {(dfr.empty() && fr.empty() && dfbr.empty() && fbr.empty()),
                     (dfi.empty() && fi.empty() && dfbi.empty() && fbi.empty())};
     Mpi::GlobalMin(2, empty, GetComm());
@@ -854,6 +1033,7 @@ void SpaceOperator::AddDampingBdrCoefficients(double coeff, MaterialPropertyCoef
 void SpaceOperator::AddRealMassCoefficients(double coeff, MaterialPropertyCoefficient &f)
 {
   f.AddCoefficient(mat_op.GetAttributeToMaterial(), mat_op.GetPermittivityReal(), coeff);
+  periodic_op.AddRealMassCoefficients(coeff, f);
 }
 
 void SpaceOperator::AddRealMassBdrCoefficients(double coeff,
@@ -876,6 +1056,7 @@ void SpaceOperator::AddImagMassCoefficients(double coeff, MaterialPropertyCoeffi
 void SpaceOperator::AddAbsMassCoefficients(double coeff, MaterialPropertyCoefficient &f)
 {
   f.AddCoefficient(mat_op.GetAttributeToMaterial(), mat_op.GetPermittivityAbs(), coeff);
+  periodic_op.AddRealMassCoefficients(coeff, f);
 }
 
 void SpaceOperator::AddExtraSystemBdrCoefficients(double omega,
@@ -1037,16 +1218,24 @@ SpaceOperator::GetExtraSystemMatrix(double, Operator::DiagonalPolicy);
 template std::unique_ptr<Operator>
 SpaceOperator::GetSystemMatrix<Operator, double>(double, double, double, const Operator *,
                                                  const Operator *, const Operator *,
-                                                 const Operator *);
+                                                 const Operator *, double, double,
+                                                 const Operator *, const Operator *);
 template std::unique_ptr<ComplexOperator>
 SpaceOperator::GetSystemMatrix<ComplexOperator, std::complex<double>>(
     std::complex<double>, std::complex<double>, std::complex<double>,
     const ComplexOperator *, const ComplexOperator *, const ComplexOperator *,
-    const ComplexOperator *);
+    const ComplexOperator *, std::complex<double>, std::complex<double>,
+    const ComplexOperator *, const ComplexOperator *);
 
 template std::unique_ptr<Operator>
-SpaceOperator::GetPreconditionerMatrix<Operator>(double, double, double, double);
+SpaceOperator::GetPreconditionerMatrix<Operator>(double, double, double, double, double, double);
 template std::unique_ptr<ComplexOperator>
-SpaceOperator::GetPreconditionerMatrix<ComplexOperator>(double, double, double, double);
+SpaceOperator::GetPreconditionerMatrix<ComplexOperator>(double, double, double, double, double, double);
+
+template std::unique_ptr<Operator>
+SpaceOperator::GetPeriodicWeakCurlMatrix();
+
+template std::unique_ptr<Operator>
+SpaceOperator::GetPeriodicCurlMatrix();
 
 }  // namespace palace
diff --git a/palace/models/spaceoperator.hpp b/palace/models/spaceoperator.hpp
index 0c9873ec6..4f59884cf 100644
--- a/palace/models/spaceoperator.hpp
+++ b/palace/models/spaceoperator.hpp
@@ -12,6 +12,7 @@
 #include "linalg/operator.hpp"
 #include "linalg/vector.hpp"
 #include "models/farfieldboundaryoperator.hpp"
+#include "models/periodicboundaryoperator.hpp"
 #include "models/lumpedportoperator.hpp"
 #include "models/materialoperator.hpp"
 #include "models/surfaceconductivityoperator.hpp"
@@ -54,6 +55,7 @@ class SpaceOperator
 
   // Operators for boundary conditions and source excitations.
   FarfieldBoundaryOperator farfield_op;
+  PeriodicBoundaryOperator periodic_op;
   SurfaceConductivityOperator surf_sigma_op;
   SurfaceImpedanceOperator surf_z_op;
   LumpedPortOperator lumped_port_op;
@@ -136,7 +138,7 @@ class SpaceOperator
   auto GlobalTrueVSize() const { return GetNDSpace().GlobalTrueVSize(); }
 
   // Construct any part of the frequency-dependent complex linear system matrix:
-  //                     A = K + iω C - ω² (Mr + i Mi) + A2(ω) .
+  //                     A = K + iω C - ω² (Mr + i Mi) + A2(ω) + i P1 - i P2.
   // For time domain problems, any one of K, C, or M = Mr can be constructed. The argument
   // ω is required only for the constructing the "extra" matrix A2(ω).
   template <typename OperType>
@@ -148,16 +150,22 @@ class SpaceOperator
   template <typename OperType>
   std::unique_ptr<OperType> GetExtraSystemMatrix(double omega,
                                                  Operator::DiagonalPolicy diag_policy);
+  template <typename OperType>
+  std::unique_ptr<OperType> GetPeriodicWeakCurlMatrix();
+  template <typename OperType>
+  std::unique_ptr<OperType> GetPeriodicCurlMatrix();
 
   // Construct the complete frequency or time domain system matrix using the provided
   // stiffness, damping, mass, and extra matrices:
-  //                     A = a0 K + a1 C + a2 (Mr + i Mi) + A2 .
+  //                     A = a0 K + a1 C + a2 (Mr + i Mi) + A2 + a4 P1 + a5 P2.
   // It is assumed that the inputs have been constructed using previous calls to
   // GetSystemMatrix() and the returned operator does not inherit ownership of any of them.
   template <typename OperType, typename ScalarType>
   std::unique_ptr<OperType>
   GetSystemMatrix(ScalarType a0, ScalarType a1, ScalarType a2, const OperType *K,
-                  const OperType *C, const OperType *M, const OperType *A2 = nullptr);
+                  const OperType *C, const OperType *M, const OperType *A2 = nullptr,
+                  ScalarType a4 = 0, ScalarType a5 = 0,
+                  const OperType *P1 = nullptr, const OperType *P2 = nullptr);
 
   // Construct the real, SPD matrix for weighted L2 or H(curl) inner products:
   //                           B = a0 Kr + a2 Mr .
@@ -172,10 +180,10 @@ class SpaceOperator
   // Construct the matrix for frequency or time domain linear system preconditioning. If it
   // is real-valued (Mr > 0, Mi < 0, |Mr + Mi| is done on the material property coefficient,
   // not the matrix entries themselves):
-  //             B = a0 K + a1 C -/+ a2 |Mr + Mi| + A2r(a3) + A2i(a3) .
+  //             B = a0 K + a1 C -/+ a2 |Mr + Mi| + A2r(a3) + A2i(a3) + a4 P1 + a5 P2.
   template <typename OperType>
   std::unique_ptr<OperType> GetPreconditionerMatrix(double a0, double a1, double a2,
-                                                    double a3);
+                                                    double a3, double a4=0, double a5=0);
 
   // Construct and return the discrete curl or gradient matrices.
   const Operator &GetGradMatrix() const
diff --git a/palace/utils/configfile.cpp b/palace/utils/configfile.cpp
index b92a8d19c..179e7b881 100644
--- a/palace/utils/configfile.cpp
+++ b/palace/utils/configfile.cpp
@@ -1052,19 +1052,20 @@ void PeriodicBoundaryData::SetUp(json &boundaries)
     MFEM_VERIFY(it->find("ReceiverAttributes") != it->end(),
                 "Missing \"ReceiverAttributes\" list for \"Periodic\" boundary in the "
                 "configuration file!");
-    MFEM_VERIFY(it->find("Translation") != it->end(),
-                "Missing \"Translation\" vector for \"Periodic\" boundary in the "
-                "configuration file!");
     PeriodicData &data = vecdata.emplace_back();
     data.donor_attributes = it->at("DonorAttributes").get<std::vector<int>>();  // Required
     data.receiver_attributes =
         it->at("ReceiverAttributes").get<std::vector<int>>();               // Required
-    data.translation = it->at("Translation").get<std::array<double, 3>>();  // Required
+    data.translation = it->at("Translation").get<std::array<double, 3>>();
+    data.affine_transform = it->at("AffineTransformation").get<std::array<double, 16>>();
+    data.wave_vector = it->at("WaveVector").get<std::array<double, 3>>();
 
     // Cleanup
     it->erase("DonorAttributes");
     it->erase("ReceiverAttributes");
     it->erase("Translation");
+    it->erase("AffineTransformation");
+    it->erase("WaveVector");
     MFEM_VERIFY(it->empty(),
                 "Found an unsupported configuration file keyword under \"Periodic\"!\n"
                     << it->dump(2));
@@ -1075,6 +1076,8 @@ void PeriodicBoundaryData::SetUp(json &boundaries)
       std::cout << "DonorAttributes: " << data.donor_attributes << '\n';
       std::cout << "ReceiverAttributes: " << data.receiver_attributes << '\n';
       std::cout << "Translation: " << data.translation << '\n';
+      std::cout << "AffineTransformation: " << data.affine_transform << '\n';
+      std::cout << "WaveVector: " << data.wave_vector << '\n';
     }
   }
 }
diff --git a/palace/utils/configfile.hpp b/palace/utils/configfile.hpp
index a9bafe6be..27f23dc71 100644
--- a/palace/utils/configfile.hpp
+++ b/palace/utils/configfile.hpp
@@ -472,10 +472,14 @@ struct PeriodicData
 public:
   // Vector defining the direction and distance for this periodic boundary condition.
   std::array<double, 3> translation = {0.0, 0.0, 0.0};
+  // Vector defining the affine transformation matrix for this periodic boundary condition.
+  std::array<double, 16> affine_transform = {0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0};
   // List of boundary donor attributes for this periodic boundary condition.
   std::vector<int> donor_attributes = {};
   // List of boundary receiver attributes for this periodic boundary condition.
   std::vector<int> receiver_attributes = {};
+  // Bloch wavevector specifying the phase delay in the X/Y/Z directions.
+  std::array<double, 3>  wave_vector = {0.0, 0.0, 0.0};
 };
 
 struct PeriodicBoundaryData : public internal::DataVector<PeriodicData>
diff --git a/palace/utils/geodata.cpp b/palace/utils/geodata.cpp
index f81dd9303..ecdd191ee 100644
--- a/palace/utils/geodata.cpp
+++ b/palace/utils/geodata.cpp
@@ -2213,7 +2213,12 @@ std::unique_ptr<mfem::Mesh> LoadMesh(const std::string &mesh_file, bool remove_c
                                                  transformation,
                                                  norm_tol);
 
-
+      // Should move this up. If translation or affine transform is provided in config
+      // file, we use those.
+      // If only translation is provided -> use it
+      // If only affine transfomr is provided -> use it
+      // If both affine transform and translation are provided -> error or warning?
+      // If neither -> automatic detection
       //mfem::Vector translation(data.translation.size());
       //std::copy(data.translation.begin(), data.translation.end(), translation.GetData());
       //auto periodic_mapping =

From 6420322ca61369519d5e77340c3df51bc3e3922f Mon Sep 17 00:00:00 2001
From: Simon Lapointe <simlap@amazon.com>
Date: Thu, 31 Oct 2024 00:15:52 +0000
Subject: [PATCH 03/49] Improve Bloch wave vector specification and fix some
 bugs

---
 palace/models/periodicboundaryoperator.cpp |  58 +++++----
 palace/models/periodicboundaryoperator.hpp |   5 +-
 palace/models/spaceoperator.cpp            | 138 ++++++++++-----------
 palace/utils/configfile.cpp                |  43 ++++++-
 palace/utils/configfile.hpp                |  12 +-
 palace/utils/geodata.cpp                   |  55 ++------
 6 files changed, 160 insertions(+), 151 deletions(-)

diff --git a/palace/models/periodicboundaryoperator.cpp b/palace/models/periodicboundaryoperator.cpp
index 0fbf0d9db..216ff2cac 100644
--- a/palace/models/periodicboundaryoperator.cpp
+++ b/palace/models/periodicboundaryoperator.cpp
@@ -26,6 +26,31 @@ PeriodicBoundaryOperator::PeriodicBoundaryOperator(const IoData &iodata,
     std::sort(periodic_attr.begin(), periodic_attr.end());
     utils::PrettyPrint(periodic_attr);
   }
+  const auto &data = iodata.boundaries.floquet;
+  MFEM_VERIFY(data.wave_vector.size() == mesh.SpaceDimension(),
+              "Bloch wave vector size must equal the spatial dimension.");
+  wave_vector.SetSize(data.wave_vector.size());
+  std::copy(data.wave_vector.begin(), data.wave_vector.end(), wave_vector.GetData());
+  non_zero_wave_vector = (wave_vector.Norml2() > std::numeric_limits<double>::epsilon());
+  MFEM_VERIFY(!non_zero_wave_vector ||
+              iodata.problem.type == config::ProblemData::Type::DRIVEN ||
+              iodata.problem.type == config::ProblemData::Type::EIGENMODE,
+              "Quasi-periodic Floquet boundary conditions are only available for "
+              " frequency domain driven or eigenmode simulations!");
+
+  // Matrix representation of cross product with wave vector
+  // [k x] = | 0  -k3  k2|
+  //         | k3  0  -k1|
+  //         |-k2  k1  0 |
+  wave_vector_cross.SetSize(3); // assumes 3D?
+  wave_vector_cross(0,1) = -wave_vector[2];
+  wave_vector_cross(0,2) = wave_vector[1];
+  wave_vector_cross(1,0) = wave_vector[2];
+  wave_vector_cross(1,2) = -wave_vector[0];
+  wave_vector_cross(2,0) = -wave_vector[1];
+  wave_vector_cross(2,1) = wave_vector[0];
+  //Mpi::Print("Wave vector cross product\n");
+  //wave_vector_cross.Print();
 }
 
 mfem::Array<int>
@@ -73,7 +98,6 @@ PeriodicBoundaryOperator::SetUpBoundaryProperties(const IoData &iodata,
   }
 
   // Mark selected boundary attributes from the mesh as periodic.
-  // ???? IS THIS USEFUL???
   mfem::Array<int> periodic_bcs;
   for (const auto &data : iodata.boundaries.periodic)
   {
@@ -94,32 +118,6 @@ PeriodicBoundaryOperator::SetUpBoundaryProperties(const IoData &iodata,
       }
       periodic_bcs.Append(attr);
     }
-
-    // Wave vector ???? SHOULD BE ONLY ONE WAVE VECTOR FOR THE ENTIRE SIM
-    // NOT ONE PER PERIODIC BC PAIR??? MOVE THIS OUTSIDE THE LOOP?
-    MFEM_VERIFY(data.wave_vector.size() == mesh.SpaceDimension(),
-    "Block wave vector size must equal the spatial dimension.");
-    wave_vector.SetSize(data.wave_vector.size());
-    std::copy(data.wave_vector.begin(), data.wave_vector.end(), wave_vector.GetData());
-    MFEM_VERIFY(periodic_bcs.Size() == 0 ||
-                wave_vector.Normlinf() < std::numeric_limits<double>::epsilon() ||
-                iodata.problem.type == config::ProblemData::Type::DRIVEN ||
-                iodata.problem.type == config::ProblemData::Type::EIGENMODE,
-                "Quasi-periodic Floquet boundary conditions are only available for "
-                " frequency domain driven or eigenmode simulations!");
-
-
-    // Matrix representation of cross product with wave vector
-    // [k x] = | 0  -k3  k2|
-    //         | k3  0  -k1|
-    //         |-k2  k1  0 |
-    wave_vector_cross.SetSize(3); // assumes 3D?
-    wave_vector_cross(0,1) = -wave_vector[2];
-    wave_vector_cross(0,2) = wave_vector[1];
-    wave_vector_cross(1,0) = wave_vector[2];
-    wave_vector_cross(1,2) = -wave_vector[0];
-    wave_vector_cross(2,0) = -wave_vector[1];
-    wave_vector_cross(2,1) = wave_vector[0];
   }
 
   return periodic_bcs;
@@ -129,7 +127,7 @@ void PeriodicBoundaryOperator::AddRealMassCoefficients(double coeff,
                                                        MaterialPropertyCoefficient &f)
 {
 
-  if (periodic_attr.Size())
+  if (non_zero_wave_vector)
   {
     // [k x]^T 1/mu [k x]
     mfem::DenseTensor kx(mat_op.GetInvPermeability().SizeI(),
@@ -154,7 +152,7 @@ void PeriodicBoundaryOperator::AddWeakCurlCoefficients(double coeff,
                                                        MaterialPropertyCoefficient &f)
 {
 
-  if (periodic_attr.Size())
+  if (non_zero_wave_vector)
   {
     // 1/mu [k x]
     mfem::DenseTensor kx(mat_op.GetInvPermeability().SizeI(),
@@ -176,7 +174,7 @@ void PeriodicBoundaryOperator::AddCurlCoefficients(double coeff,
                                                    MaterialPropertyCoefficient &f)
 {
 
-  if (periodic_attr.Size())
+  if (non_zero_wave_vector)
   {
     // [k x]^T 1/mu
     mfem::DenseTensor kxT(mat_op.GetInvPermeability().SizeI(),
diff --git a/palace/models/periodicboundaryoperator.hpp b/palace/models/periodicboundaryoperator.hpp
index 2da6fa332..6205ec284 100644
--- a/palace/models/periodicboundaryoperator.hpp
+++ b/palace/models/periodicboundaryoperator.hpp
@@ -28,9 +28,12 @@ class PeriodicBoundaryOperator
   // Bloch wave vector for Floquet boundary conditions.
   mfem::Vector wave_vector;
 
-  // Matrix representation of cross product with the wave_vector;
+  // Matrix representation of cross product with the wave vector.
   mfem::DenseMatrix wave_vector_cross;
 
+  // Check if the wave vector is zero to bypass additional terms.
+  bool non_zero_wave_vector;
+
   mfem::Array<int> SetUpBoundaryProperties(const IoData &iodata, const mfem::ParMesh &mesh);
 
 public:
diff --git a/palace/models/spaceoperator.cpp b/palace/models/spaceoperator.cpp
index 8b8acafac..26360bc69 100644
--- a/palace/models/spaceoperator.cpp
+++ b/palace/models/spaceoperator.cpp
@@ -207,7 +207,9 @@ void PrintHeader(const mfem::ParFiniteElementSpace &h1_fespace,
 void AddIntegrators(BilinearForm &a, const MaterialPropertyCoefficient *df,
                     const MaterialPropertyCoefficient *f,
                     const MaterialPropertyCoefficient *dfb,
-                    const MaterialPropertyCoefficient *fb, bool assemble_q_data = false)
+                    const MaterialPropertyCoefficient *fb,
+                    const MaterialPropertyCoefficient *fpw,
+                    const MaterialPropertyCoefficient *fp, bool assemble_q_data = false)
 {
   if (df && !df->empty() && f && !f->empty())
   {
@@ -239,6 +241,14 @@ void AddIntegrators(BilinearForm &a, const MaterialPropertyCoefficient *df,
       a.AddBoundaryIntegrator<VectorFEMassIntegrator>(*fb);
     }
   }
+  if (fpw && !fpw->empty())
+  {
+    a.AddDomainIntegrator<MixedVectorWeakCurlIntegrator>(*fpw);
+  }
+  if (fp && !fp->empty())
+  {
+    a.AddDomainIntegrator<MixedVectorCurlIntegrator>(*fp);
+  }
   if (assemble_q_data)
   {
     a.AssembleQuadratureData();
@@ -262,32 +272,17 @@ void AddAuxIntegrators(BilinearForm &a, const MaterialPropertyCoefficient *f,
   }
 }
 
-void AddMixedIntegrators(BilinearForm &a, const MaterialPropertyCoefficient *f,
-                         const MaterialPropertyCoefficient *fw, bool assemble_q_data = false)
-{
-  if (f && !f->empty())
-  {
-    a.AddDomainIntegrator<MixedVectorCurlIntegrator>(*f);
-  }
-  if (fw && !fw->empty())
-  {
-    a.AddDomainIntegrator<MixedVectorWeakCurlIntegrator>(*fw);
-  }
-  if (assemble_q_data)
-  {
-    a.AssembleQuadratureData();
-  }
-}
-
 auto AssembleOperator(const FiniteElementSpace &fespace,
                       const MaterialPropertyCoefficient *df,
                       const MaterialPropertyCoefficient *f,
                       const MaterialPropertyCoefficient *dfb,
-                      const MaterialPropertyCoefficient *fb, bool skip_zeros = false,
+                      const MaterialPropertyCoefficient *fb,
+                      const MaterialPropertyCoefficient *fpw,
+                      const MaterialPropertyCoefficient *fp, bool skip_zeros = false,
                       bool assemble_q_data = false)
 {
   BilinearForm a(fespace);
-  AddIntegrators(a, df, f, dfb, fb, assemble_q_data);
+  AddIntegrators(a, df, f, dfb, fb, fpw, fp, assemble_q_data);
   return a.Assemble(skip_zeros);
 }
 
@@ -295,11 +290,13 @@ auto AssembleOperators(const FiniteElementSpaceHierarchy &fespaces,
                        const MaterialPropertyCoefficient *df,
                        const MaterialPropertyCoefficient *f,
                        const MaterialPropertyCoefficient *dfb,
-                       const MaterialPropertyCoefficient *fb, bool skip_zeros = false,
+                       const MaterialPropertyCoefficient *fb,
+                       const MaterialPropertyCoefficient *fpw,
+                       const MaterialPropertyCoefficient *fp, bool skip_zeros = false,
                        bool assemble_q_data = false, std::size_t l0 = 0)
 {
   BilinearForm a(fespaces.GetFinestFESpace());
-  AddIntegrators(a, df, f, dfb, fb, assemble_q_data);
+  AddIntegrators(a, df, f, dfb, fb, fpw, fp, assemble_q_data);
   return a.Assemble(fespaces, skip_zeros, l0);
 }
 
@@ -313,18 +310,6 @@ auto AssembleAuxOperators(const FiniteElementSpaceHierarchy &fespaces,
   return a.Assemble(fespaces, skip_zeros, l0);
 }
 
-/*
-// Add fp, fpw logic to AssembleOperators and AddIntegrators instead
-auto AssembleMixedOperators(const FiniteElementSpaceHierarchy &fespaces,
-                            const MaterialPropertyCoefficient *fp,
-                            const MaterialPropertyCoefficient *fpw, bool skip_zeros = false,
-                            bool assemble_q_data = false, std::size_t l0 = 0)
-{
-  BilinearForm a(fespaces.GetFinestFESpace());
-  AddMixedIntegrators(a, fp, fpw, assemble_q_data);
-  return a.Assemble(fespaces, skip_zeros, l0); //can't use assemble when trial and test spaces differ
-}
-*/
 }  // namespace
 
 template <typename OperType>
@@ -343,7 +328,7 @@ SpaceOperator::GetStiffnessMatrix(Operator::DiagonalPolicy diag_policy)
     return {};
   }
   constexpr bool skip_zeros = false;
-  auto k = AssembleOperator(GetNDSpace(), &df, &f, nullptr, &fb, skip_zeros);
+  auto k = AssembleOperator(GetNDSpace(), &df, &f, nullptr, &fb, nullptr, nullptr, skip_zeros);
   if constexpr (std::is_same<OperType, ComplexOperator>::value)
   {
     auto K = std::make_unique<ComplexParOperator>(std::move(k), nullptr, GetNDSpace());
@@ -374,7 +359,7 @@ SpaceOperator::GetDampingMatrix(Operator::DiagonalPolicy diag_policy)
     return {};
   }
   constexpr bool skip_zeros = false;
-  auto c = AssembleOperator(GetNDSpace(), nullptr, &f, nullptr, &fb, skip_zeros);
+  auto c = AssembleOperator(GetNDSpace(), nullptr, &f, nullptr, &fb, nullptr, nullptr, skip_zeros);
   if constexpr (std::is_same<OperType, ComplexOperator>::value)
   {
     auto C = std::make_unique<ComplexParOperator>(std::move(c), nullptr, GetNDSpace());
@@ -411,11 +396,11 @@ std::unique_ptr<OperType> SpaceOperator::GetMassMatrix(Operator::DiagonalPolicy
   std::unique_ptr<Operator> mr, mi;
   if (!empty[0])
   {
-    mr = AssembleOperator(GetNDSpace(), nullptr, &fr, nullptr, &fbr, skip_zeros);
+    mr = AssembleOperator(GetNDSpace(), nullptr, &fr, nullptr, &fbr, nullptr, nullptr, skip_zeros);
   }
   if (!empty[1])
   {
-    mi = AssembleOperator(GetNDSpace(), nullptr, &fi, nullptr, &fbi, skip_zeros);
+    mi = AssembleOperator(GetNDSpace(), nullptr, &fi, nullptr, &fbi, nullptr, nullptr, skip_zeros);
   }
   if constexpr (std::is_same<OperType, ComplexOperator>::value)
   {
@@ -451,11 +436,11 @@ SpaceOperator::GetExtraSystemMatrix(double omega, Operator::DiagonalPolicy diag_
   std::unique_ptr<Operator> ar, ai;
   if (!empty[0])
   {
-    ar = AssembleOperator(GetNDSpace(), nullptr, nullptr, &dfbr, &fbr, skip_zeros);
+    ar = AssembleOperator(GetNDSpace(), nullptr, nullptr, &dfbr, &fbr, nullptr, nullptr, skip_zeros);
   }
   if (!empty[1])
   {
-    ai = AssembleOperator(GetNDSpace(), nullptr, nullptr, &dfbi, &fbi, skip_zeros);
+    ai = AssembleOperator(GetNDSpace(), nullptr, nullptr, &dfbi, &fbi, nullptr, nullptr, skip_zeros);
   }
   if constexpr (std::is_same<OperType, ComplexOperator>::value)
   {
@@ -479,23 +464,25 @@ std::unique_ptr<OperType>
 SpaceOperator::GetPeriodicWeakCurlMatrix()
 {
   PrintHeader(GetH1Space(), GetNDSpace(), GetRTSpace(), print_hdr);
-  MaterialPropertyCoefficient f(mat_op.MaxCeedAttribute());
-  periodic_op.AddWeakCurlCoefficients(1.0, f);
-  int empty = (f.empty());
+  MaterialPropertyCoefficient fpw(mat_op.MaxCeedAttribute());
+  periodic_op.AddWeakCurlCoefficients(1.0, fpw);
+  int empty = (fpw.empty());
   Mpi::GlobalMin(1, &empty, GetComm());
   if (empty)
   {
     return {};
   }
-  constexpr bool skip_zeros = false, assemble_q_data = false;
+  constexpr bool skip_zeros = false;
+  //constexpr bool assemble_q_data = false;
   //BilinearForm a(GetNDSpace(), GetNDSpace()); //? which spaces and what order
-  BilinearForm a(GetNDSpace());//test
-  a.AddDomainIntegrator<MixedVectorWeakCurlIntegrator>(f);
-  if (assemble_q_data)
-  {
-    a.AssembleQuadratureData();
-  }
-  auto weakCurl = a.Assemble(skip_zeros);
+  //BilinearForm a(GetNDSpace());//test
+  //a.AddDomainIntegrator<MixedVectorWeakCurlIntegrator>(f);
+  //if (assemble_q_data)
+  //{
+  //  a.AssembleQuadratureData();
+  //}
+  //auto weakCurl = a.Assemble(skip_zeros);
+  auto weakCurl = AssembleOperator(GetNDSpace(), nullptr, nullptr, nullptr, nullptr, &fpw, nullptr, skip_zeros);
   if constexpr (std::is_same<OperType, ComplexOperator>::value)
   {
     auto WeakCurl = std::make_unique<ComplexParOperator>(std::move(weakCurl),nullptr, GetNDSpace(), GetNDSpace(),false);
@@ -513,23 +500,25 @@ std::unique_ptr<OperType>
 SpaceOperator::GetPeriodicCurlMatrix()
 {
   PrintHeader(GetH1Space(), GetNDSpace(), GetRTSpace(), print_hdr);
-  MaterialPropertyCoefficient f(mat_op.MaxCeedAttribute());
-  periodic_op.AddCurlCoefficients(1.0, f);
-  int empty = (f.empty());
+  MaterialPropertyCoefficient fw(mat_op.MaxCeedAttribute());
+  periodic_op.AddCurlCoefficients(1.0, fw);
+  int empty = (fw.empty());
   Mpi::GlobalMin(1, &empty, GetComm());
   if (empty)
   {
     return {};
   }
-  constexpr bool skip_zeros = false, assemble_q_data = false;
+  constexpr bool skip_zeros = false;
+  //constexpr bool assemble_q_data = false;
   //BilinearForm a(GetNDSpace(), GetNDSpace()); //? which spaces and what order?
-  BilinearForm a(GetNDSpace());//test
-  a.AddDomainIntegrator<MixedVectorCurlIntegrator>(f);
-  if (assemble_q_data)
-  {
-    a.AssembleQuadratureData();
-  }
-  auto curl = a.Assemble(skip_zeros);
+  //BilinearForm a(GetNDSpace());//test
+  //a.AddDomainIntegrator<MixedVectorCurlIntegrator>(f);
+  //if (assemble_q_data)
+  //{
+  //  a.AssembleQuadratureData();
+  //}
+  //auto curl = a.Assemble(skip_zeros);
+  auto curl = AssembleOperator(GetNDSpace(), nullptr, nullptr, nullptr, nullptr, nullptr, &fw, skip_zeros);
   if constexpr (std::is_same<OperType, ComplexOperator>::value)
   {
     auto Curl = std::make_unique<ComplexParOperator>(std::move(curl),nullptr, GetNDSpace(), GetNDSpace(),false);
@@ -738,6 +727,7 @@ SpaceOperator::GetSystemMatrix(ScalarType a0, ScalarType a1, ScalarType a2,
                                const OperType *A2, ScalarType a4, ScalarType a5,
                                const OperType *P1, const OperType *P2)
 {
+  Mpi::Print("In GetSystemMatrix\n");
   using ParOperType =
       typename std::conditional<std::is_same<OperType, ComplexOperator>::value,
                                 ComplexParOperator, ParOperator>::type;
@@ -868,6 +858,7 @@ std::unique_ptr<OperType> SpaceOperator::GetPreconditionerMatrix(double a0, doub
                                                                  double a2, double a3,
                                                                  double a4, double a5)
 {
+  Mpi::Print("In GetPreconditionerMatrix\n");
   // XX TODO: Handle complex coeff a0/a1/a2/a3 (like GetSystemMatrix)
 
   // When partially assembled, the coarse operators can reuse the fine operator quadrature
@@ -891,7 +882,9 @@ std::unique_ptr<OperType> SpaceOperator::GetPreconditionerMatrix(double a0, doub
         dfi(mat_op.MaxCeedAttribute()), fr(mat_op.MaxCeedAttribute()),
         fi(mat_op.MaxCeedAttribute()), dfbr(mat_op.MaxCeedBdrAttribute()),
         dfbi(mat_op.MaxCeedBdrAttribute()), fbr(mat_op.MaxCeedBdrAttribute()),
-        fbi(mat_op.MaxCeedBdrAttribute());
+        fbi(mat_op.MaxCeedBdrAttribute()), fpi(mat_op.MaxCeedAttribute()),
+        fpwi(mat_op.MaxCeedAttribute()), fpr(mat_op.MaxCeedAttribute()),
+        fpwr(mat_op.MaxCeedAttribute());
     AddStiffnessCoefficients(a0, dfr, fr);
     AddStiffnessBdrCoefficients(a0, fbr);
     AddDampingCoefficients(a1, fi);
@@ -900,21 +893,23 @@ std::unique_ptr<OperType> SpaceOperator::GetPreconditionerMatrix(double a0, doub
     AddRealMassBdrCoefficients(pc_mat_shifted ? std::abs(a2) : a2, fbr);
     AddImagMassCoefficients(a2, fi);
     AddExtraSystemBdrCoefficients(a3, dfbr, dfbi, fbr, fbi);
-    //periodic_op.AddWeakCurlCoefficients(a4, fwi);
-    //periodic_op.AddCurlCoefficients(a5, fi);
-    int empty[2] = {(dfr.empty() && fr.empty() && dfbr.empty() && fbr.empty()),
-                    (dfi.empty() && fi.empty() && dfbi.empty() && fbi.empty())};
+    periodic_op.AddWeakCurlCoefficients(a4, fpwi);
+    periodic_op.AddCurlCoefficients(a5, fpi);
+    int empty[2] = {(dfr.empty() && fr.empty() && dfbr.empty() && fbr.empty()
+                     && fpwr.empty() && fpr.empty()),
+                    (dfi.empty() && fi.empty() && dfbi.empty() && fbi.empty()
+                     && fpwi.empty() && fpi.empty())};
     Mpi::GlobalMin(2, empty, GetComm());
     if (!empty[0])
     {
-      br_vec = AssembleOperators(GetNDSpaces(), &dfr, &fr, &dfbr, &fbr, skip_zeros,
+      br_vec = AssembleOperators(GetNDSpaces(), &dfr, &fr, &dfbr, &fbr, &fpwr, &fpr, skip_zeros,
                                  assemble_q_data);
       br_aux_vec =
           AssembleAuxOperators(GetH1Spaces(), &fr, &fbr, skip_zeros, assemble_q_data);
     }
     if (!empty[1])
     {
-      bi_vec = AssembleOperators(GetNDSpaces(), &dfi, &fi, &dfbi, &fbi, skip_zeros,
+      bi_vec = AssembleOperators(GetNDSpaces(), &dfi, &fi, &dfbi, &fbi, &fpwi, &fpi, skip_zeros,
                                  assemble_q_data);
       bi_aux_vec =
           AssembleAuxOperators(GetH1Spaces(), &fi, &fbi, skip_zeros, assemble_q_data);
@@ -924,7 +919,8 @@ std::unique_ptr<OperType> SpaceOperator::GetPreconditionerMatrix(double a0, doub
   {
     MaterialPropertyCoefficient dfr(mat_op.MaxCeedAttribute()),
         fr(mat_op.MaxCeedAttribute()), dfbr(mat_op.MaxCeedBdrAttribute()),
-        fbr(mat_op.MaxCeedBdrAttribute());
+        fbr(mat_op.MaxCeedBdrAttribute()), fpwr(mat_op.MaxCeedAttribute()),
+        fpr(mat_op.MaxCeedAttribute());
     AddStiffnessCoefficients(a0, dfr, fr);
     AddStiffnessBdrCoefficients(a0, fbr);
     AddDampingCoefficients(a1, fr);
@@ -936,7 +932,7 @@ std::unique_ptr<OperType> SpaceOperator::GetPreconditionerMatrix(double a0, doub
     Mpi::GlobalMin(1, &empty, GetComm());
     if (!empty)
     {
-      br_vec = AssembleOperators(GetNDSpaces(), &dfr, &fr, &dfbr, &fbr, skip_zeros,
+      br_vec = AssembleOperators(GetNDSpaces(), &dfr, &fr, &dfbr, &fbr, &fpwr, &fpr, skip_zeros,
                                  assemble_q_data);
       br_aux_vec =
           AssembleAuxOperators(GetH1Spaces(), &fr, &fbr, skip_zeros, assemble_q_data);
diff --git a/palace/utils/configfile.cpp b/palace/utils/configfile.cpp
index 179e7b881..4d6edebd1 100644
--- a/palace/utils/configfile.cpp
+++ b/palace/utils/configfile.cpp
@@ -1035,6 +1035,28 @@ void LumpedPortBoundaryData::SetUp(json &boundaries)
   }
 }
 
+void FloquetData::SetUp(json &boundaries)
+{
+  auto floquet = boundaries.find("FloquetWaveVector");
+  if (floquet == boundaries.end())
+  {
+    return;
+  }
+  else
+  {
+    MFEM_VERIFY(floquet->is_array(),
+                "\"FloquetWaveVector\" should specify an array in the configuration file!");
+    wave_vector = floquet->get<std::array<double, 3>>();
+
+    // Debug
+    if constexpr (JSON_DEBUG)
+    {
+      std::cout << "FloquetWaveVector: " << wave_vector << '\n';
+    }
+  }
+
+}
+
 void PeriodicBoundaryData::SetUp(json &boundaries)
 {
   auto periodic = boundaries.find("Periodic");
@@ -1056,16 +1078,26 @@ void PeriodicBoundaryData::SetUp(json &boundaries)
     data.donor_attributes = it->at("DonorAttributes").get<std::vector<int>>();  // Required
     data.receiver_attributes =
         it->at("ReceiverAttributes").get<std::vector<int>>();               // Required
-    data.translation = it->at("Translation").get<std::array<double, 3>>();
-    data.affine_transform = it->at("AffineTransformation").get<std::array<double, 16>>();
-    data.wave_vector = it->at("WaveVector").get<std::array<double, 3>>();
+    auto trslt = it->find("Translation");
+    if (trslt!= it->end())
+    {
+      MFEM_VERIFY(trslt->is_array(),
+                  "\"Translation\" should specify an array in the configuration file!");
+      data.translation = trslt->get<std::array<double, 3>>();
+    }
+    auto trsfr = it->find("AffineTransformation");
+    if (trsfr!= it->end())
+    {
+      MFEM_VERIFY(trsfr->is_array(),
+                  "\"AffineTransformation\" should specify an array in the configuration file!");
+      data.affine_transform = trsfr->get<std::array<double, 16>>();
+    }
 
     // Cleanup
     it->erase("DonorAttributes");
     it->erase("ReceiverAttributes");
     it->erase("Translation");
     it->erase("AffineTransformation");
-    it->erase("WaveVector");
     MFEM_VERIFY(it->empty(),
                 "Found an unsupported configuration file keyword under \"Periodic\"!\n"
                     << it->dump(2));
@@ -1077,7 +1109,6 @@ void PeriodicBoundaryData::SetUp(json &boundaries)
       std::cout << "ReceiverAttributes: " << data.receiver_attributes << '\n';
       std::cout << "Translation: " << data.translation << '\n';
       std::cout << "AffineTransformation: " << data.affine_transform << '\n';
-      std::cout << "WaveVector: " << data.wave_vector << '\n';
     }
   }
 }
@@ -1395,6 +1426,7 @@ void BoundaryData::SetUp(json &config)
   impedance.SetUp(*boundaries);
   lumpedport.SetUp(*boundaries);
   periodic.SetUp(*boundaries);
+  floquet.SetUp(*boundaries);
   waveport.SetUp(*boundaries);
   current.SetUp(*boundaries);
   postpro.SetUp(*boundaries);
@@ -1444,6 +1476,7 @@ void BoundaryData::SetUp(json &config)
   boundaries->erase("Impedance");
   boundaries->erase("LumpedPort");
   boundaries->erase("Periodic");
+  boundaries->erase("FloquetWaveVector");
   boundaries->erase("WavePort");
   boundaries->erase("SurfaceCurrent");
   boundaries->erase("Ground");
diff --git a/palace/utils/configfile.hpp b/palace/utils/configfile.hpp
index 27f23dc71..f3dcdb670 100644
--- a/palace/utils/configfile.hpp
+++ b/palace/utils/configfile.hpp
@@ -478,8 +478,6 @@ struct PeriodicData
   std::vector<int> donor_attributes = {};
   // List of boundary receiver attributes for this periodic boundary condition.
   std::vector<int> receiver_attributes = {};
-  // Bloch wavevector specifying the phase delay in the X/Y/Z directions.
-  std::array<double, 3>  wave_vector = {0.0, 0.0, 0.0};
 };
 
 struct PeriodicBoundaryData : public internal::DataVector<PeriodicData>
@@ -488,6 +486,15 @@ struct PeriodicBoundaryData : public internal::DataVector<PeriodicData>
   void SetUp(json &boundaries);
 };
 
+struct FloquetData
+{
+  public:
+    // Bloch wavevector specifying the phase delay in the X/Y/Z directions.
+    std::array<double, 3> wave_vector = {0.0, 0.0, 0.0};
+
+    void SetUp(json &boundaries);
+};
+
 struct WavePortData
 {
 public:
@@ -643,6 +650,7 @@ struct BoundaryData
   WavePortBoundaryData waveport = {};
   SurfaceCurrentBoundaryData current = {};
   PeriodicBoundaryData periodic = {};
+  FloquetData floquet;//?
   BoundaryPostData postpro = {};
 
   void SetUp(json &config);
diff --git a/palace/utils/geodata.cpp b/palace/utils/geodata.cpp
index ecdd191ee..f97a7961c 100644
--- a/palace/utils/geodata.cpp
+++ b/palace/utils/geodata.cpp
@@ -1884,37 +1884,6 @@ void ComputeAffineTransformation(const std::vector<mfem::Vector> &donor_pts,
                                  const std::vector<mfem::Vector> &receiver_pts,
                                  mfem::DenseMatrix &transformation)
 {
-  // SVD
-  // But this assumes known correspondence
-  /*
-  mfem::DenseMatrix Am(3, num_donor_pts);
-  mfem::DenseMatrix Bm(3, num_receiver_pts);
-  int idx = 0;
-  for (const int v : bdr_v_donor)
-  {
-    coord = periodic_mesh->GetVertex(v);
-    Am(0,idx) = coord[0] - donor_centroid[0];
-    Am(1,idx) = coord[1] - donor_centroid[1];
-    Am(2,idx) = coord[2] - donor_centroid[2];
-    idx++;
-  }
-  idx = 0;
-  for (const int v : bdr_v_receiver)
-  {
-    coord = periodic_mesh->GetVertex(v);
-    Bm(0,idx) = coord[0] - receiver_centroid[0];
-    Bm(1,idx) = coord[1] - receiver_centroid[1];
-    Bm(2,idx) = coord[2] - receiver_centroid[2];
-    idx++;
-  }
-  mfem::DenseMatrix H(3);
-  Bm.Transpose();
-  Mult(Am, Bm, H);
-  H.Print();
-  //mfem::DenseMatrixSVD svd(H,'A','A');
-  // Use eigen?
-  */
-
 
     mfem::DenseMatrix A(12);
     A = 0.0;
@@ -2037,7 +2006,7 @@ std::vector<int> CreatePeriodicVertexMapping(
     dx -= coord;
     //Mpi::Print(" to receiver point: {:d} ({:.3e}, {:.3e}, {:.3e}), with transform error {:.3e}\n", vj, receiver_coord[0], receiver_coord[1], receiver_coord[2], dx.Norml2());
 
-    MFEM_VERIFY(dx.Norml2() < tol, "Could not match points on periodic boundaries.");
+    MFEM_VERIFY(dx.Norml2() < tol, "Could not match points on periodic boundaries, transformed donor point does not correspond to a receive point.");
 
     MFEM_VERIFY(replica2primary.find(vj) == replica2primary.end(), "Could not match points on periodic boundaries, multiple donor points map to the same receiver point.")
 
@@ -2112,7 +2081,15 @@ std::unique_ptr<mfem::Mesh> LoadMesh(const std::string &mesh_file, bool remove_c
     auto periodic_mesh = std::move(mesh);
     for (const auto &data : boundaries.periodic)
     {
-      // Compute the translation vector between donor and receiver boundaries.
+
+      // If translation or affine transform is provided in config
+      // file, we use those.
+      // If only translation is provided -> use it
+      // If only affine transfomr is provided -> use it
+      // If both affine transform and translation are provided -> error or warning?
+      // If neither -> automatic detection
+
+      // Compute the transformation between donor and receiver boundaries.
       const auto &da = data.donor_attributes, &ra = data.receiver_attributes;
       const int sdim = periodic_mesh->SpaceDimension();
       mfem::Vector coord(sdim), donor_centroid(sdim), receiver_centroid(sdim);
@@ -2196,9 +2173,9 @@ std::unique_ptr<mfem::Mesh> LoadMesh(const std::string &mesh_file, bool remove_c
       else if (donor_pts.size() == 2)
       {
         // Use normals to compute a rotation matrix
-        ComputeRotation(donor_normal, receiver_normal,
-                        transformation);
-        // Use add centroids translation to transform matrix
+        ComputeRotation(donor_normal, receiver_normal, transformation);
+
+        // Add centroids translation to transform matrix
         transformation(0,3) = translation2[0];
         transformation(1,3) = translation2[1];
         transformation(2,3) = translation2[2];
@@ -2213,12 +2190,6 @@ std::unique_ptr<mfem::Mesh> LoadMesh(const std::string &mesh_file, bool remove_c
                                                  transformation,
                                                  norm_tol);
 
-      // Should move this up. If translation or affine transform is provided in config
-      // file, we use those.
-      // If only translation is provided -> use it
-      // If only affine transfomr is provided -> use it
-      // If both affine transform and translation are provided -> error or warning?
-      // If neither -> automatic detection
       //mfem::Vector translation(data.translation.size());
       //std::copy(data.translation.begin(), data.translation.end(), translation.GetData());
       //auto periodic_mapping =

From 707b32b8c9a682aa0d726b8bb2b8ba14d49de2b3 Mon Sep 17 00:00:00 2001
From: Simon Lapointe <simlap@amazon.com>
Date: Thu, 31 Oct 2024 18:29:35 +0000
Subject: [PATCH 04/49] Initial implementation of floquet periodicity terms in
 governing equations

---
 palace/drivers/drivensolver.cpp |  14 +-
 palace/drivers/eigensolver.cpp  |   8 +-
 palace/linalg/arpack.cpp        | 127 ++++++++++++++-
 palace/linalg/arpack.hpp        |  16 +-
 palace/linalg/eps.hpp           |   6 +
 palace/linalg/slepc.cpp         | 265 +++++++++++++++++++++++++++++++-
 palace/linalg/slepc.hpp         |  21 ++-
 palace/models/romoperator.cpp   |  27 +++-
 palace/models/romoperator.hpp   |   4 +-
 palace/models/spaceoperator.cpp |  23 +--
 palace/models/spaceoperator.hpp |   3 +-
 palace/models/timeoperator.cpp  |   4 +-
 palace/utils/geodata.cpp        |   3 +
 13 files changed, 481 insertions(+), 40 deletions(-)

diff --git a/palace/drivers/drivensolver.cpp b/palace/drivers/drivensolver.cpp
index c447b8c24..013c62acc 100644
--- a/palace/drivers/drivensolver.cpp
+++ b/palace/drivers/drivensolver.cpp
@@ -117,16 +117,18 @@ ErrorIndicator DrivenSolver::SweepUniform(SpaceOperator &space_op, PostOperator
   auto C = space_op.GetDampingMatrix<ComplexOperator>(Operator::DIAG_ZERO);
   auto M = space_op.GetMassMatrix<ComplexOperator>(Operator::DIAG_ZERO);
   auto A2 = space_op.GetExtraSystemMatrix<ComplexOperator>(omega0, Operator::DIAG_ZERO);
+  auto P1 = space_op.GetPeriodicWeakCurlMatrix<ComplexOperator>();
+  auto P2 = space_op.GetPeriodicCurlMatrix<ComplexOperator>();
   const auto &Curl = space_op.GetCurlMatrix();
 
   // Set up the linear solver and set operators for the first frequency step. The
   // preconditioner for the complex linear system is constructed from a real approximation
   // to the complex system matrix.
   auto A = space_op.GetSystemMatrix(std::complex<double>(1.0, 0.0), 1i * omega0,
-                                    std::complex<double>(-omega0 * omega0, 0.0), K.get(),
-                                    C.get(), M.get(), A2.get());
+                                    std::complex<double>(-omega0 * omega0, 0.0), 1.0i, -1.0i, K.get(),
+                                    C.get(), M.get(), A2.get(), P1.get(), P2.get());
   auto P = space_op.GetPreconditionerMatrix<ComplexOperator>(1.0, omega0, -omega0 * omega0,
-                                                             omega0);
+                                                             omega0, 1.0, -1.0);
 
   ComplexKspSolver ksp(iodata, space_op.GetNDSpaces(), &space_op.GetH1Spaces());
   ksp.SetOperators(*A, *P);
@@ -163,10 +165,10 @@ ErrorIndicator DrivenSolver::SweepUniform(SpaceOperator &space_op, PostOperator
       // Update frequency-dependent excitation and operators.
       A2 = space_op.GetExtraSystemMatrix<ComplexOperator>(omega, Operator::DIAG_ZERO);
       A = space_op.GetSystemMatrix(std::complex<double>(1.0, 0.0), 1i * omega,
-                                   std::complex<double>(-omega * omega, 0.0), K.get(),
-                                   C.get(), M.get(), A2.get());
+                                   std::complex<double>(-omega * omega, 0.0), 1.0i, -1.0i, K.get(),
+                                   C.get(), M.get(), A2.get(), P1.get(), P2.get());
       P = space_op.GetPreconditionerMatrix<ComplexOperator>(1.0, omega, -omega * omega,
-                                                            omega);
+                                                            omega, 1.0, -1.0);
       ksp.SetOperators(*A, *P);
     }
     space_op.GetExcitationVector(omega, RHS);
diff --git a/palace/drivers/eigensolver.cpp b/palace/drivers/eigensolver.cpp
index 8f12cba7e..0dc04d09c 100644
--- a/palace/drivers/eigensolver.cpp
+++ b/palace/drivers/eigensolver.cpp
@@ -36,6 +36,8 @@ EigenSolver::Solve(const std::vector<std::unique_ptr<Mesh>> &mesh) const
   auto K = space_op.GetStiffnessMatrix<ComplexOperator>(Operator::DIAG_ONE);
   auto C = space_op.GetDampingMatrix<ComplexOperator>(Operator::DIAG_ZERO);
   auto M = space_op.GetMassMatrix<ComplexOperator>(Operator::DIAG_ZERO);
+  auto P1 = space_op.GetPeriodicWeakCurlMatrix<ComplexOperator>();
+  auto P2 = space_op.GetPeriodicCurlMatrix<ComplexOperator>();
   const auto &Curl = space_op.GetCurlMatrix();
   SaveMetadata(space_op.GetNDSpaces());
 
@@ -241,10 +243,10 @@ EigenSolver::Solve(const std::vector<std::unique_ptr<Mesh>> &mesh) const
   // preconditioner for complex linear systems is constructed from a real approximation
   // to the complex system matrix.
   auto A = space_op.GetSystemMatrix(std::complex<double>(1.0, 0.0), 1i * target,
-                                    std::complex<double>(-target * target, 0.0), K.get(),
-                                    C.get(), M.get());
+                                    std::complex<double>(-target * target, 0.0), 1.0i, -1.0i, K.get(),
+                                    C.get(), M.get(), P1.get(), P2.get());
   auto P = space_op.GetPreconditionerMatrix<ComplexOperator>(1.0, target, -target * target,
-                                                             target);
+                                                             target, 1.0, -1.0);
   auto ksp = std::make_unique<ComplexKspSolver>(iodata, space_op.GetNDSpaces(),
                                                 &space_op.GetH1Spaces());
   ksp->SetOperators(*A, *P);
diff --git a/palace/linalg/arpack.cpp b/palace/linalg/arpack.cpp
index 431ff5acc..16d69bf79 100644
--- a/palace/linalg/arpack.cpp
+++ b/palace/linalg/arpack.cpp
@@ -201,6 +201,25 @@ void ArpackEigenvalueSolver::SetOperators(const ComplexOperator &K,
   MFEM_ABORT("SetOperators not defined for base class ArpackEigenvalueSolver!");
 }
 
+void ArpackEigenvalueSolver::SetOperators(const ComplexOperator &K,
+                                          const ComplexOperator &M,
+                                          const ComplexOperator &P1,
+                                          const ComplexOperator &P2,
+                                          EigenvalueSolver::ScaleType type)
+{
+  MFEM_ABORT("SetOperators not defined for base class ArpackEigenvalueSolver!");
+}
+
+void ArpackEigenvalueSolver::SetOperators(const ComplexOperator &K,
+                                          const ComplexOperator &C,
+                                          const ComplexOperator &M,
+                                          const ComplexOperator &P1,
+                                          const ComplexOperator &P2,
+                                          EigenvalueSolver::ScaleType type)
+{
+  MFEM_ABORT("SetOperators not defined for base class ArpackEigenvalueSolver!");
+}
+
 void ArpackEigenvalueSolver::SetLinearSolver(const ComplexKspSolver &ksp)
 {
   opInv = &ksp;
@@ -490,7 +509,7 @@ void ArpackEigenvalueSolver::RescaleEigenvectors(int num_eig)
 ArpackEPSSolver::ArpackEPSSolver(MPI_Comm comm, int print)
   : ArpackEigenvalueSolver(comm, print)
 {
-  opK = opM = nullptr;
+  opK = opM = opP1 = opP2 = nullptr;
   normK = normM = 0.0;
 }
 
@@ -523,6 +542,38 @@ void ArpackEPSSolver::SetOperators(const ComplexOperator &K, const ComplexOperat
   n = opK->Height();
 }
 
+void ArpackEPSSolver::SetOperators(const ComplexOperator &K, const ComplexOperator &M,
+                                   const ComplexOperator &P1, const ComplexOperator &P2,
+                                   EigenvalueSolver::ScaleType type)
+{
+  MFEM_VERIFY(!opK || K.Height() == n, "Invalid modification of eigenvalue problem size!");
+  bool first = (opK == nullptr);
+  opK = &K;
+  opM = &M;
+  opP1 = &P1;
+  opP2 = &P2;
+  if (first && type != ScaleType::NONE)
+  {
+    normK = linalg::SpectralNorm(comm, *opK, opK->IsReal());
+    normM = linalg::SpectralNorm(comm, *opM, opM->IsReal());
+    MFEM_VERIFY(normK >= 0.0 && normM >= 0.0, "Invalid matrix norms for EPS scaling!");
+    if (normK > 0 && normM > 0.0)
+    {
+      gamma = normK / normM;  // Store γ² for linear problem
+      delta = 2.0 / normK;
+    }
+  }
+
+  // Set up workspace.
+  x1.SetSize(opK->Height());
+  y1.SetSize(opK->Height());
+  z1.SetSize(opK->Height());
+  x1.UseDevice(true);
+  y1.UseDevice(true);
+  z1.UseDevice(true);
+  n = opK->Height();
+}
+
 int ArpackEPSSolver::Solve()
 {
   // Set some defaults (default maximum iterations from SLEPc).
@@ -584,6 +635,14 @@ void ArpackEPSSolver::ApplyOp(const std::complex<double> *px,
   if (!sinvert)
   {
     opK->Mult(x1, z1);
+    if (opP1)
+    {
+      opP1->AddMult(x1, z1, 1.0i);
+    }
+    if (opP2)
+    {
+      opP2->AddMult(x1, z1, -1.0i);
+    }
     opInv->Mult(z1, y1);
     y1 *= 1.0 / gamma;
   }
@@ -618,6 +677,14 @@ double ArpackEPSSolver::GetResidualNorm(std::complex<double> l, const ComplexVec
 {
   // Compute the i-th eigenpair residual: || (K - λ M) x ||₂ for eigenvalue λ.
   opK->Mult(x, r);
+  if (opP1)
+  {
+    opP1->AddMult(x, r, 1.0i);
+  }
+  if (opP2)
+  {
+    opP2->AddMult(x, r, -1.0i);
+  }
   opM->AddMult(x, r, -l);
   return linalg::Norml2(comm, r);
 }
@@ -642,7 +709,7 @@ double ArpackEPSSolver::GetBackwardScaling(std::complex<double> l) const
 ArpackPEPSolver::ArpackPEPSolver(MPI_Comm comm, int print)
   : ArpackEigenvalueSolver(comm, print)
 {
-  opK = opC = opM = nullptr;
+  opK = opC = opM = opP1 = opP2 = nullptr;
   normK = normC = normM = 0.0;
 }
 
@@ -683,6 +750,46 @@ void ArpackPEPSolver::SetOperators(const ComplexOperator &K, const ComplexOperat
   n = opK->Height();
 }
 
+void ArpackPEPSolver::SetOperators(const ComplexOperator &K, const ComplexOperator &C,
+                                   const ComplexOperator &M, const ComplexOperator &P1,
+                                   const ComplexOperator &P2,
+                                   EigenvalueSolver::ScaleType type)
+{
+  MFEM_VERIFY(!opK || K.Height() == n, "Invalid modification of eigenvalue problem size!");
+  bool first = (opK == nullptr);
+  opK = &K;
+  opC = &C;
+  opM = &M;
+  opP1 = &P1;
+  opP2 = &P2;
+  if (first && type != ScaleType::NONE)
+  {
+    normK = linalg::SpectralNorm(comm, *opK, opK->IsReal());
+    normC = linalg::SpectralNorm(comm, *opC, opC->IsReal());
+    normM = linalg::SpectralNorm(comm, *opM, opM->IsReal());
+    MFEM_VERIFY(normK >= 0.0 && normC >= 0.0 && normM >= 0.0,
+                "Invalid matrix norms for PEP scaling!");
+    if (normK > 0 && normC > 0.0 && normM > 0.0)
+    {
+      gamma = std::sqrt(normK / normM);
+      delta = 2.0 / (normK + gamma * normC);
+    }
+  }
+
+  // Set up workspace.
+  x1.SetSize(opK->Height());
+  x2.SetSize(opK->Height());
+  y1.SetSize(opK->Height());
+  y2.SetSize(opK->Height());
+  z1.SetSize(opK->Height());
+  x1.UseDevice(true);
+  x2.UseDevice(true);
+  y1.UseDevice(true);
+  y2.UseDevice(true);
+  z1.UseDevice(true);
+  n = opK->Height();
+}
+
 int ArpackPEPSolver::Solve()
 {
   // Set some defaults (from SLEPc ARPACK interface). The problem size is the size of the
@@ -767,6 +874,14 @@ void ArpackPEPSolver::ApplyOp(const std::complex<double> *px,
     }
 
     opK->Mult(x1, z1);
+    if (opP1)
+    {
+      opP1->AddMult(x1, z1, 1.0i);
+    }
+    if (opP2)
+    {
+      opP2->AddMult(x1, z1, -1.0i);
+    }
     opC->AddMult(x2, z1, std::complex<double>(gamma, 0.0));
     opInv->Mult(z1, y2);
     y2 *= -1.0 / (gamma * gamma);
@@ -825,6 +940,14 @@ double ArpackPEPSolver::GetResidualNorm(std::complex<double> l, const ComplexVec
   // Compute the i-th eigenpair residual: || P(λ) x ||₂ = || (K + λ C + λ² M) x ||₂ for
   // eigenvalue λ.
   opK->Mult(x, r);
+  if (opP1)
+  {
+    opP1->AddMult(x, r, 1.0i);
+  }
+  if (opP2)
+  {
+    opP2->AddMult(x, r, -1.0i);
+  }
   opC->AddMult(x, r, l);
   opM->AddMult(x, r, l * l);
   return linalg::Norml2(comm, r);
diff --git a/palace/linalg/arpack.hpp b/palace/linalg/arpack.hpp
index 0f37cf944..e725f7bdb 100644
--- a/palace/linalg/arpack.hpp
+++ b/palace/linalg/arpack.hpp
@@ -120,6 +120,12 @@ class ArpackEigenvalueSolver : public EigenvalueSolver
                     ScaleType type) override;
   void SetOperators(const ComplexOperator &K, const ComplexOperator &C,
                     const ComplexOperator &M, ScaleType type) override;
+  void SetOperators(const ComplexOperator &K, const ComplexOperator &M,
+                    const ComplexOperator &P1, const ComplexOperator &P2,
+                    ScaleType type) override;
+  void SetOperators(const ComplexOperator &K, const ComplexOperator &C,
+                    const ComplexOperator &M, const ComplexOperator &P1,
+                    const ComplexOperator &P2, ScaleType type) override;
 
   // For the linear generalized case, the linear solver should be configured to compute the
   // action of M⁻¹ (with no spectral transformation) or (K - σ M)⁻¹. For the quadratic
@@ -181,7 +187,7 @@ class ArpackEPSSolver : public ArpackEigenvalueSolver
 {
 private:
   // References to matrices defining the generalized eigenvalue problem (not owned).
-  const ComplexOperator *opK, *opM;
+  const ComplexOperator *opK, *opM, *opP1, *opP2;
 
   // Operator norms for scaling.
   mutable double normK, normM;
@@ -203,6 +209,9 @@ class ArpackEPSSolver : public ArpackEigenvalueSolver
   using ArpackEigenvalueSolver::SetOperators;
   void SetOperators(const ComplexOperator &K, const ComplexOperator &M,
                     ScaleType type) override;
+  void SetOperators(const ComplexOperator &K, const ComplexOperator &M,
+                    const ComplexOperator &P1, const ComplexOperator &P2,
+                    ScaleType type) override;
 
   int Solve() override;
 };
@@ -213,7 +222,7 @@ class ArpackPEPSolver : public ArpackEigenvalueSolver
 private:
   // References to matrices defining the quadratic polynomial eigenvalue problem
   // (not owned).
-  const ComplexOperator *opK, *opC, *opM;
+  const ComplexOperator *opK, *opC, *opM, *opP1, *opP2;
 
   // Operator norms for scaling.
   mutable double normK, normC, normM;
@@ -238,6 +247,9 @@ class ArpackPEPSolver : public ArpackEigenvalueSolver
   using ArpackEigenvalueSolver::SetOperators;
   void SetOperators(const ComplexOperator &K, const ComplexOperator &C,
                     const ComplexOperator &M, ScaleType type) override;
+  void SetOperators(const ComplexOperator &K, const ComplexOperator &C,
+                    const ComplexOperator &M, const ComplexOperator &P1,
+                    const ComplexOperator &P2, ScaleType type) override;
 
   int Solve() override;
 };
diff --git a/palace/linalg/eps.hpp b/palace/linalg/eps.hpp
index 835bac7cb..440f9a8b8 100644
--- a/palace/linalg/eps.hpp
+++ b/palace/linalg/eps.hpp
@@ -58,6 +58,12 @@ class EigenvalueSolver
                             ScaleType type) = 0;
   virtual void SetOperators(const ComplexOperator &K, const ComplexOperator &C,
                             const ComplexOperator &M, ScaleType type) = 0;
+  virtual void SetOperators(const ComplexOperator &K, const ComplexOperator &M,
+                            const ComplexOperator &P1, const ComplexOperator &P2,
+                            ScaleType type) = 0;
+  virtual void SetOperators(const ComplexOperator &K, const ComplexOperator &C,
+                            const ComplexOperator &M, const ComplexOperator &P1,
+                            const ComplexOperator &P2, ScaleType type) = 0;
 
   // For the linear generalized case, the linear solver should be configured to compute the
   // action of M⁻¹ (with no spectral transformation) or (K - σ M)⁻¹. For the quadratic
diff --git a/palace/linalg/slepc.cpp b/palace/linalg/slepc.cpp
index 3b8e4abdc..23aa9c0a8 100644
--- a/palace/linalg/slepc.cpp
+++ b/palace/linalg/slepc.cpp
@@ -351,6 +351,21 @@ void SlepcEigenvalueSolver::SetOperators(const ComplexOperator &K, const Complex
   MFEM_ABORT("SetOperators not defined for base class SlepcEigenvalueSolver!");
 }
 
+void SlepcEigenvalueSolver::SetOperators(const ComplexOperator &K, const ComplexOperator &M,
+                                         const ComplexOperator &P1, const ComplexOperator &P2,
+                                         EigenvalueSolver::ScaleType type)
+{
+  MFEM_ABORT("SetOperators not defined for base class SlepcEigenvalueSolver!");
+}
+
+void SlepcEigenvalueSolver::SetOperators(const ComplexOperator &K, const ComplexOperator &C,
+                                         const ComplexOperator &M, const ComplexOperator &P1,
+                                         const ComplexOperator &P2,
+                                         EigenvalueSolver::ScaleType type)
+{
+  MFEM_ABORT("SetOperators not defined for base class SlepcEigenvalueSolver!");
+}
+
 void SlepcEigenvalueSolver::SetLinearSolver(const ComplexKspSolver &ksp)
 {
   opInv = &ksp;
@@ -739,7 +754,7 @@ RG SlepcEPSSolverBase::GetRG() const
 SlepcEPSSolver::SlepcEPSSolver(MPI_Comm comm, int print, const std::string &prefix)
   : SlepcEPSSolverBase(comm, print, prefix)
 {
-  opK = opM = nullptr;
+  opK = opM = opP1 = opP2 = nullptr;
   normK = normM = 0.0;
 }
 
@@ -798,6 +813,64 @@ void SlepcEPSSolver::SetOperators(const ComplexOperator &K, const ComplexOperato
   }
 }
 
+void SlepcEPSSolver::SetOperators(const ComplexOperator &K, const ComplexOperator &M,
+                                  const ComplexOperator &P1, const ComplexOperator &P2,
+                                  EigenvalueSolver::ScaleType type)
+{
+  // Construct shell matrices for the scaled operators which define the generalized
+  // eigenvalue problem.
+  const bool first = (opK == nullptr);
+  opK = &K;
+  opM = &M;
+  opP1 = &P1;
+  opP2 = &P2;
+
+  if (first)
+  {
+    const PetscInt n = opK->Height();
+    PalacePetscCall(
+        MatCreateShell(GetComm(), n, n, PETSC_DECIDE, PETSC_DECIDE, (void *)this, &A0));
+    PalacePetscCall(
+        MatCreateShell(GetComm(), n, n, PETSC_DECIDE, PETSC_DECIDE, (void *)this, &A1));
+    PalacePetscCall(
+        MatShellSetOperation(A0, MATOP_MULT, (void (*)(void))__mat_apply_EPS_A0));
+    PalacePetscCall(
+        MatShellSetOperation(A1, MATOP_MULT, (void (*)(void))__mat_apply_EPS_A1));
+    PalacePetscCall(MatShellSetVecType(A0, PetscVecType()));
+    PalacePetscCall(MatShellSetVecType(A1, PetscVecType()));
+    PalacePetscCall(EPSSetOperators(eps, A0, A1));
+  }
+
+  if (first && type != ScaleType::NONE)
+  {
+    normK = linalg::SpectralNorm(GetComm(), *opK, opK->IsReal());
+    normM = linalg::SpectralNorm(GetComm(), *opM, opM->IsReal());
+    MFEM_VERIFY(normK >= 0.0 && normM >= 0.0, "Invalid matrix norms for EPS scaling!");
+    if (normK > 0 && normM > 0.0)
+    {
+      gamma = normK / normM;  // Store γ² for linear problem
+      delta = 2.0 / normK;
+    }
+  }
+
+  // Set up workspace.
+  if (!v0)
+  {
+    PalacePetscCall(MatCreateVecs(A0, nullptr, &v0));
+  }
+  x1.SetSize(opK->Height());
+  y1.SetSize(opK->Height());
+  x1.UseDevice(true);
+  y1.UseDevice(true);
+
+  // Configure linear solver for generalized problem or spectral transformation. This also
+  // allows use of the divergence-free projector as a linear solve side-effect.
+  if (first)
+  {
+    ConfigurePCShell(GetST(), (void *)this, __pc_apply_EPS);
+  }
+}
+
 void SlepcEPSSolver::SetBMat(const Operator &B)
 {
   SlepcEigenvalueSolver::SetBMat(B);
@@ -817,6 +890,14 @@ PetscReal SlepcEPSSolver::GetResidualNorm(PetscScalar l, const ComplexVector &x,
 {
   // Compute the i-th eigenpair residual: || (K - λ M) x ||₂ for eigenvalue λ.
   opK->Mult(x, r);
+  if (opP1)
+  {
+    opP1->AddMult(x, r, std::complex<double>(0.0, 1.0));
+  }
+  if (opP2)
+  {
+    opP2->AddMult(x, r, std::complex<double>(0.0, -1.0));
+  }
   opM->AddMult(x, r, -l);
   return linalg::Norml2(GetComm(), r);
 }
@@ -840,7 +921,7 @@ SlepcPEPLinearSolver::SlepcPEPLinearSolver(MPI_Comm comm, int print,
                                            const std::string &prefix)
   : SlepcEPSSolverBase(comm, print, prefix)
 {
-  opK = opC = opM = nullptr;
+  opK = opC = opM = opP1 = opP2 = nullptr;
   normK = normC = normM = 0.0;
 }
 
@@ -906,6 +987,71 @@ void SlepcPEPLinearSolver::SetOperators(const ComplexOperator &K, const ComplexO
   }
 }
 
+void SlepcPEPLinearSolver::SetOperators(const ComplexOperator &K, const ComplexOperator &C,
+                                        const ComplexOperator &M, const ComplexOperator &P1,
+                                        const ComplexOperator &P2,
+                                        EigenvalueSolver::ScaleType type)
+{
+  // Construct shell matrices for the scaled linearized operators which define the block 2x2
+  // eigenvalue problem.
+  const bool first = (opK == nullptr);
+  opK = &K;
+  opC = &C;
+  opM = &M;
+  opP1 = &P1;
+  opP2 = &P2;
+
+  if (first)
+  {
+    const PetscInt n = opK->Height();
+    PalacePetscCall(MatCreateShell(GetComm(), 2 * n, 2 * n, PETSC_DECIDE, PETSC_DECIDE,
+                                   (void *)this, &A0));
+    PalacePetscCall(MatCreateShell(GetComm(), 2 * n, 2 * n, PETSC_DECIDE, PETSC_DECIDE,
+                                   (void *)this, &A1));
+    PalacePetscCall(
+        MatShellSetOperation(A0, MATOP_MULT, (void (*)(void))__mat_apply_PEPLinear_L0));
+    PalacePetscCall(
+        MatShellSetOperation(A1, MATOP_MULT, (void (*)(void))__mat_apply_PEPLinear_L1));
+    PalacePetscCall(MatShellSetVecType(A0, PetscVecType()));
+    PalacePetscCall(MatShellSetVecType(A1, PetscVecType()));
+    PalacePetscCall(EPSSetOperators(eps, A0, A1));
+  }
+
+  if (first && type != ScaleType::NONE)
+  {
+    normK = linalg::SpectralNorm(GetComm(), *opK, opK->IsReal());
+    normC = linalg::SpectralNorm(GetComm(), *opC, opC->IsReal());
+    normM = linalg::SpectralNorm(GetComm(), *opM, opM->IsReal());
+    MFEM_VERIFY(normK >= 0.0 && normC >= 0.0 && normM >= 0.0,
+                "Invalid matrix norms for PEP scaling!");
+    if (normK > 0 && normC > 0.0 && normM > 0.0)
+    {
+      gamma = std::sqrt(normK / normM);
+      delta = 2.0 / (normK + gamma * normC);
+    }
+  }
+
+  // Set up workspace.
+  if (!v0)
+  {
+    PalacePetscCall(MatCreateVecs(A0, nullptr, &v0));
+  }
+  x1.SetSize(opK->Height());
+  x2.SetSize(opK->Height());
+  y1.SetSize(opK->Height());
+  y2.SetSize(opK->Height());
+  x1.UseDevice(true);
+  x2.UseDevice(true);
+  y1.UseDevice(true);
+  y2.UseDevice(true);
+
+  // Configure linear solver.
+  if (first)
+  {
+    ConfigurePCShell(GetST(), (void *)this, __pc_apply_PEPLinear);
+  }
+}
+
 void SlepcPEPLinearSolver::SetBMat(const Operator &B)
 {
   SlepcEigenvalueSolver::SetBMat(B);
@@ -956,6 +1102,14 @@ PetscReal SlepcPEPLinearSolver::GetResidualNorm(PetscScalar l, const ComplexVect
   // Compute the i-th eigenpair residual: || P(λ) x ||₂ = || (K + λ C + λ² M) x ||₂ for
   // eigenvalue λ.
   opK->Mult(x, r);
+  if (opP1)
+  {
+    opP1->AddMult(x, r, std::complex<double>(0.0, 1.0));
+  }
+  if (opP2)
+  {
+    opP2->AddMult(x, r, std::complex<double>(0.0, -1.0));
+  }
   opC->AddMult(x, r, l);
   opM->AddMult(x, r, l * l);
   return linalg::Norml2(GetComm(), r);
@@ -1217,7 +1371,7 @@ RG SlepcPEPSolverBase::GetRG() const
 SlepcPEPSolver::SlepcPEPSolver(MPI_Comm comm, int print, const std::string &prefix)
   : SlepcPEPSolverBase(comm, print, prefix)
 {
-  opK = opC = opM = nullptr;
+  opK = opC = opM = opP1 = opP2 = nullptr;
   normK = normC = normM = 0.0;
 }
 
@@ -1283,6 +1437,71 @@ void SlepcPEPSolver::SetOperators(const ComplexOperator &K, const ComplexOperato
   }
 }
 
+void SlepcPEPSolver::SetOperators(const ComplexOperator &K, const ComplexOperator &C,
+                                  const ComplexOperator &M, const ComplexOperator &P1,
+                                  const ComplexOperator &P2,
+                                  EigenvalueSolver::ScaleType type)
+{
+  // Construct shell matrices for the scaled operators which define the quadratic polynomial
+  // eigenvalue problem.
+  const bool first = (opK == nullptr);
+  opK = &K;
+  opC = &C;
+  opM = &M;
+  opP1 = &P1;
+  opP2 = &P2;
+
+  if (first)
+  {
+    const PetscInt n = opK->Height();
+    PalacePetscCall(
+        MatCreateShell(GetComm(), n, n, PETSC_DECIDE, PETSC_DECIDE, (void *)this, &A0));
+    PalacePetscCall(
+        MatCreateShell(GetComm(), n, n, PETSC_DECIDE, PETSC_DECIDE, (void *)this, &A1));
+    PalacePetscCall(
+        MatCreateShell(GetComm(), n, n, PETSC_DECIDE, PETSC_DECIDE, (void *)this, &A2));
+    PalacePetscCall(
+        MatShellSetOperation(A0, MATOP_MULT, (void (*)(void))__mat_apply_PEP_A0));
+    PalacePetscCall(
+        MatShellSetOperation(A1, MATOP_MULT, (void (*)(void))__mat_apply_PEP_A1));
+    PalacePetscCall(
+        MatShellSetOperation(A2, MATOP_MULT, (void (*)(void))__mat_apply_PEP_A2));
+    PalacePetscCall(MatShellSetVecType(A0, PetscVecType()));
+    PalacePetscCall(MatShellSetVecType(A1, PetscVecType()));
+    PalacePetscCall(MatShellSetVecType(A2, PetscVecType()));
+    Mat A[3] = {A0, A1, A2};
+    PalacePetscCall(PEPSetOperators(pep, 3, A));
+  }
+
+  if (first && type != ScaleType::NONE)
+  {
+    normK = linalg::SpectralNorm(GetComm(), *opK, opK->IsReal());
+    normC = linalg::SpectralNorm(GetComm(), *opC, opC->IsReal());
+    normM = linalg::SpectralNorm(GetComm(), *opM, opM->IsReal());
+    MFEM_VERIFY(normK >= 0.0 && normC >= 0.0 && normM >= 0.0,
+                "Invalid matrix norms for PEP scaling!");
+    if (normK > 0 && normC > 0.0 && normM > 0.0)
+    {
+      gamma = std::sqrt(normK / normM);
+      delta = 2.0 / (normK + gamma * normC);
+    }
+  }
+
+  // Set up workspace.
+  if (!v0)
+  {
+    PalacePetscCall(MatCreateVecs(A0, nullptr, &v0));
+  }
+  x1.SetSize(opK->Height());
+  y1.SetSize(opK->Height());
+
+  // Configure linear solver.
+  if (first)
+  {
+    ConfigurePCShell(GetST(), (void *)this, __pc_apply_PEP);
+  }
+}
+
 void SlepcPEPSolver::SetBMat(const Operator &B)
 {
   SlepcEigenvalueSolver::SetBMat(B);
@@ -1303,6 +1522,14 @@ PetscReal SlepcPEPSolver::GetResidualNorm(PetscScalar l, const ComplexVector &x,
   // Compute the i-th eigenpair residual: || P(λ) x ||₂ = || (K + λ C + λ² M) x ||₂ for
   // eigenvalue λ.
   opK->Mult(x, r);
+  if (opP1)
+  {
+    opP1->AddMult(x, r, std::complex<double>(0.0, 1.0));
+  }
+  if (opP2)
+  {
+    opP2->AddMult(x, r, std::complex<double>(0.0, -1.0));
+  }
   opC->AddMult(x, r, l);
   opM->AddMult(x, r, l * l);
   return linalg::Norml2(GetComm(), r);
@@ -1339,6 +1566,14 @@ PetscErrorCode __mat_apply_EPS_A0(Mat A, Vec x, Vec y)
 
   PetscCall(FromPetscVec(x, ctx->x1));
   ctx->opK->Mult(ctx->x1, ctx->y1);
+  if (ctx->opP1)
+  {
+    ctx->opP1->AddMult(ctx->x1, ctx->y1, std::complex<double>(0.0, 1.0));
+  }
+  if (ctx->opP2)
+  {
+    ctx->opP2->AddMult(ctx->x1, ctx->y1, std::complex<double>(0.0, -1.0));
+  }
   ctx->y1 *= ctx->delta;
   PetscCall(ToPetscVec(ctx->y1, y));
 
@@ -1421,6 +1656,14 @@ PetscErrorCode __mat_apply_PEPLinear_L0(Mat A, Vec x, Vec y)
   ctx->opC->Mult(ctx->x2, ctx->y2);
   ctx->y2 *= ctx->gamma;
   ctx->opK->AddMult(ctx->x1, ctx->y2, std::complex<double>(1.0, 0.0));
+  if (ctx->opP1)
+  {
+    ctx->opP1->AddMult(ctx->x1, ctx->y2, std::complex<double>(0.0, 1.0));
+  }
+  if (ctx->opP2)
+  {
+    ctx->opP2->AddMult(ctx->x1, ctx->y2, std::complex<double>(0.0, -1.0));
+  }
   ctx->y2 *= -ctx->delta;
   PetscCall(ToPetscVec(ctx->y1, ctx->y2, y));
 
@@ -1501,6 +1744,14 @@ PetscErrorCode __pc_apply_PEPLinear(PC pc, Vec x, Vec y)
   {
     ctx->y1.AXPBY(-ctx->sigma / (ctx->delta * ctx->gamma), ctx->x2, 0.0);  // Temporarily
     ctx->opK->AddMult(ctx->x1, ctx->y1, std::complex<double>(1.0, 0.0));
+    if (ctx->opP1)
+    {
+      ctx->opP1->AddMult(ctx->x1, ctx->y1, std::complex<double>(0.0, 1.0));
+    }
+    if (ctx->opP2)
+    {
+      ctx->opP2->AddMult(ctx->x1, ctx->y1, std::complex<double>(0.0, -1.0));
+    }
     ctx->opInv->Mult(ctx->y1, ctx->y2);
     if (ctx->opProj)
     {
@@ -1532,6 +1783,14 @@ PetscErrorCode __mat_apply_PEP_A0(Mat A, Vec x, Vec y)
 
   PetscCall(FromPetscVec(x, ctx->x1));
   ctx->opK->Mult(ctx->x1, ctx->y1);
+  if (ctx->opP1)
+  {
+    ctx->opP1->AddMult(ctx->x1, ctx->y1, std::complex<double>(0.0, 1.0));
+  }
+  if (ctx->opP2)
+  {
+    ctx->opP2->AddMult(ctx->x1, ctx->y1, std::complex<double>(0.0, -1.0));
+  }
   PetscCall(ToPetscVec(ctx->y1, y));
 
   PetscFunctionReturn(PETSC_SUCCESS);
diff --git a/palace/linalg/slepc.hpp b/palace/linalg/slepc.hpp
index bd4fbc201..417d7fc7c 100644
--- a/palace/linalg/slepc.hpp
+++ b/palace/linalg/slepc.hpp
@@ -134,6 +134,12 @@ class SlepcEigenvalueSolver : public EigenvalueSolver
                     ScaleType type) override;
   void SetOperators(const ComplexOperator &K, const ComplexOperator &C,
                     const ComplexOperator &M, ScaleType type) override;
+  void SetOperators(const ComplexOperator &K, const ComplexOperator &M,
+                    const ComplexOperator &P1, const ComplexOperator &P2,
+                    ScaleType type) override;
+  void SetOperators(const ComplexOperator &K, const ComplexOperator &C,
+                    const ComplexOperator &M, const ComplexOperator &P1,
+                    const ComplexOperator &P2, ScaleType type) override;
 
   // For the linear generalized case, the linear solver should be configured to compute the
   // action of M⁻¹ (with no spectral transformation) or (K - σ M)⁻¹. For the quadratic
@@ -257,7 +263,7 @@ class SlepcEPSSolver : public SlepcEPSSolverBase
   using SlepcEigenvalueSolver::sinvert;
 
   // References to matrices defining the generalized eigenvalue problem (not owned).
-  const ComplexOperator *opK, *opM;
+  const ComplexOperator *opK, *opM, *opP1, *opP2;
 
 private:
   // Operator norms for scaling.
@@ -275,6 +281,9 @@ class SlepcEPSSolver : public SlepcEPSSolverBase
   using SlepcEigenvalueSolver::SetOperators;
   void SetOperators(const ComplexOperator &K, const ComplexOperator &M,
                     ScaleType type) override;
+  void SetOperators(const ComplexOperator &K, const ComplexOperator &M,
+                    const ComplexOperator &P1, const ComplexOperator &P2,
+                    ScaleType type) override;
 
   void SetBMat(const Operator &B) override;
 };
@@ -294,7 +303,7 @@ class SlepcPEPLinearSolver : public SlepcEPSSolverBase
 
   // References to matrices defining the quadratic polynomial eigenvalue problem
   // (not owned).
-  const ComplexOperator *opK, *opC, *opM;
+  const ComplexOperator *opK, *opC, *opM, *opP1, *opP2;
 
   // Workspace vectors for operator applications.
   mutable ComplexVector x2, y2;
@@ -315,6 +324,9 @@ class SlepcPEPLinearSolver : public SlepcEPSSolverBase
   using SlepcEigenvalueSolver::SetOperators;
   void SetOperators(const ComplexOperator &K, const ComplexOperator &C,
                     const ComplexOperator &M, ScaleType type) override;
+  void SetOperators(const ComplexOperator &K, const ComplexOperator &C,
+                    const ComplexOperator &M, const ComplexOperator &P1,
+                    const ComplexOperator &P2, ScaleType type) override;
 
   void SetBMat(const Operator &B) override;
 
@@ -393,7 +405,7 @@ class SlepcPEPSolver : public SlepcPEPSolverBase
 
   // References to matrices defining the quadratic polynomial eigenvalue problem
   // (not owned).
-  const ComplexOperator *opK, *opC, *opM;
+  const ComplexOperator *opK, *opC, *opM, *opP1, *opP2;
 
 private:
   // Operator norms for scaling.
@@ -411,6 +423,9 @@ class SlepcPEPSolver : public SlepcPEPSolverBase
   using SlepcEigenvalueSolver::SetOperators;
   void SetOperators(const ComplexOperator &K, const ComplexOperator &C,
                     const ComplexOperator &M, ScaleType type) override;
+  void SetOperators(const ComplexOperator &K, const ComplexOperator &C,
+                    const ComplexOperator &M, const ComplexOperator &P1,
+                    const ComplexOperator &P2, ScaleType type) override;
 
   void SetBMat(const Operator &B) override;
 };
diff --git a/palace/models/romoperator.cpp b/palace/models/romoperator.cpp
index f87ac4b45..ea07e25c5 100644
--- a/palace/models/romoperator.cpp
+++ b/palace/models/romoperator.cpp
@@ -196,6 +196,8 @@ RomOperator::RomOperator(const IoData &iodata, SpaceOperator &space_op, int max_
   K = space_op.GetStiffnessMatrix<ComplexOperator>(Operator::DIAG_ONE);
   C = space_op.GetDampingMatrix<ComplexOperator>(Operator::DIAG_ZERO);
   M = space_op.GetMassMatrix<ComplexOperator>(Operator::DIAG_ZERO);
+  P1 = space_op.GetPeriodicWeakCurlMatrix<ComplexOperator>();
+  P2 = space_op.GetPeriodicCurlMatrix<ComplexOperator>();
   MFEM_VERIFY(K && M, "Invalid empty HDM matrices when constructing PROM!");
 
   // Set up RHS vector (linear in frequency part) for the incident field at port boundaries,
@@ -246,10 +248,10 @@ void RomOperator::SolveHDM(double omega, ComplexVector &u)
   A2 = space_op.GetExtraSystemMatrix<ComplexOperator>(omega, Operator::DIAG_ZERO);
   has_A2 = (A2 != nullptr);
   auto A = space_op.GetSystemMatrix(std::complex<double>(1.0, 0.0), 1i * omega,
-                                    std::complex<double>(-omega * omega, 0.0), K.get(),
-                                    C.get(), M.get(), A2.get());
+                                    std::complex<double>(-omega * omega, 0.0), 1.0i, -1.0i, K.get(),
+                                    C.get(), M.get(), A2.get(), P1.get(), P2.get());
   auto P =
-      space_op.GetPreconditionerMatrix<ComplexOperator>(1.0, omega, -omega * omega, omega);
+      space_op.GetPreconditionerMatrix<ComplexOperator>(1.0, omega, -omega * omega, omega, 1.0, -1.0);
   ksp->SetOperators(*A, *P);
 
   // The HDM excitation vector is computed as RHS = iω RHS1 + RHS2(ω).
@@ -314,6 +316,16 @@ void RomOperator::UpdatePROM(double omega, const ComplexVector &u)
   }
   Mr.conservativeResize(dim_V, dim_V);
   ProjectMatInternal(comm, V, *M, Mr, r, dim_V0);
+  if (P1)
+  {
+    P1r.conservativeResize(dim_V, dim_V);
+    ProjectMatInternal(comm, V, *P1, P1r, r, dim_V0);
+  }
+  if (P2)
+  {
+    P2r.conservativeResize(dim_V, dim_V);
+    ProjectMatInternal(comm, V, *P2, P2r, r, dim_V0);
+  }
   Ar.resize(dim_V, dim_V);
   if (RHS1.Size())
   {
@@ -370,7 +382,14 @@ void RomOperator::SolvePROM(double omega, ComplexVector &u)
     Ar += (1i * omega) * Cr;
   }
   Ar += (-omega * omega) * Mr;
-
+  if (P1)
+  {
+    Ar += 1i * P1r;
+  }
+  if (P2)
+  {
+    Ar -= 1i * P2r;
+  }
   if (has_RHS2)
   {
     space_op.GetExcitationVector2(omega, RHS2);
diff --git a/palace/models/romoperator.hpp b/palace/models/romoperator.hpp
index 2e9baba4d..6eecb2547 100644
--- a/palace/models/romoperator.hpp
+++ b/palace/models/romoperator.hpp
@@ -29,7 +29,7 @@ class RomOperator
   SpaceOperator &space_op;
 
   // HDM system matrices and excitation RHS.
-  std::unique_ptr<ComplexOperator> K, M, C, A2;
+  std::unique_ptr<ComplexOperator> K, M, C, A2, P1, P2;
   ComplexVector RHS1, RHS2, r;
   bool has_A2, has_RHS1, has_RHS2;
 
@@ -37,7 +37,7 @@ class RomOperator
   std::unique_ptr<ComplexKspSolver> ksp;
 
   // PROM matrices and vectors.
-  Eigen::MatrixXcd Kr, Mr, Cr, Ar;
+  Eigen::MatrixXcd Kr, Mr, Cr, Ar, P1r, P2r;
   Eigen::VectorXcd RHS1r, RHSr;
 
   // PROM reduced-order basis (real-valued) and active dimension.
diff --git a/palace/models/spaceoperator.cpp b/palace/models/spaceoperator.cpp
index 26360bc69..ca2e2a974 100644
--- a/palace/models/spaceoperator.cpp
+++ b/palace/models/spaceoperator.cpp
@@ -723,11 +723,10 @@ auto BuildParSumOperator(int h, int w, std::complex<double> a0, std::complex<dou
 template <typename OperType, typename ScalarType>
 std::unique_ptr<OperType>
 SpaceOperator::GetSystemMatrix(ScalarType a0, ScalarType a1, ScalarType a2,
+                               ScalarType a4, ScalarType a5,
                                const OperType *K, const OperType *C, const OperType *M,
-                               const OperType *A2, ScalarType a4, ScalarType a5,
-                               const OperType *P1, const OperType *P2)
+                               const OperType *A2, const OperType *P1, const OperType *P2)
 {
-  Mpi::Print("In GetSystemMatrix\n");
   using ParOperType =
       typename std::conditional<std::is_same<OperType, ComplexOperator>::value,
                                 ComplexParOperator, ParOperator>::type;
@@ -858,7 +857,6 @@ std::unique_ptr<OperType> SpaceOperator::GetPreconditionerMatrix(double a0, doub
                                                                  double a2, double a3,
                                                                  double a4, double a5)
 {
-  Mpi::Print("In GetPreconditionerMatrix\n");
   // XX TODO: Handle complex coeff a0/a1/a2/a3 (like GetSystemMatrix)
 
   // When partially assembled, the coarse operators can reuse the fine operator quadrature
@@ -1212,16 +1210,15 @@ template std::unique_ptr<ComplexOperator>
 SpaceOperator::GetExtraSystemMatrix(double, Operator::DiagonalPolicy);
 
 template std::unique_ptr<Operator>
-SpaceOperator::GetSystemMatrix<Operator, double>(double, double, double, const Operator *,
+SpaceOperator::GetSystemMatrix<Operator, double>(double, double, double, double, double, const Operator *,
                                                  const Operator *, const Operator *,
-                                                 const Operator *, double, double,
-                                                 const Operator *, const Operator *);
+                                                 const Operator *, const Operator *, const Operator *);
 template std::unique_ptr<ComplexOperator>
 SpaceOperator::GetSystemMatrix<ComplexOperator, std::complex<double>>(
     std::complex<double>, std::complex<double>, std::complex<double>,
+    std::complex<double>, std::complex<double>,
     const ComplexOperator *, const ComplexOperator *, const ComplexOperator *,
-    const ComplexOperator *, std::complex<double>, std::complex<double>,
-    const ComplexOperator *, const ComplexOperator *);
+    const ComplexOperator *, const ComplexOperator *, const ComplexOperator *);
 
 template std::unique_ptr<Operator>
 SpaceOperator::GetPreconditionerMatrix<Operator>(double, double, double, double, double, double);
@@ -1229,9 +1226,13 @@ template std::unique_ptr<ComplexOperator>
 SpaceOperator::GetPreconditionerMatrix<ComplexOperator>(double, double, double, double, double, double);
 
 template std::unique_ptr<Operator>
-SpaceOperator::GetPeriodicWeakCurlMatrix();
+SpaceOperator::GetPeriodicWeakCurlMatrix<Operator>();
+template std::unique_ptr<ComplexOperator>
+SpaceOperator::GetPeriodicWeakCurlMatrix<ComplexOperator>();
 
 template std::unique_ptr<Operator>
-SpaceOperator::GetPeriodicCurlMatrix();
+SpaceOperator::GetPeriodicCurlMatrix<Operator>();
+template std::unique_ptr<ComplexOperator>
+SpaceOperator::GetPeriodicCurlMatrix<ComplexOperator>();
 
 }  // namespace palace
diff --git a/palace/models/spaceoperator.hpp b/palace/models/spaceoperator.hpp
index 4f59884cf..6e84873d2 100644
--- a/palace/models/spaceoperator.hpp
+++ b/palace/models/spaceoperator.hpp
@@ -162,9 +162,8 @@ class SpaceOperator
   // GetSystemMatrix() and the returned operator does not inherit ownership of any of them.
   template <typename OperType, typename ScalarType>
   std::unique_ptr<OperType>
-  GetSystemMatrix(ScalarType a0, ScalarType a1, ScalarType a2, const OperType *K,
+  GetSystemMatrix(ScalarType a0, ScalarType a1, ScalarType a2, ScalarType a4, ScalarType a5, const OperType *K,
                   const OperType *C, const OperType *M, const OperType *A2 = nullptr,
-                  ScalarType a4 = 0, ScalarType a5 = 0,
                   const OperType *P1 = nullptr, const OperType *P2 = nullptr);
 
   // Construct the real, SPD matrix for weighted L2 or H(curl) inner products:
diff --git a/palace/models/timeoperator.cpp b/palace/models/timeoperator.cpp
index b5f90da2b..f825b051e 100644
--- a/palace/models/timeoperator.cpp
+++ b/palace/models/timeoperator.cpp
@@ -83,8 +83,8 @@ class TimeDependentCurlCurlOperator : public mfem::SecondOrderTimeDependentOpera
       {
         // Configure the system matrix and also the matrix (matrices) from which the
         // preconditioner will be constructed.
-        A = space_op.GetSystemMatrix(a0, a1, 1.0, K.get(), C.get(), M.get());
-        B = space_op.GetPreconditionerMatrix<Operator>(a0, a1, 1.0, 0.0);
+        A = space_op.GetSystemMatrix(a0, a1, 1.0, 0.0, 0.0, K.get(), C.get(), M.get());
+        B = space_op.GetPreconditionerMatrix<Operator>(a0, a1, 1.0, 0.0, 0.0, 0.0);
 
         // Configure the solver.
         if (!kspA)
diff --git a/palace/utils/geodata.cpp b/palace/utils/geodata.cpp
index f97a7961c..f10e89c8a 100644
--- a/palace/utils/geodata.cpp
+++ b/palace/utils/geodata.cpp
@@ -2149,6 +2149,9 @@ std::unique_ptr<mfem::Mesh> LoadMesh(const std::string &mesh_file, bool remove_c
       Mpi::Print("Donor normal: {:.9e}, {:.9e}, {:.9e}\n", donor_normal[0], donor_normal[1], donor_normal[2]);
       Mpi::Print("Receiver normal: {:.9e}, {:.9e}, {:.9e}\n", receiver_normal[0], receiver_normal[1], receiver_normal[2]);
 
+      // Should we add check somewhere that the boundary surfaces are planar??
+      // Maybe in the ComputeNormal function?
+
       std::vector<mfem::Vector> donor_pts, receiver_pts;
       FindUniquePoints(periodic_mesh, bdr_v_donor, donor_centroid, diameter, donor_pts, norm_tol);
       FindUniquePoints(periodic_mesh, bdr_v_receiver, receiver_centroid, diameter, receiver_pts, norm_tol);

From 82d1d61a1c1b7df32e76edcfb9bee398ff390104 Mon Sep 17 00:00:00 2001
From: Simon Lapointe <simlap@amazon.com>
Date: Fri, 1 Nov 2024 00:56:11 +0000
Subject: [PATCH 05/49] Update surface normal calculation and add planar
 boundary check

---
 palace/utils/geodata.cpp | 333 ++++++++++++++++++++++++++-------------
 1 file changed, 225 insertions(+), 108 deletions(-)

diff --git a/palace/utils/geodata.cpp b/palace/utils/geodata.cpp
index f10e89c8a..f5f37714f 100644
--- a/palace/utils/geodata.cpp
+++ b/palace/utils/geodata.cpp
@@ -1744,6 +1744,91 @@ void ComputeCentroid(std::unique_ptr<mfem::Mesh> &mesh,
   diameter = xDiff.Norml2(); // mesh diameter
 }
 
+mfem::Vector ComputeNormal2(std::unique_ptr<mfem::Mesh> &mesh,
+                            const std::unordered_set<int> &elem_set,
+                            bool inside, bool check_planar=true)
+{
+  int sdim = mesh->SpaceDimension();
+  mfem::IsoparametricTransformation T;
+  mfem::Vector loc_normal(sdim), normal(sdim);
+  normal = 0.0;
+  int count = 0;
+
+  auto UpdateNormal = [&](int el, mfem::ElementTransformation &T)
+  {
+    // Compute normal
+    const mfem::IntegrationPoint &ip = mfem::Geometries.GetCenter(T.GetGeometryType());
+    T.SetIntPoint(&ip);
+    mfem::CalcOrtho(T.Jacobian(), loc_normal);
+
+    // Normalize it
+    loc_normal /= loc_normal.Norml2();
+
+    // To find if the normal is pointing inside or outside the mesh
+    // We compare the boundary element position to its adjacement element
+    mfem::Array<int> vert_bdr, vert_adj;
+    mesh->GetBdrElementVertices(el, vert_bdr);
+    mfem::Vector bdr_elem_center(sdim), adj_elem_center(sdim);
+    mfem::Vector bdr_elem_offset_p(sdim), bdr_elem_offset_n(sdim);
+    bdr_elem_center = 0.0;
+    for (int j=0; j<vert_bdr.Size(); j++)
+    {
+      mfem::Vector coord(mesh->GetVertex(vert_bdr[j]), sdim);
+      bdr_elem_center += coord;
+    }
+    bdr_elem_center /= vert_bdr.Size();
+
+    int eladj, info;
+    mesh->GetBdrElementAdjacentElement(el, eladj, info);
+    mesh->GetElementVertices(eladj, vert_adj);
+    adj_elem_center = 0.0;
+    for (int j=0; j<vert_adj.Size(); j++)
+    {
+      mfem::Vector vx(mesh->GetVertex(vert_adj[j]), sdim);
+      adj_elem_center += vx;
+    }
+    adj_elem_center /= vert_adj.Size();
+
+    bdr_elem_offset_p = bdr_elem_center;
+    bdr_elem_offset_p += loc_normal;
+    bdr_elem_offset_n = bdr_elem_center;
+    bdr_elem_offset_n -= loc_normal;
+    //Mpi::Print("dist_n: {:.3e}, dist_p: {:.3e}\n", adj_elem_center.DistanceTo(bdr_elem_offset_n), adj_elem_center.DistanceTo(bdr_elem_offset_p));
+    if (inside && (adj_elem_center.DistanceTo(bdr_elem_offset_n) <
+                 adj_elem_center.DistanceTo(bdr_elem_offset_p)))
+    {
+      loc_normal *= -1.0;
+    }
+    if (!inside && (adj_elem_center.DistanceTo(bdr_elem_offset_p) <
+                    adj_elem_center.DistanceTo(bdr_elem_offset_n)))
+    {
+      loc_normal *= -1.0;
+    }
+
+    // Check if the boundary is planar by comparing the current elem's
+    // normal to the average normal (accumulated so far)
+    if (count > 0 && check_planar)
+    {
+      mfem::Vector diff(sdim);
+      diff = normal;
+      diff /= count;
+      diff -= loc_normal;
+      MFEM_VERIFY(diff.Norml2() < 1e-6, "Periodic boundary mapping is only supported for planar boundaries.");
+    }
+    normal += loc_normal;
+
+    count++;
+  };
+
+  for (const int elem : elem_set)
+  {
+    mesh->GetBdrElementTransformation(elem, &T);
+    UpdateNormal(elem, T);
+  }
+  normal /= count;
+  return normal;
+}
+
 void ComputeNormal(std::unique_ptr<mfem::Mesh> &periodic_mesh,
                    const int elem, mfem::Vector &normal,
                    bool inside, const double norm_tol = 1e-6)
@@ -1884,47 +1969,49 @@ void ComputeAffineTransformation(const std::vector<mfem::Vector> &donor_pts,
                                  const std::vector<mfem::Vector> &receiver_pts,
                                  mfem::DenseMatrix &transformation)
 {
-
-    mfem::DenseMatrix A(12);
-    A = 0.0;
-    mfem::Vector rhs(12), affine_coeffs(12);
-    for (int i = 0; i < 4; i++)
-    {
-      A(3*i,0) = A(3*i+1,4) = A(3*i+2, 8)  = donor_pts[i][0];
-      A(3*i,1) = A(3*i+1,5) = A(3*i+2, 9)  = donor_pts[i][1];
-      A(3*i,2) = A(3*i+1,6) = A(3*i+2, 10) = donor_pts[i][2];
-      A(3*i,3) = A(3*i+1,7) = A(3*i+2, 11) = 1.0;
-      rhs[3*i+0] = receiver_pts[i][0];
-      rhs[3*i+1] = receiver_pts[i][1];
-      rhs[3*i+2] = receiver_pts[i][2];
-    }
-    Mpi::Print("Donor pts matrix:\n");
-    A.Print();
-    Mpi::Print("Receiver pts RHS:\n");
-    rhs.Print();
-    A.Invert(); // Invert in place
-    // coeffs = A^-1 rhs
-    A.Mult(rhs, affine_coeffs);
-    Mpi::Print("affine coeffs:\n");
-    affine_coeffs.Print();
-    // Build affine transformation matrix
-    transformation = 0.0;
-    for (int i = 0; i < 3; i++)
-    {
-      for (int j = 0; j < 4; j++)
-      {
-        transformation(i,j) = affine_coeffs[i*4+j];
-      }
-    }
-    transformation(3,3) = 1.0;
-    Mpi::Print("Affine transform matrix:\n");
-    transformation.Print();
+  // Use 4 point pairs (donor, receiver) to compute the affine
+  // transformation matrix
+  mfem::DenseMatrix A(12);
+  A = 0.0;
+  mfem::Vector rhs(12), affine_coeffs(12);
+  for (int i = 0; i < 4; i++)
+  {
+    A(3*i,0) = A(3*i+1,4) = A(3*i+2, 8)  = donor_pts[i][0];
+    A(3*i,1) = A(3*i+1,5) = A(3*i+2, 9)  = donor_pts[i][1];
+    A(3*i,2) = A(3*i+1,6) = A(3*i+2, 10) = donor_pts[i][2];
+    A(3*i,3) = A(3*i+1,7) = A(3*i+2, 11) = 1.0;
+    rhs[3*i+0] = receiver_pts[i][0];
+    rhs[3*i+1] = receiver_pts[i][1];
+    rhs[3*i+2] = receiver_pts[i][2];
+  }
+  Mpi::Print("Donor pts matrix:\n");
+  A.Print();
+  Mpi::Print("Receiver pts RHS:\n");
+  rhs.Print();
+  A.Invert(); // Invert in place
+  // coeffs = A^-1 rhs
+  A.Mult(rhs, affine_coeffs);
+  Mpi::Print("affine coeffs:\n");
+  affine_coeffs.Print();
+  // Build affine transformation matrix
+  transformation = 0.0;
+  for (int i = 0; i < 3; i++)
+  {
+    for (int j = 0; j < 4; j++)
+    {
+      transformation(i,j) = affine_coeffs[i*4+j];
+    }
+  }
+  transformation(3,3) = 1.0;
+  Mpi::Print("Affine transform matrix:\n");
+  transformation.Print();
 }
 
 void ComputeRotation(const mfem::Vector &normal1,
                      const mfem::Vector &normal2,
                      mfem::DenseMatrix &transformation)
 {
+  // Calculate the rotation matrix between two vectors
   mfem::DenseMatrix R(3), vx(3), vx2(3);
 
   mfem::Vector v(normal1.Size());
@@ -2079,25 +2166,14 @@ std::unique_ptr<mfem::Mesh> LoadMesh(const std::string &mesh_file, bool remove_c
   if (!boundaries.periodic.empty())
   {
     auto periodic_mesh = std::move(mesh);
+
     for (const auto &data : boundaries.periodic)
     {
-
-      // If translation or affine transform is provided in config
-      // file, we use those.
-      // If only translation is provided -> use it
-      // If only affine transfomr is provided -> use it
-      // If both affine transform and translation are provided -> error or warning?
-      // If neither -> automatic detection
-
-      // Compute the transformation between donor and receiver boundaries.
+      // Identify donor and receiver vertices
       const auto &da = data.donor_attributes, &ra = data.receiver_attributes;
+      double norm_tol = 1e-6; //?
       const int sdim = periodic_mesh->SpaceDimension();
-      mfem::Vector coord(sdim), donor_centroid(sdim), receiver_centroid(sdim);
-      mfem::Vector translation2(sdim);
-
-      // test
-      mfem::Vector donor_normal(sdim), receiver_normal(sdim);
-      donor_normal = receiver_normal = 0.0;
+      mfem::Vector coord(sdim);
       std::unordered_set<int> bdr_v_donor, bdr_v_receiver;
       std::unordered_set<int> bdr_e_donor, bdr_e_receiver;
       for (int be = 0; be < periodic_mesh->GetNBE(); be++)
@@ -2107,6 +2183,7 @@ std::unique_ptr<mfem::Mesh> LoadMesh(const std::string &mesh_file, bool remove_c
         auto receiver = std::find(ra.begin(), ra.end(), attr) != ra.end();
         if (donor || receiver)
         {
+          //Mpi::Print("attr: {:d}, donor: {:d}, receiver: {:d}\n", attr, donor, receiver);
           if (donor) bdr_e_donor.insert(be);
           if (receiver) bdr_e_receiver.insert(be);
           mfem::Array<int> vertidxs;
@@ -2118,73 +2195,113 @@ std::unique_ptr<mfem::Mesh> LoadMesh(const std::string &mesh_file, bool remove_c
           for (int i = 0; i < vertidxs.Size(); i++)
           {
             coord = periodic_mesh->GetVertex(vertidxs[i]);
-            if (donor)
-            {
-              bdr_v_donor.insert(vertidxs[i]);
-            }
-            else if (receiver)
-            {
-              bdr_v_receiver.insert(vertidxs[i]);
-            }
+            if (donor) bdr_v_donor.insert(vertidxs[i]);
+            else if (receiver) bdr_v_receiver.insert(vertidxs[i]);
           }
         }
       }
-      double donor_dia, receiver_dia, diameter;
-      Mpi::Print("num donor/receiver pts {:d}, {:d}\n",bdr_v_donor.size(), bdr_v_receiver.size());
-      MFEM_VERIFY(bdr_v_donor.size() == bdr_v_receiver.size(), "Different number of vertices on donor and receiver boundaries. Cannot create periodic mesh.");
-      ComputeCentroid(periodic_mesh, bdr_v_donor, donor_centroid, donor_dia);
-      Mpi::Print("Donor centroid: {:.3e}, {:.3e}, {:.3e}\n", donor_centroid[0], donor_centroid[1], donor_centroid[2]);
-      ComputeCentroid(periodic_mesh, bdr_v_receiver, receiver_centroid, receiver_dia);
-      Mpi::Print("Receiver centroid: {:.3e}, {:.3e}, {:.3e}\n", receiver_centroid[0], receiver_centroid[1], receiver_centroid[2]);
-      translation2 = receiver_centroid;
-      translation2 -= donor_centroid;
-      Mpi::Print("computed translation: {:.9e}, {:.9e}, {:.9e}\n", translation2[0], translation2[1], translation2[2]);
-      Mpi::Print("config translation: {:.9e}, {:.9e}, {:.9e}\n", data.translation[0], data.translation[1], data.translation[2]);
-
-      diameter = std::max(donor_dia, receiver_dia);
-      const double norm_tol = 1e-6 * diameter;
-      // Compute normal so it points inside domain for donor and outside for receiver
-      ComputeNormal(periodic_mesh, *bdr_e_donor.begin(), donor_normal, true, norm_tol);
-      ComputeNormal(periodic_mesh, *bdr_e_receiver.begin(), receiver_normal, false, norm_tol);
-      Mpi::Print("Donor normal: {:.9e}, {:.9e}, {:.9e}\n", donor_normal[0], donor_normal[1], donor_normal[2]);
-      Mpi::Print("Receiver normal: {:.9e}, {:.9e}, {:.9e}\n", receiver_normal[0], receiver_normal[1], receiver_normal[2]);
-
-      // Should we add check somewhere that the boundary surfaces are planar??
-      // Maybe in the ComputeNormal function?
-
-      std::vector<mfem::Vector> donor_pts, receiver_pts;
-      FindUniquePoints(periodic_mesh, bdr_v_donor, donor_centroid, diameter, donor_pts, norm_tol);
-      FindUniquePoints(periodic_mesh, bdr_v_receiver, receiver_centroid, diameter, receiver_pts, norm_tol);
-
-      // Add point offset from centroid in normal direction
-      donor_centroid += donor_normal;
-      receiver_centroid += receiver_normal;
-      donor_pts.push_back(donor_centroid);
-      receiver_pts.push_back(receiver_centroid);
-
-      Mpi::Print("Number of unique donor pts: {:d}\n", donor_pts.size());
-      Mpi::Print("Number of unique receiver pts: {:d}\n", receiver_pts.size());
-
-      MFEM_VERIFY(donor_pts.size() == receiver_pts.size(), "Different number of unique points on donor and receiver boundaries.");
 
       mfem::DenseMatrix transformation(4);
-      if(donor_pts.size() == 4)
+      // If only translation is provided -> use it
+      // If only affine transfomr is provided -> use it
+      // If both affine transform and translation are provided -> error or warning?
+      // If neither -> automatic detection
+      mfem::Vector translation(data.translation.size());
+      std::copy(data.translation.begin(), data.translation.end(), translation.GetData());
+      mfem::Vector affine_vec(data.affine_transform.size());
+      std::copy(data.affine_transform.begin(), data.affine_transform.end(), affine_vec.GetData());
+
+      if (translation.Norml2() > 1e-12) // which value to use?
+      {
+        // use user-provided translation
+        for (int i = 0; i < 3; i++)
+        {
+          transformation(i,i) = 1.0;
+          transformation(i,3) = translation[i];
+        }
+        transformation(3,3) = 1.0;
+      }
+      else if (affine_vec.Norml2() > 1e-12) // which value to use?
       {
-        ComputeAffineTransformation(donor_pts, receiver_pts,
-                                    transformation);
+        // use affine transformation matrix
+        for (int i = 0; i < 4; i++)
+        {
+          for (int j = 0; j < 4; j++)
+          {
+            transformation(i,j) = affine_vec[i*4+j];
+          }
+        }
       }
-      else if (donor_pts.size() == 2)
+      else
       {
-        // Use normals to compute a rotation matrix
-        ComputeRotation(donor_normal, receiver_normal, transformation);
+        // automatically detect transformation
+        mfem::Vector donor_centroid(sdim), receiver_centroid(sdim);
+        mfem::Vector translation2(sdim);
+        mfem::Vector donor_normal(sdim), receiver_normal(sdim);
+        donor_normal = receiver_normal = 0.0;
+
+        double donor_dia, receiver_dia, diameter;
+        Mpi::Print("num donor/receiver pts {:d}, {:d}\n",bdr_v_donor.size(), bdr_v_receiver.size());
+        MFEM_VERIFY(bdr_v_donor.size() == bdr_v_receiver.size(), "Different number of vertices on donor and receiver boundaries. Cannot create periodic mesh.");
+        ComputeCentroid(periodic_mesh, bdr_v_donor, donor_centroid, donor_dia);
+        Mpi::Print("Donor centroid: {:.3e}, {:.3e}, {:.3e}\n", donor_centroid[0], donor_centroid[1], donor_centroid[2]);
+        ComputeCentroid(periodic_mesh, bdr_v_receiver, receiver_centroid, receiver_dia);
+        Mpi::Print("Receiver centroid: {:.3e}, {:.3e}, {:.3e}\n", receiver_centroid[0], receiver_centroid[1], receiver_centroid[2]);
+        translation2 = receiver_centroid;
+        translation2 -= donor_centroid;
+        Mpi::Print("computed translation: {:.9e}, {:.9e}, {:.9e}\n", translation2[0], translation2[1], translation2[2]);
+        Mpi::Print("config translation: {:.9e}, {:.9e}, {:.9e}\n", data.translation[0], data.translation[1], data.translation[2]);
+
+        diameter = std::max(donor_dia, receiver_dia);
+        norm_tol = 1e-6 * diameter;
+        // Compute normal so it points inside domain for donor and outside for receiver
+        //ComputeNormal(periodic_mesh, *bdr_e_donor.begin(), donor_normal, true, norm_tol);
+        //ComputeNormal(periodic_mesh, *bdr_e_receiver.begin(), receiver_normal, false, norm_tol);
+        //Mpi::Print("Donor normal: {:.9e}, {:.9e}, {:.9e}\n", donor_normal[0], donor_normal[1], donor_normal[2]);
+        //Mpi::Print("Receiver normal: {:.9e}, {:.9e}, {:.9e}\n", receiver_normal[0], receiver_normal[1], receiver_normal[2]);
+
+        // This one computes the average normal over the whole boundary
+        // and checks if the boundary is planar
+        // If not planar, error out
+        donor_normal = ComputeNormal2(periodic_mesh, bdr_e_donor, true);
+        receiver_normal = ComputeNormal2(periodic_mesh, bdr_e_receiver, false);
+        Mpi::Print("Donor normal: {:.9e}, {:.9e}, {:.9e}\n", donor_normal[0], donor_normal[1], donor_normal[2]);
+        Mpi::Print("Receiver normal: {:.9e}, {:.9e}, {:.9e}\n", receiver_normal[0], receiver_normal[1], receiver_normal[2]);
+
+        std::vector<mfem::Vector> donor_pts, receiver_pts;
+        FindUniquePoints(periodic_mesh, bdr_v_donor, donor_centroid, diameter, donor_pts, norm_tol);
+        FindUniquePoints(periodic_mesh, bdr_v_receiver, receiver_centroid, diameter, receiver_pts, norm_tol);
+
+        // Add point offset from centroid in normal direction
+        donor_centroid += donor_normal;
+        receiver_centroid += receiver_normal;
+        donor_pts.push_back(donor_centroid);
+        receiver_pts.push_back(receiver_centroid);
+
+        Mpi::Print("Number of unique donor pts: {:d}\n", donor_pts.size());
+        Mpi::Print("Number of unique receiver pts: {:d}\n", receiver_pts.size());
+
+        MFEM_VERIFY(donor_pts.size() == receiver_pts.size(), "Different number of unique points on donor and receiver boundaries.");
+
+        if(donor_pts.size() == 4)
+        {
+          ComputeAffineTransformation(donor_pts, receiver_pts,
+                                      transformation);
+        }
+        else if (donor_pts.size() == 2)
+        {
+          // Use normals to compute a rotation matrix
+          ComputeRotation(donor_normal, receiver_normal, transformation);
+
+          // Add centroids translation to transform matrix
+          transformation(0,3) = translation2[0];
+          transformation(1,3) = translation2[1];
+          transformation(2,3) = translation2[2];
+          transformation(3,3) = 1.0;
+          Mpi::Print("Affine transformation matrix\n");
+          transformation.Print();
+        }
 
-        // Add centroids translation to transform matrix
-        transformation(0,3) = translation2[0];
-        transformation(1,3) = translation2[1];
-        transformation(2,3) = translation2[2];
-        transformation(3,3) = 1.0;
-        Mpi::Print("Affine transformation matrix\n");
-        transformation.Print();
       }
 
       auto periodic_mapping = CreatePeriodicVertexMapping(periodic_mesh,

From 1de268d12214745dd5f448b30ba9c2ae80b00b98 Mon Sep 17 00:00:00 2001
From: Simon Lapointe <simlap@amazon.com>
Date: Fri, 1 Nov 2024 23:40:21 +0000
Subject: [PATCH 06/49] Fix some with floquet operators and their use in
 eigensolver

---
 palace/drivers/drivensolver.cpp |  4 +--
 palace/drivers/eigensolver.cpp  | 30 ++++++++++++++++++----
 palace/linalg/slepc.cpp         | 18 ++++++-------
 palace/models/romoperator.cpp   |  4 +--
 palace/models/spaceoperator.cpp | 45 +++++++++++++++++++++++++++------
 palace/models/spaceoperator.hpp |  4 +--
 6 files changed, 77 insertions(+), 28 deletions(-)

diff --git a/palace/drivers/drivensolver.cpp b/palace/drivers/drivensolver.cpp
index 013c62acc..c8fff175f 100644
--- a/palace/drivers/drivensolver.cpp
+++ b/palace/drivers/drivensolver.cpp
@@ -117,8 +117,8 @@ ErrorIndicator DrivenSolver::SweepUniform(SpaceOperator &space_op, PostOperator
   auto C = space_op.GetDampingMatrix<ComplexOperator>(Operator::DIAG_ZERO);
   auto M = space_op.GetMassMatrix<ComplexOperator>(Operator::DIAG_ZERO);
   auto A2 = space_op.GetExtraSystemMatrix<ComplexOperator>(omega0, Operator::DIAG_ZERO);
-  auto P1 = space_op.GetPeriodicWeakCurlMatrix<ComplexOperator>();
-  auto P2 = space_op.GetPeriodicCurlMatrix<ComplexOperator>();
+  auto P1 = space_op.GetPeriodicWeakCurlMatrix<ComplexOperator>(Operator::DIAG_ZERO);
+  auto P2 = space_op.GetPeriodicCurlMatrix<ComplexOperator>(Operator::DIAG_ZERO);
   const auto &Curl = space_op.GetCurlMatrix();
 
   // Set up the linear solver and set operators for the first frequency step. The
diff --git a/palace/drivers/eigensolver.cpp b/palace/drivers/eigensolver.cpp
index 0dc04d09c..ce1300676 100644
--- a/palace/drivers/eigensolver.cpp
+++ b/palace/drivers/eigensolver.cpp
@@ -36,8 +36,10 @@ EigenSolver::Solve(const std::vector<std::unique_ptr<Mesh>> &mesh) const
   auto K = space_op.GetStiffnessMatrix<ComplexOperator>(Operator::DIAG_ONE);
   auto C = space_op.GetDampingMatrix<ComplexOperator>(Operator::DIAG_ZERO);
   auto M = space_op.GetMassMatrix<ComplexOperator>(Operator::DIAG_ZERO);
-  auto P1 = space_op.GetPeriodicWeakCurlMatrix<ComplexOperator>();
-  auto P2 = space_op.GetPeriodicCurlMatrix<ComplexOperator>();
+  auto P1 = space_op.GetPeriodicWeakCurlMatrix<ComplexOperator>(Operator::DIAG_ZERO);
+  auto P2 = space_op.GetPeriodicCurlMatrix<ComplexOperator>(Operator::DIAG_ZERO);
+  auto A2 = space_op.GetExtraSystemMatrix<ComplexOperator>(1.0, Operator::DIAG_ZERO);
+  A2 = nullptr;//?
   const auto &Curl = space_op.GetCurlMatrix();
   SaveMetadata(space_op.GetNDSpaces());
 
@@ -126,11 +128,29 @@ EigenSolver::Solve(const std::vector<std::unique_ptr<Mesh>> &mesh) const
                                           : EigenvalueSolver::ScaleType::NONE;
   if (C)
   {
-    eigen->SetOperators(*K, *C, *M, scale);
+    if (P1 && P2)
+    {
+      Mpi::Print("Eigen set operators K, C, M, P1, P2\n");
+      eigen->SetOperators(*K, *C, *M, *P1, *P2, scale);
+    }
+    else
+    {
+      Mpi::Print("Eigen set operators K, C, M\n");
+      eigen->SetOperators(*K, *C, *M, scale);
+    }
   }
   else
   {
-    eigen->SetOperators(*K, *M, scale);
+    if (P1 && P2)
+    {
+      Mpi::Print("Eigen set operators K, M, P1, P2\n");
+      eigen->SetOperators(*K, *M, *P1, *P2, scale);
+    }
+    else
+    {
+      Mpi::Print("Eigen set operators K, M\n");
+      eigen->SetOperators(*K, *M, scale);
+    }
   }
   eigen->SetNumModes(iodata.solver.eigenmode.n, iodata.solver.eigenmode.max_size);
   eigen->SetTol(iodata.solver.eigenmode.tol);
@@ -244,7 +264,7 @@ EigenSolver::Solve(const std::vector<std::unique_ptr<Mesh>> &mesh) const
   // to the complex system matrix.
   auto A = space_op.GetSystemMatrix(std::complex<double>(1.0, 0.0), 1i * target,
                                     std::complex<double>(-target * target, 0.0), 1.0i, -1.0i, K.get(),
-                                    C.get(), M.get(), P1.get(), P2.get());
+                                    C.get(), M.get(), A2.get(), P1.get(), P2.get());
   auto P = space_op.GetPreconditionerMatrix<ComplexOperator>(1.0, target, -target * target,
                                                              target, 1.0, -1.0);
   auto ksp = std::make_unique<ComplexKspSolver>(iodata, space_op.GetNDSpaces(),
diff --git a/palace/linalg/slepc.cpp b/palace/linalg/slepc.cpp
index 23aa9c0a8..16b97f5bd 100644
--- a/palace/linalg/slepc.cpp
+++ b/palace/linalg/slepc.cpp
@@ -1563,7 +1563,7 @@ PetscErrorCode __mat_apply_EPS_A0(Mat A, Vec x, Vec y)
   palace::slepc::SlepcEPSSolver *ctx;
   PetscCall(MatShellGetContext(A, (void **)&ctx));
   MFEM_VERIFY(ctx, "Invalid PETSc shell matrix context for SLEPc!");
-
+  palace::Mpi::Print("SLEPc apply_EPS_A0\n");
   PetscCall(FromPetscVec(x, ctx->x1));
   ctx->opK->Mult(ctx->x1, ctx->y1);
   if (ctx->opP1)
@@ -1586,7 +1586,7 @@ PetscErrorCode __mat_apply_EPS_A1(Mat A, Vec x, Vec y)
   palace::slepc::SlepcEPSSolver *ctx;
   PetscCall(MatShellGetContext(A, (void **)&ctx));
   MFEM_VERIFY(ctx, "Invalid PETSc shell matrix context for SLEPc!");
-
+  palace::Mpi::Print("SLEPc apply_EPS_A1\n");
   PetscCall(FromPetscVec(x, ctx->x1));
   ctx->opM->Mult(ctx->x1, ctx->y1);
   ctx->y1 *= ctx->delta * ctx->gamma;
@@ -1601,7 +1601,7 @@ PetscErrorCode __mat_apply_EPS_B(Mat A, Vec x, Vec y)
   palace::slepc::SlepcEPSSolver *ctx;
   PetscCall(MatShellGetContext(A, (void **)&ctx));
   MFEM_VERIFY(ctx, "Invalid PETSc shell matrix context for SLEPc!");
-
+  palace::Mpi::Print("SLEPc apply_EPS_B\n");
   PetscCall(FromPetscVec(x, ctx->x1));
   ctx->opB->Mult(ctx->x1.Real(), ctx->y1.Real());
   ctx->opB->Mult(ctx->x1.Imag(), ctx->y1.Imag());
@@ -1620,7 +1620,7 @@ PetscErrorCode __pc_apply_EPS(PC pc, Vec x, Vec y)
   palace::slepc::SlepcEPSSolver *ctx;
   PetscCall(PCShellGetContext(pc, (void **)&ctx));
   MFEM_VERIFY(ctx, "Invalid PETSc shell PC context for SLEPc!");
-
+  palace::Mpi::Print("SLEPc pc_apply_EPS\n");
   PetscCall(FromPetscVec(x, ctx->x1));
   ctx->opInv->Mult(ctx->x1, ctx->y1);
   if (!ctx->sinvert)
@@ -1780,7 +1780,7 @@ PetscErrorCode __mat_apply_PEP_A0(Mat A, Vec x, Vec y)
   palace::slepc::SlepcPEPSolver *ctx;
   PetscCall(MatShellGetContext(A, (void **)&ctx));
   MFEM_VERIFY(ctx, "Invalid PETSc shell matrix context for SLEPc!");
-
+  palace::Mpi::Print("SLEPc apply_PEP_A0\n");
   PetscCall(FromPetscVec(x, ctx->x1));
   ctx->opK->Mult(ctx->x1, ctx->y1);
   if (ctx->opP1)
@@ -1802,7 +1802,7 @@ PetscErrorCode __mat_apply_PEP_A1(Mat A, Vec x, Vec y)
   palace::slepc::SlepcPEPSolver *ctx;
   PetscCall(MatShellGetContext(A, (void **)&ctx));
   MFEM_VERIFY(ctx, "Invalid PETSc shell matrix context for SLEPc!");
-
+  palace::Mpi::Print("SLEPc apply_PEP_A1\n");
   PetscCall(FromPetscVec(x, ctx->x1));
   ctx->opC->Mult(ctx->x1, ctx->y1);
   PetscCall(ToPetscVec(ctx->y1, y));
@@ -1816,7 +1816,7 @@ PetscErrorCode __mat_apply_PEP_A2(Mat A, Vec x, Vec y)
   palace::slepc::SlepcPEPSolver *ctx;
   PetscCall(MatShellGetContext(A, (void **)&ctx));
   MFEM_VERIFY(ctx, "Invalid PETSc shell matrix context for SLEPc!");
-
+  palace::Mpi::Print("SLEPc apply_PEP_A2\n");
   PetscCall(FromPetscVec(x, ctx->x1));
   ctx->opM->Mult(ctx->x1, ctx->y1);
   PetscCall(ToPetscVec(ctx->y1, y));
@@ -1830,7 +1830,7 @@ PetscErrorCode __mat_apply_PEP_B(Mat A, Vec x, Vec y)
   palace::slepc::SlepcPEPSolver *ctx;
   PetscCall(MatShellGetContext(A, (void **)&ctx));
   MFEM_VERIFY(ctx, "Invalid PETSc shell matrix context for SLEPc!");
-
+  palace::Mpi::Print("SLEPc apply_PEP_B\n");
   PetscCall(FromPetscVec(x, ctx->x1));
   ctx->opB->Mult(ctx->x1.Real(), ctx->y1.Real());
   ctx->opB->Mult(ctx->x1.Imag(), ctx->y1.Imag());
@@ -1849,7 +1849,7 @@ PetscErrorCode __pc_apply_PEP(PC pc, Vec x, Vec y)
   palace::slepc::SlepcPEPSolver *ctx;
   PetscCall(PCShellGetContext(pc, (void **)&ctx));
   MFEM_VERIFY(ctx, "Invalid PETSc shell PC context for SLEPc!");
-
+  palace::Mpi::Print("SLEPc pc_apply_PEP\n");
   PetscCall(FromPetscVec(x, ctx->x1));
   ctx->opInv->Mult(ctx->x1, ctx->y1);
   if (!ctx->sinvert)
diff --git a/palace/models/romoperator.cpp b/palace/models/romoperator.cpp
index ea07e25c5..66a82b606 100644
--- a/palace/models/romoperator.cpp
+++ b/palace/models/romoperator.cpp
@@ -196,8 +196,8 @@ RomOperator::RomOperator(const IoData &iodata, SpaceOperator &space_op, int max_
   K = space_op.GetStiffnessMatrix<ComplexOperator>(Operator::DIAG_ONE);
   C = space_op.GetDampingMatrix<ComplexOperator>(Operator::DIAG_ZERO);
   M = space_op.GetMassMatrix<ComplexOperator>(Operator::DIAG_ZERO);
-  P1 = space_op.GetPeriodicWeakCurlMatrix<ComplexOperator>();
-  P2 = space_op.GetPeriodicCurlMatrix<ComplexOperator>();
+  P1 = space_op.GetPeriodicWeakCurlMatrix<ComplexOperator>(Operator::DIAG_ZERO);
+  P2 = space_op.GetPeriodicCurlMatrix<ComplexOperator>(Operator::DIAG_ZERO);
   MFEM_VERIFY(K && M, "Invalid empty HDM matrices when constructing PROM!");
 
   // Set up RHS vector (linear in frequency part) for the incident field at port boundaries,
diff --git a/palace/models/spaceoperator.cpp b/palace/models/spaceoperator.cpp
index ca2e2a974..01958a5c3 100644
--- a/palace/models/spaceoperator.cpp
+++ b/palace/models/spaceoperator.cpp
@@ -461,7 +461,7 @@ SpaceOperator::GetExtraSystemMatrix(double omega, Operator::DiagonalPolicy diag_
 // Move some of this inside AssembleOperator(s)? AssembleMixedOperator(s)?
 template <typename OperType>
 std::unique_ptr<OperType>
-SpaceOperator::GetPeriodicWeakCurlMatrix()
+SpaceOperator::GetPeriodicWeakCurlMatrix(Operator::DiagonalPolicy diag_policy)
 {
   PrintHeader(GetH1Space(), GetNDSpace(), GetRTSpace(), print_hdr);
   MaterialPropertyCoefficient fpw(mat_op.MaxCeedAttribute());
@@ -483,6 +483,20 @@ SpaceOperator::GetPeriodicWeakCurlMatrix()
   //}
   //auto weakCurl = a.Assemble(skip_zeros);
   auto weakCurl = AssembleOperator(GetNDSpace(), nullptr, nullptr, nullptr, nullptr, &fpw, nullptr, skip_zeros);
+
+  if constexpr (std::is_same<OperType, ComplexOperator>::value)
+  {
+    auto WeakCurl = std::make_unique<ComplexParOperator>(std::move(weakCurl), nullptr, GetNDSpace());
+    WeakCurl->SetEssentialTrueDofs(nd_dbc_tdof_lists.back(), diag_policy);
+    return WeakCurl;
+  }
+  else
+  {
+    auto WeakCurl = std::make_unique<ParOperator>(std::move(weakCurl), GetNDSpace());
+    WeakCurl->SetEssentialTrueDofs(nd_dbc_tdof_lists.back(), diag_policy);
+    return WeakCurl;
+  }
+/*
   if constexpr (std::is_same<OperType, ComplexOperator>::value)
   {
     auto WeakCurl = std::make_unique<ComplexParOperator>(std::move(weakCurl),nullptr, GetNDSpace(), GetNDSpace(),false);
@@ -493,11 +507,12 @@ SpaceOperator::GetPeriodicWeakCurlMatrix()
     auto WeakCurl = std::make_unique<ParOperator>(std::move(weakCurl),GetNDSpace(), GetNDSpace(), false);
     return WeakCurl;
   }
+  */
 }
 
 template <typename OperType>
 std::unique_ptr<OperType>
-SpaceOperator::GetPeriodicCurlMatrix()
+SpaceOperator::GetPeriodicCurlMatrix(Operator::DiagonalPolicy diag_policy)
 {
   PrintHeader(GetH1Space(), GetNDSpace(), GetRTSpace(), print_hdr);
   MaterialPropertyCoefficient fw(mat_op.MaxCeedAttribute());
@@ -520,6 +535,19 @@ SpaceOperator::GetPeriodicCurlMatrix()
   //auto curl = a.Assemble(skip_zeros);
   auto curl = AssembleOperator(GetNDSpace(), nullptr, nullptr, nullptr, nullptr, nullptr, &fw, skip_zeros);
   if constexpr (std::is_same<OperType, ComplexOperator>::value)
+  {
+    auto Curl = std::make_unique<ComplexParOperator>(std::move(curl), nullptr, GetNDSpace());
+    Curl->SetEssentialTrueDofs(nd_dbc_tdof_lists.back(), diag_policy);
+    return Curl;
+  }
+  else
+  {
+    auto Curl = std::make_unique<ParOperator>(std::move(curl), GetNDSpace());
+    Curl->SetEssentialTrueDofs(nd_dbc_tdof_lists.back(), diag_policy);
+    return Curl;
+  }
+  /*
+  if constexpr (std::is_same<OperType, ComplexOperator>::value)
   {
     auto Curl = std::make_unique<ComplexParOperator>(std::move(curl),nullptr, GetNDSpace(), GetNDSpace(),false);
     return Curl;
@@ -529,6 +557,7 @@ SpaceOperator::GetPeriodicCurlMatrix()
     auto Curl = std::make_unique<ParOperator>(std::move(curl),GetNDSpace(), GetNDSpace(), false);
     return Curl;
   }
+  */
 }
 
 namespace
@@ -556,11 +585,11 @@ auto BuildParSumOperator(int h, int w, double a0, double a1, double a2,
   {
     sum->AddOperator(A2->LocalOperator(), 1.0);
   }
-  if (P1)
+  if (P1 && a4 != 0.0)
   {
     sum->AddOperator(P1->LocalOperator(), a4);
   }
-  if (P2)
+  if (P2 && a5 != 0.0)
   {
     sum->AddOperator(P2->LocalOperator(), a5);
   }
@@ -1226,13 +1255,13 @@ template std::unique_ptr<ComplexOperator>
 SpaceOperator::GetPreconditionerMatrix<ComplexOperator>(double, double, double, double, double, double);
 
 template std::unique_ptr<Operator>
-SpaceOperator::GetPeriodicWeakCurlMatrix<Operator>();
+SpaceOperator::GetPeriodicWeakCurlMatrix<Operator>(Operator::DiagonalPolicy);
 template std::unique_ptr<ComplexOperator>
-SpaceOperator::GetPeriodicWeakCurlMatrix<ComplexOperator>();
+SpaceOperator::GetPeriodicWeakCurlMatrix<ComplexOperator>(Operator::DiagonalPolicy);
 
 template std::unique_ptr<Operator>
-SpaceOperator::GetPeriodicCurlMatrix<Operator>();
+SpaceOperator::GetPeriodicCurlMatrix<Operator>(Operator::DiagonalPolicy);
 template std::unique_ptr<ComplexOperator>
-SpaceOperator::GetPeriodicCurlMatrix<ComplexOperator>();
+SpaceOperator::GetPeriodicCurlMatrix<ComplexOperator>(Operator::DiagonalPolicy);
 
 }  // namespace palace
diff --git a/palace/models/spaceoperator.hpp b/palace/models/spaceoperator.hpp
index 6e84873d2..8e23239eb 100644
--- a/palace/models/spaceoperator.hpp
+++ b/palace/models/spaceoperator.hpp
@@ -151,9 +151,9 @@ class SpaceOperator
   std::unique_ptr<OperType> GetExtraSystemMatrix(double omega,
                                                  Operator::DiagonalPolicy diag_policy);
   template <typename OperType>
-  std::unique_ptr<OperType> GetPeriodicWeakCurlMatrix();
+  std::unique_ptr<OperType> GetPeriodicWeakCurlMatrix(Operator::DiagonalPolicy diag_policy);
   template <typename OperType>
-  std::unique_ptr<OperType> GetPeriodicCurlMatrix();
+  std::unique_ptr<OperType> GetPeriodicCurlMatrix(Operator::DiagonalPolicy diag_policy);
 
   // Construct the complete frequency or time domain system matrix using the provided
   // stiffness, damping, mass, and extra matrices:

From 95502e2d869a65ac181b30bbaab424b2187c4ca0 Mon Sep 17 00:00:00 2001
From: Simon Lapointe <simlap@amazon.com>
Date: Fri, 1 Nov 2024 23:48:16 +0000
Subject: [PATCH 07/49] Remove some prints

---
 palace/drivers/eigensolver.cpp |  4 ----
 palace/linalg/slepc.cpp        | 18 +++++++++---------
 2 files changed, 9 insertions(+), 13 deletions(-)

diff --git a/palace/drivers/eigensolver.cpp b/palace/drivers/eigensolver.cpp
index ce1300676..ee00378a1 100644
--- a/palace/drivers/eigensolver.cpp
+++ b/palace/drivers/eigensolver.cpp
@@ -130,12 +130,10 @@ EigenSolver::Solve(const std::vector<std::unique_ptr<Mesh>> &mesh) const
   {
     if (P1 && P2)
     {
-      Mpi::Print("Eigen set operators K, C, M, P1, P2\n");
       eigen->SetOperators(*K, *C, *M, *P1, *P2, scale);
     }
     else
     {
-      Mpi::Print("Eigen set operators K, C, M\n");
       eigen->SetOperators(*K, *C, *M, scale);
     }
   }
@@ -143,12 +141,10 @@ EigenSolver::Solve(const std::vector<std::unique_ptr<Mesh>> &mesh) const
   {
     if (P1 && P2)
     {
-      Mpi::Print("Eigen set operators K, M, P1, P2\n");
       eigen->SetOperators(*K, *M, *P1, *P2, scale);
     }
     else
     {
-      Mpi::Print("Eigen set operators K, M\n");
       eigen->SetOperators(*K, *M, scale);
     }
   }
diff --git a/palace/linalg/slepc.cpp b/palace/linalg/slepc.cpp
index 16b97f5bd..23aa9c0a8 100644
--- a/palace/linalg/slepc.cpp
+++ b/palace/linalg/slepc.cpp
@@ -1563,7 +1563,7 @@ PetscErrorCode __mat_apply_EPS_A0(Mat A, Vec x, Vec y)
   palace::slepc::SlepcEPSSolver *ctx;
   PetscCall(MatShellGetContext(A, (void **)&ctx));
   MFEM_VERIFY(ctx, "Invalid PETSc shell matrix context for SLEPc!");
-  palace::Mpi::Print("SLEPc apply_EPS_A0\n");
+
   PetscCall(FromPetscVec(x, ctx->x1));
   ctx->opK->Mult(ctx->x1, ctx->y1);
   if (ctx->opP1)
@@ -1586,7 +1586,7 @@ PetscErrorCode __mat_apply_EPS_A1(Mat A, Vec x, Vec y)
   palace::slepc::SlepcEPSSolver *ctx;
   PetscCall(MatShellGetContext(A, (void **)&ctx));
   MFEM_VERIFY(ctx, "Invalid PETSc shell matrix context for SLEPc!");
-  palace::Mpi::Print("SLEPc apply_EPS_A1\n");
+
   PetscCall(FromPetscVec(x, ctx->x1));
   ctx->opM->Mult(ctx->x1, ctx->y1);
   ctx->y1 *= ctx->delta * ctx->gamma;
@@ -1601,7 +1601,7 @@ PetscErrorCode __mat_apply_EPS_B(Mat A, Vec x, Vec y)
   palace::slepc::SlepcEPSSolver *ctx;
   PetscCall(MatShellGetContext(A, (void **)&ctx));
   MFEM_VERIFY(ctx, "Invalid PETSc shell matrix context for SLEPc!");
-  palace::Mpi::Print("SLEPc apply_EPS_B\n");
+
   PetscCall(FromPetscVec(x, ctx->x1));
   ctx->opB->Mult(ctx->x1.Real(), ctx->y1.Real());
   ctx->opB->Mult(ctx->x1.Imag(), ctx->y1.Imag());
@@ -1620,7 +1620,7 @@ PetscErrorCode __pc_apply_EPS(PC pc, Vec x, Vec y)
   palace::slepc::SlepcEPSSolver *ctx;
   PetscCall(PCShellGetContext(pc, (void **)&ctx));
   MFEM_VERIFY(ctx, "Invalid PETSc shell PC context for SLEPc!");
-  palace::Mpi::Print("SLEPc pc_apply_EPS\n");
+
   PetscCall(FromPetscVec(x, ctx->x1));
   ctx->opInv->Mult(ctx->x1, ctx->y1);
   if (!ctx->sinvert)
@@ -1780,7 +1780,7 @@ PetscErrorCode __mat_apply_PEP_A0(Mat A, Vec x, Vec y)
   palace::slepc::SlepcPEPSolver *ctx;
   PetscCall(MatShellGetContext(A, (void **)&ctx));
   MFEM_VERIFY(ctx, "Invalid PETSc shell matrix context for SLEPc!");
-  palace::Mpi::Print("SLEPc apply_PEP_A0\n");
+
   PetscCall(FromPetscVec(x, ctx->x1));
   ctx->opK->Mult(ctx->x1, ctx->y1);
   if (ctx->opP1)
@@ -1802,7 +1802,7 @@ PetscErrorCode __mat_apply_PEP_A1(Mat A, Vec x, Vec y)
   palace::slepc::SlepcPEPSolver *ctx;
   PetscCall(MatShellGetContext(A, (void **)&ctx));
   MFEM_VERIFY(ctx, "Invalid PETSc shell matrix context for SLEPc!");
-  palace::Mpi::Print("SLEPc apply_PEP_A1\n");
+
   PetscCall(FromPetscVec(x, ctx->x1));
   ctx->opC->Mult(ctx->x1, ctx->y1);
   PetscCall(ToPetscVec(ctx->y1, y));
@@ -1816,7 +1816,7 @@ PetscErrorCode __mat_apply_PEP_A2(Mat A, Vec x, Vec y)
   palace::slepc::SlepcPEPSolver *ctx;
   PetscCall(MatShellGetContext(A, (void **)&ctx));
   MFEM_VERIFY(ctx, "Invalid PETSc shell matrix context for SLEPc!");
-  palace::Mpi::Print("SLEPc apply_PEP_A2\n");
+
   PetscCall(FromPetscVec(x, ctx->x1));
   ctx->opM->Mult(ctx->x1, ctx->y1);
   PetscCall(ToPetscVec(ctx->y1, y));
@@ -1830,7 +1830,7 @@ PetscErrorCode __mat_apply_PEP_B(Mat A, Vec x, Vec y)
   palace::slepc::SlepcPEPSolver *ctx;
   PetscCall(MatShellGetContext(A, (void **)&ctx));
   MFEM_VERIFY(ctx, "Invalid PETSc shell matrix context for SLEPc!");
-  palace::Mpi::Print("SLEPc apply_PEP_B\n");
+
   PetscCall(FromPetscVec(x, ctx->x1));
   ctx->opB->Mult(ctx->x1.Real(), ctx->y1.Real());
   ctx->opB->Mult(ctx->x1.Imag(), ctx->y1.Imag());
@@ -1849,7 +1849,7 @@ PetscErrorCode __pc_apply_PEP(PC pc, Vec x, Vec y)
   palace::slepc::SlepcPEPSolver *ctx;
   PetscCall(PCShellGetContext(pc, (void **)&ctx));
   MFEM_VERIFY(ctx, "Invalid PETSc shell PC context for SLEPc!");
-  palace::Mpi::Print("SLEPc pc_apply_PEP\n");
+
   PetscCall(FromPetscVec(x, ctx->x1));
   ctx->opInv->Mult(ctx->x1, ctx->y1);
   if (!ctx->sinvert)

From 6021e8f07ba0f985364cbb27eeca19926fb58f07 Mon Sep 17 00:00:00 2001
From: Simon Lapointe <simlap@amazon.com>
Date: Thu, 7 Nov 2024 18:07:17 -0800
Subject: [PATCH 08/49] Add a new matrix for the mass periodic term

---
 palace/drivers/drivensolver.cpp            |  13 +-
 palace/drivers/eigensolver.cpp             |  17 ++-
 palace/linalg/arpack.cpp                   |  30 +++-
 palace/linalg/arpack.hpp                   |  20 +--
 palace/linalg/eps.hpp                      |   7 +-
 palace/linalg/slepc.cpp                    |  53 +++++--
 palace/linalg/slepc.hpp                    |  24 +--
 palace/models/periodicboundaryoperator.cpp |   7 +-
 palace/models/romoperator.cpp              |  16 +-
 palace/models/romoperator.hpp              |   4 +-
 palace/models/spaceoperator.cpp            | 162 +++++++++++++++------
 palace/models/spaceoperator.hpp            |  12 +-
 palace/models/timeoperator.cpp             |   2 +-
 palace/utils/geodata.cpp                   |  81 ++++++++++-
 palace/utils/iodata.cpp                    |   8 +
 15 files changed, 334 insertions(+), 122 deletions(-)

diff --git a/palace/drivers/drivensolver.cpp b/palace/drivers/drivensolver.cpp
index c8fff175f..b5b3325b7 100644
--- a/palace/drivers/drivensolver.cpp
+++ b/palace/drivers/drivensolver.cpp
@@ -117,6 +117,7 @@ ErrorIndicator DrivenSolver::SweepUniform(SpaceOperator &space_op, PostOperator
   auto C = space_op.GetDampingMatrix<ComplexOperator>(Operator::DIAG_ZERO);
   auto M = space_op.GetMassMatrix<ComplexOperator>(Operator::DIAG_ZERO);
   auto A2 = space_op.GetExtraSystemMatrix<ComplexOperator>(omega0, Operator::DIAG_ZERO);
+  auto MP = space_op.GetPeriodicMassMatrix<ComplexOperator>(Operator::DIAG_ZERO);
   auto P1 = space_op.GetPeriodicWeakCurlMatrix<ComplexOperator>(Operator::DIAG_ZERO);
   auto P2 = space_op.GetPeriodicCurlMatrix<ComplexOperator>(Operator::DIAG_ZERO);
   const auto &Curl = space_op.GetCurlMatrix();
@@ -125,10 +126,10 @@ ErrorIndicator DrivenSolver::SweepUniform(SpaceOperator &space_op, PostOperator
   // preconditioner for the complex linear system is constructed from a real approximation
   // to the complex system matrix.
   auto A = space_op.GetSystemMatrix(std::complex<double>(1.0, 0.0), 1i * omega0,
-                                    std::complex<double>(-omega0 * omega0, 0.0), 1.0i, -1.0i, K.get(),
-                                    C.get(), M.get(), A2.get(), P1.get(), P2.get());
+                                    std::complex<double>(-omega0 * omega0, 0.0), std::complex<double>(1.0, 0.0), 1.0i, -1.0i, K.get(),
+                                    C.get(), M.get(), A2.get(), MP.get(), P1.get(), P2.get());
   auto P = space_op.GetPreconditionerMatrix<ComplexOperator>(1.0, omega0, -omega0 * omega0,
-                                                             omega0, 1.0, -1.0);
+                                                             omega0, 1.0, 1.0, -1.0);
 
   ComplexKspSolver ksp(iodata, space_op.GetNDSpaces(), &space_op.GetH1Spaces());
   ksp.SetOperators(*A, *P);
@@ -165,10 +166,10 @@ ErrorIndicator DrivenSolver::SweepUniform(SpaceOperator &space_op, PostOperator
       // Update frequency-dependent excitation and operators.
       A2 = space_op.GetExtraSystemMatrix<ComplexOperator>(omega, Operator::DIAG_ZERO);
       A = space_op.GetSystemMatrix(std::complex<double>(1.0, 0.0), 1i * omega,
-                                   std::complex<double>(-omega * omega, 0.0), 1.0i, -1.0i, K.get(),
-                                   C.get(), M.get(), A2.get(), P1.get(), P2.get());
+                                   std::complex<double>(-omega * omega, 0.0), std::complex<double>(1.0, 0.0), 1.0i, -1.0i, K.get(),
+                                   C.get(), M.get(), A2.get(), MP.get(), P1.get(), P2.get());
       P = space_op.GetPreconditionerMatrix<ComplexOperator>(1.0, omega, -omega * omega,
-                                                            omega, 1.0, -1.0);
+                                                            omega, 1.0, 1.0, -1.0);
       ksp.SetOperators(*A, *P);
     }
     space_op.GetExcitationVector(omega, RHS);
diff --git a/palace/drivers/eigensolver.cpp b/palace/drivers/eigensolver.cpp
index ee00378a1..f70e0d690 100644
--- a/palace/drivers/eigensolver.cpp
+++ b/palace/drivers/eigensolver.cpp
@@ -36,10 +36,11 @@ EigenSolver::Solve(const std::vector<std::unique_ptr<Mesh>> &mesh) const
   auto K = space_op.GetStiffnessMatrix<ComplexOperator>(Operator::DIAG_ONE);
   auto C = space_op.GetDampingMatrix<ComplexOperator>(Operator::DIAG_ZERO);
   auto M = space_op.GetMassMatrix<ComplexOperator>(Operator::DIAG_ZERO);
+  auto MP = space_op.GetPeriodicMassMatrix<ComplexOperator>(Operator::DIAG_ZERO);
   auto P1 = space_op.GetPeriodicWeakCurlMatrix<ComplexOperator>(Operator::DIAG_ZERO);
   auto P2 = space_op.GetPeriodicCurlMatrix<ComplexOperator>(Operator::DIAG_ZERO);
   auto A2 = space_op.GetExtraSystemMatrix<ComplexOperator>(1.0, Operator::DIAG_ZERO);
-  A2 = nullptr;//?
+  A2 = nullptr;
   const auto &Curl = space_op.GetCurlMatrix();
   SaveMetadata(space_op.GetNDSpaces());
 
@@ -128,9 +129,9 @@ EigenSolver::Solve(const std::vector<std::unique_ptr<Mesh>> &mesh) const
                                           : EigenvalueSolver::ScaleType::NONE;
   if (C)
   {
-    if (P1 && P2)
+    if (MP && P1 && P2)
     {
-      eigen->SetOperators(*K, *C, *M, *P1, *P2, scale);
+      eigen->SetOperators(*K, *C, *M, *MP, *P1, *P2, scale);
     }
     else
     {
@@ -139,9 +140,9 @@ EigenSolver::Solve(const std::vector<std::unique_ptr<Mesh>> &mesh) const
   }
   else
   {
-    if (P1 && P2)
+    if (MP && P1 && P2)
     {
-      eigen->SetOperators(*K, *M, *P1, *P2, scale);
+      eigen->SetOperators(*K, *M, *MP, *P1, *P2, scale);
     }
     else
     {
@@ -259,10 +260,10 @@ EigenSolver::Solve(const std::vector<std::unique_ptr<Mesh>> &mesh) const
   // preconditioner for complex linear systems is constructed from a real approximation
   // to the complex system matrix.
   auto A = space_op.GetSystemMatrix(std::complex<double>(1.0, 0.0), 1i * target,
-                                    std::complex<double>(-target * target, 0.0), 1.0i, -1.0i, K.get(),
-                                    C.get(), M.get(), A2.get(), P1.get(), P2.get());
+                                    std::complex<double>(-target * target, 0.0), std::complex<double>(1.0, 0.0), 1.0i, -1.0i, K.get(),
+                                    C.get(), M.get(), A2.get(), MP.get(), P1.get(), P2.get());
   auto P = space_op.GetPreconditionerMatrix<ComplexOperator>(1.0, target, -target * target,
-                                                             target, 1.0, -1.0);
+                                                             target, 1.0, 1.0, -1.0);
   auto ksp = std::make_unique<ComplexKspSolver>(iodata, space_op.GetNDSpaces(),
                                                 &space_op.GetH1Spaces());
   ksp->SetOperators(*A, *P);
diff --git a/palace/linalg/arpack.cpp b/palace/linalg/arpack.cpp
index 16d69bf79..ee479be4d 100644
--- a/palace/linalg/arpack.cpp
+++ b/palace/linalg/arpack.cpp
@@ -203,6 +203,7 @@ void ArpackEigenvalueSolver::SetOperators(const ComplexOperator &K,
 
 void ArpackEigenvalueSolver::SetOperators(const ComplexOperator &K,
                                           const ComplexOperator &M,
+                                          const ComplexOperator &MP,
                                           const ComplexOperator &P1,
                                           const ComplexOperator &P2,
                                           EigenvalueSolver::ScaleType type)
@@ -213,6 +214,7 @@ void ArpackEigenvalueSolver::SetOperators(const ComplexOperator &K,
 void ArpackEigenvalueSolver::SetOperators(const ComplexOperator &K,
                                           const ComplexOperator &C,
                                           const ComplexOperator &M,
+                                          const ComplexOperator &MP,
                                           const ComplexOperator &P1,
                                           const ComplexOperator &P2,
                                           EigenvalueSolver::ScaleType type)
@@ -509,7 +511,7 @@ void ArpackEigenvalueSolver::RescaleEigenvectors(int num_eig)
 ArpackEPSSolver::ArpackEPSSolver(MPI_Comm comm, int print)
   : ArpackEigenvalueSolver(comm, print)
 {
-  opK = opM = opP1 = opP2 = nullptr;
+  opK = opM = opMP = opP1 = opP2 = nullptr;
   normK = normM = 0.0;
 }
 
@@ -543,13 +545,14 @@ void ArpackEPSSolver::SetOperators(const ComplexOperator &K, const ComplexOperat
 }
 
 void ArpackEPSSolver::SetOperators(const ComplexOperator &K, const ComplexOperator &M,
-                                   const ComplexOperator &P1, const ComplexOperator &P2,
+                                   const ComplexOperator &MP, const ComplexOperator &P1, const ComplexOperator &P2,
                                    EigenvalueSolver::ScaleType type)
 {
   MFEM_VERIFY(!opK || K.Height() == n, "Invalid modification of eigenvalue problem size!");
   bool first = (opK == nullptr);
   opK = &K;
   opM = &M;
+  opMP = &MP;
   opP1 = &P1;
   opP2 = &P2;
   if (first && type != ScaleType::NONE)
@@ -635,6 +638,10 @@ void ArpackEPSSolver::ApplyOp(const std::complex<double> *px,
   if (!sinvert)
   {
     opK->Mult(x1, z1);
+    if (opMP)
+    {
+      opMP->AddMult(x1, z1, 1.0);
+    }
     if (opP1)
     {
       opP1->AddMult(x1, z1, 1.0i);
@@ -677,6 +684,10 @@ double ArpackEPSSolver::GetResidualNorm(std::complex<double> l, const ComplexVec
 {
   // Compute the i-th eigenpair residual: || (K - λ M) x ||₂ for eigenvalue λ.
   opK->Mult(x, r);
+  if (opMP)
+  {
+    opMP->AddMult(x, r, 1.0);
+  }
   if (opP1)
   {
     opP1->AddMult(x, r, 1.0i);
@@ -709,7 +720,7 @@ double ArpackEPSSolver::GetBackwardScaling(std::complex<double> l) const
 ArpackPEPSolver::ArpackPEPSolver(MPI_Comm comm, int print)
   : ArpackEigenvalueSolver(comm, print)
 {
-  opK = opC = opM = opP1 = opP2 = nullptr;
+  opK = opC = opM = opMP = opP1 = opP2 = nullptr;
   normK = normC = normM = 0.0;
 }
 
@@ -751,8 +762,8 @@ void ArpackPEPSolver::SetOperators(const ComplexOperator &K, const ComplexOperat
 }
 
 void ArpackPEPSolver::SetOperators(const ComplexOperator &K, const ComplexOperator &C,
-                                   const ComplexOperator &M, const ComplexOperator &P1,
-                                   const ComplexOperator &P2,
+                                   const ComplexOperator &M, const ComplexOperator &MP,
+                                   const ComplexOperator &P1, const ComplexOperator &P2,
                                    EigenvalueSolver::ScaleType type)
 {
   MFEM_VERIFY(!opK || K.Height() == n, "Invalid modification of eigenvalue problem size!");
@@ -760,6 +771,7 @@ void ArpackPEPSolver::SetOperators(const ComplexOperator &K, const ComplexOperat
   opK = &K;
   opC = &C;
   opM = &M;
+  opMP = &MP;
   opP1 = &P1;
   opP2 = &P2;
   if (first && type != ScaleType::NONE)
@@ -874,6 +886,10 @@ void ArpackPEPSolver::ApplyOp(const std::complex<double> *px,
     }
 
     opK->Mult(x1, z1);
+    if (opMP)
+    {
+      opMP->AddMult(x1, z1, 1.0);
+    }
     if (opP1)
     {
       opP1->AddMult(x1, z1, 1.0i);
@@ -940,6 +956,10 @@ double ArpackPEPSolver::GetResidualNorm(std::complex<double> l, const ComplexVec
   // Compute the i-th eigenpair residual: || P(λ) x ||₂ = || (K + λ C + λ² M) x ||₂ for
   // eigenvalue λ.
   opK->Mult(x, r);
+  if (opMP)
+  {
+    opMP->AddMult(x, r, 1.0);
+  }
   if (opP1)
   {
     opP1->AddMult(x, r, 1.0i);
diff --git a/palace/linalg/arpack.hpp b/palace/linalg/arpack.hpp
index e725f7bdb..0e058a4e3 100644
--- a/palace/linalg/arpack.hpp
+++ b/palace/linalg/arpack.hpp
@@ -121,11 +121,13 @@ class ArpackEigenvalueSolver : public EigenvalueSolver
   void SetOperators(const ComplexOperator &K, const ComplexOperator &C,
                     const ComplexOperator &M, ScaleType type) override;
   void SetOperators(const ComplexOperator &K, const ComplexOperator &M,
-                    const ComplexOperator &P1, const ComplexOperator &P2,
+                    const ComplexOperator &MP, const ComplexOperator &P1,
+                    const ComplexOperator &P2,
                     ScaleType type) override;
   void SetOperators(const ComplexOperator &K, const ComplexOperator &C,
-                    const ComplexOperator &M, const ComplexOperator &P1,
-                    const ComplexOperator &P2, ScaleType type) override;
+                    const ComplexOperator &M, const ComplexOperator &MP,
+                    const ComplexOperator &P1, const ComplexOperator &P2,
+                    ScaleType type) override;
 
   // For the linear generalized case, the linear solver should be configured to compute the
   // action of M⁻¹ (with no spectral transformation) or (K - σ M)⁻¹. For the quadratic
@@ -187,7 +189,7 @@ class ArpackEPSSolver : public ArpackEigenvalueSolver
 {
 private:
   // References to matrices defining the generalized eigenvalue problem (not owned).
-  const ComplexOperator *opK, *opM, *opP1, *opP2;
+  const ComplexOperator *opK, *opM, *opMP, *opP1, *opP2;
 
   // Operator norms for scaling.
   mutable double normK, normM;
@@ -210,8 +212,8 @@ class ArpackEPSSolver : public ArpackEigenvalueSolver
   void SetOperators(const ComplexOperator &K, const ComplexOperator &M,
                     ScaleType type) override;
   void SetOperators(const ComplexOperator &K, const ComplexOperator &M,
-                    const ComplexOperator &P1, const ComplexOperator &P2,
-                    ScaleType type) override;
+                    const ComplexOperator &MP, const ComplexOperator &P1,
+                    const ComplexOperator &P2, ScaleType type) override;
 
   int Solve() override;
 };
@@ -222,7 +224,7 @@ class ArpackPEPSolver : public ArpackEigenvalueSolver
 private:
   // References to matrices defining the quadratic polynomial eigenvalue problem
   // (not owned).
-  const ComplexOperator *opK, *opC, *opM, *opP1, *opP2;
+  const ComplexOperator *opK, *opC, *opM, *opMP, *opP1, *opP2;
 
   // Operator norms for scaling.
   mutable double normK, normC, normM;
@@ -248,8 +250,8 @@ class ArpackPEPSolver : public ArpackEigenvalueSolver
   void SetOperators(const ComplexOperator &K, const ComplexOperator &C,
                     const ComplexOperator &M, ScaleType type) override;
   void SetOperators(const ComplexOperator &K, const ComplexOperator &C,
-                    const ComplexOperator &M, const ComplexOperator &P1,
-                    const ComplexOperator &P2, ScaleType type) override;
+                    const ComplexOperator &M, const ComplexOperator &MP,
+                    const ComplexOperator &P1, const ComplexOperator &P2, ScaleType type) override;
 
   int Solve() override;
 };
diff --git a/palace/linalg/eps.hpp b/palace/linalg/eps.hpp
index 440f9a8b8..a939a00da 100644
--- a/palace/linalg/eps.hpp
+++ b/palace/linalg/eps.hpp
@@ -59,11 +59,12 @@ class EigenvalueSolver
   virtual void SetOperators(const ComplexOperator &K, const ComplexOperator &C,
                             const ComplexOperator &M, ScaleType type) = 0;
   virtual void SetOperators(const ComplexOperator &K, const ComplexOperator &M,
+                            const ComplexOperator &MP, const ComplexOperator &P1,
+                            const ComplexOperator &P2, ScaleType type) = 0;
+  virtual void SetOperators(const ComplexOperator &K, const ComplexOperator &C,
+                            const ComplexOperator &M, const ComplexOperator &MP,
                             const ComplexOperator &P1, const ComplexOperator &P2,
                             ScaleType type) = 0;
-  virtual void SetOperators(const ComplexOperator &K, const ComplexOperator &C,
-                            const ComplexOperator &M, const ComplexOperator &P1,
-                            const ComplexOperator &P2, ScaleType type) = 0;
 
   // For the linear generalized case, the linear solver should be configured to compute the
   // action of M⁻¹ (with no spectral transformation) or (K - σ M)⁻¹. For the quadratic
diff --git a/palace/linalg/slepc.cpp b/palace/linalg/slepc.cpp
index 23aa9c0a8..97bf24f83 100644
--- a/palace/linalg/slepc.cpp
+++ b/palace/linalg/slepc.cpp
@@ -352,15 +352,15 @@ void SlepcEigenvalueSolver::SetOperators(const ComplexOperator &K, const Complex
 }
 
 void SlepcEigenvalueSolver::SetOperators(const ComplexOperator &K, const ComplexOperator &M,
-                                         const ComplexOperator &P1, const ComplexOperator &P2,
+                                         const ComplexOperator &MP, const ComplexOperator &P1, const ComplexOperator &P2,
                                          EigenvalueSolver::ScaleType type)
 {
   MFEM_ABORT("SetOperators not defined for base class SlepcEigenvalueSolver!");
 }
 
 void SlepcEigenvalueSolver::SetOperators(const ComplexOperator &K, const ComplexOperator &C,
-                                         const ComplexOperator &M, const ComplexOperator &P1,
-                                         const ComplexOperator &P2,
+                                         const ComplexOperator &M, const ComplexOperator &MP,
+                                         const ComplexOperator &P1, const ComplexOperator &P2,
                                          EigenvalueSolver::ScaleType type)
 {
   MFEM_ABORT("SetOperators not defined for base class SlepcEigenvalueSolver!");
@@ -754,7 +754,7 @@ RG SlepcEPSSolverBase::GetRG() const
 SlepcEPSSolver::SlepcEPSSolver(MPI_Comm comm, int print, const std::string &prefix)
   : SlepcEPSSolverBase(comm, print, prefix)
 {
-  opK = opM = opP1 = opP2 = nullptr;
+  opK = opM = opMP = opP1 = opP2 = nullptr;
   normK = normM = 0.0;
 }
 
@@ -814,7 +814,7 @@ void SlepcEPSSolver::SetOperators(const ComplexOperator &K, const ComplexOperato
 }
 
 void SlepcEPSSolver::SetOperators(const ComplexOperator &K, const ComplexOperator &M,
-                                  const ComplexOperator &P1, const ComplexOperator &P2,
+                                  const ComplexOperator &MP, const ComplexOperator &P1, const ComplexOperator &P2,
                                   EigenvalueSolver::ScaleType type)
 {
   // Construct shell matrices for the scaled operators which define the generalized
@@ -822,6 +822,7 @@ void SlepcEPSSolver::SetOperators(const ComplexOperator &K, const ComplexOperato
   const bool first = (opK == nullptr);
   opK = &K;
   opM = &M;
+  opMP = &MP;
   opP1 = &P1;
   opP2 = &P2;
 
@@ -890,6 +891,10 @@ PetscReal SlepcEPSSolver::GetResidualNorm(PetscScalar l, const ComplexVector &x,
 {
   // Compute the i-th eigenpair residual: || (K - λ M) x ||₂ for eigenvalue λ.
   opK->Mult(x, r);
+  if (opMP)
+  {
+    opMP->AddMult(x, r, std::complex<double>(1.0, 0.0));
+  }
   if (opP1)
   {
     opP1->AddMult(x, r, std::complex<double>(0.0, 1.0));
@@ -921,7 +926,7 @@ SlepcPEPLinearSolver::SlepcPEPLinearSolver(MPI_Comm comm, int print,
                                            const std::string &prefix)
   : SlepcEPSSolverBase(comm, print, prefix)
 {
-  opK = opC = opM = opP1 = opP2 = nullptr;
+  opK = opC = opM = opMP = opP1 = opP2 = nullptr;
   normK = normC = normM = 0.0;
 }
 
@@ -988,8 +993,8 @@ void SlepcPEPLinearSolver::SetOperators(const ComplexOperator &K, const ComplexO
 }
 
 void SlepcPEPLinearSolver::SetOperators(const ComplexOperator &K, const ComplexOperator &C,
-                                        const ComplexOperator &M, const ComplexOperator &P1,
-                                        const ComplexOperator &P2,
+                                        const ComplexOperator &M, const ComplexOperator &MP,
+                                        const ComplexOperator &P1, const ComplexOperator &P2,
                                         EigenvalueSolver::ScaleType type)
 {
   // Construct shell matrices for the scaled linearized operators which define the block 2x2
@@ -998,6 +1003,7 @@ void SlepcPEPLinearSolver::SetOperators(const ComplexOperator &K, const ComplexO
   opK = &K;
   opC = &C;
   opM = &M;
+  opMP = &MP;
   opP1 = &P1;
   opP2 = &P2;
 
@@ -1102,6 +1108,10 @@ PetscReal SlepcPEPLinearSolver::GetResidualNorm(PetscScalar l, const ComplexVect
   // Compute the i-th eigenpair residual: || P(λ) x ||₂ = || (K + λ C + λ² M) x ||₂ for
   // eigenvalue λ.
   opK->Mult(x, r);
+  if (opMP)
+  {
+    opMP->AddMult(x, r, std::complex<double>(1.0, 0.0));
+  }
   if (opP1)
   {
     opP1->AddMult(x, r, std::complex<double>(0.0, 1.0));
@@ -1371,7 +1381,7 @@ RG SlepcPEPSolverBase::GetRG() const
 SlepcPEPSolver::SlepcPEPSolver(MPI_Comm comm, int print, const std::string &prefix)
   : SlepcPEPSolverBase(comm, print, prefix)
 {
-  opK = opC = opM = opP1 = opP2 = nullptr;
+  opK = opC = opM = opMP = opP1 = opP2 = nullptr;
   normK = normC = normM = 0.0;
 }
 
@@ -1438,8 +1448,8 @@ void SlepcPEPSolver::SetOperators(const ComplexOperator &K, const ComplexOperato
 }
 
 void SlepcPEPSolver::SetOperators(const ComplexOperator &K, const ComplexOperator &C,
-                                  const ComplexOperator &M, const ComplexOperator &P1,
-                                  const ComplexOperator &P2,
+                                  const ComplexOperator &M, const ComplexOperator &MP,
+                                  const ComplexOperator &P1, const ComplexOperator &P2,
                                   EigenvalueSolver::ScaleType type)
 {
   // Construct shell matrices for the scaled operators which define the quadratic polynomial
@@ -1448,6 +1458,7 @@ void SlepcPEPSolver::SetOperators(const ComplexOperator &K, const ComplexOperato
   opK = &K;
   opC = &C;
   opM = &M;
+  opMP = &MP;
   opP1 = &P1;
   opP2 = &P2;
 
@@ -1522,6 +1533,10 @@ PetscReal SlepcPEPSolver::GetResidualNorm(PetscScalar l, const ComplexVector &x,
   // Compute the i-th eigenpair residual: || P(λ) x ||₂ = || (K + λ C + λ² M) x ||₂ for
   // eigenvalue λ.
   opK->Mult(x, r);
+  if (opMP)
+  {
+    opMP->AddMult(x, r, std::complex<double>(1.0, 0.0));
+  }
   if (opP1)
   {
     opP1->AddMult(x, r, std::complex<double>(0.0, 1.0));
@@ -1566,6 +1581,10 @@ PetscErrorCode __mat_apply_EPS_A0(Mat A, Vec x, Vec y)
 
   PetscCall(FromPetscVec(x, ctx->x1));
   ctx->opK->Mult(ctx->x1, ctx->y1);
+  if (ctx->opMP)
+  {
+    ctx->opMP->AddMult(ctx->x1, ctx->y1, std::complex<double>(1.0, 0.0));
+  }
   if (ctx->opP1)
   {
     ctx->opP1->AddMult(ctx->x1, ctx->y1, std::complex<double>(0.0, 1.0));
@@ -1656,6 +1675,10 @@ PetscErrorCode __mat_apply_PEPLinear_L0(Mat A, Vec x, Vec y)
   ctx->opC->Mult(ctx->x2, ctx->y2);
   ctx->y2 *= ctx->gamma;
   ctx->opK->AddMult(ctx->x1, ctx->y2, std::complex<double>(1.0, 0.0));
+  if (ctx->opMP)
+  {
+    ctx->opMP->AddMult(ctx->x1, ctx->y2, std::complex<double>(1.0, 0.0));
+  }
   if (ctx->opP1)
   {
     ctx->opP1->AddMult(ctx->x1, ctx->y2, std::complex<double>(0.0, 1.0));
@@ -1744,6 +1767,10 @@ PetscErrorCode __pc_apply_PEPLinear(PC pc, Vec x, Vec y)
   {
     ctx->y1.AXPBY(-ctx->sigma / (ctx->delta * ctx->gamma), ctx->x2, 0.0);  // Temporarily
     ctx->opK->AddMult(ctx->x1, ctx->y1, std::complex<double>(1.0, 0.0));
+    if (ctx->opMP)
+    {
+      ctx->opMP->AddMult(ctx->x1, ctx->y1, std::complex<double>(1.0, 0.0));
+    }
     if (ctx->opP1)
     {
       ctx->opP1->AddMult(ctx->x1, ctx->y1, std::complex<double>(0.0, 1.0));
@@ -1783,6 +1810,10 @@ PetscErrorCode __mat_apply_PEP_A0(Mat A, Vec x, Vec y)
 
   PetscCall(FromPetscVec(x, ctx->x1));
   ctx->opK->Mult(ctx->x1, ctx->y1);
+  if (ctx->opMP)
+  {
+    ctx->opMP->AddMult(ctx->x1, ctx->y1, std::complex<double>(1.0, 0.0));
+  }
   if (ctx->opP1)
   {
     ctx->opP1->AddMult(ctx->x1, ctx->y1, std::complex<double>(0.0, 1.0));
diff --git a/palace/linalg/slepc.hpp b/palace/linalg/slepc.hpp
index 417d7fc7c..7d498e5ed 100644
--- a/palace/linalg/slepc.hpp
+++ b/palace/linalg/slepc.hpp
@@ -135,11 +135,11 @@ class SlepcEigenvalueSolver : public EigenvalueSolver
   void SetOperators(const ComplexOperator &K, const ComplexOperator &C,
                     const ComplexOperator &M, ScaleType type) override;
   void SetOperators(const ComplexOperator &K, const ComplexOperator &M,
-                    const ComplexOperator &P1, const ComplexOperator &P2,
-                    ScaleType type) override;
-  void SetOperators(const ComplexOperator &K, const ComplexOperator &C,
-                    const ComplexOperator &M, const ComplexOperator &P1,
+                    const ComplexOperator &MP, const ComplexOperator &P1,
                     const ComplexOperator &P2, ScaleType type) override;
+  void SetOperators(const ComplexOperator &K, const ComplexOperator &C,
+                    const ComplexOperator &M, const ComplexOperator &MP,
+                    const ComplexOperator &P1, const ComplexOperator &P2, ScaleType type) override;
 
   // For the linear generalized case, the linear solver should be configured to compute the
   // action of M⁻¹ (with no spectral transformation) or (K - σ M)⁻¹. For the quadratic
@@ -263,7 +263,7 @@ class SlepcEPSSolver : public SlepcEPSSolverBase
   using SlepcEigenvalueSolver::sinvert;
 
   // References to matrices defining the generalized eigenvalue problem (not owned).
-  const ComplexOperator *opK, *opM, *opP1, *opP2;
+  const ComplexOperator *opK, *opM, *opMP, *opP1, *opP2;
 
 private:
   // Operator norms for scaling.
@@ -282,7 +282,7 @@ class SlepcEPSSolver : public SlepcEPSSolverBase
   void SetOperators(const ComplexOperator &K, const ComplexOperator &M,
                     ScaleType type) override;
   void SetOperators(const ComplexOperator &K, const ComplexOperator &M,
-                    const ComplexOperator &P1, const ComplexOperator &P2,
+                    const ComplexOperator &MP ,const ComplexOperator &P1, const ComplexOperator &P2,
                     ScaleType type) override;
 
   void SetBMat(const Operator &B) override;
@@ -303,7 +303,7 @@ class SlepcPEPLinearSolver : public SlepcEPSSolverBase
 
   // References to matrices defining the quadratic polynomial eigenvalue problem
   // (not owned).
-  const ComplexOperator *opK, *opC, *opM, *opP1, *opP2;
+  const ComplexOperator *opK, *opC, *opM, *opMP, *opP1, *opP2;
 
   // Workspace vectors for operator applications.
   mutable ComplexVector x2, y2;
@@ -325,8 +325,8 @@ class SlepcPEPLinearSolver : public SlepcEPSSolverBase
   void SetOperators(const ComplexOperator &K, const ComplexOperator &C,
                     const ComplexOperator &M, ScaleType type) override;
   void SetOperators(const ComplexOperator &K, const ComplexOperator &C,
-                    const ComplexOperator &M, const ComplexOperator &P1,
-                    const ComplexOperator &P2, ScaleType type) override;
+                    const ComplexOperator &M, const ComplexOperator &MP,
+                    const ComplexOperator &P1, const ComplexOperator &P2, ScaleType type) override;
 
   void SetBMat(const Operator &B) override;
 
@@ -405,7 +405,7 @@ class SlepcPEPSolver : public SlepcPEPSolverBase
 
   // References to matrices defining the quadratic polynomial eigenvalue problem
   // (not owned).
-  const ComplexOperator *opK, *opC, *opM, *opP1, *opP2;
+  const ComplexOperator *opK, *opC, *opM, *opMP, *opP1, *opP2;
 
 private:
   // Operator norms for scaling.
@@ -424,8 +424,8 @@ class SlepcPEPSolver : public SlepcPEPSolverBase
   void SetOperators(const ComplexOperator &K, const ComplexOperator &C,
                     const ComplexOperator &M, ScaleType type) override;
   void SetOperators(const ComplexOperator &K, const ComplexOperator &C,
-                    const ComplexOperator &M, const ComplexOperator &P1,
-                    const ComplexOperator &P2, ScaleType type) override;
+                    const ComplexOperator &M, const ComplexOperator &MP,
+                    const ComplexOperator &P1, const ComplexOperator &P2, ScaleType type) override;
 
   void SetBMat(const Operator &B) override;
 };
diff --git a/palace/models/periodicboundaryoperator.cpp b/palace/models/periodicboundaryoperator.cpp
index 216ff2cac..b204ebf38 100644
--- a/palace/models/periodicboundaryoperator.cpp
+++ b/palace/models/periodicboundaryoperator.cpp
@@ -126,7 +126,6 @@ PeriodicBoundaryOperator::SetUpBoundaryProperties(const IoData &iodata,
 void PeriodicBoundaryOperator::AddRealMassCoefficients(double coeff,
                                                        MaterialPropertyCoefficient &f)
 {
-
   if (non_zero_wave_vector)
   {
     // [k x]^T 1/mu [k x]
@@ -136,7 +135,7 @@ void PeriodicBoundaryOperator::AddRealMassCoefficients(double coeff,
     mfem::DenseTensor kxT(kx.SizeI(), kx.SizeJ(), kx.SizeK());
     for (int k = 0; k < kx.SizeK(); k++)
     {
-      kx(k)  = wave_vector_cross;
+      kx(k) = wave_vector_cross;
       kxT(k).Transpose(wave_vector_cross);
     }
     mfem::DenseTensor kxTmuinvkx = linalg::Mult(mat_op.GetInvPermeability(), kx);
@@ -151,7 +150,6 @@ void PeriodicBoundaryOperator::AddRealMassCoefficients(double coeff,
 void PeriodicBoundaryOperator::AddWeakCurlCoefficients(double coeff,
                                                        MaterialPropertyCoefficient &f)
 {
-
   if (non_zero_wave_vector)
   {
     // 1/mu [k x]
@@ -160,7 +158,7 @@ void PeriodicBoundaryOperator::AddWeakCurlCoefficients(double coeff,
                          mat_op.GetInvPermeability().SizeK());
     for (int k = 0; k < kx.SizeK(); k++)
     {
-      kx(k)  = wave_vector_cross;
+      kx(k) = wave_vector_cross;
     }
     mfem::DenseTensor muinvkx = linalg::Mult(mat_op.GetInvPermeability(), kx);
     MaterialPropertyCoefficient muinvkx_func(mat_op.GetAttributeToMaterial(), muinvkx);
@@ -173,7 +171,6 @@ void PeriodicBoundaryOperator::AddWeakCurlCoefficients(double coeff,
 void PeriodicBoundaryOperator::AddCurlCoefficients(double coeff,
                                                    MaterialPropertyCoefficient &f)
 {
-
   if (non_zero_wave_vector)
   {
     // [k x]^T 1/mu
diff --git a/palace/models/romoperator.cpp b/palace/models/romoperator.cpp
index 66a82b606..125cada6e 100644
--- a/palace/models/romoperator.cpp
+++ b/palace/models/romoperator.cpp
@@ -196,6 +196,7 @@ RomOperator::RomOperator(const IoData &iodata, SpaceOperator &space_op, int max_
   K = space_op.GetStiffnessMatrix<ComplexOperator>(Operator::DIAG_ONE);
   C = space_op.GetDampingMatrix<ComplexOperator>(Operator::DIAG_ZERO);
   M = space_op.GetMassMatrix<ComplexOperator>(Operator::DIAG_ZERO);
+  MP = space_op.GetPeriodicMassMatrix<ComplexOperator>(Operator::DIAG_ZERO);
   P1 = space_op.GetPeriodicWeakCurlMatrix<ComplexOperator>(Operator::DIAG_ZERO);
   P2 = space_op.GetPeriodicCurlMatrix<ComplexOperator>(Operator::DIAG_ZERO);
   MFEM_VERIFY(K && M, "Invalid empty HDM matrices when constructing PROM!");
@@ -248,10 +249,10 @@ void RomOperator::SolveHDM(double omega, ComplexVector &u)
   A2 = space_op.GetExtraSystemMatrix<ComplexOperator>(omega, Operator::DIAG_ZERO);
   has_A2 = (A2 != nullptr);
   auto A = space_op.GetSystemMatrix(std::complex<double>(1.0, 0.0), 1i * omega,
-                                    std::complex<double>(-omega * omega, 0.0), 1.0i, -1.0i, K.get(),
-                                    C.get(), M.get(), A2.get(), P1.get(), P2.get());
+                                    std::complex<double>(-omega * omega, 0.0), std::complex<double>(1.0, 0.0), 1.0i, -1.0i, K.get(),
+                                    C.get(), M.get(), A2.get(), MP.get(), P1.get(), P2.get());
   auto P =
-      space_op.GetPreconditionerMatrix<ComplexOperator>(1.0, omega, -omega * omega, omega, 1.0, -1.0);
+      space_op.GetPreconditionerMatrix<ComplexOperator>(1.0, omega, -omega * omega, omega, 1.0, 1.0, -1.0);
   ksp->SetOperators(*A, *P);
 
   // The HDM excitation vector is computed as RHS = iω RHS1 + RHS2(ω).
@@ -316,6 +317,11 @@ void RomOperator::UpdatePROM(double omega, const ComplexVector &u)
   }
   Mr.conservativeResize(dim_V, dim_V);
   ProjectMatInternal(comm, V, *M, Mr, r, dim_V0);
+  if (MP)
+  {
+    MPr.conservativeResize(dim_V, dim_V);
+    ProjectMatInternal(comm, V, *MP, MPr, r, dim_V0);
+  }
   if (P1)
   {
     P1r.conservativeResize(dim_V, dim_V);
@@ -382,6 +388,10 @@ void RomOperator::SolvePROM(double omega, ComplexVector &u)
     Ar += (1i * omega) * Cr;
   }
   Ar += (-omega * omega) * Mr;
+  if (MP)
+  {
+    Ar += MPr;
+  }
   if (P1)
   {
     Ar += 1i * P1r;
diff --git a/palace/models/romoperator.hpp b/palace/models/romoperator.hpp
index 6eecb2547..8cef1e5e6 100644
--- a/palace/models/romoperator.hpp
+++ b/palace/models/romoperator.hpp
@@ -29,7 +29,7 @@ class RomOperator
   SpaceOperator &space_op;
 
   // HDM system matrices and excitation RHS.
-  std::unique_ptr<ComplexOperator> K, M, C, A2, P1, P2;
+  std::unique_ptr<ComplexOperator> K, M, C, A2, MP, P1, P2;
   ComplexVector RHS1, RHS2, r;
   bool has_A2, has_RHS1, has_RHS2;
 
@@ -37,7 +37,7 @@ class RomOperator
   std::unique_ptr<ComplexKspSolver> ksp;
 
   // PROM matrices and vectors.
-  Eigen::MatrixXcd Kr, Mr, Cr, Ar, P1r, P2r;
+  Eigen::MatrixXcd Kr, Mr, Cr, Ar, MPr, P1r, P2r;
   Eigen::VectorXcd RHS1r, RHSr;
 
   // PROM reduced-order basis (real-valued) and active dimension.
diff --git a/palace/models/spaceoperator.cpp b/palace/models/spaceoperator.cpp
index 01958a5c3..7ba963f2b 100644
--- a/palace/models/spaceoperator.cpp
+++ b/palace/models/spaceoperator.cpp
@@ -458,7 +458,35 @@ SpaceOperator::GetExtraSystemMatrix(double omega, Operator::DiagonalPolicy diag_
   }
 }
 
-// Move some of this inside AssembleOperator(s)? AssembleMixedOperator(s)?
+template <typename OperType>
+std::unique_ptr<OperType>
+SpaceOperator::GetPeriodicMassMatrix(Operator::DiagonalPolicy diag_policy)
+{
+  PrintHeader(GetH1Space(), GetNDSpace(), GetRTSpace(), print_hdr);
+  MaterialPropertyCoefficient f(mat_op.MaxCeedAttribute());
+  periodic_op.AddRealMassCoefficients(1.0, f);
+  int empty = (f.empty());
+  Mpi::GlobalMin(1, &empty, GetComm());
+  if (empty)
+  {
+    return {};
+  }
+  constexpr bool skip_zeros = false;
+  auto m = AssembleOperator(GetNDSpace(), nullptr, &f, nullptr, nullptr, nullptr, nullptr, skip_zeros);
+  if constexpr (std::is_same<OperType, ComplexOperator>::value)
+  {
+    auto M = std::make_unique<ComplexParOperator>(std::move(m), nullptr, GetNDSpace());
+    M->SetEssentialTrueDofs(nd_dbc_tdof_lists.back(), diag_policy);
+    return M;
+  }
+  else
+  {
+    auto M = std::make_unique<ParOperator>(std::move(m), GetNDSpace());
+    M->SetEssentialTrueDofs(nd_dbc_tdof_lists.back(), diag_policy);
+    return M;
+  }
+}
+
 template <typename OperType>
 std::unique_ptr<OperType>
 SpaceOperator::GetPeriodicWeakCurlMatrix(Operator::DiagonalPolicy diag_policy)
@@ -565,8 +593,8 @@ namespace
 
 auto BuildParSumOperator(int h, int w, double a0, double a1, double a2,
                          const ParOperator *K, const ParOperator *C, const ParOperator *M,
-                         const ParOperator *A2, double a4, double a5, const ParOperator *P1,
-                         const ParOperator *P2, const FiniteElementSpace &fespace)
+                         const ParOperator *A2, double a4, double a5, double a6, const ParOperator *MP,
+                         const ParOperator *P1, const ParOperator *P2, const FiniteElementSpace &fespace)
 {
   auto sum = std::make_unique<SumOperator>(h, w);
   if (K && a0 != 0.0)
@@ -585,13 +613,17 @@ auto BuildParSumOperator(int h, int w, double a0, double a1, double a2,
   {
     sum->AddOperator(A2->LocalOperator(), 1.0);
   }
-  if (P1 && a4 != 0.0)
+  if (MP && a4 != 0.0)
   {
-    sum->AddOperator(P1->LocalOperator(), a4);
+    sum->AddOperator(MP->LocalOperator(), a4);
   }
-  if (P2 && a5 != 0.0)
+  if (P1 && a5 != 0.0)
   {
-    sum->AddOperator(P2->LocalOperator(), a5);
+    sum->AddOperator(P1->LocalOperator(), a5);
+  }
+  if (P2 && a6 != 0.0)
+  {
+    sum->AddOperator(P2->LocalOperator(), a6);
   }
   return std::make_unique<ParOperator>(std::move(sum), fespace);
 }
@@ -600,8 +632,8 @@ auto BuildParSumOperator(int h, int w, std::complex<double> a0, std::complex<dou
                          std::complex<double> a2, const ComplexParOperator *K,
                          const ComplexParOperator *C, const ComplexParOperator *M,
                          const ComplexParOperator *A2, std::complex<double> a4,
-                         std::complex<double> a5, const ComplexParOperator *P1,
-                         const ComplexParOperator *P2, const FiniteElementSpace &fespace)
+                         std::complex<double> a5, std::complex<double> a6, const ComplexParOperator *MP,
+                         const ComplexParOperator *P1, const ComplexParOperator *P2, const FiniteElementSpace &fespace)
 {
   // Block 2 x 2 equivalent-real formulation for each term in the sum:
   //                    [ sumr ]  +=  [ ar  -ai ] [ Ar ]
@@ -694,53 +726,78 @@ auto BuildParSumOperator(int h, int w, std::complex<double> a0, std::complex<dou
       sumi->AddOperator(*A2->LocalOperator().Imag(), 1.0);
     }
   }
-  if (P1 && a4 != 0.0)
+  if (MP && a4 != 0.0)
   {
     if (a4.real() != 0.0)
+    {
+      if (MP->LocalOperator().Real())
+      {
+        sumr->AddOperator(*MP->LocalOperator().Real(), a4.real());
+      }
+      if (MP->LocalOperator().Imag())
+      {
+        sumi->AddOperator(*MP->LocalOperator().Imag(), a4.real());
+      }
+    }
+    if (a4.imag() != 0.0)
+    {
+      if (MP->LocalOperator().Imag())
+      {
+        sumr->AddOperator(*MP->LocalOperator().Imag(), -a4.imag());
+      }
+      if (MP->LocalOperator().Real())
+      {
+        sumi->AddOperator(*MP->LocalOperator().Real(), a4.imag());
+      }
+    }
+  }
+  if (P1 && a5 != 0.0)
+  {
+    if (a5.real() != 0.0)
     {
       if (P1->LocalOperator().Real())
       {
-        sumr->AddOperator(*P1->LocalOperator().Real(), a4.real());
+        sumr->AddOperator(*P1->LocalOperator().Real(), a5.real());
       }
       if (P1->LocalOperator().Imag())
       {
-        sumi->AddOperator(*P1->LocalOperator().Imag(), a4.real());
+        sumi->AddOperator(*P1->LocalOperator().Imag(), a5.real());
       }
     }
-    if (a4.imag() != 0.0)
+    if (a5.imag() != 0.0)
     {
       if (P1->LocalOperator().Imag())
       {
-        sumr->AddOperator(*P1->LocalOperator().Imag(), -a4.imag());
+        sumr->AddOperator(*P1->LocalOperator().Imag(), -a5.imag());
       }
       if (P1->LocalOperator().Real())
       {
-        sumi->AddOperator(*P1->LocalOperator().Real(), a4.imag());
+        sumi->AddOperator(*P1->LocalOperator().Real(), a5.imag());
       }
     }
   }
-  if (P2 && a5 != 0.0)
+  if (P2 && a6 != 0.0)
   {
-    if (a5.real() != 0.0)
+    if (a6.real() != 0.0)
     {
       if (P2->LocalOperator().Real())
       {
-        sumr->AddOperator(*P2->LocalOperator().Real(), a5.real());
+        sumr->AddOperator(*P2->LocalOperator().Real(), a6.real());
       }
       if (P2->LocalOperator().Imag())
       {
-        sumi->AddOperator(*P2->LocalOperator().Imag(), a5.real());
+        sumi->AddOperator(*P2->LocalOperator().Imag(), a6.real());
       }
     }
-    if (a5.imag() != 0.0)
+    if (a6.imag() != 0.0)
     {
       if (P2->LocalOperator().Imag())
       {
-        sumr->AddOperator(*P2->LocalOperator().Imag(), -a5.imag());
+        sumr->AddOperator(*P2->LocalOperator().Imag(), -a6.imag());
       }
       if (P2->LocalOperator().Real())
       {
-        sumi->AddOperator(*P2->LocalOperator().Real(), a5.imag());
+        sumi->AddOperator(*P2->LocalOperator().Real(), a6.imag());
       }
     }
   }
@@ -752,9 +809,9 @@ auto BuildParSumOperator(int h, int w, std::complex<double> a0, std::complex<dou
 template <typename OperType, typename ScalarType>
 std::unique_ptr<OperType>
 SpaceOperator::GetSystemMatrix(ScalarType a0, ScalarType a1, ScalarType a2,
-                               ScalarType a4, ScalarType a5,
+                               ScalarType a4, ScalarType a5, ScalarType a6,
                                const OperType *K, const OperType *C, const OperType *M,
-                               const OperType *A2, const OperType *P1, const OperType *P2)
+                               const OperType *A2, const OperType *MP, const OperType *P1, const OperType *P2)
 {
   using ParOperType =
       typename std::conditional<std::is_same<OperType, ComplexOperator>::value,
@@ -764,10 +821,11 @@ SpaceOperator::GetSystemMatrix(ScalarType a0, ScalarType a1, ScalarType a2,
   const auto *PtAP_C = (C) ? dynamic_cast<const ParOperType *>(C) : nullptr;
   const auto *PtAP_M = (M) ? dynamic_cast<const ParOperType *>(M) : nullptr;
   const auto *PtAP_A2 = (A2) ? dynamic_cast<const ParOperType *>(A2) : nullptr;
+  const auto *PtAP_MP = (MP) ? dynamic_cast<const ParOperType *>(MP) : nullptr;
   const auto *PtAP_P1 = (P1) ? dynamic_cast<const ParOperType *>(P1) : nullptr;
   const auto *PtAP_P2 = (P2) ? dynamic_cast<const ParOperType *>(P2) : nullptr;
   MFEM_VERIFY((!K || PtAP_K) && (!C || PtAP_C) && (!M || PtAP_M) && (!A2 || PtAP_A2)
-               && (!P1 || PtAP_P1) && (!P2 || PtAP_P2),
+               && (!MP || PtAP_MP) && (!P1 || PtAP_P1) && (!P2 || PtAP_P2),
               "SpaceOperator requires ParOperator or ComplexParOperator for system matrix "
               "construction!");
 
@@ -792,6 +850,11 @@ SpaceOperator::GetSystemMatrix(ScalarType a0, ScalarType a1, ScalarType a2,
     height = PtAP_A2->LocalOperator().Height();
     width = PtAP_A2->LocalOperator().Width();
   }
+  else if (PtAP_MP)
+  {
+    height = PtAP_MP->LocalOperator().Height();
+    width = PtAP_MP->LocalOperator().Width();
+  }
   else if (PtAP_P1)
   {
     height = PtAP_P1->LocalOperator().Height();
@@ -806,7 +869,7 @@ SpaceOperator::GetSystemMatrix(ScalarType a0, ScalarType a1, ScalarType a2,
               "At least one argument to GetSystemMatrix must not be empty!");
 
   auto A = BuildParSumOperator(height, width, a0, a1, a2, PtAP_K, PtAP_C, PtAP_M, PtAP_A2,
-                               a4, a5, PtAP_P1, PtAP_P2,
+                               a4, a5, a6, PtAP_MP, PtAP_P1, PtAP_P2,
                                GetNDSpace());
   A->SetEssentialTrueDofs(nd_dbc_tdof_lists.back(), Operator::DiagonalPolicy::DIAG_ONE);
   return A;
@@ -884,7 +947,8 @@ auto BuildLevelParOperator<ComplexOperator>(std::unique_ptr<Operator> &&br,
 template <typename OperType>
 std::unique_ptr<OperType> SpaceOperator::GetPreconditionerMatrix(double a0, double a1,
                                                                  double a2, double a3,
-                                                                 double a4, double a5)
+                                                                 double a4, double a5,
+                                                                 double a6)
 {
   // XX TODO: Handle complex coeff a0/a1/a2/a3 (like GetSystemMatrix)
 
@@ -911,7 +975,8 @@ std::unique_ptr<OperType> SpaceOperator::GetPreconditionerMatrix(double a0, doub
         dfbi(mat_op.MaxCeedBdrAttribute()), fbr(mat_op.MaxCeedBdrAttribute()),
         fbi(mat_op.MaxCeedBdrAttribute()), fpi(mat_op.MaxCeedAttribute()),
         fpwi(mat_op.MaxCeedAttribute()), fpr(mat_op.MaxCeedAttribute()),
-        fpwr(mat_op.MaxCeedAttribute());
+        fpwr(mat_op.MaxCeedAttribute()), fmpr(mat_op.MaxCeedAttribute()),
+        fmpi(mat_op.MaxCeedAttribute());
     AddStiffnessCoefficients(a0, dfr, fr);
     AddStiffnessBdrCoefficients(a0, fbr);
     AddDampingCoefficients(a1, fi);
@@ -920,23 +985,24 @@ std::unique_ptr<OperType> SpaceOperator::GetPreconditionerMatrix(double a0, doub
     AddRealMassBdrCoefficients(pc_mat_shifted ? std::abs(a2) : a2, fbr);
     AddImagMassCoefficients(a2, fi);
     AddExtraSystemBdrCoefficients(a3, dfbr, dfbi, fbr, fbi);
-    periodic_op.AddWeakCurlCoefficients(a4, fpwi);
-    periodic_op.AddCurlCoefficients(a5, fpi);
+    periodic_op.AddRealMassCoefficients(a4, fmpr);
+    periodic_op.AddWeakCurlCoefficients(a5, fpwi);
+    periodic_op.AddCurlCoefficients(a6, fpi);
     int empty[2] = {(dfr.empty() && fr.empty() && dfbr.empty() && fbr.empty()
-                     && fpwr.empty() && fpr.empty()),
+                     && fpwr.empty() && fpr.empty() && fmpr.empty()),
                     (dfi.empty() && fi.empty() && dfbi.empty() && fbi.empty()
-                     && fpwi.empty() && fpi.empty())};
+                     && fpwi.empty() && fpi.empty() && fmpi.empty())};
     Mpi::GlobalMin(2, empty, GetComm());
     if (!empty[0])
     {
-      br_vec = AssembleOperators(GetNDSpaces(), &dfr, &fr, &dfbr, &fbr, &fpwr, &fpr, skip_zeros,
+      br_vec = AssembleOperators(GetNDSpaces(), &dfr, &fr, &dfbr, &fbr, &fmpr, &fpwr, &fpr, skip_zeros,
                                  assemble_q_data);
       br_aux_vec =
           AssembleAuxOperators(GetH1Spaces(), &fr, &fbr, skip_zeros, assemble_q_data);
     }
     if (!empty[1])
     {
-      bi_vec = AssembleOperators(GetNDSpaces(), &dfi, &fi, &dfbi, &fbi, &fpwi, &fpi, skip_zeros,
+      bi_vec = AssembleOperators(GetNDSpaces(), &dfi, &fi, &dfbi, &fbi, &fmpr, &fpwi, &fpi, skip_zeros,
                                  assemble_q_data);
       bi_aux_vec =
           AssembleAuxOperators(GetH1Spaces(), &fi, &fbi, skip_zeros, assemble_q_data);
@@ -947,7 +1013,7 @@ std::unique_ptr<OperType> SpaceOperator::GetPreconditionerMatrix(double a0, doub
     MaterialPropertyCoefficient dfr(mat_op.MaxCeedAttribute()),
         fr(mat_op.MaxCeedAttribute()), dfbr(mat_op.MaxCeedBdrAttribute()),
         fbr(mat_op.MaxCeedBdrAttribute()), fpwr(mat_op.MaxCeedAttribute()),
-        fpr(mat_op.MaxCeedAttribute());
+        fpr(mat_op.MaxCeedAttribute()), fmpr(mat_op.MaxCeedAttribute());
     AddStiffnessCoefficients(a0, dfr, fr);
     AddStiffnessBdrCoefficients(a0, fbr);
     AddDampingCoefficients(a1, fr);
@@ -955,11 +1021,12 @@ std::unique_ptr<OperType> SpaceOperator::GetPreconditionerMatrix(double a0, doub
     AddAbsMassCoefficients(pc_mat_shifted ? std::abs(a2) : a2, fr);
     AddRealMassBdrCoefficients(pc_mat_shifted ? std::abs(a2) : a2, fbr);
     AddExtraSystemBdrCoefficients(a3, dfbr, dfbr, fbr, fbr);
-    int empty = (dfr.empty() && fr.empty() && dfbr.empty() && fbr.empty());
+    periodic_op.AddRealMassCoefficients(a6, fmpr);
+    int empty = (dfr.empty() && fr.empty() && dfbr.empty() && fbr.empty() && fmpr.empty());
     Mpi::GlobalMin(1, &empty, GetComm());
     if (!empty)
     {
-      br_vec = AssembleOperators(GetNDSpaces(), &dfr, &fr, &dfbr, &fbr, &fpwr, &fpr, skip_zeros,
+      br_vec = AssembleOperators(GetNDSpaces(), &dfr, &fr, &dfbr, &fbr, &fmpr, &fpwr, &fpr, skip_zeros,
                                  assemble_q_data);
       br_aux_vec =
           AssembleAuxOperators(GetH1Spaces(), &fr, &fbr, skip_zeros, assemble_q_data);
@@ -1056,7 +1123,7 @@ void SpaceOperator::AddDampingBdrCoefficients(double coeff, MaterialPropertyCoef
 void SpaceOperator::AddRealMassCoefficients(double coeff, MaterialPropertyCoefficient &f)
 {
   f.AddCoefficient(mat_op.GetAttributeToMaterial(), mat_op.GetPermittivityReal(), coeff);
-  periodic_op.AddRealMassCoefficients(coeff, f);
+  //periodic_op.AddRealMassCoefficients(coeff, f);//1.0 or coeff?
 }
 
 void SpaceOperator::AddRealMassBdrCoefficients(double coeff,
@@ -1079,7 +1146,7 @@ void SpaceOperator::AddImagMassCoefficients(double coeff, MaterialPropertyCoeffi
 void SpaceOperator::AddAbsMassCoefficients(double coeff, MaterialPropertyCoefficient &f)
 {
   f.AddCoefficient(mat_op.GetAttributeToMaterial(), mat_op.GetPermittivityAbs(), coeff);
-  periodic_op.AddRealMassCoefficients(coeff, f);
+  //periodic_op.AddRealMassCoefficients(coeff, f);
 }
 
 void SpaceOperator::AddExtraSystemBdrCoefficients(double omega,
@@ -1239,20 +1306,20 @@ template std::unique_ptr<ComplexOperator>
 SpaceOperator::GetExtraSystemMatrix(double, Operator::DiagonalPolicy);
 
 template std::unique_ptr<Operator>
-SpaceOperator::GetSystemMatrix<Operator, double>(double, double, double, double, double, const Operator *,
-                                                 const Operator *, const Operator *,
+SpaceOperator::GetSystemMatrix<Operator, double>(double, double, double, double, double, double, const Operator *,
+                                                 const Operator *, const Operator *, const Operator *,
                                                  const Operator *, const Operator *, const Operator *);
 template std::unique_ptr<ComplexOperator>
 SpaceOperator::GetSystemMatrix<ComplexOperator, std::complex<double>>(
     std::complex<double>, std::complex<double>, std::complex<double>,
-    std::complex<double>, std::complex<double>,
-    const ComplexOperator *, const ComplexOperator *, const ComplexOperator *,
+    std::complex<double>, std::complex<double>, std::complex<double>,
+    const ComplexOperator *, const ComplexOperator *, const ComplexOperator *, const ComplexOperator *,
     const ComplexOperator *, const ComplexOperator *, const ComplexOperator *);
 
 template std::unique_ptr<Operator>
-SpaceOperator::GetPreconditionerMatrix<Operator>(double, double, double, double, double, double);
+SpaceOperator::GetPreconditionerMatrix<Operator>(double, double, double, double, double, double, double);
 template std::unique_ptr<ComplexOperator>
-SpaceOperator::GetPreconditionerMatrix<ComplexOperator>(double, double, double, double, double, double);
+SpaceOperator::GetPreconditionerMatrix<ComplexOperator>(double, double, double, double, double, double, double);
 
 template std::unique_ptr<Operator>
 SpaceOperator::GetPeriodicWeakCurlMatrix<Operator>(Operator::DiagonalPolicy);
@@ -1264,4 +1331,9 @@ SpaceOperator::GetPeriodicCurlMatrix<Operator>(Operator::DiagonalPolicy);
 template std::unique_ptr<ComplexOperator>
 SpaceOperator::GetPeriodicCurlMatrix<ComplexOperator>(Operator::DiagonalPolicy);
 
+template std::unique_ptr<Operator>
+SpaceOperator::GetPeriodicMassMatrix<Operator>(Operator::DiagonalPolicy);
+template std::unique_ptr<ComplexOperator>
+SpaceOperator::GetPeriodicMassMatrix<ComplexOperator>(Operator::DiagonalPolicy);
+
 }  // namespace palace
diff --git a/palace/models/spaceoperator.hpp b/palace/models/spaceoperator.hpp
index 8e23239eb..6bec1e33d 100644
--- a/palace/models/spaceoperator.hpp
+++ b/palace/models/spaceoperator.hpp
@@ -151,20 +151,22 @@ class SpaceOperator
   std::unique_ptr<OperType> GetExtraSystemMatrix(double omega,
                                                  Operator::DiagonalPolicy diag_policy);
   template <typename OperType>
+  std::unique_ptr<OperType> GetPeriodicMassMatrix(Operator::DiagonalPolicy diag_policy);
+  template <typename OperType>
   std::unique_ptr<OperType> GetPeriodicWeakCurlMatrix(Operator::DiagonalPolicy diag_policy);
   template <typename OperType>
   std::unique_ptr<OperType> GetPeriodicCurlMatrix(Operator::DiagonalPolicy diag_policy);
 
   // Construct the complete frequency or time domain system matrix using the provided
   // stiffness, damping, mass, and extra matrices:
-  //                     A = a0 K + a1 C + a2 (Mr + i Mi) + A2 + a4 P1 + a5 P2.
+  //                     A = a0 K + a1 C + a2 (Mr + i Mi) + A2 + a4 MP + a5 P1 + a6 P2.
   // It is assumed that the inputs have been constructed using previous calls to
   // GetSystemMatrix() and the returned operator does not inherit ownership of any of them.
   template <typename OperType, typename ScalarType>
   std::unique_ptr<OperType>
-  GetSystemMatrix(ScalarType a0, ScalarType a1, ScalarType a2, ScalarType a4, ScalarType a5, const OperType *K,
+  GetSystemMatrix(ScalarType a0, ScalarType a1, ScalarType a2, ScalarType a4, ScalarType a5, ScalarType a6, const OperType *K,
                   const OperType *C, const OperType *M, const OperType *A2 = nullptr,
-                  const OperType *P1 = nullptr, const OperType *P2 = nullptr);
+                  const OperType *MP = nullptr, const OperType *P1 = nullptr, const OperType *P2 = nullptr);
 
   // Construct the real, SPD matrix for weighted L2 or H(curl) inner products:
   //                           B = a0 Kr + a2 Mr .
@@ -179,10 +181,10 @@ class SpaceOperator
   // Construct the matrix for frequency or time domain linear system preconditioning. If it
   // is real-valued (Mr > 0, Mi < 0, |Mr + Mi| is done on the material property coefficient,
   // not the matrix entries themselves):
-  //             B = a0 K + a1 C -/+ a2 |Mr + Mi| + A2r(a3) + A2i(a3) + a4 P1 + a5 P2.
+  //             B = a0 K + a1 C -/+ a2 |Mr + Mi| + A2r(a3) + A2i(a3) + a4 MP + a5 P1 + a6 P2.
   template <typename OperType>
   std::unique_ptr<OperType> GetPreconditionerMatrix(double a0, double a1, double a2,
-                                                    double a3, double a4=0, double a5=0);
+                                                    double a3, double a4=0, double a5=0, double a6=0);
 
   // Construct and return the discrete curl or gradient matrices.
   const Operator &GetGradMatrix() const
diff --git a/palace/models/timeoperator.cpp b/palace/models/timeoperator.cpp
index f825b051e..e7b6cb22b 100644
--- a/palace/models/timeoperator.cpp
+++ b/palace/models/timeoperator.cpp
@@ -83,7 +83,7 @@ class TimeDependentCurlCurlOperator : public mfem::SecondOrderTimeDependentOpera
       {
         // Configure the system matrix and also the matrix (matrices) from which the
         // preconditioner will be constructed.
-        A = space_op.GetSystemMatrix(a0, a1, 1.0, 0.0, 0.0, K.get(), C.get(), M.get());
+        A = space_op.GetSystemMatrix(a0, a1, 1.0, 0.0, 0.0, 0.0, K.get(), C.get(), M.get());
         B = space_op.GetPreconditionerMatrix<Operator>(a0, a1, 1.0, 0.0, 0.0, 0.0);
 
         // Configure the solver.
diff --git a/palace/utils/geodata.cpp b/palace/utils/geodata.cpp
index f5f37714f..067b1e0ac 100644
--- a/palace/utils/geodata.cpp
+++ b/palace/utils/geodata.cpp
@@ -1943,8 +1943,8 @@ void FindUniquePoints(std::unique_ptr<mfem::Mesh> &mesh,
   while (q.size() > 0 and normal.Norml2() < norm_tol)
   {
     coord = mesh->GetVertex(q.top().second);
-    Mpi::Print("pts: {:d}, x/y/z: {:.3e}, {:.3e}, {:.3e}, dist: {:.12e}\n", q.top().second, coord[0], coord[1], coord[2], q.top().first);
-    Mpi::Print("dist2points.size(): {:d}\n",dist2points[std::round(q.top().first/diameter*1e8)].size());
+    Mpi::Print("pts: {:d}, x/y/z: {:.3e}, {:.3e}, {:.3e}, dist: {:d}\n", q.top().second, coord[0], coord[1], coord[2], q.top().first);
+    Mpi::Print("dist2points.size(): {:d}\n",dist2points[q.top().first].size());
     q.pop();
     unique_pts.push_back(coord);
     if (unique_pts.size() == 3)
@@ -1956,7 +1956,7 @@ void FindUniquePoints(std::unique_ptr<mfem::Mesh> &mesh,
       v2 = unique_pts[2];
       v2 -= unique_pts[0];
       v1.cross3D(v2, normal);
-      //Mpi::Print("q.size: {:d}, normal.linf: {:.3e}\n", q.size(), normal.Normlinf());
+      Mpi::Print("q.size: {:d}, normal.linf: {:.3e}\n", q.size(), normal.Normlinf());
       if (normal.Norml2() < norm_tol)
       {
         unique_pts.pop_back();
@@ -1965,6 +1965,68 @@ void FindUniquePoints(std::unique_ptr<mfem::Mesh> &mesh,
   }
 }
 
+void ComputeTransformSVD(const std::vector<mfem::Vector> &donor_pts,
+                         const std::vector<mfem::Vector> &receiver_pts,
+                         mfem::DenseMatrix &transformation)
+{
+  Eigen::MatrixXd A(3,3), B(3,3), R(3,3);
+  Eigen::VectorXd cA(3), cB(3);
+
+  for (int i = 0; i < 3; i++)
+  {
+    A(0,i) = donor_pts[i][0] - donor_pts[0][0];
+    A(1,i) = donor_pts[i][1] - donor_pts[0][1];
+    A(2,i) = donor_pts[i][2] - donor_pts[0][2];
+    B(0,i) = receiver_pts[i][0] - receiver_pts[0][0];
+    B(1,i) = receiver_pts[i][1] - receiver_pts[0][1];
+    B(2,i) = receiver_pts[i][2] - receiver_pts[0][2];
+    cA(i) = donor_pts[0][i];
+    cB(i) = receiver_pts[0][i];
+  }
+
+  // Compute covariance matrix and its SVD
+  R = A * B.transpose();
+  Eigen::JacobiSVD<Eigen::MatrixXd> svd;
+  svd.compute(R, Eigen::ComputeFullU | Eigen::ComputeFullV);
+  Eigen::MatrixXd U = svd.matrixU();
+  Eigen::MatrixXd V = svd.matrixV();
+  // Get rotation matrix
+  R = U * V.transpose();
+
+  // Check determinant
+  double det = R.determinant();
+  if (det < 0)
+  {
+    Mpi::Print("Determinant < 1, ({:.3e}), correct R matrix\n", det);
+    svd.compute(R, Eigen::ComputeFullU | Eigen::ComputeFullV);
+    U = svd.matrixU();
+    V = svd.matrixV();
+    for (int i = 0; i < 3; i++)
+    {
+      V(i,2) *= -1.0;
+    }
+    R = V * U.transpose();
+  }
+
+  // Get translation
+  const Eigen::VectorXd t = cB - R * cA;
+
+  // Form affine transformation matrix
+  for(int i = 0; i < 3; i++)
+  {
+    for(int j = 0; j < 3; j++)
+    {
+      transformation(i,j) = R(i,j);
+    }
+  }
+  transformation(0,3) = t(0);
+  transformation(1,3) = t(1);
+  transformation(2,3) = t(2);
+  transformation(3,3) = 1.0;
+  Mpi::Print("Affine transformation using 3-pt SVD\n");
+  transformation.Print();
+}
+
 void ComputeAffineTransformation(const std::vector<mfem::Vector> &donor_pts,
                                  const std::vector<mfem::Vector> &receiver_pts,
                                  mfem::DenseMatrix &transformation)
@@ -2082,7 +2144,7 @@ std::vector<int> CreatePeriodicVertexMapping(
     at.MakeRef(receiver_coord, 0);
 
     coord = mesh->GetVertex(vi);
-    //Mpi::Print("Mapping donor point: {:d} ({:.3e}, {:.3e}, {:.3e})", vi, donor_coord[0], donor_coord[1], donor_coord[2]);
+    Mpi::Print("Mapping donor point: {:d} ({:.3e}, {:.3e}, {:.3e})", vi, donor_coord[0], donor_coord[1], donor_coord[2]);
     // Apply transformation
     // receiver = transform * donor
     transform.Mult(donor_coord, receiver_coord);
@@ -2091,7 +2153,7 @@ std::vector<int> CreatePeriodicVertexMapping(
     coord = mesh->GetVertex(vj);
     dx = at;
     dx -= coord;
-    //Mpi::Print(" to receiver point: {:d} ({:.3e}, {:.3e}, {:.3e}), with transform error {:.3e}\n", vj, receiver_coord[0], receiver_coord[1], receiver_coord[2], dx.Norml2());
+    Mpi::Print(" to receiver point: {:d} ({:.3e}, {:.3e}, {:.3e}), with transform error {:.3e}\n", vj, receiver_coord[0], receiver_coord[1], receiver_coord[2], dx.Norml2());
 
     MFEM_VERIFY(dx.Norml2() < tol, "Could not match points on periodic boundaries, transformed donor point does not correspond to a receive point.");
 
@@ -2287,8 +2349,12 @@ std::unique_ptr<mfem::Mesh> LoadMesh(const std::string &mesh_file, bool remove_c
         {
           ComputeAffineTransformation(donor_pts, receiver_pts,
                                       transformation);
-        }
-        else if (donor_pts.size() == 2)
+        }/*
+        else if (donor_pts.size() == 3)
+        {
+          ComputeTransformSVD(donor_pts, receiver_pts, transformation);
+        }*/
+        else /*if (donor_pts.size() == 2)*/
         {
           // Use normals to compute a rotation matrix
           ComputeRotation(donor_normal, receiver_normal, transformation);
@@ -2314,6 +2380,7 @@ std::unique_ptr<mfem::Mesh> LoadMesh(const std::string &mesh_file, bool remove_c
       //std::copy(data.translation.begin(), data.translation.end(), translation.GetData());
       //auto periodic_mapping =
       //    periodic_mesh->CreatePeriodicVertexMapping({translation2}, 1E-6);
+      //periodic_mesh->
       auto p_mesh = std::make_unique<mfem::Mesh>(
           mfem::Mesh::MakePeriodic(*periodic_mesh, periodic_mapping));
       periodic_mesh = std::move(p_mesh);
diff --git a/palace/utils/iodata.cpp b/palace/utils/iodata.cpp
index f2c5b6818..1c03c64f2 100644
--- a/palace/utils/iodata.cpp
+++ b/palace/utils/iodata.cpp
@@ -526,6 +526,14 @@ void IoData::NondimensionalizeInputs(mfem::ParMesh &mesh)
     data.Cs /= electromagnetics::epsilon0_ * Lc;
   }
 
+  // Floquet periodic boundaries
+  for (int i = 0; i < boundaries.floquet.wave_vector.size(); i++)
+  {
+    Mpi::Print("Rescaling floquet component from {:.3e}", boundaries.floquet.wave_vector[i]);
+    boundaries.floquet.wave_vector[i] /= 1.0 / Lc;
+    Mpi::Print("to {:.3e}\n", boundaries.floquet.wave_vector[i]);
+  }
+
   // Wave port offset distance.
   for (auto &[idx, data] : boundaries.waveport)
   {

From 0301a7115137c6d18628af1531a266d34a694b39 Mon Sep 17 00:00:00 2001
From: Simon Lapointe <simlap@amazon.com>
Date: Tue, 12 Nov 2024 09:55:34 -0800
Subject: [PATCH 09/49] Ensure enough mesh elements in the periodic direction

---
 palace/utils/geodata.cpp | 38 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 38 insertions(+)

diff --git a/palace/utils/geodata.cpp b/palace/utils/geodata.cpp
index 067b1e0ac..ceed118ab 100644
--- a/palace/utils/geodata.cpp
+++ b/palace/utils/geodata.cpp
@@ -2238,6 +2238,7 @@ std::unique_ptr<mfem::Mesh> LoadMesh(const std::string &mesh_file, bool remove_c
       mfem::Vector coord(sdim);
       std::unordered_set<int> bdr_v_donor, bdr_v_receiver;
       std::unordered_set<int> bdr_e_donor, bdr_e_receiver;
+      bool has_tets = false;
       for (int be = 0; be < periodic_mesh->GetNBE(); be++)
       {
         int attr = periodic_mesh->GetBdrAttribute(be);
@@ -2245,6 +2246,12 @@ std::unique_ptr<mfem::Mesh> LoadMesh(const std::string &mesh_file, bool remove_c
         auto receiver = std::find(ra.begin(), ra.end(), attr) != ra.end();
         if (donor || receiver)
         {
+          int el, info;
+          periodic_mesh->GetBdrElementAdjacentElement(be, el, info);
+          if (periodic_mesh->GetElementType(el) == mfem::Element::TETRAHEDRON)
+          {
+            has_tets = true;
+          }
           //Mpi::Print("attr: {:d}, donor: {:d}, receiver: {:d}\n", attr, donor, receiver);
           if (donor) bdr_e_donor.insert(be);
           if (receiver) bdr_e_receiver.insert(be);
@@ -2262,6 +2269,37 @@ std::unique_ptr<mfem::Mesh> LoadMesh(const std::string &mesh_file, bool remove_c
           }
         }
       }
+      const int num_periodic_bc_elems = bdr_e_donor.size() + bdr_e_receiver.size();
+      Mpi::Print("Total number of elements: {:d}\n",periodic_mesh->GetNE());
+      Mpi::Print("Number of periodic BC elements: {:d}\n", num_periodic_bc_elems);
+      // How to check if the mesh is OK?
+      // Count number of elems in the periodic direction?
+      // If hex/prism: Count boundary elements on donor+receiver,
+      // if total NE = ndonorE+nReceiverE: not enough cells?
+      // If pure tet mesh NE = 3*(ndonorE+nreceiverE): not enough
+      // Mixed mesh is trickier
+      // MOVE THIS TEST SOMEWHERE ELSE. IT SHOULD ALSO APPLY TO MESHES
+      // ALREADY CREATED WITH PERIODICITY!!!
+      mfem::Array<mfem::Geometry::Type> geoms;
+      periodic_mesh->GetGeometries(3, geoms);
+      if (geoms.Size() == 1 && geoms[0] == mfem::Geometry::TETRAHEDRON)
+      {
+        // Pure tet mesh
+        MFEM_VERIFY(periodic_mesh->GetNE() > 3*num_periodic_bc_elems,
+        "Not enough mesh elements in periodic direction!");
+      }
+      else if (geoms.Size() > 1 && has_tets)
+      {
+        // Mixed mesh
+        MFEM_VERIFY(periodic_mesh->GetNE() > num_periodic_bc_elems,
+        "Not enough mesh elements in periodic direction!");
+      }
+      else
+      {
+        // No tets
+        MFEM_VERIFY(periodic_mesh->GetNE() > num_periodic_bc_elems,
+        "Not enough mesh elements in periodic direction!");
+      }
 
       mfem::DenseMatrix transformation(4);
       // If only translation is provided -> use it

From 63096b3c06885a53b901d216d2ba12c2121f511d Mon Sep 17 00:00:00 2001
From: Simon Lapointe <simlap@amazon.com>
Date: Wed, 13 Nov 2024 10:08:22 -0800
Subject: [PATCH 10/49] Debugging tests

---
 palace/drivers/eigensolver.cpp             | 20 +++++++--
 palace/linalg/slepc.cpp                    | 25 +++++++++--
 palace/models/periodicboundaryoperator.cpp |  2 +
 palace/models/spaceoperator.cpp            | 51 +++++++++++++---------
 4 files changed, 70 insertions(+), 28 deletions(-)

diff --git a/palace/drivers/eigensolver.cpp b/palace/drivers/eigensolver.cpp
index f70e0d690..6398a878e 100644
--- a/palace/drivers/eigensolver.cpp
+++ b/palace/drivers/eigensolver.cpp
@@ -41,6 +41,11 @@ EigenSolver::Solve(const std::vector<std::unique_ptr<Mesh>> &mesh) const
   auto P2 = space_op.GetPeriodicCurlMatrix<ComplexOperator>(Operator::DIAG_ZERO);
   auto A2 = space_op.GetExtraSystemMatrix<ComplexOperator>(1.0, Operator::DIAG_ZERO);
   A2 = nullptr;
+  //test
+  //MP = nullptr;
+  //P1 = nullptr;
+  //P2 = nullptr;
+
   const auto &Curl = space_op.GetCurlMatrix();
   SaveMetadata(space_op.GetNDSpaces());
 
@@ -129,23 +134,27 @@ EigenSolver::Solve(const std::vector<std::unique_ptr<Mesh>> &mesh) const
                                           : EigenvalueSolver::ScaleType::NONE;
   if (C)
   {
-    if (MP && P1 && P2)
+    if (MP || P1 || P2)
     {
+      Mpi::Print("Setting eigensolver with K C M MP P1 P2\n");
       eigen->SetOperators(*K, *C, *M, *MP, *P1, *P2, scale);
     }
     else
     {
+      Mpi::Print("Setting eigensolver with K C M\n");
       eigen->SetOperators(*K, *C, *M, scale);
     }
   }
   else
   {
-    if (MP && P1 && P2)
+    if (MP || P1 || P2)
     {
+      Mpi::Print("Setting eigensolver with K M MP P1 P2\n");
       eigen->SetOperators(*K, *M, *MP, *P1, *P2, scale);
     }
     else
     {
+      Mpi::Print("Setting eigensolver with K M\n");
       eigen->SetOperators(*K, *M, scale);
     }
   }
@@ -260,8 +269,11 @@ EigenSolver::Solve(const std::vector<std::unique_ptr<Mesh>> &mesh) const
   // preconditioner for complex linear systems is constructed from a real approximation
   // to the complex system matrix.
   auto A = space_op.GetSystemMatrix(std::complex<double>(1.0, 0.0), 1i * target,
-                                    std::complex<double>(-target * target, 0.0), std::complex<double>(1.0, 0.0), 1.0i, -1.0i, K.get(),
-                                    C.get(), M.get(), A2.get(), MP.get(), P1.get(), P2.get());
+                                    std::complex<double>(-target * target, 0.0),
+                                    std::complex<double>(1.0, 0.0),
+                                    std::complex<double>(0.0, 1.0),
+                                    std::complex<double>(0.0, -1.0),
+                                    K.get(), C.get(), M.get(), A2.get(), MP.get(), P1.get(), P2.get());
   auto P = space_op.GetPreconditionerMatrix<ComplexOperator>(1.0, target, -target * target,
                                                              target, 1.0, 1.0, -1.0);
   auto ksp = std::make_unique<ComplexKspSolver>(iodata, space_op.GetNDSpaces(),
diff --git a/palace/linalg/slepc.cpp b/palace/linalg/slepc.cpp
index 97bf24f83..201cca59d 100644
--- a/palace/linalg/slepc.cpp
+++ b/palace/linalg/slepc.cpp
@@ -893,14 +893,17 @@ PetscReal SlepcEPSSolver::GetResidualNorm(PetscScalar l, const ComplexVector &x,
   opK->Mult(x, r);
   if (opMP)
   {
+    Mpi::Print("EPS GetResNorm opMP\n");
     opMP->AddMult(x, r, std::complex<double>(1.0, 0.0));
   }
   if (opP1)
   {
+    Mpi::Print("EPS GetResNorm opP1\n");
     opP1->AddMult(x, r, std::complex<double>(0.0, 1.0));
   }
   if (opP2)
   {
+    Mpi::Print("EPS GetResNorm opP2\n");
     opP2->AddMult(x, r, std::complex<double>(0.0, -1.0));
   }
   opM->AddMult(x, r, -l);
@@ -1110,14 +1113,17 @@ PetscReal SlepcPEPLinearSolver::GetResidualNorm(PetscScalar l, const ComplexVect
   opK->Mult(x, r);
   if (opMP)
   {
+    Mpi::Print("PEPLinear GetResNorm opMP\n");
     opMP->AddMult(x, r, std::complex<double>(1.0, 0.0));
   }
   if (opP1)
   {
+    Mpi::Print("PEPLinear GetResNorm opP1\n");
     opP1->AddMult(x, r, std::complex<double>(0.0, 1.0));
   }
   if (opP2)
   {
+    Mpi::Print("PEPLinear GetResNorm opP2\n");
     opP2->AddMult(x, r, std::complex<double>(0.0, -1.0));
   }
   opC->AddMult(x, r, l);
@@ -1535,14 +1541,17 @@ PetscReal SlepcPEPSolver::GetResidualNorm(PetscScalar l, const ComplexVector &x,
   opK->Mult(x, r);
   if (opMP)
   {
+    Mpi::Print("PEP GetResNorm opMP\n");
     opMP->AddMult(x, r, std::complex<double>(1.0, 0.0));
   }
   if (opP1)
   {
+    Mpi::Print("PEP GetResNorm opP1\n");
     opP1->AddMult(x, r, std::complex<double>(0.0, 1.0));
   }
   if (opP2)
   {
+    Mpi::Print("PEP GetResNorm opP2\n");
     opP2->AddMult(x, r, std::complex<double>(0.0, -1.0));
   }
   opC->AddMult(x, r, l);
@@ -1583,6 +1592,7 @@ PetscErrorCode __mat_apply_EPS_A0(Mat A, Vec x, Vec y)
   ctx->opK->Mult(ctx->x1, ctx->y1);
   if (ctx->opMP)
   {
+    std::cerr << "EPS A0 opMP\n";
     ctx->opMP->AddMult(ctx->x1, ctx->y1, std::complex<double>(1.0, 0.0));
   }
   if (ctx->opP1)
@@ -1669,7 +1679,7 @@ PetscErrorCode __mat_apply_PEPLinear_L0(Mat A, Vec x, Vec y)
   palace::slepc::SlepcPEPLinearSolver *ctx;
   PetscCall(MatShellGetContext(A, (void **)&ctx));
   MFEM_VERIFY(ctx, "Invalid PETSc shell matrix context for SLEPc!");
-
+  std::cerr << "PEPLinear L0\n";
   PetscCall(FromPetscVec(x, ctx->x1, ctx->x2));
   ctx->y1 = ctx->x2;
   ctx->opC->Mult(ctx->x2, ctx->y2);
@@ -1677,14 +1687,17 @@ PetscErrorCode __mat_apply_PEPLinear_L0(Mat A, Vec x, Vec y)
   ctx->opK->AddMult(ctx->x1, ctx->y2, std::complex<double>(1.0, 0.0));
   if (ctx->opMP)
   {
+    std::cerr << "PEPLinear L0 opMP\n";
     ctx->opMP->AddMult(ctx->x1, ctx->y2, std::complex<double>(1.0, 0.0));
   }
   if (ctx->opP1)
   {
+    std::cerr << "PEPLinear L0 opP1\n";
     ctx->opP1->AddMult(ctx->x1, ctx->y2, std::complex<double>(0.0, 1.0));
   }
   if (ctx->opP2)
   {
+    std::cerr << "PEPLinear L0 opP2\n";
     ctx->opP2->AddMult(ctx->x1, ctx->y2, std::complex<double>(0.0, -1.0));
   }
   ctx->y2 *= -ctx->delta;
@@ -1701,7 +1714,7 @@ PetscErrorCode __mat_apply_PEPLinear_L1(Mat A, Vec x, Vec y)
   palace::slepc::SlepcPEPLinearSolver *ctx;
   PetscCall(MatShellGetContext(A, (void **)&ctx));
   MFEM_VERIFY(ctx, "Invalid PETSc shell matrix context for SLEPc!");
-
+  std::cerr << "PEPLinear L1\n";
   PetscCall(FromPetscVec(x, ctx->x1, ctx->x2));
   ctx->y1 = ctx->x1;
   ctx->opM->Mult(ctx->x2, ctx->y2);
@@ -1717,7 +1730,7 @@ PetscErrorCode __mat_apply_PEPLinear_B(Mat A, Vec x, Vec y)
   palace::slepc::SlepcPEPLinearSolver *ctx;
   PetscCall(MatShellGetContext(A, (void **)&ctx));
   MFEM_VERIFY(ctx, "Invalid PETSc shell matrix context for SLEPc!");
-
+  std::cerr << "PEPLinear B\n";
   PetscCall(FromPetscVec(x, ctx->x1, ctx->x2));
   ctx->opB->Mult(ctx->x1.Real(), ctx->y1.Real());
   ctx->opB->Mult(ctx->x1.Imag(), ctx->y1.Imag());
@@ -1742,7 +1755,7 @@ PetscErrorCode __pc_apply_PEPLinear(PC pc, Vec x, Vec y)
   palace::slepc::SlepcPEPLinearSolver *ctx;
   PetscCall(PCShellGetContext(pc, (void **)&ctx));
   MFEM_VERIFY(ctx, "Invalid PETSc shell PC context for SLEPc!");
-
+  std::cerr << "PEPLinear\n";
   PetscCall(FromPetscVec(x, ctx->x1, ctx->x2));
   if (!ctx->sinvert)
   {
@@ -1769,14 +1782,17 @@ PetscErrorCode __pc_apply_PEPLinear(PC pc, Vec x, Vec y)
     ctx->opK->AddMult(ctx->x1, ctx->y1, std::complex<double>(1.0, 0.0));
     if (ctx->opMP)
     {
+      std::cerr << "PEPLinear opMP\n";
       ctx->opMP->AddMult(ctx->x1, ctx->y1, std::complex<double>(1.0, 0.0));
     }
     if (ctx->opP1)
     {
+      std::cerr << "PEPLinear opP1\n";
       ctx->opP1->AddMult(ctx->x1, ctx->y1, std::complex<double>(0.0, 1.0));
     }
     if (ctx->opP2)
     {
+      std::cerr << "PEPLinear opP2\n";
       ctx->opP2->AddMult(ctx->x1, ctx->y1, std::complex<double>(0.0, -1.0));
     }
     ctx->opInv->Mult(ctx->y1, ctx->y2);
@@ -1812,6 +1828,7 @@ PetscErrorCode __mat_apply_PEP_A0(Mat A, Vec x, Vec y)
   ctx->opK->Mult(ctx->x1, ctx->y1);
   if (ctx->opMP)
   {
+    std::cerr << "PEP A0 opMP\n";
     ctx->opMP->AddMult(ctx->x1, ctx->y1, std::complex<double>(1.0, 0.0));
   }
   if (ctx->opP1)
diff --git a/palace/models/periodicboundaryoperator.cpp b/palace/models/periodicboundaryoperator.cpp
index b204ebf38..d4c3e03be 100644
--- a/palace/models/periodicboundaryoperator.cpp
+++ b/palace/models/periodicboundaryoperator.cpp
@@ -180,8 +180,10 @@ void PeriodicBoundaryOperator::AddCurlCoefficients(double coeff,
     for (int k = 0; k < kxT.SizeK(); k++)
     {
       kxT(k).Transpose(wave_vector_cross);
+      //kxT(k) = wave_vector_cross;
     }
     mfem::DenseTensor kxTmuinv = linalg::Mult(kxT, mat_op.GetInvPermeability());
+    //mfem::DenseTensor kxTmuinv = linalg::Mult(mat_op.GetInvPermeability(), kxT);
     MaterialPropertyCoefficient kxTmuinv_func(mat_op.GetAttributeToMaterial(), kxTmuinv);
     //muinvkx_func.RestrictCoefficient
     f.AddCoefficient(kxTmuinv_func.GetAttributeToMaterial(),
diff --git a/palace/models/spaceoperator.cpp b/palace/models/spaceoperator.cpp
index 7ba963f2b..dc16bf47a 100644
--- a/palace/models/spaceoperator.cpp
+++ b/palace/models/spaceoperator.cpp
@@ -208,6 +208,7 @@ void AddIntegrators(BilinearForm &a, const MaterialPropertyCoefficient *df,
                     const MaterialPropertyCoefficient *f,
                     const MaterialPropertyCoefficient *dfb,
                     const MaterialPropertyCoefficient *fb,
+                    const MaterialPropertyCoefficient *fpm,
                     const MaterialPropertyCoefficient *fpw,
                     const MaterialPropertyCoefficient *fp, bool assemble_q_data = false)
 {
@@ -241,6 +242,10 @@ void AddIntegrators(BilinearForm &a, const MaterialPropertyCoefficient *df,
       a.AddBoundaryIntegrator<VectorFEMassIntegrator>(*fb);
     }
   }
+  if (fpm && !fpm->empty())
+  {
+    a.AddDomainIntegrator<VectorFEMassIntegrator>(*fpm);
+  }
   if (fpw && !fpw->empty())
   {
     a.AddDomainIntegrator<MixedVectorWeakCurlIntegrator>(*fpw);
@@ -277,12 +282,13 @@ auto AssembleOperator(const FiniteElementSpace &fespace,
                       const MaterialPropertyCoefficient *f,
                       const MaterialPropertyCoefficient *dfb,
                       const MaterialPropertyCoefficient *fb,
+                      const MaterialPropertyCoefficient *fpm,
                       const MaterialPropertyCoefficient *fpw,
                       const MaterialPropertyCoefficient *fp, bool skip_zeros = false,
                       bool assemble_q_data = false)
 {
   BilinearForm a(fespace);
-  AddIntegrators(a, df, f, dfb, fb, fpw, fp, assemble_q_data);
+  AddIntegrators(a, df, f, dfb, fb, fpm, fpw, fp, assemble_q_data);
   return a.Assemble(skip_zeros);
 }
 
@@ -291,12 +297,13 @@ auto AssembleOperators(const FiniteElementSpaceHierarchy &fespaces,
                        const MaterialPropertyCoefficient *f,
                        const MaterialPropertyCoefficient *dfb,
                        const MaterialPropertyCoefficient *fb,
+                       const MaterialPropertyCoefficient *fpm,
                        const MaterialPropertyCoefficient *fpw,
                        const MaterialPropertyCoefficient *fp, bool skip_zeros = false,
                        bool assemble_q_data = false, std::size_t l0 = 0)
 {
   BilinearForm a(fespaces.GetFinestFESpace());
-  AddIntegrators(a, df, f, dfb, fb, fpw, fp, assemble_q_data);
+  AddIntegrators(a, df, f, dfb, fb, fpm, fpw, fp, assemble_q_data);
   return a.Assemble(fespaces, skip_zeros, l0);
 }
 
@@ -328,7 +335,7 @@ SpaceOperator::GetStiffnessMatrix(Operator::DiagonalPolicy diag_policy)
     return {};
   }
   constexpr bool skip_zeros = false;
-  auto k = AssembleOperator(GetNDSpace(), &df, &f, nullptr, &fb, nullptr, nullptr, skip_zeros);
+  auto k = AssembleOperator(GetNDSpace(), &df, &f, nullptr, &fb, nullptr, nullptr, nullptr, skip_zeros);
   if constexpr (std::is_same<OperType, ComplexOperator>::value)
   {
     auto K = std::make_unique<ComplexParOperator>(std::move(k), nullptr, GetNDSpace());
@@ -359,7 +366,7 @@ SpaceOperator::GetDampingMatrix(Operator::DiagonalPolicy diag_policy)
     return {};
   }
   constexpr bool skip_zeros = false;
-  auto c = AssembleOperator(GetNDSpace(), nullptr, &f, nullptr, &fb, nullptr, nullptr, skip_zeros);
+  auto c = AssembleOperator(GetNDSpace(), nullptr, &f, nullptr, &fb, nullptr, nullptr, nullptr, skip_zeros);
   if constexpr (std::is_same<OperType, ComplexOperator>::value)
   {
     auto C = std::make_unique<ComplexParOperator>(std::move(c), nullptr, GetNDSpace());
@@ -396,11 +403,11 @@ std::unique_ptr<OperType> SpaceOperator::GetMassMatrix(Operator::DiagonalPolicy
   std::unique_ptr<Operator> mr, mi;
   if (!empty[0])
   {
-    mr = AssembleOperator(GetNDSpace(), nullptr, &fr, nullptr, &fbr, nullptr, nullptr, skip_zeros);
+    mr = AssembleOperator(GetNDSpace(), nullptr, &fr, nullptr, &fbr, nullptr, nullptr, nullptr, skip_zeros);
   }
   if (!empty[1])
   {
-    mi = AssembleOperator(GetNDSpace(), nullptr, &fi, nullptr, &fbi, nullptr, nullptr, skip_zeros);
+    mi = AssembleOperator(GetNDSpace(), nullptr, &fi, nullptr, &fbi, nullptr, nullptr, nullptr, skip_zeros);
   }
   if constexpr (std::is_same<OperType, ComplexOperator>::value)
   {
@@ -436,11 +443,11 @@ SpaceOperator::GetExtraSystemMatrix(double omega, Operator::DiagonalPolicy diag_
   std::unique_ptr<Operator> ar, ai;
   if (!empty[0])
   {
-    ar = AssembleOperator(GetNDSpace(), nullptr, nullptr, &dfbr, &fbr, nullptr, nullptr, skip_zeros);
+    ar = AssembleOperator(GetNDSpace(), nullptr, nullptr, &dfbr, &fbr, nullptr, nullptr, nullptr, skip_zeros);
   }
   if (!empty[1])
   {
-    ai = AssembleOperator(GetNDSpace(), nullptr, nullptr, &dfbi, &fbi, nullptr, nullptr, skip_zeros);
+    ai = AssembleOperator(GetNDSpace(), nullptr, nullptr, &dfbi, &fbi, nullptr, nullptr, nullptr, skip_zeros);
   }
   if constexpr (std::is_same<OperType, ComplexOperator>::value)
   {
@@ -463,16 +470,16 @@ std::unique_ptr<OperType>
 SpaceOperator::GetPeriodicMassMatrix(Operator::DiagonalPolicy diag_policy)
 {
   PrintHeader(GetH1Space(), GetNDSpace(), GetRTSpace(), print_hdr);
-  MaterialPropertyCoefficient f(mat_op.MaxCeedAttribute());
-  periodic_op.AddRealMassCoefficients(1.0, f);
-  int empty = (f.empty());
+  MaterialPropertyCoefficient fpm(mat_op.MaxCeedAttribute());
+  periodic_op.AddRealMassCoefficients(1.0, fpm);
+  int empty = (fpm.empty());
   Mpi::GlobalMin(1, &empty, GetComm());
   if (empty)
   {
     return {};
   }
   constexpr bool skip_zeros = false;
-  auto m = AssembleOperator(GetNDSpace(), nullptr, &f, nullptr, nullptr, nullptr, nullptr, skip_zeros);
+  auto m = AssembleOperator(GetNDSpace(), nullptr, nullptr, nullptr, nullptr, &fpm, nullptr, nullptr, skip_zeros);
   if constexpr (std::is_same<OperType, ComplexOperator>::value)
   {
     auto M = std::make_unique<ComplexParOperator>(std::move(m), nullptr, GetNDSpace());
@@ -510,7 +517,7 @@ SpaceOperator::GetPeriodicWeakCurlMatrix(Operator::DiagonalPolicy diag_policy)
   //  a.AssembleQuadratureData();
   //}
   //auto weakCurl = a.Assemble(skip_zeros);
-  auto weakCurl = AssembleOperator(GetNDSpace(), nullptr, nullptr, nullptr, nullptr, &fpw, nullptr, skip_zeros);
+  auto weakCurl = AssembleOperator(GetNDSpace(), nullptr, nullptr, nullptr, nullptr, nullptr, &fpw, nullptr, skip_zeros);
 
   if constexpr (std::is_same<OperType, ComplexOperator>::value)
   {
@@ -543,9 +550,9 @@ std::unique_ptr<OperType>
 SpaceOperator::GetPeriodicCurlMatrix(Operator::DiagonalPolicy diag_policy)
 {
   PrintHeader(GetH1Space(), GetNDSpace(), GetRTSpace(), print_hdr);
-  MaterialPropertyCoefficient fw(mat_op.MaxCeedAttribute());
-  periodic_op.AddCurlCoefficients(1.0, fw);
-  int empty = (fw.empty());
+  MaterialPropertyCoefficient fp(mat_op.MaxCeedAttribute());
+  periodic_op.AddCurlCoefficients(1.0, fp);
+  int empty = (fp.empty());
   Mpi::GlobalMin(1, &empty, GetComm());
   if (empty)
   {
@@ -561,7 +568,7 @@ SpaceOperator::GetPeriodicCurlMatrix(Operator::DiagonalPolicy diag_policy)
   //  a.AssembleQuadratureData();
   //}
   //auto curl = a.Assemble(skip_zeros);
-  auto curl = AssembleOperator(GetNDSpace(), nullptr, nullptr, nullptr, nullptr, nullptr, &fw, skip_zeros);
+  auto curl = AssembleOperator(GetNDSpace(), nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, &fp, skip_zeros);
   if constexpr (std::is_same<OperType, ComplexOperator>::value)
   {
     auto Curl = std::make_unique<ComplexParOperator>(std::move(curl), nullptr, GetNDSpace());
@@ -967,6 +974,7 @@ std::unique_ptr<OperType> SpaceOperator::GetPreconditionerMatrix(double a0, doub
   std::vector<std::unique_ptr<Operator>> br_vec(n_levels), bi_vec(n_levels),
       br_aux_vec(n_levels), bi_aux_vec(n_levels);
   constexpr bool skip_zeros = false, assemble_q_data = false;
+  Mpi::Print("GetPreconditioner pc_mat_real: {:d}, pc_mat_shifted: {:d}\n", pc_mat_real, pc_mat_shifted);
   if (std::is_same<OperType, ComplexOperator>::value && !pc_mat_real)
   {
     MaterialPropertyCoefficient dfr(mat_op.MaxCeedAttribute()),
@@ -1002,7 +1010,7 @@ std::unique_ptr<OperType> SpaceOperator::GetPreconditionerMatrix(double a0, doub
     }
     if (!empty[1])
     {
-      bi_vec = AssembleOperators(GetNDSpaces(), &dfi, &fi, &dfbi, &fbi, &fmpr, &fpwi, &fpi, skip_zeros,
+      bi_vec = AssembleOperators(GetNDSpaces(), &dfi, &fi, &dfbi, &fbi, &fmpi, &fpwi, &fpi, skip_zeros,
                                  assemble_q_data);
       bi_aux_vec =
           AssembleAuxOperators(GetH1Spaces(), &fi, &fbi, skip_zeros, assemble_q_data);
@@ -1021,8 +1029,11 @@ std::unique_ptr<OperType> SpaceOperator::GetPreconditionerMatrix(double a0, doub
     AddAbsMassCoefficients(pc_mat_shifted ? std::abs(a2) : a2, fr);
     AddRealMassBdrCoefficients(pc_mat_shifted ? std::abs(a2) : a2, fbr);
     AddExtraSystemBdrCoefficients(a3, dfbr, dfbr, fbr, fbr);
-    periodic_op.AddRealMassCoefficients(a6, fmpr);
-    int empty = (dfr.empty() && fr.empty() && dfbr.empty() && fbr.empty() && fmpr.empty());
+    periodic_op.AddRealMassCoefficients(a4, fmpr);
+    periodic_op.AddWeakCurlCoefficients(a5, fpwr);
+    periodic_op.AddCurlCoefficients(a6, fpr);
+    int empty = (dfr.empty() && fr.empty() && dfbr.empty() && fbr.empty() &&
+                 fmpr.empty() && fpwr.empty() && fpr.empty());
     Mpi::GlobalMin(1, &empty, GetComm());
     if (!empty)
     {

From 974da2ad6c5d055174220a46409e73e278e97cee Mon Sep 17 00:00:00 2001
From: Simon Lapointe <simlap@amazon.com>
Date: Mon, 18 Nov 2024 17:58:43 -0800
Subject: [PATCH 11/49] Debugging tests

---
 palace/linalg/slepc.cpp         | 18 +++++++++++++++---
 palace/linalg/slepc.hpp         |  6 +++---
 palace/models/spaceoperator.cpp |  4 ++--
 3 files changed, 20 insertions(+), 8 deletions(-)

diff --git a/palace/linalg/slepc.cpp b/palace/linalg/slepc.cpp
index 201cca59d..2eebaee9b 100644
--- a/palace/linalg/slepc.cpp
+++ b/palace/linalg/slepc.cpp
@@ -755,7 +755,7 @@ SlepcEPSSolver::SlepcEPSSolver(MPI_Comm comm, int print, const std::string &pref
   : SlepcEPSSolverBase(comm, print, prefix)
 {
   opK = opM = opMP = opP1 = opP2 = nullptr;
-  normK = normM = 0.0;
+  normK = normM = normMP = normP1 = normP2 = 0.0;
 }
 
 void SlepcEPSSolver::SetOperators(const ComplexOperator &K, const ComplexOperator &M,
@@ -846,6 +846,10 @@ void SlepcEPSSolver::SetOperators(const ComplexOperator &K, const ComplexOperato
   {
     normK = linalg::SpectralNorm(GetComm(), *opK, opK->IsReal());
     normM = linalg::SpectralNorm(GetComm(), *opM, opM->IsReal());
+    normMP = linalg::SpectralNorm(GetComm(), *opMP, opMP->IsReal());
+    normP1 = linalg::SpectralNorm(GetComm(), *opP1, opP1->IsReal());
+    normP2 = linalg::SpectralNorm(GetComm(), *opP2, opP2->IsReal());
+    Mpi::Print("normK, M, MP, P1, P2: {:.3e}, {:.3e}, {:.3e}, {:.3e}, {:.3e}\n", normK, normM, normMP, normP1, normP2);
     MFEM_VERIFY(normK >= 0.0 && normM >= 0.0, "Invalid matrix norms for EPS scaling!");
     if (normK > 0 && normM > 0.0)
     {
@@ -930,7 +934,7 @@ SlepcPEPLinearSolver::SlepcPEPLinearSolver(MPI_Comm comm, int print,
   : SlepcEPSSolverBase(comm, print, prefix)
 {
   opK = opC = opM = opMP = opP1 = opP2 = nullptr;
-  normK = normC = normM = 0.0;
+  normK = normC = normM = normMP = normP1 = normP2 = 0.0;
 }
 
 void SlepcPEPLinearSolver::SetOperators(const ComplexOperator &K, const ComplexOperator &C,
@@ -1031,6 +1035,10 @@ void SlepcPEPLinearSolver::SetOperators(const ComplexOperator &K, const ComplexO
     normK = linalg::SpectralNorm(GetComm(), *opK, opK->IsReal());
     normC = linalg::SpectralNorm(GetComm(), *opC, opC->IsReal());
     normM = linalg::SpectralNorm(GetComm(), *opM, opM->IsReal());
+    normMP = linalg::SpectralNorm(GetComm(), *opMP, opMP->IsReal());
+    normP1 = linalg::SpectralNorm(GetComm(), *opP1, opP1->IsReal());
+    normP2 = linalg::SpectralNorm(GetComm(), *opP2, opP2->IsReal());
+    Mpi::Print("normK, C, M, MP, P1, P2: {:.3e}, {:.3e}, {:.3e}, {:.3e}, {:.3e}, {:.3e}\n", normK, normC, normM, normMP, normP1, normP2);
     MFEM_VERIFY(normK >= 0.0 && normC >= 0.0 && normM >= 0.0,
                 "Invalid matrix norms for PEP scaling!");
     if (normK > 0 && normC > 0.0 && normM > 0.0)
@@ -1388,7 +1396,7 @@ SlepcPEPSolver::SlepcPEPSolver(MPI_Comm comm, int print, const std::string &pref
   : SlepcPEPSolverBase(comm, print, prefix)
 {
   opK = opC = opM = opMP = opP1 = opP2 = nullptr;
-  normK = normC = normM = 0.0;
+  normK = normC = normM = normMP = normP1 = normP2 = 0.0;
 }
 
 void SlepcPEPSolver::SetOperators(const ComplexOperator &K, const ComplexOperator &C,
@@ -1495,6 +1503,10 @@ void SlepcPEPSolver::SetOperators(const ComplexOperator &K, const ComplexOperato
     normK = linalg::SpectralNorm(GetComm(), *opK, opK->IsReal());
     normC = linalg::SpectralNorm(GetComm(), *opC, opC->IsReal());
     normM = linalg::SpectralNorm(GetComm(), *opM, opM->IsReal());
+    normMP = linalg::SpectralNorm(GetComm(), *opMP, opMP->IsReal());
+    normP1 = linalg::SpectralNorm(GetComm(), *opP1, opP1->IsReal());
+    normP2 = linalg::SpectralNorm(GetComm(), *opP2, opP2->IsReal());
+    Mpi::Print("normK, C, M, MP, P1, P2: {:.3e}, {:.3e}, {:.3e}, {:.3e}, {:.3e}, {:.3e}\n", normK, normC, normM, normMP, normP1, normP2);
     MFEM_VERIFY(normK >= 0.0 && normC >= 0.0 && normM >= 0.0,
                 "Invalid matrix norms for PEP scaling!");
     if (normK > 0 && normC > 0.0 && normM > 0.0)
diff --git a/palace/linalg/slepc.hpp b/palace/linalg/slepc.hpp
index 7d498e5ed..e5fe6944f 100644
--- a/palace/linalg/slepc.hpp
+++ b/palace/linalg/slepc.hpp
@@ -267,7 +267,7 @@ class SlepcEPSSolver : public SlepcEPSSolverBase
 
 private:
   // Operator norms for scaling.
-  mutable PetscReal normK, normM;
+  mutable PetscReal normK, normM, normMP, normP1, normP2;
 
 protected:
   PetscReal GetResidualNorm(PetscScalar l, const ComplexVector &x,
@@ -310,7 +310,7 @@ class SlepcPEPLinearSolver : public SlepcEPSSolverBase
 
 private:
   // Operator norms for scaling.
-  mutable PetscReal normK, normC, normM;
+  mutable PetscReal normK, normC, normM, normMP, normP1, normP2;
 
 protected:
   PetscReal GetResidualNorm(PetscScalar l, const ComplexVector &x,
@@ -409,7 +409,7 @@ class SlepcPEPSolver : public SlepcPEPSolverBase
 
 private:
   // Operator norms for scaling.
-  mutable PetscReal normK, normC, normM;
+  mutable PetscReal normK, normC, normM, normMP, normP1, normP2;
 
 protected:
   PetscReal GetResidualNorm(PetscScalar l, const ComplexVector &x,
diff --git a/palace/models/spaceoperator.cpp b/palace/models/spaceoperator.cpp
index dc16bf47a..775c4ea0d 100644
--- a/palace/models/spaceoperator.cpp
+++ b/palace/models/spaceoperator.cpp
@@ -977,6 +977,7 @@ std::unique_ptr<OperType> SpaceOperator::GetPreconditionerMatrix(double a0, doub
   Mpi::Print("GetPreconditioner pc_mat_real: {:d}, pc_mat_shifted: {:d}\n", pc_mat_real, pc_mat_shifted);
   if (std::is_same<OperType, ComplexOperator>::value && !pc_mat_real)
   {
+    Mpi::Print("GetPreconditioner Complex!\n");
     MaterialPropertyCoefficient dfr(mat_op.MaxCeedAttribute()),
         dfi(mat_op.MaxCeedAttribute()), fr(mat_op.MaxCeedAttribute()),
         fi(mat_op.MaxCeedAttribute()), dfbr(mat_op.MaxCeedBdrAttribute()),
@@ -1018,6 +1019,7 @@ std::unique_ptr<OperType> SpaceOperator::GetPreconditionerMatrix(double a0, doub
   }
   else
   {
+    Mpi::Print("GetPreconditioner Real!\n");
     MaterialPropertyCoefficient dfr(mat_op.MaxCeedAttribute()),
         fr(mat_op.MaxCeedAttribute()), dfbr(mat_op.MaxCeedBdrAttribute()),
         fbr(mat_op.MaxCeedBdrAttribute()), fpwr(mat_op.MaxCeedAttribute()),
@@ -1134,7 +1136,6 @@ void SpaceOperator::AddDampingBdrCoefficients(double coeff, MaterialPropertyCoef
 void SpaceOperator::AddRealMassCoefficients(double coeff, MaterialPropertyCoefficient &f)
 {
   f.AddCoefficient(mat_op.GetAttributeToMaterial(), mat_op.GetPermittivityReal(), coeff);
-  //periodic_op.AddRealMassCoefficients(coeff, f);//1.0 or coeff?
 }
 
 void SpaceOperator::AddRealMassBdrCoefficients(double coeff,
@@ -1157,7 +1158,6 @@ void SpaceOperator::AddImagMassCoefficients(double coeff, MaterialPropertyCoeffi
 void SpaceOperator::AddAbsMassCoefficients(double coeff, MaterialPropertyCoefficient &f)
 {
   f.AddCoefficient(mat_op.GetAttributeToMaterial(), mat_op.GetPermittivityAbs(), coeff);
-  //periodic_op.AddRealMassCoefficients(coeff, f);
 }
 
 void SpaceOperator::AddExtraSystemBdrCoefficients(double omega,

From 3f6bef0bc2e086c7725268bb2c09395bb17c9097 Mon Sep 17 00:00:00 2001
From: Simon Lapointe <simlap@amazon.com>
Date: Mon, 25 Nov 2024 13:59:40 -0800
Subject: [PATCH 12/49] Allow non-symmetric material properties and merge
 floquet terms in a single operator

---
 palace/drivers/drivensolver.cpp               |  17 +-
 palace/drivers/eigensolver.cpp                |  27 +-
 palace/fem/libceed/coefficient.cpp            |  11 +-
 palace/fem/libceed/integrator.cpp             |   2 +-
 palace/fem/libceed/operator.cpp               |   4 +-
 palace/fem/qfunctions/2/h1_2_qf.h             |   2 +-
 palace/fem/qfunctions/2/h1_build_2_qf.h       |   3 +-
 palace/fem/qfunctions/2/l2_2_qf.h             |   2 +-
 palace/fem/qfunctions/2/l2_build_2_qf.h       |   3 +-
 palace/fem/qfunctions/21/hcurl_21_qf.h        |   2 +-
 palace/fem/qfunctions/21/hcurl_build_21_qf.h  |   2 +-
 palace/fem/qfunctions/21/hcurlh1d_21_qf.h     |   2 +-
 .../fem/qfunctions/21/hcurlh1d_build_21_qf.h  |   2 +-
 palace/fem/qfunctions/21/hcurlhdiv_21_qf.h    |   4 +-
 .../fem/qfunctions/21/hcurlhdiv_build_21_qf.h |   4 +-
 palace/fem/qfunctions/21/hcurlmass_21_qf.h    |   2 +-
 .../fem/qfunctions/21/hcurlmass_build_21_qf.h |   2 +-
 palace/fem/qfunctions/21/hdiv_21_qf.h         |   2 +-
 palace/fem/qfunctions/21/hdiv_build_21_qf.h   |   2 +-
 palace/fem/qfunctions/21/l2mass_21_qf.h       |   2 +-
 palace/fem/qfunctions/21/l2mass_build_21_qf.h |   2 +-
 palace/fem/qfunctions/21/utils_21_qf.h        |  50 +--
 palace/fem/qfunctions/22/hcurl_22_qf.h        |   2 +-
 palace/fem/qfunctions/22/hcurl_build_22_qf.h  |   3 +-
 palace/fem/qfunctions/22/hcurlh1d_22_qf.h     |   2 +-
 .../fem/qfunctions/22/hcurlh1d_build_22_qf.h  |   2 +-
 palace/fem/qfunctions/22/hcurlhdiv_22_qf.h    |   4 +-
 .../fem/qfunctions/22/hcurlhdiv_build_22_qf.h |   4 +-
 .../fem/qfunctions/22/hcurlhdiv_error_22_qf.h |   8 +-
 palace/fem/qfunctions/22/hcurlmass_22_qf.h    |   2 +-
 .../fem/qfunctions/22/hcurlmass_build_22_qf.h |   3 +-
 palace/fem/qfunctions/22/hdiv_22_qf.h         |   2 +-
 palace/fem/qfunctions/22/hdiv_build_22_qf.h   |   3 +-
 palace/fem/qfunctions/22/hdivmass_22_qf.h     |   2 +-
 .../fem/qfunctions/22/hdivmass_build_22_qf.h  |   5 +-
 palace/fem/qfunctions/22/l2mass_22_qf.h       |   2 +-
 palace/fem/qfunctions/22/l2mass_build_22_qf.h |   5 +-
 palace/fem/qfunctions/22/utils_22_qf.h        |  79 ++---
 palace/fem/qfunctions/3/h1_3_qf.h             |   8 +-
 palace/fem/qfunctions/3/h1_build_3_qf.h       |   5 +-
 palace/fem/qfunctions/3/l2_3_qf.h             |   8 +-
 palace/fem/qfunctions/3/l2_build_3_qf.h       |   5 +-
 palace/fem/qfunctions/32/hcurl_32_qf.h        |   2 +-
 palace/fem/qfunctions/32/hcurl_build_32_qf.h  |   3 +-
 palace/fem/qfunctions/32/hcurlh1d_32_qf.h     |   2 +-
 .../fem/qfunctions/32/hcurlh1d_build_32_qf.h  |   2 +-
 palace/fem/qfunctions/32/hcurlhdiv_32_qf.h    |   4 +-
 .../fem/qfunctions/32/hcurlhdiv_build_32_qf.h |   4 +-
 palace/fem/qfunctions/32/hcurlmass_32_qf.h    |   2 +-
 .../fem/qfunctions/32/hcurlmass_build_32_qf.h |   3 +-
 palace/fem/qfunctions/32/hdiv_32_qf.h         |   2 +-
 palace/fem/qfunctions/32/hdiv_build_32_qf.h   |   3 +-
 palace/fem/qfunctions/32/hdivmass_32_qf.h     |   2 +-
 .../fem/qfunctions/32/hdivmass_build_32_qf.h  |   3 +-
 palace/fem/qfunctions/32/l2mass_32_qf.h       |   2 +-
 palace/fem/qfunctions/32/l2mass_build_32_qf.h |   5 +-
 palace/fem/qfunctions/32/utils_32_qf.h        |  95 +++---
 palace/fem/qfunctions/33/hcurl_33_qf.h        |   2 +-
 palace/fem/qfunctions/33/hcurl_build_33_qf.h  |   5 +-
 palace/fem/qfunctions/33/hcurlh1d_33_qf.h     |   2 +-
 .../fem/qfunctions/33/hcurlh1d_build_33_qf.h  |   2 +-
 .../fem/qfunctions/33/hcurlh1d_error_22_qf.h  |   8 +-
 .../fem/qfunctions/33/hcurlh1d_error_33_qf.h  |   8 +-
 palace/fem/qfunctions/33/hcurlhdiv_33_qf.h    |   4 +-
 .../fem/qfunctions/33/hcurlhdiv_build_33_qf.h |   4 +-
 .../fem/qfunctions/33/hcurlhdiv_error_33_qf.h |   8 +-
 palace/fem/qfunctions/33/hcurlmass_33_qf.h    |   2 +-
 .../fem/qfunctions/33/hcurlmass_build_33_qf.h |   5 +-
 palace/fem/qfunctions/33/hdiv_33_qf.h         |   2 +-
 palace/fem/qfunctions/33/hdiv_build_33_qf.h   |   5 +-
 palace/fem/qfunctions/33/hdivmass_33_qf.h     |   4 +-
 .../fem/qfunctions/33/hdivmass_build_33_qf.h  |  12 +-
 palace/fem/qfunctions/33/l2mass_33_qf.h       |   2 +-
 palace/fem/qfunctions/33/l2mass_build_33_qf.h |   7 +-
 palace/fem/qfunctions/33/utils_33_qf.h        | 135 ++++----
 palace/fem/qfunctions/apply/apply_12_qf.h     |   4 +-
 palace/fem/qfunctions/apply/apply_13_qf.h     |   6 +-
 palace/fem/qfunctions/apply/apply_21_qf.h     |   6 +-
 palace/fem/qfunctions/apply/apply_22_qf.h     |  10 +-
 palace/fem/qfunctions/apply/apply_2_qf.h      |   4 +-
 palace/fem/qfunctions/apply/apply_31_qf.h     |   8 +-
 palace/fem/qfunctions/apply/apply_33_qf.h     |  14 +-
 palace/fem/qfunctions/apply/apply_3_qf.h      |   6 +-
 palace/fem/qfunctions/coeff/coeff_2_qf.h      |   9 +-
 palace/fem/qfunctions/coeff/coeff_3_qf.h      |  17 +-
 palace/fem/qfunctions/coeff/coeff_qf.h        |   2 +-
 palace/linalg/eps.hpp                         |   7 +-
 palace/linalg/slepc.cpp                       | 163 ++--------
 palace/linalg/slepc.hpp                       |  33 +-
 palace/linalg/solver.cpp                      |  58 +++-
 palace/models/periodicboundaryoperator.cpp    |  11 +-
 palace/models/romoperator.cpp                 |  38 +--
 palace/models/romoperator.hpp                 |   4 +-
 palace/models/spaceoperator.cpp               | 303 ++++--------------
 palace/models/spaceoperator.hpp               |  19 +-
 palace/models/timeoperator.cpp                |   4 +-
 palace/utils/iodata.cpp                       |   3 +-
 97 files changed, 560 insertions(+), 823 deletions(-)

diff --git a/palace/drivers/drivensolver.cpp b/palace/drivers/drivensolver.cpp
index b5b3325b7..c6fee6b5d 100644
--- a/palace/drivers/drivensolver.cpp
+++ b/palace/drivers/drivensolver.cpp
@@ -117,20 +117,17 @@ ErrorIndicator DrivenSolver::SweepUniform(SpaceOperator &space_op, PostOperator
   auto C = space_op.GetDampingMatrix<ComplexOperator>(Operator::DIAG_ZERO);
   auto M = space_op.GetMassMatrix<ComplexOperator>(Operator::DIAG_ZERO);
   auto A2 = space_op.GetExtraSystemMatrix<ComplexOperator>(omega0, Operator::DIAG_ZERO);
-  auto MP = space_op.GetPeriodicMassMatrix<ComplexOperator>(Operator::DIAG_ZERO);
-  auto P1 = space_op.GetPeriodicWeakCurlMatrix<ComplexOperator>(Operator::DIAG_ZERO);
-  auto P2 = space_op.GetPeriodicCurlMatrix<ComplexOperator>(Operator::DIAG_ZERO);
+  auto PF = space_op.GetPeriodicMatrix<ComplexOperator>(Operator::DIAG_ZERO);
   const auto &Curl = space_op.GetCurlMatrix();
 
   // Set up the linear solver and set operators for the first frequency step. The
   // preconditioner for the complex linear system is constructed from a real approximation
   // to the complex system matrix.
   auto A = space_op.GetSystemMatrix(std::complex<double>(1.0, 0.0), 1i * omega0,
-                                    std::complex<double>(-omega0 * omega0, 0.0), std::complex<double>(1.0, 0.0), 1.0i, -1.0i, K.get(),
-                                    C.get(), M.get(), A2.get(), MP.get(), P1.get(), P2.get());
+                                    std::complex<double>(-omega0 * omega0, 0.0), K.get(),
+                                    C.get(), M.get(), A2.get(), PF.get());
   auto P = space_op.GetPreconditionerMatrix<ComplexOperator>(1.0, omega0, -omega0 * omega0,
-                                                             omega0, 1.0, 1.0, -1.0);
-
+                                                             omega0);
   ComplexKspSolver ksp(iodata, space_op.GetNDSpaces(), &space_op.GetH1Spaces());
   ksp.SetOperators(*A, *P);
 
@@ -166,10 +163,10 @@ ErrorIndicator DrivenSolver::SweepUniform(SpaceOperator &space_op, PostOperator
       // Update frequency-dependent excitation and operators.
       A2 = space_op.GetExtraSystemMatrix<ComplexOperator>(omega, Operator::DIAG_ZERO);
       A = space_op.GetSystemMatrix(std::complex<double>(1.0, 0.0), 1i * omega,
-                                   std::complex<double>(-omega * omega, 0.0), std::complex<double>(1.0, 0.0), 1.0i, -1.0i, K.get(),
-                                   C.get(), M.get(), A2.get(), MP.get(), P1.get(), P2.get());
+                                   std::complex<double>(-omega * omega, 0.0), K.get(),
+                                   C.get(), M.get(), A2.get(), PF.get());
       P = space_op.GetPreconditionerMatrix<ComplexOperator>(1.0, omega, -omega * omega,
-                                                            omega, 1.0, 1.0, -1.0);
+                                                            omega);
       ksp.SetOperators(*A, *P);
     }
     space_op.GetExcitationVector(omega, RHS);
diff --git a/palace/drivers/eigensolver.cpp b/palace/drivers/eigensolver.cpp
index 6398a878e..edb26867b 100644
--- a/palace/drivers/eigensolver.cpp
+++ b/palace/drivers/eigensolver.cpp
@@ -36,15 +36,9 @@ EigenSolver::Solve(const std::vector<std::unique_ptr<Mesh>> &mesh) const
   auto K = space_op.GetStiffnessMatrix<ComplexOperator>(Operator::DIAG_ONE);
   auto C = space_op.GetDampingMatrix<ComplexOperator>(Operator::DIAG_ZERO);
   auto M = space_op.GetMassMatrix<ComplexOperator>(Operator::DIAG_ZERO);
-  auto MP = space_op.GetPeriodicMassMatrix<ComplexOperator>(Operator::DIAG_ZERO);
-  auto P1 = space_op.GetPeriodicWeakCurlMatrix<ComplexOperator>(Operator::DIAG_ZERO);
-  auto P2 = space_op.GetPeriodicCurlMatrix<ComplexOperator>(Operator::DIAG_ZERO);
+  auto PF = space_op.GetPeriodicMatrix<ComplexOperator>(Operator::DIAG_ZERO);
   auto A2 = space_op.GetExtraSystemMatrix<ComplexOperator>(1.0, Operator::DIAG_ZERO);
   A2 = nullptr;
-  //test
-  //MP = nullptr;
-  //P1 = nullptr;
-  //P2 = nullptr;
 
   const auto &Curl = space_op.GetCurlMatrix();
   SaveMetadata(space_op.GetNDSpaces());
@@ -134,27 +128,23 @@ EigenSolver::Solve(const std::vector<std::unique_ptr<Mesh>> &mesh) const
                                           : EigenvalueSolver::ScaleType::NONE;
   if (C)
   {
-    if (MP || P1 || P2)
+    if (PF)
     {
-      Mpi::Print("Setting eigensolver with K C M MP P1 P2\n");
-      eigen->SetOperators(*K, *C, *M, *MP, *P1, *P2, scale);
+      eigen->SetOperators(*K, *C, *M, *PF, scale);
     }
     else
     {
-      Mpi::Print("Setting eigensolver with K C M\n");
       eigen->SetOperators(*K, *C, *M, scale);
     }
   }
   else
   {
-    if (MP || P1 || P2)
+    if (PF)
     {
-      Mpi::Print("Setting eigensolver with K M MP P1 P2\n");
-      eigen->SetOperators(*K, *M, *MP, *P1, *P2, scale);
+      eigen->SetOperators(*K, *M, *PF, scale);
     }
     else
     {
-      Mpi::Print("Setting eigensolver with K M\n");
       eigen->SetOperators(*K, *M, scale);
     }
   }
@@ -270,12 +260,9 @@ EigenSolver::Solve(const std::vector<std::unique_ptr<Mesh>> &mesh) const
   // to the complex system matrix.
   auto A = space_op.GetSystemMatrix(std::complex<double>(1.0, 0.0), 1i * target,
                                     std::complex<double>(-target * target, 0.0),
-                                    std::complex<double>(1.0, 0.0),
-                                    std::complex<double>(0.0, 1.0),
-                                    std::complex<double>(0.0, -1.0),
-                                    K.get(), C.get(), M.get(), A2.get(), MP.get(), P1.get(), P2.get());
+                                    K.get(), C.get(), M.get(), A2.get(), PF.get());
   auto P = space_op.GetPreconditionerMatrix<ComplexOperator>(1.0, target, -target * target,
-                                                             target, 1.0, 1.0, -1.0);
+                                                             target);
   auto ksp = std::make_unique<ComplexKspSolver>(iodata, space_op.GetNDSpaces(),
                                                 &space_op.GetH1Spaces());
   ksp->SetOperators(*A, *P);
diff --git a/palace/fem/libceed/coefficient.cpp b/palace/fem/libceed/coefficient.cpp
index 0469070af..f1a86bfd8 100644
--- a/palace/fem/libceed/coefficient.cpp
+++ b/palace/fem/libceed/coefficient.cpp
@@ -17,7 +17,7 @@ namespace
 
 inline auto CoeffDim(int dim)
 {
-  return dim * (dim + 1) / 2;
+  return dim * dim;
 }
 
 inline void MakeDiagonalCoefficient(int dim, CeedIntScalar *mat_coeff, CeedScalar a,
@@ -30,7 +30,7 @@ inline void MakeDiagonalCoefficient(int dim, CeedIntScalar *mat_coeff, CeedScala
   }
   for (int di = 0; di < dim; ++di)
   {
-    const int idx = (di * dim) - (((di - 1) * di) / 2);
+    const int idx = di * (dim + 1);
     mat_coeff[coeff_dim * k + idx].second = a;
   }
 }
@@ -86,8 +86,7 @@ PopulateCoefficientContext(int dim, const MaterialPropertyCoefficient *Q, double
     AttrMat(ctx.data())[i].first = (k < 0) ? zero_mat : k;
   }
 
-  // Copy material properties: Matrix-valued material properties are always assumed to be
-  // symmetric and we store only the lower triangular part.
+  // Copy material properties
   ctx[1 + attr_mat.Size()].first = mat_coeff.SizeK() + 1;
   for (int k = 0; k < mat_coeff.SizeK(); k++)
   {
@@ -100,10 +99,10 @@ PopulateCoefficientContext(int dim, const MaterialPropertyCoefficient *Q, double
     {
       for (int dj = 0; dj < dim; ++dj)
       {
-        for (int di = dj; di < dim; ++di)
+        for (int di = 0; di < dim; ++di)
         {
           // Column-major ordering.
-          const int idx = (dj * dim) - (((dj - 1) * dj) / 2) + di - dj;
+          const int idx = (dj * dim) + di;
           MatCoeff(ctx.data())[coeff_dim * k + idx].second = a * mat_coeff(di, dj, k);
         }
       }
diff --git a/palace/fem/libceed/integrator.cpp b/palace/fem/libceed/integrator.cpp
index 0daeb207b..987b17429 100644
--- a/palace/fem/libceed/integrator.cpp
+++ b/palace/fem/libceed/integrator.cpp
@@ -204,7 +204,7 @@ std::vector<CeedInt> QuadratureDataSetup(unsigned int ops, Ceed ceed,
   PalaceCeedCall(ceed, CeedBasisGetNumQuadraturePoints(basis, &num_qpts));
   for (auto size : active_input_sizes)
   {
-    q_data_size += size * (size + 1) / 2;
+    q_data_size += size * size;
   }
 
   PalaceCeedCall(
diff --git a/palace/fem/libceed/operator.cpp b/palace/fem/libceed/operator.cpp
index 8fdc162bd..3b9a98af2 100644
--- a/palace/fem/libceed/operator.cpp
+++ b/palace/fem/libceed/operator.cpp
@@ -530,8 +530,8 @@ std::unique_ptr<Operator> CeedOperatorCoarsen(const Operator &op_fine,
   };
 
   // Initialize the coarse operator.
-  auto op_coarse = std::make_unique<SymmetricOperator>(fespace_coarse.GetVSize(),
-                                                       fespace_coarse.GetVSize());
+  auto op_coarse = std::make_unique<Operator>(fespace_coarse.GetVSize(),
+                                              fespace_coarse.GetVSize());
 
   // Assemble the coarse operator by coarsening each sub-operator (over threads, geometry
   // types, integrators) of the original fine operator.
diff --git a/palace/fem/qfunctions/2/h1_2_qf.h b/palace/fem/qfunctions/2/h1_2_qf.h
index 3e884b379..4185e8624 100644
--- a/palace/fem/qfunctions/2/h1_2_qf.h
+++ b/palace/fem/qfunctions/2/h1_2_qf.h
@@ -14,7 +14,7 @@ CEED_QFUNCTION(f_apply_h1_2)(void *__restrict__ ctx, CeedInt Q, const CeedScalar
 
   CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
   {
-    CeedScalar coeff[3];
+    CeedScalar coeff[4];
     CoeffUnpack2((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff);
 
     const CeedScalar u0 = u[i + Q * 0];
diff --git a/palace/fem/qfunctions/2/h1_build_2_qf.h b/palace/fem/qfunctions/2/h1_build_2_qf.h
index a4bb96c28..311e99f77 100644
--- a/palace/fem/qfunctions/2/h1_build_2_qf.h
+++ b/palace/fem/qfunctions/2/h1_build_2_qf.h
@@ -14,12 +14,13 @@ CEED_QFUNCTION(f_build_h1_2)(void *__restrict__ ctx, CeedInt Q, const CeedScalar
 
   CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
   {
-    CeedScalar coeff[3];
+    CeedScalar coeff[4];
     CoeffUnpack2((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff);
 
     qd[i + Q * 0] = wdetJ[i] * coeff[0];
     qd[i + Q * 1] = wdetJ[i] * coeff[1];
     qd[i + Q * 2] = wdetJ[i] * coeff[2];
+    qd[i + Q * 3] = wdetJ[i] * coeff[3];
   }
   return 0;
 }
diff --git a/palace/fem/qfunctions/2/l2_2_qf.h b/palace/fem/qfunctions/2/l2_2_qf.h
index 5057d47ba..8407fef1b 100644
--- a/palace/fem/qfunctions/2/l2_2_qf.h
+++ b/palace/fem/qfunctions/2/l2_2_qf.h
@@ -14,7 +14,7 @@ CEED_QFUNCTION(f_apply_l2_2)(void *__restrict__ ctx, CeedInt Q, const CeedScalar
 
   CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
   {
-    CeedScalar coeff[3];
+    CeedScalar coeff[4];
     CoeffUnpack2((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff);
     const CeedScalar w = qw[i] * qw[i] / wdetJ[i];
 
diff --git a/palace/fem/qfunctions/2/l2_build_2_qf.h b/palace/fem/qfunctions/2/l2_build_2_qf.h
index f8b7d6411..c6fccaa93 100644
--- a/palace/fem/qfunctions/2/l2_build_2_qf.h
+++ b/palace/fem/qfunctions/2/l2_build_2_qf.h
@@ -14,13 +14,14 @@ CEED_QFUNCTION(f_build_l2_2)(void *__restrict__ ctx, CeedInt Q, const CeedScalar
 
   CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
   {
-    CeedScalar coeff[3];
+    CeedScalar coeff[4];
     CoeffUnpack2((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff);
     const CeedScalar w = qw[i] * qw[i] / wdetJ[i];
 
     qd[i + Q * 0] = w * coeff[0];
     qd[i + Q * 1] = w * coeff[1];
     qd[i + Q * 2] = w * coeff[2];
+    qd[i + Q * 3] = w * coeff[3];
   }
   return 0;
 }
diff --git a/palace/fem/qfunctions/21/hcurl_21_qf.h b/palace/fem/qfunctions/21/hcurl_21_qf.h
index 5c608d1b8..e3b4a484f 100644
--- a/palace/fem/qfunctions/21/hcurl_21_qf.h
+++ b/palace/fem/qfunctions/21/hcurl_21_qf.h
@@ -16,7 +16,7 @@ CEED_QFUNCTION(f_apply_hcurl_21)(void *__restrict__ ctx, CeedInt Q,
   CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
   {
     const CeedScalar u_loc[1] = {u[i + Q * 0]};
-    CeedScalar coeff[3], adjJt_loc[2], v_loc[1];
+    CeedScalar coeff[4], adjJt_loc[2], v_loc[1];
     CoeffUnpack2((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff);
     MatUnpack21(adjJt + i, Q, adjJt_loc);
     MultAtBCx21(adjJt_loc, coeff, adjJt_loc, u_loc, v_loc);
diff --git a/palace/fem/qfunctions/21/hcurl_build_21_qf.h b/palace/fem/qfunctions/21/hcurl_build_21_qf.h
index 53c8130b5..ed5369750 100644
--- a/palace/fem/qfunctions/21/hcurl_build_21_qf.h
+++ b/palace/fem/qfunctions/21/hcurl_build_21_qf.h
@@ -15,7 +15,7 @@ CEED_QFUNCTION(f_build_hcurl_21)(void *__restrict__ ctx, CeedInt Q,
 
   CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
   {
-    CeedScalar coeff[3], adjJt_loc[2], qd_loc[1];
+    CeedScalar coeff[4], adjJt_loc[2], qd_loc[1];
     CoeffUnpack2((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff);
     MatUnpack21(adjJt + i, Q, adjJt_loc);
     MultAtBA21(adjJt_loc, coeff, qd_loc);
diff --git a/palace/fem/qfunctions/21/hcurlh1d_21_qf.h b/palace/fem/qfunctions/21/hcurlh1d_21_qf.h
index d0645daae..f438a7999 100644
--- a/palace/fem/qfunctions/21/hcurlh1d_21_qf.h
+++ b/palace/fem/qfunctions/21/hcurlh1d_21_qf.h
@@ -16,7 +16,7 @@ CEED_QFUNCTION(f_apply_hcurlh1d_21)(void *__restrict__ ctx, CeedInt Q,
   CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
   {
     const CeedScalar u_loc[1] = {u[i + Q * 0]};
-    CeedScalar coeff[3], adjJt_loc[2], v_loc[2];
+    CeedScalar coeff[4], adjJt_loc[2], v_loc[2];
     CoeffUnpack2((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff);
     MatUnpack21(adjJt + i, Q, adjJt_loc);
     MultBAx21(adjJt_loc, coeff, u_loc, v_loc);
diff --git a/palace/fem/qfunctions/21/hcurlh1d_build_21_qf.h b/palace/fem/qfunctions/21/hcurlh1d_build_21_qf.h
index 1f0421fd6..10c2502d0 100644
--- a/palace/fem/qfunctions/21/hcurlh1d_build_21_qf.h
+++ b/palace/fem/qfunctions/21/hcurlh1d_build_21_qf.h
@@ -15,7 +15,7 @@ CEED_QFUNCTION(f_build_hcurlh1d_21)(void *__restrict__ ctx, CeedInt Q,
 
   CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
   {
-    CeedScalar coeff[3], adjJt_loc[2], qd_loc[2];
+    CeedScalar coeff[4], adjJt_loc[2], qd_loc[2];
     CoeffUnpack2((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff);
     MatUnpack21(adjJt + i, Q, adjJt_loc);
     MultBA21(adjJt_loc, coeff, qd_loc);
diff --git a/palace/fem/qfunctions/21/hcurlhdiv_21_qf.h b/palace/fem/qfunctions/21/hcurlhdiv_21_qf.h
index 2f956b8ee..c7bb300d8 100644
--- a/palace/fem/qfunctions/21/hcurlhdiv_21_qf.h
+++ b/palace/fem/qfunctions/21/hcurlhdiv_21_qf.h
@@ -16,7 +16,7 @@ CEED_QFUNCTION(f_apply_hcurlhdiv_21)(void *__restrict__ ctx, CeedInt Q,
   CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
   {
     const CeedScalar u_loc[1] = {u[i + Q * 0]};
-    CeedScalar coeff[3], adjJt_loc[2], J_loc[2], v_loc[1];
+    CeedScalar coeff[4], adjJt_loc[2], J_loc[2], v_loc[1];
     CoeffUnpack2((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff);
     MatUnpack21(adjJt + i, Q, adjJt_loc);
     AdjJt21(adjJt_loc, J_loc);
@@ -36,7 +36,7 @@ CEED_QFUNCTION(f_apply_hdivhcurl_21)(void *__restrict__ ctx, CeedInt Q,
   CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
   {
     const CeedScalar u_loc[1] = {u[i + Q * 0]};
-    CeedScalar coeff[3], adjJt_loc[2], J_loc[2], v_loc[1];
+    CeedScalar coeff[4], adjJt_loc[2], J_loc[2], v_loc[1];
     CoeffUnpack2((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff);
     MatUnpack21(adjJt + i, Q, adjJt_loc);
     AdjJt21(adjJt_loc, J_loc);
diff --git a/palace/fem/qfunctions/21/hcurlhdiv_build_21_qf.h b/palace/fem/qfunctions/21/hcurlhdiv_build_21_qf.h
index 1eebe7192..33936cb81 100644
--- a/palace/fem/qfunctions/21/hcurlhdiv_build_21_qf.h
+++ b/palace/fem/qfunctions/21/hcurlhdiv_build_21_qf.h
@@ -15,7 +15,7 @@ CEED_QFUNCTION(f_build_hcurlhdiv_21)(void *__restrict__ ctx, CeedInt Q,
 
   CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
   {
-    CeedScalar coeff[3], adjJt_loc[2], J_loc[2], qd_loc[1];
+    CeedScalar coeff[4], adjJt_loc[2], J_loc[2], qd_loc[1];
     CoeffUnpack2((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff);
     MatUnpack21(adjJt + i, Q, adjJt_loc);
     AdjJt21(adjJt_loc, J_loc);
@@ -34,7 +34,7 @@ CEED_QFUNCTION(f_build_hdivhcurl_21)(void *__restrict__ ctx, CeedInt Q,
 
   CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
   {
-    CeedScalar coeff[3], adjJt_loc[2], J_loc[2], qd_loc[1];
+    CeedScalar coeff[4], adjJt_loc[2], J_loc[2], qd_loc[1];
     CoeffUnpack2((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff);
     MatUnpack21(adjJt + i, Q, adjJt_loc);
     AdjJt21(adjJt_loc, J_loc);
diff --git a/palace/fem/qfunctions/21/hcurlmass_21_qf.h b/palace/fem/qfunctions/21/hcurlmass_21_qf.h
index 5cbf74251..5244f0d85 100644
--- a/palace/fem/qfunctions/21/hcurlmass_21_qf.h
+++ b/palace/fem/qfunctions/21/hcurlmass_21_qf.h
@@ -24,7 +24,7 @@ CEED_QFUNCTION(f_apply_hcurlmass_21)(void *__restrict__ ctx, CeedInt Q,
     }
     {
       const CeedScalar u_loc[1] = {gradu[i + Q * 0]};
-      CeedScalar coeff[3], adjJt_loc[2], v_loc[1];
+      CeedScalar coeff[4], adjJt_loc[2], v_loc[1];
       CoeffUnpack2(CoeffPairSecond<1>((const CeedIntScalar *)ctx), (CeedInt)attr[i], coeff);
       MatUnpack21(adjJt + i, Q, adjJt_loc);
       MultAtBCx21(adjJt_loc, coeff, adjJt_loc, u_loc, v_loc);
diff --git a/palace/fem/qfunctions/21/hcurlmass_build_21_qf.h b/palace/fem/qfunctions/21/hcurlmass_build_21_qf.h
index 8d3eeac4a..ffb652755 100644
--- a/palace/fem/qfunctions/21/hcurlmass_build_21_qf.h
+++ b/palace/fem/qfunctions/21/hcurlmass_build_21_qf.h
@@ -22,7 +22,7 @@ CEED_QFUNCTION(f_build_hcurlmass_21)(void *__restrict__ ctx, CeedInt Q,
       qd1[i + Q * 0] = coeff * wdetJ[i];
     }
     {
-      CeedScalar coeff[3], adjJt_loc[2], qd_loc[1];
+      CeedScalar coeff[4], adjJt_loc[2], qd_loc[1];
       CoeffUnpack2(CoeffPairSecond<1>((const CeedIntScalar *)ctx), (CeedInt)attr[i], coeff);
       MatUnpack21(adjJt + i, Q, adjJt_loc);
       MultAtBA21(adjJt_loc, coeff, qd_loc);
diff --git a/palace/fem/qfunctions/21/hdiv_21_qf.h b/palace/fem/qfunctions/21/hdiv_21_qf.h
index e71932a13..1db5540fb 100644
--- a/palace/fem/qfunctions/21/hdiv_21_qf.h
+++ b/palace/fem/qfunctions/21/hdiv_21_qf.h
@@ -16,7 +16,7 @@ CEED_QFUNCTION(f_apply_hdiv_21)(void *__restrict__ ctx, CeedInt Q,
   CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
   {
     const CeedScalar u_loc[1] = {u[i + Q * 0]};
-    CeedScalar coeff[3], adjJt_loc[2], J_loc[2], v_loc[1];
+    CeedScalar coeff[4], adjJt_loc[2], J_loc[2], v_loc[1];
     CoeffUnpack2((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff);
     MatUnpack21(adjJt + i, Q, adjJt_loc);
     AdjJt21(adjJt_loc, J_loc);
diff --git a/palace/fem/qfunctions/21/hdiv_build_21_qf.h b/palace/fem/qfunctions/21/hdiv_build_21_qf.h
index 784c411c7..9b6748690 100644
--- a/palace/fem/qfunctions/21/hdiv_build_21_qf.h
+++ b/palace/fem/qfunctions/21/hdiv_build_21_qf.h
@@ -15,7 +15,7 @@ CEED_QFUNCTION(f_build_hdiv_21)(void *__restrict__ ctx, CeedInt Q,
 
   CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
   {
-    CeedScalar coeff[3], adjJt_loc[2], J_loc[2], qd_loc[1];
+    CeedScalar coeff[4], adjJt_loc[2], J_loc[2], qd_loc[1];
     CoeffUnpack2((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff);
     MatUnpack21(adjJt + i, Q, adjJt_loc);
     AdjJt21(adjJt_loc, J_loc);
diff --git a/palace/fem/qfunctions/21/l2mass_21_qf.h b/palace/fem/qfunctions/21/l2mass_21_qf.h
index 89a372e54..e672c7275 100644
--- a/palace/fem/qfunctions/21/l2mass_21_qf.h
+++ b/palace/fem/qfunctions/21/l2mass_21_qf.h
@@ -19,7 +19,7 @@ CEED_QFUNCTION(f_apply_l2mass_21)(void *__restrict__ ctx, CeedInt Q,
   {
     {
       const CeedScalar u_loc[1] = {u[i + Q * 0]};
-      CeedScalar coeff[3], adjJt_loc[2], J_loc[2], v_loc[1];
+      CeedScalar coeff[4], adjJt_loc[2], J_loc[2], v_loc[1];
       CoeffUnpack2((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff);
       MatUnpack21(adjJt + i, Q, adjJt_loc);
       AdjJt21(adjJt_loc, J_loc);
diff --git a/palace/fem/qfunctions/21/l2mass_build_21_qf.h b/palace/fem/qfunctions/21/l2mass_build_21_qf.h
index ee7091541..497c02b72 100644
--- a/palace/fem/qfunctions/21/l2mass_build_21_qf.h
+++ b/palace/fem/qfunctions/21/l2mass_build_21_qf.h
@@ -17,7 +17,7 @@ CEED_QFUNCTION(f_build_l2mass_21)(void *__restrict__ ctx, CeedInt Q,
   CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
   {
     {
-      CeedScalar coeff[3], adjJt_loc[2], J_loc[2], qd_loc[1];
+      CeedScalar coeff[4], adjJt_loc[2], J_loc[2], qd_loc[1];
       CoeffUnpack2((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff);
       MatUnpack21(adjJt + i, Q, adjJt_loc);
       AdjJt21(adjJt_loc, J_loc);
diff --git a/palace/fem/qfunctions/21/utils_21_qf.h b/palace/fem/qfunctions/21/utils_21_qf.h
index ab7c2df24..50db6fae1 100644
--- a/palace/fem/qfunctions/21/utils_21_qf.h
+++ b/palace/fem/qfunctions/21/utils_21_qf.h
@@ -32,70 +32,70 @@ CEED_QFUNCTION_HELPER void MatUnpack21(const CeedScalar *A, const CeedInt A_stri
   A_loc[1] = A[A_stride * 1];
 }
 
-CEED_QFUNCTION_HELPER void MultAtBCx21(const CeedScalar A[2], const CeedScalar B[3],
+CEED_QFUNCTION_HELPER void MultAtBCx21(const CeedScalar A[2], const CeedScalar B[4],
                                        const CeedScalar C[2], const CeedScalar x[1],
                                        CeedScalar y[1])
 {
-  // A: 0   B: 0 1   C: 0
-  //    1      1 2      1
+  // A: 0   B: 0 2   C: 0
+  //    1      1 3      1
   CeedScalar z[2], t;
 
   y[0] = C[0] * x[0];
   t = C[1] * x[0];
 
-  z[0] = B[0] * y[0] + B[1] * t;
-  z[1] = B[1] * y[0] + B[2] * t;
+  z[0] = B[0] * y[0] + B[2] * t;
+  z[1] = B[1] * y[0] + B[3] * t;
 
   y[0] = A[0] * z[0] + A[1] * z[1];
 }
 
-CEED_QFUNCTION_HELPER void MultBAx21(const CeedScalar A[2], const CeedScalar B[3],
+CEED_QFUNCTION_HELPER void MultBAx21(const CeedScalar A[2], const CeedScalar B[4],
                                      const CeedScalar x[1], CeedScalar y[2])
 {
-  // A: 0   B: 0 1
-  //    1      1 2
+  // A: 0   B: 0 2
+  //    1      1 3
   CeedScalar z[2];
 
   z[0] = A[0] * x[0];
   z[1] = A[1] * x[0];
 
-  y[0] = B[0] * z[0] + B[1] * z[1];
-  y[1] = B[1] * z[0] + B[2] * z[1];
+  y[0] = B[0] * z[0] + B[2] * z[1];
+  y[1] = B[1] * z[0] + B[3] * z[1];
 }
 
-CEED_QFUNCTION_HELPER void MultAtBA21(const CeedScalar A[2], const CeedScalar B[3],
+CEED_QFUNCTION_HELPER void MultAtBA21(const CeedScalar A[2], const CeedScalar B[4],
                                       CeedScalar C[1])
 {
-  // A: 0   B: 0 1   C: 0
-  //    1      1 2
+  // A: 0   B: 0 2   C: 0
+  //    1      1 3
 
   // First compute entries of R = B A.
-  const CeedScalar R11 = B[0] * A[0] + B[1] * A[1];
-  const CeedScalar R21 = B[1] * A[0] + B[2] * A[1];
+  const CeedScalar R11 = B[0] * A[0] + B[2] * A[1];
+  const CeedScalar R21 = B[1] * A[0] + B[3] * A[1];
 
   C[0] = A[0] * R11 + A[1] * R21;
 }
 
-CEED_QFUNCTION_HELPER void MultAtBC21(const CeedScalar A[2], const CeedScalar B[3],
+CEED_QFUNCTION_HELPER void MultAtBC21(const CeedScalar A[2], const CeedScalar B[4],
                                       const CeedScalar C[2], CeedScalar D[1])
 {
-  // A, C: 0   B: 0 1   D: 0
-  //       1      1 2
+  // A, C: 0   B: 0 2   D: 0
+  //       1      1 3
 
   // First compute entries of R = B C.
-  const CeedScalar R11 = B[0] * C[0] + B[1] * C[1];
-  const CeedScalar R21 = B[1] * C[0] + B[2] * C[1];
+  const CeedScalar R11 = B[0] * C[0] + B[2] * C[1];
+  const CeedScalar R21 = B[1] * C[0] + B[3] * C[1];
 
   D[0] = A[0] * R11 + A[1] * R21;
 }
 
-CEED_QFUNCTION_HELPER void MultBA21(const CeedScalar A[2], const CeedScalar B[3],
+CEED_QFUNCTION_HELPER void MultBA21(const CeedScalar A[2], const CeedScalar B[4],
                                     CeedScalar C[2])
 {
-  // A: 0   B: 0 1   C: 0
-  //    1      1 2      1
-  C[0] = B[0] * A[0] + B[1] * A[1];
-  C[1] = B[1] * A[0] + B[2] * A[1];
+  // A: 0   B: 0 2   C: 0
+  //    1      1 3      1
+  C[0] = B[0] * A[0] + B[2] * A[1];
+  C[1] = B[1] * A[0] + B[3] * A[1];
 }
 
 #endif  // PALACE_LIBCEED_UTILS_21_QF_H
diff --git a/palace/fem/qfunctions/22/hcurl_22_qf.h b/palace/fem/qfunctions/22/hcurl_22_qf.h
index 50e35e801..b535dc10d 100644
--- a/palace/fem/qfunctions/22/hcurl_22_qf.h
+++ b/palace/fem/qfunctions/22/hcurl_22_qf.h
@@ -16,7 +16,7 @@ CEED_QFUNCTION(f_apply_hcurl_22)(void *__restrict__ ctx, CeedInt Q,
   CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
   {
     const CeedScalar u_loc[2] = {u[i + Q * 0], u[i + Q * 1]};
-    CeedScalar coeff[3], adjJt_loc[4], v_loc[2];
+    CeedScalar coeff[4], adjJt_loc[4], v_loc[2];
     CoeffUnpack2((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff);
     MatUnpack22(adjJt + i, Q, adjJt_loc);
     MultAtBCx22(adjJt_loc, coeff, adjJt_loc, u_loc, v_loc);
diff --git a/palace/fem/qfunctions/22/hcurl_build_22_qf.h b/palace/fem/qfunctions/22/hcurl_build_22_qf.h
index 8fdd180ea..e6807da2b 100644
--- a/palace/fem/qfunctions/22/hcurl_build_22_qf.h
+++ b/palace/fem/qfunctions/22/hcurl_build_22_qf.h
@@ -15,7 +15,7 @@ CEED_QFUNCTION(f_build_hcurl_22)(void *__restrict__ ctx, CeedInt Q,
 
   CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
   {
-    CeedScalar coeff[3], adjJt_loc[4], qd_loc[3];
+    CeedScalar coeff[4], adjJt_loc[4], qd_loc[4];
     CoeffUnpack2((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff);
     MatUnpack22(adjJt + i, Q, adjJt_loc);
     MultAtBA22(adjJt_loc, coeff, qd_loc);
@@ -23,6 +23,7 @@ CEED_QFUNCTION(f_build_hcurl_22)(void *__restrict__ ctx, CeedInt Q,
     qd[i + Q * 0] = wdetJ[i] * qd_loc[0];
     qd[i + Q * 1] = wdetJ[i] * qd_loc[1];
     qd[i + Q * 2] = wdetJ[i] * qd_loc[2];
+    qd[i + Q * 3] = wdetJ[i] * qd_loc[3];
   }
   return 0;
 }
diff --git a/palace/fem/qfunctions/22/hcurlh1d_22_qf.h b/palace/fem/qfunctions/22/hcurlh1d_22_qf.h
index a0c506cf0..df909d337 100644
--- a/palace/fem/qfunctions/22/hcurlh1d_22_qf.h
+++ b/palace/fem/qfunctions/22/hcurlh1d_22_qf.h
@@ -16,7 +16,7 @@ CEED_QFUNCTION(f_apply_hcurlh1d_22)(void *__restrict__ ctx, CeedInt Q,
   CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
   {
     const CeedScalar u_loc[2] = {u[i + Q * 0], u[i + Q * 1]};
-    CeedScalar coeff[3], adjJt_loc[4], v_loc[2];
+    CeedScalar coeff[4], adjJt_loc[4], v_loc[2];
     CoeffUnpack2((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff);
     MatUnpack22(adjJt + i, Q, adjJt_loc);
     MultBAx22(adjJt_loc, coeff, u_loc, v_loc);
diff --git a/palace/fem/qfunctions/22/hcurlh1d_build_22_qf.h b/palace/fem/qfunctions/22/hcurlh1d_build_22_qf.h
index 5652b05a2..d6bfc31ed 100644
--- a/palace/fem/qfunctions/22/hcurlh1d_build_22_qf.h
+++ b/palace/fem/qfunctions/22/hcurlh1d_build_22_qf.h
@@ -15,7 +15,7 @@ CEED_QFUNCTION(f_build_hcurlh1d_22)(void *__restrict__ ctx, CeedInt Q,
 
   CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
   {
-    CeedScalar coeff[3], adjJt_loc[4], qd_loc[4];
+    CeedScalar coeff[4], adjJt_loc[4], qd_loc[4];
     CoeffUnpack2((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff);
     MatUnpack22(adjJt + i, Q, adjJt_loc);
     MultBA22(adjJt_loc, coeff, qd_loc);
diff --git a/palace/fem/qfunctions/22/hcurlhdiv_22_qf.h b/palace/fem/qfunctions/22/hcurlhdiv_22_qf.h
index 32745f7b1..dfecf9620 100644
--- a/palace/fem/qfunctions/22/hcurlhdiv_22_qf.h
+++ b/palace/fem/qfunctions/22/hcurlhdiv_22_qf.h
@@ -16,7 +16,7 @@ CEED_QFUNCTION(f_apply_hcurlhdiv_22)(void *__restrict__ ctx, CeedInt Q,
   CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
   {
     const CeedScalar u_loc[2] = {u[i + Q * 0], u[i + Q * 1]};
-    CeedScalar coeff[3], adjJt_loc[4], J_loc[4], v_loc[2];
+    CeedScalar coeff[4], adjJt_loc[4], J_loc[4], v_loc[2];
     CoeffUnpack2((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff);
     MatUnpack22(adjJt + i, Q, adjJt_loc);
     AdjJt22(adjJt_loc, J_loc);
@@ -37,7 +37,7 @@ CEED_QFUNCTION(f_apply_hdivhcurl_22)(void *__restrict__ ctx, CeedInt Q,
   CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
   {
     const CeedScalar u_loc[2] = {u[i + Q * 0], u[i + Q * 1]};
-    CeedScalar coeff[3], adjJt_loc[4], J_loc[4], v_loc[2];
+    CeedScalar coeff[4], adjJt_loc[4], J_loc[4], v_loc[2];
     CoeffUnpack2((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff);
     MatUnpack22(adjJt + i, Q, adjJt_loc);
     AdjJt22(adjJt_loc, J_loc);
diff --git a/palace/fem/qfunctions/22/hcurlhdiv_build_22_qf.h b/palace/fem/qfunctions/22/hcurlhdiv_build_22_qf.h
index a17f123dc..bf8c23f00 100644
--- a/palace/fem/qfunctions/22/hcurlhdiv_build_22_qf.h
+++ b/palace/fem/qfunctions/22/hcurlhdiv_build_22_qf.h
@@ -15,7 +15,7 @@ CEED_QFUNCTION(f_build_hcurlhdiv_22)(void *__restrict__ ctx, CeedInt Q,
 
   CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
   {
-    CeedScalar coeff[3], adjJt_loc[4], J_loc[4], qd_loc[4];
+    CeedScalar coeff[4], adjJt_loc[4], J_loc[4], qd_loc[4];
     CoeffUnpack2((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff);
     MatUnpack22(adjJt + i, Q, adjJt_loc);
     AdjJt22(adjJt_loc, J_loc);
@@ -37,7 +37,7 @@ CEED_QFUNCTION(f_build_hdivhcurl_22)(void *__restrict__ ctx, CeedInt Q,
 
   CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
   {
-    CeedScalar coeff[3], adjJt_loc[4], J_loc[4], qd_loc[4];
+    CeedScalar coeff[4], adjJt_loc[4], J_loc[4], qd_loc[4];
     CoeffUnpack2((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff);
     MatUnpack22(adjJt + i, Q, adjJt_loc);
     AdjJt22(adjJt_loc, J_loc);
diff --git a/palace/fem/qfunctions/22/hcurlhdiv_error_22_qf.h b/palace/fem/qfunctions/22/hcurlhdiv_error_22_qf.h
index 8ee1d1457..7c1a150e6 100644
--- a/palace/fem/qfunctions/22/hcurlhdiv_error_22_qf.h
+++ b/palace/fem/qfunctions/22/hcurlhdiv_error_22_qf.h
@@ -21,13 +21,13 @@ CEED_QFUNCTION(f_apply_hcurlhdiv_error_22)(void *__restrict__ ctx, CeedInt Q,
     MatUnpack22(adjJt + i, Q, adjJt_loc);
     {
       const CeedScalar u1_loc[2] = {u1[i + Q * 0], u1[i + Q * 1]};
-      CeedScalar coeff[3];
+      CeedScalar coeff[4];
       CoeffUnpack2((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff);
       MultBAx22(adjJt_loc, coeff, u1_loc, v1_loc);
     }
     {
       const CeedScalar u2_loc[2] = {u2[i + Q * 0], u2[i + Q * 1]};
-      CeedScalar coeff[3], J_loc[4];
+      CeedScalar coeff[4], J_loc[4];
       CoeffUnpack2(CoeffPairSecond<2>((const CeedIntScalar *)ctx), (CeedInt)attr[i], coeff);
       AdjJt22(adjJt_loc, J_loc);
       MultBAx22(J_loc, coeff, u2_loc, v2_loc);
@@ -53,14 +53,14 @@ CEED_QFUNCTION(f_apply_hdivhcurl_error_22)(void *__restrict__ ctx, CeedInt Q,
     MatUnpack22(adjJt + i, Q, adjJt_loc);
     {
       const CeedScalar u1_loc[2] = {u1[i + Q * 0], u1[i + Q * 1]};
-      CeedScalar coeff[3], J_loc[4];
+      CeedScalar coeff[4], J_loc[4];
       CoeffUnpack2((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff);
       AdjJt22(adjJt_loc, J_loc);
       MultBAx22(J_loc, coeff, u1_loc, v1_loc);
     }
     {
       const CeedScalar u2_loc[2] = {u2[i + Q * 0], u2[i + Q * 1]};
-      CeedScalar coeff[3];
+      CeedScalar coeff[4];
       CoeffUnpack2(CoeffPairSecond<2>((const CeedIntScalar *)ctx), (CeedInt)attr[i], coeff);
       MultBAx22(adjJt_loc, coeff, u2_loc, v2_loc);
     }
diff --git a/palace/fem/qfunctions/22/hcurlmass_22_qf.h b/palace/fem/qfunctions/22/hcurlmass_22_qf.h
index 20994d744..0f20b6a55 100644
--- a/palace/fem/qfunctions/22/hcurlmass_22_qf.h
+++ b/palace/fem/qfunctions/22/hcurlmass_22_qf.h
@@ -24,7 +24,7 @@ CEED_QFUNCTION(f_apply_hcurlmass_22)(void *__restrict__ ctx, CeedInt Q,
     }
     {
       const CeedScalar u_loc[2] = {gradu[i + Q * 0], gradu[i + Q * 1]};
-      CeedScalar coeff[3], adjJt_loc[4], v_loc[2];
+      CeedScalar coeff[4], adjJt_loc[4], v_loc[2];
       CoeffUnpack2(CoeffPairSecond<1>((const CeedIntScalar *)ctx), (CeedInt)attr[i], coeff);
       MatUnpack22(adjJt + i, Q, adjJt_loc);
       MultAtBCx22(adjJt_loc, coeff, adjJt_loc, u_loc, v_loc);
diff --git a/palace/fem/qfunctions/22/hcurlmass_build_22_qf.h b/palace/fem/qfunctions/22/hcurlmass_build_22_qf.h
index 0a4023abb..2057a9be4 100644
--- a/palace/fem/qfunctions/22/hcurlmass_build_22_qf.h
+++ b/palace/fem/qfunctions/22/hcurlmass_build_22_qf.h
@@ -22,7 +22,7 @@ CEED_QFUNCTION(f_build_hcurlmass_22)(void *__restrict__ ctx, CeedInt Q,
       qd1[i + Q * 0] = coeff * wdetJ[i];
     }
     {
-      CeedScalar coeff[3], adjJt_loc[4], qd_loc[3];
+      CeedScalar coeff[4], adjJt_loc[4], qd_loc[4];
       CoeffUnpack2(CoeffPairSecond<1>((const CeedIntScalar *)ctx), (CeedInt)attr[i], coeff);
       MatUnpack22(adjJt + i, Q, adjJt_loc);
       MultAtBA22(adjJt_loc, coeff, qd_loc);
@@ -30,6 +30,7 @@ CEED_QFUNCTION(f_build_hcurlmass_22)(void *__restrict__ ctx, CeedInt Q,
       qd2[i + Q * 0] = wdetJ[i] * qd_loc[0];
       qd2[i + Q * 1] = wdetJ[i] * qd_loc[1];
       qd2[i + Q * 2] = wdetJ[i] * qd_loc[2];
+      qd2[i + Q * 3] = wdetJ[i] * qd_loc[3];
     }
   }
   return 0;
diff --git a/palace/fem/qfunctions/22/hdiv_22_qf.h b/palace/fem/qfunctions/22/hdiv_22_qf.h
index 9f0a3b6f3..c9572d5b0 100644
--- a/palace/fem/qfunctions/22/hdiv_22_qf.h
+++ b/palace/fem/qfunctions/22/hdiv_22_qf.h
@@ -16,7 +16,7 @@ CEED_QFUNCTION(f_apply_hdiv_22)(void *__restrict__ ctx, CeedInt Q,
   CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
   {
     const CeedScalar u_loc[2] = {u[i + Q * 0], u[i + Q * 1]};
-    CeedScalar coeff[3], adjJt_loc[4], J_loc[4], v_loc[2];
+    CeedScalar coeff[4], adjJt_loc[4], J_loc[4], v_loc[2];
     CoeffUnpack2((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff);
     MatUnpack22(adjJt + i, Q, adjJt_loc);
     AdjJt22(adjJt_loc, J_loc);
diff --git a/palace/fem/qfunctions/22/hdiv_build_22_qf.h b/palace/fem/qfunctions/22/hdiv_build_22_qf.h
index fd0fd2dc1..700f0e752 100644
--- a/palace/fem/qfunctions/22/hdiv_build_22_qf.h
+++ b/palace/fem/qfunctions/22/hdiv_build_22_qf.h
@@ -15,7 +15,7 @@ CEED_QFUNCTION(f_build_hdiv_22)(void *__restrict__ ctx, CeedInt Q,
 
   CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
   {
-    CeedScalar coeff[3], adjJt_loc[4], J_loc[4], qd_loc[3];
+    CeedScalar coeff[4], adjJt_loc[4], J_loc[4], qd_loc[4];
     CoeffUnpack2((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff);
     MatUnpack22(adjJt + i, Q, adjJt_loc);
     AdjJt22(adjJt_loc, J_loc);
@@ -24,6 +24,7 @@ CEED_QFUNCTION(f_build_hdiv_22)(void *__restrict__ ctx, CeedInt Q,
     qd[i + Q * 0] = wdetJ[i] * qd_loc[0];
     qd[i + Q * 1] = wdetJ[i] * qd_loc[1];
     qd[i + Q * 2] = wdetJ[i] * qd_loc[2];
+    qd[i + Q * 3] = wdetJ[i] * qd_loc[3];
   }
   return 0;
 }
diff --git a/palace/fem/qfunctions/22/hdivmass_22_qf.h b/palace/fem/qfunctions/22/hdivmass_22_qf.h
index 78163cff3..0bf163680 100644
--- a/palace/fem/qfunctions/22/hdivmass_22_qf.h
+++ b/palace/fem/qfunctions/22/hdivmass_22_qf.h
@@ -19,7 +19,7 @@ CEED_QFUNCTION(f_apply_hdivmass_22)(void *__restrict__ ctx, CeedInt Q,
   {
     {
       const CeedScalar u_loc[2] = {u[i + Q * 0], u[i + Q * 1]};
-      CeedScalar coeff[3], adjJt_loc[4], v_loc[2];
+      CeedScalar coeff[4], adjJt_loc[4], v_loc[2];
       CoeffUnpack2((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff);
       MatUnpack22(adjJt + i, Q, adjJt_loc);
       MultAtBCx22(adjJt_loc, coeff, adjJt_loc, u_loc, v_loc);
diff --git a/palace/fem/qfunctions/22/hdivmass_build_22_qf.h b/palace/fem/qfunctions/22/hdivmass_build_22_qf.h
index da3e06935..a7c821082 100644
--- a/palace/fem/qfunctions/22/hdivmass_build_22_qf.h
+++ b/palace/fem/qfunctions/22/hdivmass_build_22_qf.h
@@ -12,12 +12,12 @@ CEED_QFUNCTION(f_build_hdivmass_22)(void *__restrict__ ctx, CeedInt Q,
                                     const CeedScalar *const *in, CeedScalar *const *out)
 {
   const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q, *qw = in[1];
-  CeedScalar *__restrict__ qd1 = out[0], *__restrict__ qd2 = out[0] + 3 * Q;
+  CeedScalar *__restrict__ qd1 = out[0], *__restrict__ qd2 = out[0] + 4 * Q;
 
   CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
   {
     {
-      CeedScalar coeff[3], adjJt_loc[4], qd_loc[3];
+      CeedScalar coeff[4], adjJt_loc[4], qd_loc[4];
       CoeffUnpack2((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff);
       MatUnpack22(adjJt + i, Q, adjJt_loc);
       MultAtBA22(adjJt_loc, coeff, qd_loc);
@@ -25,6 +25,7 @@ CEED_QFUNCTION(f_build_hdivmass_22)(void *__restrict__ ctx, CeedInt Q,
       qd1[i + Q * 0] = wdetJ[i] * qd_loc[0];
       qd1[i + Q * 1] = wdetJ[i] * qd_loc[1];
       qd1[i + Q * 2] = wdetJ[i] * qd_loc[2];
+      qd1[i + Q * 3] = wdetJ[i] * qd_loc[3];
     }
     {
       const CeedScalar coeff =
diff --git a/palace/fem/qfunctions/22/l2mass_22_qf.h b/palace/fem/qfunctions/22/l2mass_22_qf.h
index 7af6f00d7..a28d33eca 100644
--- a/palace/fem/qfunctions/22/l2mass_22_qf.h
+++ b/palace/fem/qfunctions/22/l2mass_22_qf.h
@@ -19,7 +19,7 @@ CEED_QFUNCTION(f_apply_l2mass_22)(void *__restrict__ ctx, CeedInt Q,
   {
     {
       const CeedScalar u_loc[2] = {u[i + Q * 0], u[i + Q * 1]};
-      CeedScalar coeff[3], adjJt_loc[4], J_loc[4], v_loc[2];
+      CeedScalar coeff[4], adjJt_loc[4], J_loc[4], v_loc[2];
       CoeffUnpack2((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff);
       MatUnpack22(adjJt + i, Q, adjJt_loc);
       AdjJt22(adjJt_loc, J_loc);
diff --git a/palace/fem/qfunctions/22/l2mass_build_22_qf.h b/palace/fem/qfunctions/22/l2mass_build_22_qf.h
index 98d6099a4..2d968c203 100644
--- a/palace/fem/qfunctions/22/l2mass_build_22_qf.h
+++ b/palace/fem/qfunctions/22/l2mass_build_22_qf.h
@@ -12,12 +12,12 @@ CEED_QFUNCTION(f_build_l2mass_22)(void *__restrict__ ctx, CeedInt Q,
                                   const CeedScalar *const *in, CeedScalar *const *out)
 {
   const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q, *qw = in[1];
-  CeedScalar *qd1 = out[0], *qd2 = out[0] + 3 * Q;
+  CeedScalar *qd1 = out[0], *qd2 = out[0] + 4 * Q;
 
   CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
   {
     {
-      CeedScalar coeff[3], adjJt_loc[4], J_loc[4], qd_loc[3];
+      CeedScalar coeff[4], adjJt_loc[4], J_loc[4], qd_loc[4];
       CoeffUnpack2((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff);
       MatUnpack22(adjJt + i, Q, adjJt_loc);
       AdjJt22(adjJt_loc, J_loc);
@@ -26,6 +26,7 @@ CEED_QFUNCTION(f_build_l2mass_22)(void *__restrict__ ctx, CeedInt Q,
       qd1[i + Q * 0] = wdetJ[i] * qd_loc[0];
       qd1[i + Q * 1] = wdetJ[i] * qd_loc[1];
       qd1[i + Q * 2] = wdetJ[i] * qd_loc[2];
+      qd1[i + Q * 3] = wdetJ[i] * qd_loc[3];
     }
     {
       const CeedScalar coeff =
diff --git a/palace/fem/qfunctions/22/utils_22_qf.h b/palace/fem/qfunctions/22/utils_22_qf.h
index 9474ed946..d3c4d7306 100644
--- a/palace/fem/qfunctions/22/utils_22_qf.h
+++ b/palace/fem/qfunctions/22/utils_22_qf.h
@@ -35,75 +35,76 @@ CEED_QFUNCTION_HELPER void MatUnpack22(const CeedScalar *A, const CeedInt A_stri
   A_loc[3] = A[A_stride * 3];
 }
 
-CEED_QFUNCTION_HELPER void MultBx22(const CeedScalar B[3], const CeedScalar x[2],
+CEED_QFUNCTION_HELPER void MultBx22(const CeedScalar B[4], const CeedScalar x[2],
                                     CeedScalar y[2])
 {
-  // B: 0 1
-  //    1 2
-  y[0] = B[0] * x[0] + B[1] * x[1];
-  y[1] = B[1] * x[0] + B[2] * x[1];
+  // B: 0 2
+  //    1 3
+  y[0] = B[0] * x[0] + B[2] * x[1];
+  y[1] = B[1] * x[0] + B[3] * x[1];
 }
 
-CEED_QFUNCTION_HELPER void MultAtBCx22(const CeedScalar A[4], const CeedScalar B[3],
+CEED_QFUNCTION_HELPER void MultAtBCx22(const CeedScalar A[4], const CeedScalar B[4],
                                        const CeedScalar C[4], const CeedScalar x[2],
                                        CeedScalar y[2])
 {
-  // A: 0 2   B: 0 1   C: 0 2
-  //    1 3      1 2      1 3
+  // A: 0 2   B: 0 2   C: 0 2
+  //    1 3      1 3      1 3
   CeedScalar z[2];
 
   y[0] = C[0] * x[0] + C[2] * x[1];
   y[1] = C[1] * x[0] + C[3] * x[1];
 
-  z[0] = B[0] * y[0] + B[1] * y[1];
-  z[1] = B[1] * y[0] + B[2] * y[1];
+  z[0] = B[0] * y[0] + B[2] * y[1];
+  z[1] = B[1] * y[0] + B[3] * y[1];
 
   y[0] = A[0] * z[0] + A[1] * z[1];
   y[1] = A[2] * z[0] + A[3] * z[1];
 }
 
-CEED_QFUNCTION_HELPER void MultBAx22(const CeedScalar A[4], const CeedScalar B[3],
+CEED_QFUNCTION_HELPER void MultBAx22(const CeedScalar A[4], const CeedScalar B[4],
                                      const CeedScalar x[2], CeedScalar y[2])
 {
-  // A: 0 2   B: 0 1
-  //    1 3      1 2
+  // A: 0 2   B: 0 2
+  //    1 3      1 3
   CeedScalar z[2];
 
   z[0] = A[0] * x[0] + A[2] * x[1];
   z[1] = A[1] * x[0] + A[3] * x[1];
 
-  y[0] = B[0] * z[0] + B[1] * z[1];
-  y[1] = B[1] * z[0] + B[2] * z[1];
+  y[0] = B[0] * z[0] + B[2] * z[1];
+  y[1] = B[1] * z[0] + B[3] * z[1];
 }
 
-CEED_QFUNCTION_HELPER void MultAtBA22(const CeedScalar A[4], const CeedScalar B[3],
-                                      CeedScalar C[3])
+CEED_QFUNCTION_HELPER void MultAtBA22(const CeedScalar A[4], const CeedScalar B[4],
+                                      CeedScalar C[/*3*/4])
 {
-  // A: 0 2   B: 0 1   C: 0 1
-  //    1 3      1 2      1 2
+  // A: 0 2   B: 0 2   C: 0 2
+  //    1 3      1 3      1 3
 
   // First compute entries of R = B A.
-  const CeedScalar R11 = B[0] * A[0] + B[1] * A[1];
-  const CeedScalar R21 = B[1] * A[0] + B[2] * A[1];
-  const CeedScalar R12 = B[0] * A[2] + B[1] * A[3];
-  const CeedScalar R22 = B[1] * A[2] + B[2] * A[3];
+  const CeedScalar R11 = B[0] * A[0] + B[2] * A[1];
+  const CeedScalar R21 = B[1] * A[0] + B[3] * A[1];
+  const CeedScalar R12 = B[0] * A[2] + B[2] * A[3];
+  const CeedScalar R22 = B[1] * A[2] + B[3] * A[3];
 
   C[0] = A[0] * R11 + A[1] * R21;
-  C[1] = A[0] * R12 + A[1] * R22;
-  C[2] = A[2] * R12 + A[3] * R22;
+  C[1] = A[2] * R11 + A[3] * R21;
+  C[2] = A[0] * R12 + A[1] * R22;
+  C[3] = A[2] * R12 + A[3] * R22;
 }
 
-CEED_QFUNCTION_HELPER void MultAtBC22(const CeedScalar A[4], const CeedScalar B[3],
+CEED_QFUNCTION_HELPER void MultAtBC22(const CeedScalar A[4], const CeedScalar B[4],
                                       const CeedScalar C[4], CeedScalar D[4])
 {
-  // A, C: 0 2   B: 0 1   D: 0 2
-  //       1 3      1 2      1 3
+  // A, C: 0 2   B: 0 2   D: 0 2
+  //       1 3      1 3      1 3
 
   // First compute entries of R = B C.
-  const CeedScalar R11 = B[0] * C[0] + B[1] * C[1];
-  const CeedScalar R21 = B[1] * C[0] + B[2] * C[1];
-  const CeedScalar R12 = B[0] * C[2] + B[1] * C[3];
-  const CeedScalar R22 = B[1] * C[2] + B[2] * C[3];
+  const CeedScalar R11 = B[0] * C[0] + B[2] * C[1];
+  const CeedScalar R21 = B[1] * C[0] + B[3] * C[1];
+  const CeedScalar R12 = B[0] * C[2] + B[2] * C[3];
+  const CeedScalar R22 = B[1] * C[2] + B[3] * C[3];
 
   D[0] = A[0] * R11 + A[1] * R21;
   D[1] = A[2] * R11 + A[3] * R21;
@@ -111,15 +112,15 @@ CEED_QFUNCTION_HELPER void MultAtBC22(const CeedScalar A[4], const CeedScalar B[
   D[3] = A[2] * R12 + A[3] * R22;
 }
 
-CEED_QFUNCTION_HELPER void MultBA22(const CeedScalar A[4], const CeedScalar B[3],
+CEED_QFUNCTION_HELPER void MultBA22(const CeedScalar A[4], const CeedScalar B[4],
                                     CeedScalar C[4])
 {
-  // A: 0 2   B: 0 1   C: 0 2
-  //    1 3      1 2      1 3
-  C[0] = B[0] * A[0] + B[1] * A[1];
-  C[1] = B[1] * A[0] + B[2] * A[1];
-  C[2] = B[0] * A[2] + B[1] * A[3];
-  C[3] = B[1] * A[2] + B[2] * A[3];
+  // A: 0 2   B: 0 2   C: 0 2
+  //    1 3      1 3      1 3
+  C[0] = B[0] * A[0] + B[2] * A[1];
+  C[1] = B[1] * A[0] + B[3] * A[1];
+  C[2] = B[0] * A[2] + B[2] * A[3];
+  C[3] = B[1] * A[2] + B[3] * A[3];
 }
 
 #endif  // PALACE_LIBCEED_UTILS_22_QF_H
diff --git a/palace/fem/qfunctions/3/h1_3_qf.h b/palace/fem/qfunctions/3/h1_3_qf.h
index 2f5cf32f8..96f4101e4 100644
--- a/palace/fem/qfunctions/3/h1_3_qf.h
+++ b/palace/fem/qfunctions/3/h1_3_qf.h
@@ -14,15 +14,15 @@ CEED_QFUNCTION(f_apply_h1_3)(void *__restrict__ ctx, CeedInt Q, const CeedScalar
 
   CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
   {
-    CeedScalar coeff[6];
+    CeedScalar coeff[9];
     CoeffUnpack3((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff);
 
     const CeedScalar u0 = u[i + Q * 0];
     const CeedScalar u1 = u[i + Q * 1];
     const CeedScalar u2 = u[i + Q * 2];
-    v[i + Q * 0] = wdetJ[i] * (coeff[0] * u0 + coeff[1] * u1 + coeff[2] * u2);
-    v[i + Q * 1] = wdetJ[i] * (coeff[1] * u0 + coeff[3] * u1 + coeff[4] * u2);
-    v[i + Q * 2] = wdetJ[i] * (coeff[2] * u0 + coeff[4] * u1 + coeff[5] * u2);
+    v[i + Q * 0] = wdetJ[i] * (coeff[0] * u0 + coeff[3] * u1 + coeff[6] * u2);
+    v[i + Q * 1] = wdetJ[i] * (coeff[1] * u0 + coeff[4] * u1 + coeff[7] * u2);
+    v[i + Q * 2] = wdetJ[i] * (coeff[2] * u0 + coeff[5] * u1 + coeff[8] * u2);
   }
   return 0;
 }
diff --git a/palace/fem/qfunctions/3/h1_build_3_qf.h b/palace/fem/qfunctions/3/h1_build_3_qf.h
index 534fae03a..39e1fb84d 100644
--- a/palace/fem/qfunctions/3/h1_build_3_qf.h
+++ b/palace/fem/qfunctions/3/h1_build_3_qf.h
@@ -14,7 +14,7 @@ CEED_QFUNCTION(f_build_h1_3)(void *__restrict__ ctx, CeedInt Q, const CeedScalar
 
   CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
   {
-    CeedScalar coeff[6];
+    CeedScalar coeff[9];
     CoeffUnpack3((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff);
 
     qd[i + Q * 0] = wdetJ[i] * coeff[0];
@@ -23,6 +23,9 @@ CEED_QFUNCTION(f_build_h1_3)(void *__restrict__ ctx, CeedInt Q, const CeedScalar
     qd[i + Q * 3] = wdetJ[i] * coeff[3];
     qd[i + Q * 4] = wdetJ[i] * coeff[4];
     qd[i + Q * 5] = wdetJ[i] * coeff[5];
+    qd[i + Q * 6] = wdetJ[i] * coeff[6];
+    qd[i + Q * 7] = wdetJ[i] * coeff[7];
+    qd[i + Q * 8] = wdetJ[i] * coeff[8];
   }
   return 0;
 }
diff --git a/palace/fem/qfunctions/3/l2_3_qf.h b/palace/fem/qfunctions/3/l2_3_qf.h
index 51c2bb5b4..ef3e8b483 100644
--- a/palace/fem/qfunctions/3/l2_3_qf.h
+++ b/palace/fem/qfunctions/3/l2_3_qf.h
@@ -14,16 +14,16 @@ CEED_QFUNCTION(f_apply_l2_3)(void *__restrict__ ctx, CeedInt Q, const CeedScalar
 
   CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
   {
-    CeedScalar coeff[6];
+    CeedScalar coeff[9];
     CoeffUnpack3((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff);
     const CeedScalar w = qw[i] * qw[i] / wdetJ[i];
 
     const CeedScalar u0 = u[i + Q * 0];
     const CeedScalar u1 = u[i + Q * 1];
     const CeedScalar u2 = u[i + Q * 2];
-    v[i + Q * 0] = w * (coeff[0] * u0 + coeff[1] * u1 + coeff[2] * u2);
-    v[i + Q * 1] = w * (coeff[1] * u0 + coeff[3] * u1 + coeff[4] * u2);
-    v[i + Q * 2] = w * (coeff[2] * u0 + coeff[4] * u1 + coeff[5] * u2);
+    v[i + Q * 0] = w * (coeff[0] * u0 + coeff[3] * u1 + coeff[6] * u2);
+    v[i + Q * 1] = w * (coeff[1] * u0 + coeff[4] * u1 + coeff[7] * u2);
+    v[i + Q * 2] = w * (coeff[2] * u0 + coeff[5] * u1 + coeff[8] * u2);
   }
   return 0;
 }
diff --git a/palace/fem/qfunctions/3/l2_build_3_qf.h b/palace/fem/qfunctions/3/l2_build_3_qf.h
index 78e6c71e0..1e402539e 100644
--- a/palace/fem/qfunctions/3/l2_build_3_qf.h
+++ b/palace/fem/qfunctions/3/l2_build_3_qf.h
@@ -14,7 +14,7 @@ CEED_QFUNCTION(f_build_l2_3)(void *__restrict__ ctx, CeedInt Q, const CeedScalar
 
   CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
   {
-    CeedScalar coeff[6];
+    CeedScalar coeff[9];
     CoeffUnpack3((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff);
     const CeedScalar w = qw[i] * qw[i] / wdetJ[i];
 
@@ -24,6 +24,9 @@ CEED_QFUNCTION(f_build_l2_3)(void *__restrict__ ctx, CeedInt Q, const CeedScalar
     qd[i + Q * 3] = w * coeff[3];
     qd[i + Q * 4] = w * coeff[4];
     qd[i + Q * 5] = w * coeff[5];
+    qd[i + Q * 6] = w * coeff[6];
+    qd[i + Q * 7] = w * coeff[7];
+    qd[i + Q * 8] = w * coeff[8];
   }
   return 0;
 }
diff --git a/palace/fem/qfunctions/32/hcurl_32_qf.h b/palace/fem/qfunctions/32/hcurl_32_qf.h
index 681917957..f348587e2 100644
--- a/palace/fem/qfunctions/32/hcurl_32_qf.h
+++ b/palace/fem/qfunctions/32/hcurl_32_qf.h
@@ -16,7 +16,7 @@ CEED_QFUNCTION(f_apply_hcurl_32)(void *__restrict__ ctx, CeedInt Q,
   CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
   {
     const CeedScalar u_loc[2] = {u[i + Q * 0], u[i + Q * 1]};
-    CeedScalar coeff[6], adjJt_loc[6], v_loc[2];
+    CeedScalar coeff[9], adjJt_loc[6], v_loc[2];
     CoeffUnpack3((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff);
     MatUnpack32(adjJt + i, Q, adjJt_loc);
     MultAtBCx32(adjJt_loc, coeff, adjJt_loc, u_loc, v_loc);
diff --git a/palace/fem/qfunctions/32/hcurl_build_32_qf.h b/palace/fem/qfunctions/32/hcurl_build_32_qf.h
index eadd4bb4c..370491b71 100644
--- a/palace/fem/qfunctions/32/hcurl_build_32_qf.h
+++ b/palace/fem/qfunctions/32/hcurl_build_32_qf.h
@@ -15,7 +15,7 @@ CEED_QFUNCTION(f_build_hcurl_32)(void *__restrict__ ctx, CeedInt Q,
 
   CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
   {
-    CeedScalar coeff[6], adjJt_loc[6], qd_loc[3];
+    CeedScalar coeff[9], adjJt_loc[6], qd_loc[4];
     CoeffUnpack3((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff);
     MatUnpack32(adjJt + i, Q, adjJt_loc);
     MultAtBA32(adjJt_loc, coeff, qd_loc);
@@ -23,6 +23,7 @@ CEED_QFUNCTION(f_build_hcurl_32)(void *__restrict__ ctx, CeedInt Q,
     qd[i + Q * 0] = wdetJ[i] * qd_loc[0];
     qd[i + Q * 1] = wdetJ[i] * qd_loc[1];
     qd[i + Q * 2] = wdetJ[i] * qd_loc[2];
+    qd[i + Q * 3] = wdetJ[i] * qd_loc[3];
   }
   return 0;
 }
diff --git a/palace/fem/qfunctions/32/hcurlh1d_32_qf.h b/palace/fem/qfunctions/32/hcurlh1d_32_qf.h
index 1a60204f4..9c749daec 100644
--- a/palace/fem/qfunctions/32/hcurlh1d_32_qf.h
+++ b/palace/fem/qfunctions/32/hcurlh1d_32_qf.h
@@ -16,7 +16,7 @@ CEED_QFUNCTION(f_apply_hcurlh1d_32)(void *__restrict__ ctx, CeedInt Q,
   CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
   {
     const CeedScalar u_loc[2] = {u[i + Q * 0], u[i + Q * 1]};
-    CeedScalar coeff[6], adjJt_loc[6], v_loc[3];
+    CeedScalar coeff[9], adjJt_loc[6], v_loc[3];
     CoeffUnpack3((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff);
     MatUnpack32(adjJt + i, Q, adjJt_loc);
     MultBAx32(adjJt_loc, coeff, u_loc, v_loc);
diff --git a/palace/fem/qfunctions/32/hcurlh1d_build_32_qf.h b/palace/fem/qfunctions/32/hcurlh1d_build_32_qf.h
index 2f3981df8..184bebeb1 100644
--- a/palace/fem/qfunctions/32/hcurlh1d_build_32_qf.h
+++ b/palace/fem/qfunctions/32/hcurlh1d_build_32_qf.h
@@ -15,7 +15,7 @@ CEED_QFUNCTION(f_build_hcurlh1d_32)(void *__restrict__ ctx, CeedInt Q,
 
   CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
   {
-    CeedScalar coeff[6], adjJt_loc[6], qd_loc[6];
+    CeedScalar coeff[9], adjJt_loc[6], qd_loc[6];
     CoeffUnpack3((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff);
     MatUnpack32(adjJt + i, Q, adjJt_loc);
     MultBA32(adjJt_loc, coeff, qd_loc);
diff --git a/palace/fem/qfunctions/32/hcurlhdiv_32_qf.h b/palace/fem/qfunctions/32/hcurlhdiv_32_qf.h
index f40245445..2f5e35b9d 100644
--- a/palace/fem/qfunctions/32/hcurlhdiv_32_qf.h
+++ b/palace/fem/qfunctions/32/hcurlhdiv_32_qf.h
@@ -16,7 +16,7 @@ CEED_QFUNCTION(f_apply_hcurlhdiv_32)(void *__restrict__ ctx, CeedInt Q,
   CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
   {
     const CeedScalar u_loc[2] = {u[i + Q * 0], u[i + Q * 1]};
-    CeedScalar coeff[6], adjJt_loc[6], J_loc[6], v_loc[2];
+    CeedScalar coeff[9], adjJt_loc[6], J_loc[6], v_loc[2];
     CoeffUnpack3((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff);
     MatUnpack32(adjJt + i, Q, adjJt_loc);
     AdjJt32(adjJt_loc, J_loc);
@@ -37,7 +37,7 @@ CEED_QFUNCTION(f_apply_hdivhcurl_32)(void *__restrict__ ctx, CeedInt Q,
   CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
   {
     const CeedScalar u_loc[2] = {u[i + Q * 0], u[i + Q * 1]};
-    CeedScalar coeff[6], adjJt_loc[6], J_loc[6], v_loc[2];
+    CeedScalar coeff[9], adjJt_loc[6], J_loc[6], v_loc[2];
     CoeffUnpack3((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff);
     MatUnpack32(adjJt + i, Q, adjJt_loc);
     AdjJt32(adjJt_loc, J_loc);
diff --git a/palace/fem/qfunctions/32/hcurlhdiv_build_32_qf.h b/palace/fem/qfunctions/32/hcurlhdiv_build_32_qf.h
index 4103f3db0..f091e0a30 100644
--- a/palace/fem/qfunctions/32/hcurlhdiv_build_32_qf.h
+++ b/palace/fem/qfunctions/32/hcurlhdiv_build_32_qf.h
@@ -15,7 +15,7 @@ CEED_QFUNCTION(f_build_hcurlhdiv_32)(void *__restrict__ ctx, CeedInt Q,
 
   CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
   {
-    CeedScalar coeff[6], adjJt_loc[6], J_loc[6], qd_loc[4];
+    CeedScalar coeff[9], adjJt_loc[6], J_loc[6], qd_loc[4];
     CoeffUnpack3((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff);
     MatUnpack32(adjJt + i, Q, adjJt_loc);
     AdjJt32(adjJt_loc, J_loc);
@@ -37,7 +37,7 @@ CEED_QFUNCTION(f_build_hdivhcurl_32)(void *__restrict__ ctx, CeedInt Q,
 
   CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
   {
-    CeedScalar coeff[6], adjJt_loc[6], J_loc[6], qd_loc[4];
+    CeedScalar coeff[9], adjJt_loc[6], J_loc[6], qd_loc[4];
     CoeffUnpack3((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff);
     MatUnpack32(adjJt + i, Q, adjJt_loc);
     AdjJt32(adjJt_loc, J_loc);
diff --git a/palace/fem/qfunctions/32/hcurlmass_32_qf.h b/palace/fem/qfunctions/32/hcurlmass_32_qf.h
index 03d5d1f63..a3975b2bf 100644
--- a/palace/fem/qfunctions/32/hcurlmass_32_qf.h
+++ b/palace/fem/qfunctions/32/hcurlmass_32_qf.h
@@ -24,7 +24,7 @@ CEED_QFUNCTION(f_apply_hcurlmass_32)(void *__restrict__ ctx, CeedInt Q,
     }
     {
       const CeedScalar u_loc[2] = {gradu[i + Q * 0], gradu[i + Q * 1]};
-      CeedScalar coeff[6], adjJt_loc[6], v_loc[2];
+      CeedScalar coeff[9], adjJt_loc[6], v_loc[2];
       CoeffUnpack3(CoeffPairSecond<1>((const CeedIntScalar *)ctx), (CeedInt)attr[i], coeff);
       MatUnpack32(adjJt + i, Q, adjJt_loc);
       MultAtBCx32(adjJt_loc, coeff, adjJt_loc, u_loc, v_loc);
diff --git a/palace/fem/qfunctions/32/hcurlmass_build_32_qf.h b/palace/fem/qfunctions/32/hcurlmass_build_32_qf.h
index cff513903..46b8e2a84 100644
--- a/palace/fem/qfunctions/32/hcurlmass_build_32_qf.h
+++ b/palace/fem/qfunctions/32/hcurlmass_build_32_qf.h
@@ -22,7 +22,7 @@ CEED_QFUNCTION(f_build_hcurlmass_32)(void *__restrict__ ctx, CeedInt Q,
       qd1[i + Q * 0] = coeff * wdetJ[i];
     }
     {
-      CeedScalar coeff[6], adjJt_loc[6], qd_loc[3];
+      CeedScalar coeff[9], adjJt_loc[6], qd_loc[4];
       CoeffUnpack3(CoeffPairSecond<1>((const CeedIntScalar *)ctx), (CeedInt)attr[i], coeff);
       MatUnpack32(adjJt + i, Q, adjJt_loc);
       MultAtBA32(adjJt_loc, coeff, qd_loc);
@@ -30,6 +30,7 @@ CEED_QFUNCTION(f_build_hcurlmass_32)(void *__restrict__ ctx, CeedInt Q,
       qd2[i + Q * 0] = wdetJ[i] * qd_loc[0];
       qd2[i + Q * 1] = wdetJ[i] * qd_loc[1];
       qd2[i + Q * 2] = wdetJ[i] * qd_loc[2];
+      qd2[i + Q * 3] = wdetJ[i] * qd_loc[3];
     }
   }
   return 0;
diff --git a/palace/fem/qfunctions/32/hdiv_32_qf.h b/palace/fem/qfunctions/32/hdiv_32_qf.h
index e67f4300d..deafc57df 100644
--- a/palace/fem/qfunctions/32/hdiv_32_qf.h
+++ b/palace/fem/qfunctions/32/hdiv_32_qf.h
@@ -16,7 +16,7 @@ CEED_QFUNCTION(f_apply_hdiv_32)(void *__restrict__ ctx, CeedInt Q,
   CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
   {
     const CeedScalar u_loc[2] = {u[i + Q * 0], u[i + Q * 1]};
-    CeedScalar coeff[6], adjJt_loc[6], J_loc[6], v_loc[2];
+    CeedScalar coeff[9], adjJt_loc[6], J_loc[6], v_loc[2];
     CoeffUnpack3((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff);
     MatUnpack32(adjJt + i, Q, adjJt_loc);
     AdjJt32(adjJt_loc, J_loc);
diff --git a/palace/fem/qfunctions/32/hdiv_build_32_qf.h b/palace/fem/qfunctions/32/hdiv_build_32_qf.h
index abe0bbdb1..80159f321 100644
--- a/palace/fem/qfunctions/32/hdiv_build_32_qf.h
+++ b/palace/fem/qfunctions/32/hdiv_build_32_qf.h
@@ -15,7 +15,7 @@ CEED_QFUNCTION(f_build_hdiv_32)(void *__restrict__ ctx, CeedInt Q,
 
   CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
   {
-    CeedScalar coeff[6], adjJt_loc[6], J_loc[6], qd_loc[3];
+    CeedScalar coeff[9], adjJt_loc[6], J_loc[6], qd_loc[4];
     CoeffUnpack3((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff);
     MatUnpack32(adjJt + i, Q, adjJt_loc);
     AdjJt32(adjJt_loc, J_loc);
@@ -24,6 +24,7 @@ CEED_QFUNCTION(f_build_hdiv_32)(void *__restrict__ ctx, CeedInt Q,
     qd[i + Q * 0] = wdetJ[i] * qd_loc[0];
     qd[i + Q * 1] = wdetJ[i] * qd_loc[1];
     qd[i + Q * 2] = wdetJ[i] * qd_loc[2];
+    qd[i + Q * 3] = wdetJ[i] * qd_loc[3];
   }
   return 0;
 }
diff --git a/palace/fem/qfunctions/32/hdivmass_32_qf.h b/palace/fem/qfunctions/32/hdivmass_32_qf.h
index 865645fd3..aee469494 100644
--- a/palace/fem/qfunctions/32/hdivmass_32_qf.h
+++ b/palace/fem/qfunctions/32/hdivmass_32_qf.h
@@ -19,7 +19,7 @@ CEED_QFUNCTION(f_apply_hdivmass_32)(void *__restrict__ ctx, CeedInt Q,
   {
     {
       const CeedScalar u_loc[2] = {u[i + Q * 0], u[i + Q * 1]};
-      CeedScalar coeff[6], adjJt_loc[6], v_loc[2];
+      CeedScalar coeff[9], adjJt_loc[6], v_loc[2];
       CoeffUnpack3((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff);
       MatUnpack32(adjJt + i, Q, adjJt_loc);
       MultAtBCx32(adjJt_loc, coeff, adjJt_loc, u_loc, v_loc);
diff --git a/palace/fem/qfunctions/32/hdivmass_build_32_qf.h b/palace/fem/qfunctions/32/hdivmass_build_32_qf.h
index 2cad3a878..8116356f4 100644
--- a/palace/fem/qfunctions/32/hdivmass_build_32_qf.h
+++ b/palace/fem/qfunctions/32/hdivmass_build_32_qf.h
@@ -17,7 +17,7 @@ CEED_QFUNCTION(f_build_hdivmass_32)(void *__restrict__ ctx, CeedInt Q,
   CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
   {
     {
-      CeedScalar coeff[6], adjJt_loc[6], qd_loc[3];
+      CeedScalar coeff[9], adjJt_loc[6], qd_loc[4];
       CoeffUnpack3((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff);
       MatUnpack32(adjJt + i, Q, adjJt_loc);
       MultAtBA32(adjJt_loc, coeff, qd_loc);
@@ -25,6 +25,7 @@ CEED_QFUNCTION(f_build_hdivmass_32)(void *__restrict__ ctx, CeedInt Q,
       qd1[i + Q * 0] = wdetJ[i] * qd_loc[0];
       qd1[i + Q * 1] = wdetJ[i] * qd_loc[1];
       qd1[i + Q * 2] = wdetJ[i] * qd_loc[2];
+      qd1[i + Q * 3] = wdetJ[i] * qd_loc[3];
     }
     {
       const CeedScalar coeff =
diff --git a/palace/fem/qfunctions/32/l2mass_32_qf.h b/palace/fem/qfunctions/32/l2mass_32_qf.h
index 3eb59aae7..2d9966a24 100644
--- a/palace/fem/qfunctions/32/l2mass_32_qf.h
+++ b/palace/fem/qfunctions/32/l2mass_32_qf.h
@@ -19,7 +19,7 @@ CEED_QFUNCTION(f_apply_l2mass_32)(void *__restrict__ ctx, CeedInt Q,
   {
     {
       const CeedScalar u_loc[2] = {u[i + Q * 0], u[i + Q * 1]};
-      CeedScalar coeff[6], adjJt_loc[6], J_loc[6], v_loc[2];
+      CeedScalar coeff[9], adjJt_loc[6], J_loc[6], v_loc[2];
       CoeffUnpack3((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff);
       MatUnpack32(adjJt + i, Q, adjJt_loc);
       AdjJt32(adjJt_loc, J_loc);
diff --git a/palace/fem/qfunctions/32/l2mass_build_32_qf.h b/palace/fem/qfunctions/32/l2mass_build_32_qf.h
index 68f558ac9..bd92b8535 100644
--- a/palace/fem/qfunctions/32/l2mass_build_32_qf.h
+++ b/palace/fem/qfunctions/32/l2mass_build_32_qf.h
@@ -12,12 +12,12 @@ CEED_QFUNCTION(f_build_l2mass_32)(void *__restrict__ ctx, CeedInt Q,
                                   const CeedScalar *const *in, CeedScalar *const *out)
 {
   const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q, *qw = in[1];
-  CeedScalar *qd1 = out[0], *qd2 = out[0] + 3 * Q;
+  CeedScalar *qd1 = out[0], *qd2 = out[0] + 4 * Q;
 
   CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
   {
     {
-      CeedScalar coeff[6], adjJt_loc[6], J_loc[6], qd_loc[3];
+      CeedScalar coeff[9], adjJt_loc[6], J_loc[6], qd_loc[4];
       CoeffUnpack3((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff);
       MatUnpack32(adjJt + i, Q, adjJt_loc);
       AdjJt32(adjJt_loc, J_loc);
@@ -26,6 +26,7 @@ CEED_QFUNCTION(f_build_l2mass_32)(void *__restrict__ ctx, CeedInt Q,
       qd1[i + Q * 0] = wdetJ[i] * qd_loc[0];
       qd1[i + Q * 1] = wdetJ[i] * qd_loc[1];
       qd1[i + Q * 2] = wdetJ[i] * qd_loc[2];
+      qd1[i + Q * 3] = wdetJ[i] * qd_loc[3];
     }
     {
       const CeedScalar coeff =
diff --git a/palace/fem/qfunctions/32/utils_32_qf.h b/palace/fem/qfunctions/32/utils_32_qf.h
index bc283d0d1..cb8fe80d0 100644
--- a/palace/fem/qfunctions/32/utils_32_qf.h
+++ b/palace/fem/qfunctions/32/utils_32_qf.h
@@ -48,78 +48,79 @@ CEED_QFUNCTION_HELPER void MatUnpack32(const CeedScalar *A, const CeedInt A_stri
   A_loc[5] = A[A_stride * 5];
 }
 
-CEED_QFUNCTION_HELPER void MultAtBCx32(const CeedScalar A[6], const CeedScalar B[6],
+CEED_QFUNCTION_HELPER void MultAtBCx32(const CeedScalar A[6], const CeedScalar B[9],
                                        const CeedScalar C[6], const CeedScalar x[2],
                                        CeedScalar y[2])
 {
-  // A: 0 3   B: 0 1 2   C: 0 3
-  //    1 4      1 3 4      1 4
-  //    2 5      2 4 5      2 5
+  // A: 0 3   B: 0 3 6   C: 0 3
+  //    1 4      1 4 7      1 4
+  //    2 5      2 5 8      2 5
   CeedScalar z[3], t;
 
   y[0] = C[0] * x[0] + C[3] * x[1];
   y[1] = C[1] * x[0] + C[4] * x[1];
   t = C[2] * x[0] + C[5] * x[1];
 
-  z[0] = B[0] * y[0] + B[1] * y[1] + B[2] * t;
-  z[1] = B[1] * y[0] + B[3] * y[1] + B[4] * t;
-  z[2] = B[2] * y[0] + B[4] * y[1] + B[5] * t;
+  z[0] = B[0] * y[0] + B[3] * y[1] + B[6] * t;
+  z[1] = B[1] * y[0] + B[4] * y[1] + B[7] * t;
+  z[2] = B[2] * y[0] + B[5] * y[1] + B[8] * t;
 
   y[0] = A[0] * z[0] + A[1] * z[1] + A[2] * z[2];
   y[1] = A[3] * z[0] + A[4] * z[1] + A[5] * z[2];
 }
 
-CEED_QFUNCTION_HELPER void MultBAx32(const CeedScalar A[6], const CeedScalar B[6],
+CEED_QFUNCTION_HELPER void MultBAx32(const CeedScalar A[6], const CeedScalar B[9],
                                      const CeedScalar x[2], CeedScalar y[3])
 {
-  // A: 0 3   B: 0 1 2
-  //    1 4      1 3 4
-  //    2 5      2 4 5
+  // A: 0 3   B: 0 3 6
+  //    1 4      1 4 7
+  //    2 5      2 5 8
   CeedScalar z[3];
 
   z[0] = A[0] * x[0] + A[3] * x[1];
   z[1] = A[1] * x[0] + A[4] * x[1];
   z[2] = A[2] * x[0] + A[5] * x[1];
 
-  y[0] = B[0] * z[0] + B[1] * z[1] + B[2] * z[2];
-  y[1] = B[1] * z[0] + B[3] * z[1] + B[4] * z[2];
-  y[2] = B[2] * z[0] + B[4] * z[1] + B[5] * z[2];
+  y[0] = B[0] * z[0] + B[3] * z[1] + B[6] * z[2];
+  y[1] = B[1] * z[0] + B[4] * z[1] + B[7] * z[2];
+  y[2] = B[2] * z[0] + B[5] * z[1] + B[8] * z[2];
 }
 
-CEED_QFUNCTION_HELPER void MultAtBA32(const CeedScalar A[6], const CeedScalar B[6],
-                                      CeedScalar C[3])
+CEED_QFUNCTION_HELPER void MultAtBA32(const CeedScalar A[6], const CeedScalar B[9],
+                                      CeedScalar C[4])
 {
-  // A: 0 3   B: 0 1 2   C: 0 1
-  //    1 4      1 3 4      1 2
-  //    2 5      2 4 5
+  // A: 0 3   B: 0 3 6   C: 0 2
+  //    1 4      1 4 7      1 3
+  //    2 5      2 5 8
 
   // First compute entries of R = B A.
-  const CeedScalar R11 = B[0] * A[0] + B[1] * A[1] + B[2] * A[2];
-  const CeedScalar R21 = B[1] * A[0] + B[3] * A[1] + B[4] * A[2];
-  const CeedScalar R31 = B[2] * A[0] + B[4] * A[1] + B[5] * A[2];
-  const CeedScalar R12 = B[0] * A[3] + B[1] * A[4] + B[2] * A[5];
-  const CeedScalar R22 = B[1] * A[3] + B[3] * A[4] + B[4] * A[5];
-  const CeedScalar R32 = B[2] * A[3] + B[4] * A[4] + B[5] * A[5];
+  const CeedScalar R11 = B[0] * A[0] + B[3] * A[1] + B[6] * A[2];
+  const CeedScalar R21 = B[1] * A[0] + B[4] * A[1] + B[7] * A[2];
+  const CeedScalar R31 = B[2] * A[0] + B[5] * A[1] + B[8] * A[2];
+  const CeedScalar R12 = B[0] * A[3] + B[3] * A[4] + B[6] * A[5];
+  const CeedScalar R22 = B[1] * A[3] + B[4] * A[4] + B[7] * A[5];
+  const CeedScalar R32 = B[2] * A[3] + B[5] * A[4] + B[8] * A[5];
 
   C[0] = A[0] * R11 + A[1] * R21 + A[2] * R31;
-  C[1] = A[0] * R12 + A[1] * R22 + A[2] * R32;
-  C[2] = A[3] * R12 + A[4] * R22 + A[5] * R32;
+  C[1] = A[3] * R11 + A[4] * R21 + A[5] * R31;
+  C[2] = A[0] * R12 + A[1] * R22 + A[2] * R32;
+  C[3] = A[3] * R12 + A[4] * R22 + A[5] * R32;
 }
 
-CEED_QFUNCTION_HELPER void MultAtBC32(const CeedScalar A[6], const CeedScalar B[6],
+CEED_QFUNCTION_HELPER void MultAtBC32(const CeedScalar A[6], const CeedScalar B[9],
                                       const CeedScalar C[6], CeedScalar D[4])
 {
-  // A, C: 0 3   B: 0 1 2   D: 0 2
-  //       1 4      1 3 4      1 3
-  //       2 5      2 4 5
+  // A, C: 0 3   B: 0 3 6   D: 0 2
+  //       1 4      1 4 7      1 3
+  //       2 5      2 5 8
 
   // First compute entries of R = B C.
-  const CeedScalar R11 = B[0] * C[0] + B[1] * C[1] + B[2] * C[2];
-  const CeedScalar R21 = B[1] * C[0] + B[3] * C[1] + B[4] * C[2];
-  const CeedScalar R31 = B[2] * C[0] + B[4] * C[1] + B[5] * C[2];
-  const CeedScalar R12 = B[0] * C[3] + B[1] * C[4] + B[2] * C[5];
-  const CeedScalar R22 = B[1] * C[3] + B[3] * C[4] + B[4] * C[5];
-  const CeedScalar R32 = B[2] * C[3] + B[4] * C[4] + B[5] * C[5];
+  const CeedScalar R11 = B[0] * C[0] + B[3] * C[1] + B[6] * C[2];
+  const CeedScalar R21 = B[1] * C[0] + B[4] * C[1] + B[7] * C[2];
+  const CeedScalar R31 = B[2] * C[0] + B[5] * C[1] + B[8] * C[2];
+  const CeedScalar R12 = B[0] * C[3] + B[3] * C[4] + B[6] * C[5];
+  const CeedScalar R22 = B[1] * C[3] + B[4] * C[4] + B[7] * C[5];
+  const CeedScalar R32 = B[2] * C[3] + B[5] * C[4] + B[8] * C[5];
 
   D[0] = A[0] * R11 + A[1] * R21 + A[2] * R31;
   D[1] = A[3] * R11 + A[4] * R21 + A[5] * R31;
@@ -127,18 +128,18 @@ CEED_QFUNCTION_HELPER void MultAtBC32(const CeedScalar A[6], const CeedScalar B[
   D[3] = A[3] * R12 + A[4] * R22 + A[5] * R32;
 }
 
-CEED_QFUNCTION_HELPER void MultBA32(const CeedScalar A[6], const CeedScalar B[6],
+CEED_QFUNCTION_HELPER void MultBA32(const CeedScalar A[6], const CeedScalar B[9],
                                     CeedScalar C[6])
 {
-  // A: 0 3   B: 0 1 2   C: 0 3
-  //    1 4      1 3 4      1 4
-  //    2 5      2 4 5      2 5
-  C[0] = B[0] * A[0] + B[1] * A[1] + B[2] * A[2];
-  C[1] = B[1] * A[0] + B[3] * A[1] + B[4] * A[2];
-  C[2] = B[2] * A[0] + B[4] * A[1] + B[5] * A[2];
-  C[3] = B[0] * A[3] + B[1] * A[4] + B[2] * A[5];
-  C[4] = B[1] * A[3] + B[3] * A[4] + B[4] * A[5];
-  C[5] = B[2] * A[3] + B[4] * A[4] + B[5] * A[5];
+  // A: 0 3   B: 0 3 6   C: 0 3
+  //    1 4      1 4 7      1 4
+  //    2 5      2 5 8      2 5
+  C[0] = B[0] * A[0] + B[3] * A[1] + B[6] * A[2];
+  C[1] = B[1] * A[0] + B[4] * A[1] + B[7] * A[2];
+  C[2] = B[2] * A[0] + B[5] * A[1] + B[8] * A[2];
+  C[3] = B[0] * A[3] + B[3] * A[4] + B[6] * A[5];
+  C[4] = B[1] * A[3] + B[4] * A[4] + B[7] * A[5];
+  C[5] = B[2] * A[3] + B[5] * A[4] + B[8] * A[5];
 }
 
 #endif  // PALACE_LIBCEED_UTILS_32_QF_H
diff --git a/palace/fem/qfunctions/33/hcurl_33_qf.h b/palace/fem/qfunctions/33/hcurl_33_qf.h
index 98803c9b4..f09668700 100644
--- a/palace/fem/qfunctions/33/hcurl_33_qf.h
+++ b/palace/fem/qfunctions/33/hcurl_33_qf.h
@@ -16,7 +16,7 @@ CEED_QFUNCTION(f_apply_hcurl_33)(void *__restrict__ ctx, CeedInt Q,
   CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
   {
     const CeedScalar u_loc[3] = {u[i + Q * 0], u[i + Q * 1], u[i + Q * 2]};
-    CeedScalar coeff[6], adjJt_loc[9], v_loc[3];
+    CeedScalar coeff[9], adjJt_loc[9], v_loc[3];
     CoeffUnpack3((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff);
     MatUnpack33(adjJt + i, Q, adjJt_loc);
     MultAtBCx33(adjJt_loc, coeff, adjJt_loc, u_loc, v_loc);
diff --git a/palace/fem/qfunctions/33/hcurl_build_33_qf.h b/palace/fem/qfunctions/33/hcurl_build_33_qf.h
index f1f34be12..1a750bbf6 100644
--- a/palace/fem/qfunctions/33/hcurl_build_33_qf.h
+++ b/palace/fem/qfunctions/33/hcurl_build_33_qf.h
@@ -15,7 +15,7 @@ CEED_QFUNCTION(f_build_hcurl_33)(void *__restrict__ ctx, CeedInt Q,
 
   CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
   {
-    CeedScalar coeff[6], adjJt_loc[9], qd_loc[6];
+    CeedScalar coeff[9], adjJt_loc[9], qd_loc[9];
     CoeffUnpack3((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff);
     MatUnpack33(adjJt + i, Q, adjJt_loc);
     MultAtBA33(adjJt_loc, coeff, qd_loc);
@@ -26,6 +26,9 @@ CEED_QFUNCTION(f_build_hcurl_33)(void *__restrict__ ctx, CeedInt Q,
     qd[i + Q * 3] = wdetJ[i] * qd_loc[3];
     qd[i + Q * 4] = wdetJ[i] * qd_loc[4];
     qd[i + Q * 5] = wdetJ[i] * qd_loc[5];
+    qd[i + Q * 6] = wdetJ[i] * qd_loc[6];
+    qd[i + Q * 7] = wdetJ[i] * qd_loc[7];
+    qd[i + Q * 8] = wdetJ[i] * qd_loc[8];
   }
   return 0;
 }
diff --git a/palace/fem/qfunctions/33/hcurlh1d_33_qf.h b/palace/fem/qfunctions/33/hcurlh1d_33_qf.h
index fde5ffc6c..d147a3034 100644
--- a/palace/fem/qfunctions/33/hcurlh1d_33_qf.h
+++ b/palace/fem/qfunctions/33/hcurlh1d_33_qf.h
@@ -16,7 +16,7 @@ CEED_QFUNCTION(f_apply_hcurlh1d_33)(void *__restrict__ ctx, CeedInt Q,
   CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
   {
     const CeedScalar u_loc[3] = {u[i + Q * 0], u[i + Q * 1], u[i + Q * 2]};
-    CeedScalar coeff[6], adjJt_loc[9], v_loc[3];
+    CeedScalar coeff[9], adjJt_loc[9], v_loc[3];
     CoeffUnpack3((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff);
     MatUnpack33(adjJt + i, Q, adjJt_loc);
     MultBAx33(adjJt_loc, coeff, u_loc, v_loc);
diff --git a/palace/fem/qfunctions/33/hcurlh1d_build_33_qf.h b/palace/fem/qfunctions/33/hcurlh1d_build_33_qf.h
index fb1a09587..163507cf2 100644
--- a/palace/fem/qfunctions/33/hcurlh1d_build_33_qf.h
+++ b/palace/fem/qfunctions/33/hcurlh1d_build_33_qf.h
@@ -15,7 +15,7 @@ CEED_QFUNCTION(f_build_hcurlh1d_33)(void *__restrict__ ctx, CeedInt Q,
 
   CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
   {
-    CeedScalar coeff[6], adjJt_loc[9], qd_loc[9];
+    CeedScalar coeff[9], adjJt_loc[9], qd_loc[9];
     CoeffUnpack3((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff);
     MatUnpack33(adjJt + i, Q, adjJt_loc);
     MultBA33(adjJt_loc, coeff, qd_loc);
diff --git a/palace/fem/qfunctions/33/hcurlh1d_error_22_qf.h b/palace/fem/qfunctions/33/hcurlh1d_error_22_qf.h
index 47df58439..74ccb60d7 100644
--- a/palace/fem/qfunctions/33/hcurlh1d_error_22_qf.h
+++ b/palace/fem/qfunctions/33/hcurlh1d_error_22_qf.h
@@ -20,14 +20,14 @@ CEED_QFUNCTION(f_apply_hcurlh1d_error_22)(void *__restrict__ ctx, CeedInt Q,
     CeedScalar v1_loc[2], v2_loc[2];
     {
       const CeedScalar u1_loc[2] = {u1[i + Q * 0], u1[i + Q * 1]};
-      CeedScalar coeff[3], adjJt_loc[4];
+      CeedScalar coeff[4], adjJt_loc[4];
       CoeffUnpack2((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff);
       MatUnpack22(adjJt + i, Q, adjJt_loc);
       MultBAx22(adjJt_loc, coeff, u1_loc, v1_loc);
     }
     {
       const CeedScalar u2_loc[2] = {u2[i + Q * 0], u2[i + Q * 1]};
-      CeedScalar coeff[3];
+      CeedScalar coeff[4];
       CoeffUnpack2(CoeffPairSecond<2>((const CeedIntScalar *)ctx), (CeedInt)attr[i], coeff);
       MultBx22(coeff, u2_loc, v2_loc);
     }
@@ -51,13 +51,13 @@ CEED_QFUNCTION(f_apply_h1dhcurl_error_22)(void *__restrict__ ctx, CeedInt Q,
     CeedScalar v1_loc[2], v2_loc[2];
     {
       const CeedScalar u1_loc[2] = {u1[i + Q * 0], u1[i + Q * 1]};
-      CeedScalar coeff[3];
+      CeedScalar coeff[4];
       CoeffUnpack2((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff);
       MultBx22(coeff, u1_loc, v1_loc);
     }
     {
       const CeedScalar u2_loc[2] = {u2[i + Q * 0], u2[i + Q * 1]};
-      CeedScalar coeff[3], adjJt_loc[4];
+      CeedScalar coeff[4], adjJt_loc[4];
       CoeffUnpack2(CoeffPairSecond<2>((const CeedIntScalar *)ctx), (CeedInt)attr[i], coeff);
       MatUnpack22(adjJt + i, Q, adjJt_loc);
       MultBAx22(adjJt_loc, coeff, u2_loc, v2_loc);
diff --git a/palace/fem/qfunctions/33/hcurlh1d_error_33_qf.h b/palace/fem/qfunctions/33/hcurlh1d_error_33_qf.h
index 1797f5159..0dd1e7b7b 100644
--- a/palace/fem/qfunctions/33/hcurlh1d_error_33_qf.h
+++ b/palace/fem/qfunctions/33/hcurlh1d_error_33_qf.h
@@ -20,14 +20,14 @@ CEED_QFUNCTION(f_apply_hcurlh1d_error_33)(void *__restrict__ ctx, CeedInt Q,
     CeedScalar v1_loc[3], v2_loc[3];
     {
       const CeedScalar u1_loc[3] = {u1[i + Q * 0], u1[i + Q * 1], u1[i + Q * 2]};
-      CeedScalar coeff[6], adjJt_loc[9];
+      CeedScalar coeff[9], adjJt_loc[9];
       CoeffUnpack3((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff);
       MatUnpack33(adjJt + i, Q, adjJt_loc);
       MultBAx33(adjJt_loc, coeff, u1_loc, v1_loc);
     }
     {
       const CeedScalar u2_loc[3] = {u2[i + Q * 0], u2[i + Q * 1], u2[i + Q * 2]};
-      CeedScalar coeff[6];
+      CeedScalar coeff[9];
       CoeffUnpack3(CoeffPairSecond<3>((const CeedIntScalar *)ctx), (CeedInt)attr[i], coeff);
       MultBx33(coeff, u2_loc, v2_loc);
     }
@@ -53,13 +53,13 @@ CEED_QFUNCTION(f_apply_h1dhcurl_error_33)(void *__restrict__ ctx, CeedInt Q,
     CeedScalar v1_loc[3], v2_loc[3];
     {
       const CeedScalar u1_loc[3] = {u1[i + Q * 0], u1[i + Q * 1], u1[i + Q * 2]};
-      CeedScalar coeff[6];
+      CeedScalar coeff[9];
       CoeffUnpack3((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff);
       MultBx33(coeff, u1_loc, v1_loc);
     }
     {
       const CeedScalar u2_loc[3] = {u2[i + Q * 0], u2[i + Q * 1], u2[i + Q * 2]};
-      CeedScalar coeff[6], adjJt_loc[9];
+      CeedScalar coeff[9], adjJt_loc[9];
       CoeffUnpack3(CoeffPairSecond<3>((const CeedIntScalar *)ctx), (CeedInt)attr[i], coeff);
       MatUnpack33(adjJt + i, Q, adjJt_loc);
       MultBAx33(adjJt_loc, coeff, u2_loc, v2_loc);
diff --git a/palace/fem/qfunctions/33/hcurlhdiv_33_qf.h b/palace/fem/qfunctions/33/hcurlhdiv_33_qf.h
index bebbddda2..a8ce54d3e 100644
--- a/palace/fem/qfunctions/33/hcurlhdiv_33_qf.h
+++ b/palace/fem/qfunctions/33/hcurlhdiv_33_qf.h
@@ -16,7 +16,7 @@ CEED_QFUNCTION(f_apply_hcurlhdiv_33)(void *__restrict__ ctx, CeedInt Q,
   CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
   {
     const CeedScalar u_loc[3] = {u[i + Q * 0], u[i + Q * 1], u[i + Q * 2]};
-    CeedScalar coeff[6], adjJt_loc[9], J_loc[9], v_loc[3];
+    CeedScalar coeff[9], adjJt_loc[9], J_loc[9], v_loc[3];
     CoeffUnpack3((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff);
     MatUnpack33(adjJt + i, Q, adjJt_loc);
     AdjJt33(adjJt_loc, J_loc);
@@ -38,7 +38,7 @@ CEED_QFUNCTION(f_apply_hdivhcurl_33)(void *__restrict__ ctx, CeedInt Q,
   CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
   {
     const CeedScalar u_loc[3] = {u[i + Q * 0], u[i + Q * 1], u[i + Q * 2]};
-    CeedScalar coeff[6], adjJt_loc[9], J_loc[9], v_loc[3];
+    CeedScalar coeff[9], adjJt_loc[9], J_loc[9], v_loc[3];
     CoeffUnpack3((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff);
     MatUnpack33(adjJt + i, Q, adjJt_loc);
     AdjJt33(adjJt_loc, J_loc);
diff --git a/palace/fem/qfunctions/33/hcurlhdiv_build_33_qf.h b/palace/fem/qfunctions/33/hcurlhdiv_build_33_qf.h
index 971519beb..3abacec3d 100644
--- a/palace/fem/qfunctions/33/hcurlhdiv_build_33_qf.h
+++ b/palace/fem/qfunctions/33/hcurlhdiv_build_33_qf.h
@@ -15,7 +15,7 @@ CEED_QFUNCTION(f_build_hcurlhdiv_33)(void *__restrict__ ctx, CeedInt Q,
 
   CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
   {
-    CeedScalar coeff[6], adjJt_loc[9], J_loc[9], qd_loc[9];
+    CeedScalar coeff[9], adjJt_loc[9], J_loc[9], qd_loc[9];
     CoeffUnpack3((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff);
     MatUnpack33(adjJt + i, Q, adjJt_loc);
     AdjJt33(adjJt_loc, J_loc);
@@ -42,7 +42,7 @@ CEED_QFUNCTION(f_build_hdivhcurl_33)(void *__restrict__ ctx, CeedInt Q,
 
   CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
   {
-    CeedScalar coeff[6], adjJt_loc[9], J_loc[9], qd_loc[9];
+    CeedScalar coeff[9], adjJt_loc[9], J_loc[9], qd_loc[9];
     CoeffUnpack3((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff);
     MatUnpack33(adjJt + i, Q, adjJt_loc);
     AdjJt33(adjJt_loc, J_loc);
diff --git a/palace/fem/qfunctions/33/hcurlhdiv_error_33_qf.h b/palace/fem/qfunctions/33/hcurlhdiv_error_33_qf.h
index 954310fb3..dce1a8c92 100644
--- a/palace/fem/qfunctions/33/hcurlhdiv_error_33_qf.h
+++ b/palace/fem/qfunctions/33/hcurlhdiv_error_33_qf.h
@@ -21,13 +21,13 @@ CEED_QFUNCTION(f_apply_hcurlhdiv_error_33)(void *__restrict__ ctx, CeedInt Q,
     MatUnpack33(adjJt + i, Q, adjJt_loc);
     {
       const CeedScalar u1_loc[3] = {u1[i + Q * 0], u1[i + Q * 1], u1[i + Q * 2]};
-      CeedScalar coeff[6];
+      CeedScalar coeff[9];
       CoeffUnpack3((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff);
       MultBAx33(adjJt_loc, coeff, u1_loc, v1_loc);
     }
     {
       const CeedScalar u2_loc[3] = {u2[i + Q * 0], u2[i + Q * 1], u2[i + Q * 2]};
-      CeedScalar coeff[6], J_loc[9];
+      CeedScalar coeff[9], J_loc[9];
       CoeffUnpack3(CoeffPairSecond<3>((const CeedIntScalar *)ctx), (CeedInt)attr[i], coeff);
       AdjJt33(adjJt_loc, J_loc);
       MultBAx33(J_loc, coeff, u2_loc, v2_loc);
@@ -55,14 +55,14 @@ CEED_QFUNCTION(f_apply_hdivhcurl_error_33)(void *__restrict__ ctx, CeedInt Q,
     MatUnpack33(adjJt + i, Q, adjJt_loc);
     {
       const CeedScalar u1_loc[3] = {u1[i + Q * 0], u1[i + Q * 1], u1[i + Q * 2]};
-      CeedScalar coeff[6], J_loc[9];
+      CeedScalar coeff[9], J_loc[9];
       CoeffUnpack3((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff);
       AdjJt33(adjJt_loc, J_loc);
       MultBAx33(J_loc, coeff, u1_loc, v1_loc);
     }
     {
       const CeedScalar u2_loc[3] = {u2[i + Q * 0], u2[i + Q * 1], u2[i + Q * 2]};
-      CeedScalar coeff[6];
+      CeedScalar coeff[9];
       CoeffUnpack3(CoeffPairSecond<3>((const CeedIntScalar *)ctx), (CeedInt)attr[i], coeff);
       MultBAx33(adjJt_loc, coeff, u2_loc, v2_loc);
     }
diff --git a/palace/fem/qfunctions/33/hcurlmass_33_qf.h b/palace/fem/qfunctions/33/hcurlmass_33_qf.h
index 71563d953..03b88b1ff 100644
--- a/palace/fem/qfunctions/33/hcurlmass_33_qf.h
+++ b/palace/fem/qfunctions/33/hcurlmass_33_qf.h
@@ -24,7 +24,7 @@ CEED_QFUNCTION(f_apply_hcurlmass_33)(void *__restrict__ ctx, CeedInt Q,
     }
     {
       const CeedScalar u_loc[3] = {gradu[i + Q * 0], gradu[i + Q * 1], gradu[i + Q * 2]};
-      CeedScalar coeff[6], adjJt_loc[9], v_loc[3];
+      CeedScalar coeff[9], adjJt_loc[9], v_loc[3];
       CoeffUnpack3(CoeffPairSecond<1>((const CeedIntScalar *)ctx), (CeedInt)attr[i], coeff);
       MatUnpack33(adjJt + i, Q, adjJt_loc);
       MultAtBCx33(adjJt_loc, coeff, adjJt_loc, u_loc, v_loc);
diff --git a/palace/fem/qfunctions/33/hcurlmass_build_33_qf.h b/palace/fem/qfunctions/33/hcurlmass_build_33_qf.h
index ea43256ef..2408b1db6 100644
--- a/palace/fem/qfunctions/33/hcurlmass_build_33_qf.h
+++ b/palace/fem/qfunctions/33/hcurlmass_build_33_qf.h
@@ -22,7 +22,7 @@ CEED_QFUNCTION(f_build_hcurlmass_33)(void *__restrict__ ctx, CeedInt Q,
       qd1[i + Q * 0] = coeff * wdetJ[i];
     }
     {
-      CeedScalar coeff[6], adjJt_loc[9], qd_loc[6];
+      CeedScalar coeff[9], adjJt_loc[9], qd_loc[9];
       CoeffUnpack3(CoeffPairSecond<1>((const CeedIntScalar *)ctx), (CeedInt)attr[i], coeff);
       MatUnpack33(adjJt + i, Q, adjJt_loc);
       MultAtBA33(adjJt_loc, coeff, qd_loc);
@@ -33,6 +33,9 @@ CEED_QFUNCTION(f_build_hcurlmass_33)(void *__restrict__ ctx, CeedInt Q,
       qd2[i + Q * 3] = wdetJ[i] * qd_loc[3];
       qd2[i + Q * 4] = wdetJ[i] * qd_loc[4];
       qd2[i + Q * 5] = wdetJ[i] * qd_loc[5];
+      qd2[i + Q * 6] = wdetJ[i] * qd_loc[6];
+      qd2[i + Q * 7] = wdetJ[i] * qd_loc[7];
+      qd2[i + Q * 8] = wdetJ[i] * qd_loc[8];
     }
   }
   return 0;
diff --git a/palace/fem/qfunctions/33/hdiv_33_qf.h b/palace/fem/qfunctions/33/hdiv_33_qf.h
index 34cec106c..018c96e78 100644
--- a/palace/fem/qfunctions/33/hdiv_33_qf.h
+++ b/palace/fem/qfunctions/33/hdiv_33_qf.h
@@ -16,7 +16,7 @@ CEED_QFUNCTION(f_apply_hdiv_33)(void *__restrict__ ctx, CeedInt Q,
   CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
   {
     const CeedScalar u_loc[3] = {u[i + Q * 0], u[i + Q * 1], u[i + Q * 2]};
-    CeedScalar coeff[6], adjJt_loc[9], J_loc[9], v_loc[3];
+    CeedScalar coeff[9], adjJt_loc[9], J_loc[9], v_loc[3];
     CoeffUnpack3((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff);
     MatUnpack33(adjJt + i, Q, adjJt_loc);
     AdjJt33(adjJt_loc, J_loc);
diff --git a/palace/fem/qfunctions/33/hdiv_build_33_qf.h b/palace/fem/qfunctions/33/hdiv_build_33_qf.h
index 1989dbdc5..a1fb151de 100644
--- a/palace/fem/qfunctions/33/hdiv_build_33_qf.h
+++ b/palace/fem/qfunctions/33/hdiv_build_33_qf.h
@@ -15,7 +15,7 @@ CEED_QFUNCTION(f_build_hdiv_33)(void *__restrict__ ctx, CeedInt Q,
 
   CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
   {
-    CeedScalar coeff[6], adjJt_loc[9], J_loc[9], qd_loc[6];
+    CeedScalar coeff[9], adjJt_loc[9], J_loc[9], qd_loc[9];
     CoeffUnpack3((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff);
     MatUnpack33(adjJt + i, Q, adjJt_loc);
     AdjJt33(adjJt_loc, J_loc);
@@ -27,6 +27,9 @@ CEED_QFUNCTION(f_build_hdiv_33)(void *__restrict__ ctx, CeedInt Q,
     qd[i + Q * 3] = wdetJ[i] * qd_loc[3];
     qd[i + Q * 4] = wdetJ[i] * qd_loc[4];
     qd[i + Q * 5] = wdetJ[i] * qd_loc[5];
+    qd[i + Q * 6] = wdetJ[i] * qd_loc[6];
+    qd[i + Q * 7] = wdetJ[i] * qd_loc[7];
+    qd[i + Q * 8] = wdetJ[i] * qd_loc[8];
   }
   return 0;
 }
diff --git a/palace/fem/qfunctions/33/hdivmass_33_qf.h b/palace/fem/qfunctions/33/hdivmass_33_qf.h
index 062cacc13..f0ae3192f 100644
--- a/palace/fem/qfunctions/33/hdivmass_33_qf.h
+++ b/palace/fem/qfunctions/33/hdivmass_33_qf.h
@@ -20,7 +20,7 @@ CEED_QFUNCTION(f_apply_hdivmass_33)(void *__restrict__ ctx, CeedInt Q,
     MatUnpack33(adjJt + i, Q, adjJt_loc);
     {
       const CeedScalar u_loc[3] = {u[i + Q * 0], u[i + Q * 1], u[i + Q * 2]};
-      CeedScalar coeff[6], v_loc[3];
+      CeedScalar coeff[9], v_loc[3];
       CoeffUnpack3((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff);
       MultAtBCx33(adjJt_loc, coeff, adjJt_loc, u_loc, v_loc);
 
@@ -30,7 +30,7 @@ CEED_QFUNCTION(f_apply_hdivmass_33)(void *__restrict__ ctx, CeedInt Q,
     }
     {
       const CeedScalar u_loc[3] = {curlu[i + Q * 0], curlu[i + Q * 1], curlu[i + Q * 2]};
-      CeedScalar coeff[6], J_loc[9], v_loc[3];
+      CeedScalar coeff[9], J_loc[9], v_loc[3];
       CoeffUnpack3(CoeffPairSecond<3>((const CeedIntScalar *)ctx), (CeedInt)attr[i], coeff);
       AdjJt33(adjJt_loc, J_loc);
       MultAtBCx33(J_loc, coeff, J_loc, u_loc, v_loc);
diff --git a/palace/fem/qfunctions/33/hdivmass_build_33_qf.h b/palace/fem/qfunctions/33/hdivmass_build_33_qf.h
index 32b2eafe3..333936721 100644
--- a/palace/fem/qfunctions/33/hdivmass_build_33_qf.h
+++ b/palace/fem/qfunctions/33/hdivmass_build_33_qf.h
@@ -11,14 +11,14 @@ CEED_QFUNCTION(f_build_hdivmass_33)(void *__restrict__ ctx, CeedInt Q,
                                     const CeedScalar *const *in, CeedScalar *const *out)
 {
   const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q;
-  CeedScalar *__restrict__ qd1 = out[0], *__restrict__ qd2 = out[0] + 6 * Q;
+  CeedScalar *__restrict__ qd1 = out[0], *__restrict__ qd2 = out[0] + 9 * Q;
 
   CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
   {
     CeedScalar adjJt_loc[9];
     MatUnpack33(adjJt + i, Q, adjJt_loc);
     {
-      CeedScalar coeff[6], qd_loc[6];
+      CeedScalar coeff[9], qd_loc[9];
       CoeffUnpack3((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff);
       MultAtBA33(adjJt_loc, coeff, qd_loc);
 
@@ -28,9 +28,12 @@ CEED_QFUNCTION(f_build_hdivmass_33)(void *__restrict__ ctx, CeedInt Q,
       qd1[i + Q * 3] = wdetJ[i] * qd_loc[3];
       qd1[i + Q * 4] = wdetJ[i] * qd_loc[4];
       qd1[i + Q * 5] = wdetJ[i] * qd_loc[5];
+      qd1[i + Q * 6] = wdetJ[i] * qd_loc[6];
+      qd1[i + Q * 7] = wdetJ[i] * qd_loc[7];
+      qd1[i + Q * 8] = wdetJ[i] * qd_loc[8];
     }
     {
-      CeedScalar coeff[6], J_loc[9], qd_loc[6];
+      CeedScalar coeff[9], J_loc[9], qd_loc[9];
       CoeffUnpack3(CoeffPairSecond<3>((const CeedIntScalar *)ctx), (CeedInt)attr[i], coeff);
       AdjJt33(adjJt_loc, J_loc);
       MultAtBA33(J_loc, coeff, qd_loc);
@@ -41,6 +44,9 @@ CEED_QFUNCTION(f_build_hdivmass_33)(void *__restrict__ ctx, CeedInt Q,
       qd2[i + Q * 3] = wdetJ[i] * qd_loc[3];
       qd2[i + Q * 4] = wdetJ[i] * qd_loc[4];
       qd2[i + Q * 5] = wdetJ[i] * qd_loc[5];
+      qd2[i + Q * 6] = wdetJ[i] * qd_loc[6];
+      qd2[i + Q * 7] = wdetJ[i] * qd_loc[7];
+      qd2[i + Q * 8] = wdetJ[i] * qd_loc[8];
     }
   }
   return 0;
diff --git a/palace/fem/qfunctions/33/l2mass_33_qf.h b/palace/fem/qfunctions/33/l2mass_33_qf.h
index d6a716ded..39eeb19bf 100644
--- a/palace/fem/qfunctions/33/l2mass_33_qf.h
+++ b/palace/fem/qfunctions/33/l2mass_33_qf.h
@@ -19,7 +19,7 @@ CEED_QFUNCTION(f_apply_l2mass_33)(void *__restrict__ ctx, CeedInt Q,
   {
     {
       const CeedScalar u_loc[3] = {u[i + Q * 0], u[i + Q * 1], u[i + Q * 2]};
-      CeedScalar coeff[6], adjJt_loc[9], J_loc[9], v_loc[3];
+      CeedScalar coeff[9], adjJt_loc[9], J_loc[9], v_loc[3];
       CoeffUnpack3((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff);
       MatUnpack33(adjJt + i, Q, adjJt_loc);
       AdjJt33(adjJt_loc, J_loc);
diff --git a/palace/fem/qfunctions/33/l2mass_build_33_qf.h b/palace/fem/qfunctions/33/l2mass_build_33_qf.h
index c66aa4ba7..2427fd776 100644
--- a/palace/fem/qfunctions/33/l2mass_build_33_qf.h
+++ b/palace/fem/qfunctions/33/l2mass_build_33_qf.h
@@ -12,12 +12,12 @@ CEED_QFUNCTION(f_build_l2mass_33)(void *__restrict__ ctx, CeedInt Q,
                                   const CeedScalar *const *in, CeedScalar *const *out)
 {
   const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q, *qw = in[1];
-  CeedScalar *qd1 = out[0], *qd2 = out[0] + 6 * Q;
+  CeedScalar *qd1 = out[0], *qd2 = out[0] + 9 * Q;
 
   CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
   {
     {
-      CeedScalar coeff[6], adjJt_loc[9], J_loc[9], qd_loc[6];
+      CeedScalar coeff[9], adjJt_loc[9], J_loc[9], qd_loc[9];
       CoeffUnpack3((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff);
       MatUnpack33(adjJt + i, Q, adjJt_loc);
       AdjJt33(adjJt_loc, J_loc);
@@ -29,6 +29,9 @@ CEED_QFUNCTION(f_build_l2mass_33)(void *__restrict__ ctx, CeedInt Q,
       qd1[i + Q * 3] = wdetJ[i] * qd_loc[3];
       qd1[i + Q * 4] = wdetJ[i] * qd_loc[4];
       qd1[i + Q * 5] = wdetJ[i] * qd_loc[5];
+      qd1[i + Q * 6] = wdetJ[i] * qd_loc[6];
+      qd1[i + Q * 7] = wdetJ[i] * qd_loc[7];
+      qd1[i + Q * 8] = wdetJ[i] * qd_loc[8];
     }
     {
       const CeedScalar coeff =
diff --git a/palace/fem/qfunctions/33/utils_33_qf.h b/palace/fem/qfunctions/33/utils_33_qf.h
index fb7d89c48..0347434f4 100644
--- a/palace/fem/qfunctions/33/utils_33_qf.h
+++ b/palace/fem/qfunctions/33/utils_33_qf.h
@@ -48,99 +48,102 @@ CEED_QFUNCTION_HELPER void MatUnpack33(const CeedScalar *A, const CeedInt A_stri
   A_loc[8] = A[A_stride * 8];
 }
 
-CEED_QFUNCTION_HELPER void MultBx33(const CeedScalar B[6], const CeedScalar x[3],
+CEED_QFUNCTION_HELPER void MultBx33(const CeedScalar B[9], const CeedScalar x[3],
                                     CeedScalar y[3])
 {
-  // B: 0 1 2
-  //    1 3 4
-  //    2 4 5
-  y[0] = B[0] * x[0] + B[1] * x[1] + B[2] * x[2];
-  y[1] = B[1] * x[0] + B[3] * x[1] + B[4] * x[2];
-  y[2] = B[2] * x[0] + B[4] * x[1] + B[5] * x[2];
+  // B: 0 3 6
+  //    1 4 7
+  //    2 5 8
+  y[0] = B[0] * x[0] + B[3] * x[1] + B[6] * x[2];
+  y[1] = B[1] * x[0] + B[4] * x[1] + B[7] * x[2];
+  y[2] = B[2] * x[0] + B[5] * x[1] + B[8] * x[2];
 }
 
-CEED_QFUNCTION_HELPER void MultAtBCx33(const CeedScalar A[9], const CeedScalar B[6],
+CEED_QFUNCTION_HELPER void MultAtBCx33(const CeedScalar A[9], const CeedScalar B[9],
                                        const CeedScalar C[9], const CeedScalar x[3],
                                        CeedScalar y[3])
 {
-  // A: 0 3 6   B: 0 1 2   C: 0 3 6
-  //    1 4 7      1 3 4      1 4 7
-  //    2 5 8      2 4 5      2 5 8
+  // A: 0 3 6   B: 0 3 6   C: 0 3 6
+  //    1 4 7      1 4 7      1 4 7
+  //    2 5 8      2 5 8      2 5 8
   CeedScalar z[3];
 
   y[0] = C[0] * x[0] + C[3] * x[1] + C[6] * x[2];
   y[1] = C[1] * x[0] + C[4] * x[1] + C[7] * x[2];
   y[2] = C[2] * x[0] + C[5] * x[1] + C[8] * x[2];
 
-  z[0] = B[0] * y[0] + B[1] * y[1] + B[2] * y[2];
-  z[1] = B[1] * y[0] + B[3] * y[1] + B[4] * y[2];
-  z[2] = B[2] * y[0] + B[4] * y[1] + B[5] * y[2];
+  z[0] = B[0] * y[0] + B[3] * y[1] + B[6] * y[2];
+  z[1] = B[1] * y[0] + B[4] * y[1] + B[7] * y[2];
+  z[2] = B[2] * y[0] + B[5] * y[1] + B[8] * y[2];
 
   y[0] = A[0] * z[0] + A[1] * z[1] + A[2] * z[2];
   y[1] = A[3] * z[0] + A[4] * z[1] + A[5] * z[2];
   y[2] = A[6] * z[0] + A[7] * z[1] + A[8] * z[2];
 }
 
-CEED_QFUNCTION_HELPER void MultBAx33(const CeedScalar A[9], const CeedScalar B[6],
+CEED_QFUNCTION_HELPER void MultBAx33(const CeedScalar A[9], const CeedScalar B[9],
                                      const CeedScalar x[3], CeedScalar y[3])
 {
-  // A: 0 3 6   B: 0 1 2
-  //    1 4 7      1 3 4
-  //    2 5 8      2 4 5
+  // A: 0 3 6   B: 0 3 6
+  //    1 4 7      1 4 7
+  //    2 5 8      2 5 8
   CeedScalar z[3];
 
   z[0] = A[0] * x[0] + A[3] * x[1] + A[6] * x[2];
   z[1] = A[1] * x[0] + A[4] * x[1] + A[7] * x[2];
   z[2] = A[2] * x[0] + A[5] * x[1] + A[8] * x[2];
 
-  y[0] = B[0] * z[0] + B[1] * z[1] + B[2] * z[2];
-  y[1] = B[1] * z[0] + B[3] * z[1] + B[4] * z[2];
-  y[2] = B[2] * z[0] + B[4] * z[1] + B[5] * z[2];
+  y[0] = B[0] * z[0] + B[3] * z[1] + B[6] * z[2];
+  y[1] = B[1] * z[0] + B[4] * z[1] + B[7] * z[2];
+  y[2] = B[2] * z[0] + B[5] * z[1] + B[8] * z[2];
 }
 
-CEED_QFUNCTION_HELPER void MultAtBA33(const CeedScalar A[9], const CeedScalar B[6],
-                                      CeedScalar C[6])
+CEED_QFUNCTION_HELPER void MultAtBA33(const CeedScalar A[9], const CeedScalar B[9],
+                                      CeedScalar C[9])
 {
-  // A: 0 3 6   B: 0 1 2   C: 0 1 2
-  //    1 4 7      1 3 4      1 3 4
-  //    2 5 8      2 4 5      2 4 5
+  // A: 0 3 6   B: 0 3 6   C: 0 3 6
+  //    1 4 7      1 4 7      1 4 7
+  //    2 5 8      2 5 8      2 5 8
 
   // First compute entries of R = B A.
-  const CeedScalar R11 = B[0] * A[0] + B[1] * A[1] + B[2] * A[2];
-  const CeedScalar R21 = B[1] * A[0] + B[3] * A[1] + B[4] * A[2];
-  const CeedScalar R31 = B[2] * A[0] + B[4] * A[1] + B[5] * A[2];
-  const CeedScalar R12 = B[0] * A[3] + B[1] * A[4] + B[2] * A[5];
-  const CeedScalar R22 = B[1] * A[3] + B[3] * A[4] + B[4] * A[5];
-  const CeedScalar R32 = B[2] * A[3] + B[4] * A[4] + B[5] * A[5];
-  const CeedScalar R13 = B[0] * A[6] + B[1] * A[7] + B[2] * A[8];
-  const CeedScalar R23 = B[1] * A[6] + B[3] * A[7] + B[4] * A[8];
-  const CeedScalar R33 = B[2] * A[6] + B[4] * A[7] + B[5] * A[8];
+  const CeedScalar R11 = B[0] * A[0] + B[3] * A[1] + B[6] * A[2];
+  const CeedScalar R21 = B[1] * A[0] + B[4] * A[1] + B[7] * A[2];
+  const CeedScalar R31 = B[2] * A[0] + B[5] * A[1] + B[8] * A[2];
+  const CeedScalar R12 = B[0] * A[3] + B[3] * A[4] + B[6] * A[5];
+  const CeedScalar R22 = B[1] * A[3] + B[4] * A[4] + B[7] * A[5];
+  const CeedScalar R32 = B[2] * A[3] + B[5] * A[4] + B[8] * A[5];
+  const CeedScalar R13 = B[0] * A[6] + B[3] * A[7] + B[6] * A[8];
+  const CeedScalar R23 = B[1] * A[6] + B[4] * A[7] + B[7] * A[8];
+  const CeedScalar R33 = B[2] * A[6] + B[5] * A[7] + B[8] * A[8];
 
   C[0] = A[0] * R11 + A[1] * R21 + A[2] * R31;
-  C[1] = A[0] * R12 + A[1] * R22 + A[2] * R32;
-  C[2] = A[0] * R13 + A[1] * R23 + A[2] * R33;
-  C[3] = A[3] * R12 + A[4] * R22 + A[5] * R32;
-  C[4] = A[3] * R13 + A[4] * R23 + A[5] * R33;
-  C[5] = A[6] * R13 + A[7] * R23 + A[8] * R33;
+  C[1] = A[3] * R11 + A[4] * R21 + A[5] * R31;
+  C[2] = A[6] * R11 + A[7] * R21 + A[8] * R31;
+  C[3] = A[0] * R12 + A[1] * R22 + A[2] * R32;
+  C[4] = A[3] * R12 + A[4] * R22 + A[5] * R32;
+  C[5] = A[6] * R12 + A[7] * R22 + A[8] * R32;
+  C[6] = A[0] * R13 + A[1] * R23 + A[2] * R33;
+  C[7] = A[3] * R13 + A[4] * R23 + A[5] * R33;
+  C[8] = A[6] * R13 + A[7] * R23 + A[8] * R33;
 }
 
-CEED_QFUNCTION_HELPER void MultAtBC33(const CeedScalar A[9], const CeedScalar B[6],
+CEED_QFUNCTION_HELPER void MultAtBC33(const CeedScalar A[9], const CeedScalar B[9],
                                       const CeedScalar C[9], CeedScalar D[9])
 {
-  // A, C: 0 3 6   B: 0 1 2   D: 0 3 6
-  //       1 4 7      1 3 4      1 4 7
-  //       2 5 8      2 4 5      2 5 8
+  // A, C: 0 3 6   B: 0 3 6   D: 0 3 6
+  //       1 4 7      1 4 7      1 4 7
+  //       2 5 8      2 5 8      2 5 8
 
   // First compute entries of R = B C.
-  const CeedScalar R11 = B[0] * C[0] + B[1] * C[1] + B[2] * C[2];
-  const CeedScalar R21 = B[1] * C[0] + B[3] * C[1] + B[4] * C[2];
-  const CeedScalar R31 = B[2] * C[0] + B[4] * C[1] + B[5] * C[2];
-  const CeedScalar R12 = B[0] * C[3] + B[1] * C[4] + B[2] * C[5];
-  const CeedScalar R22 = B[1] * C[3] + B[3] * C[4] + B[4] * C[5];
-  const CeedScalar R32 = B[2] * C[3] + B[4] * C[4] + B[5] * C[5];
-  const CeedScalar R13 = B[0] * C[6] + B[1] * C[7] + B[2] * C[8];
-  const CeedScalar R23 = B[1] * C[6] + B[3] * C[7] + B[4] * C[8];
-  const CeedScalar R33 = B[2] * C[6] + B[4] * C[7] + B[5] * C[8];
+  const CeedScalar R11 = B[0] * C[0] + B[3] * C[1] + B[6] * C[2];
+  const CeedScalar R21 = B[1] * C[0] + B[4] * C[1] + B[7] * C[2];
+  const CeedScalar R31 = B[2] * C[0] + B[5] * C[1] + B[8] * C[2];
+  const CeedScalar R12 = B[0] * C[3] + B[3] * C[4] + B[6] * C[5];
+  const CeedScalar R22 = B[1] * C[3] + B[4] * C[4] + B[7] * C[5];
+  const CeedScalar R32 = B[2] * C[3] + B[5] * C[4] + B[8] * C[5];
+  const CeedScalar R13 = B[0] * C[6] + B[3] * C[7] + B[6] * C[8];
+  const CeedScalar R23 = B[1] * C[6] + B[4] * C[7] + B[7] * C[8];
+  const CeedScalar R33 = B[2] * C[6] + B[5] * C[7] + B[8] * C[8];
 
   D[0] = A[0] * R11 + A[1] * R21 + A[2] * R31;
   D[1] = A[3] * R11 + A[4] * R21 + A[5] * R31;
@@ -153,21 +156,21 @@ CEED_QFUNCTION_HELPER void MultAtBC33(const CeedScalar A[9], const CeedScalar B[
   D[8] = A[6] * R13 + A[7] * R23 + A[8] * R33;
 }
 
-CEED_QFUNCTION_HELPER void MultBA33(const CeedScalar A[9], const CeedScalar B[6],
+CEED_QFUNCTION_HELPER void MultBA33(const CeedScalar A[9], const CeedScalar B[9],
                                     CeedScalar C[9])
 {
-  // A: 0 3 6   B: 0 1 2   C: 0 3 6
-  //    1 4 7      1 3 4      1 4 7
-  //    2 5 8      2 4 5      2 5 8
-  C[0] = B[0] * A[0] + B[1] * A[1] + B[2] * A[2];
-  C[1] = B[1] * A[0] + B[3] * A[1] + B[4] * A[2];
-  C[2] = B[2] * A[0] + B[4] * A[1] + B[5] * A[2];
-  C[3] = B[0] * A[3] + B[1] * A[4] + B[2] * A[5];
-  C[4] = B[1] * A[3] + B[3] * A[4] + B[4] * A[5];
-  C[5] = B[2] * A[3] + B[4] * A[4] + B[5] * A[5];
-  C[6] = B[0] * A[6] + B[1] * A[7] + B[2] * A[8];
-  C[7] = B[1] * A[6] + B[3] * A[7] + B[4] * A[8];
-  C[8] = B[2] * A[6] + B[4] * A[7] + B[5] * A[8];
+  // A: 0 3 6   B: 0 3 6   C: 0 3 6
+  //    1 4 7      1 4 7      1 4 7
+  //    2 5 8      2 5 8      2 5 8
+  C[0] = B[0] * A[0] + B[3] * A[1] + B[6] * A[2];
+  C[1] = B[1] * A[0] + B[4] * A[1] + B[7] * A[2];
+  C[2] = B[2] * A[0] + B[5] * A[1] + B[8] * A[2];
+  C[3] = B[0] * A[3] + B[3] * A[4] + B[6] * A[5];
+  C[4] = B[1] * A[3] + B[4] * A[4] + B[7] * A[5];
+  C[5] = B[2] * A[3] + B[5] * A[4] + B[8] * A[5];
+  C[6] = B[0] * A[6] + B[3] * A[7] + B[6] * A[8];
+  C[7] = B[1] * A[6] + B[4] * A[7] + B[7] * A[8];
+  C[8] = B[2] * A[6] + B[5] * A[7] + B[8] * A[8];
 }
 
 #endif  // PALACE_LIBCEED_UTILS_33_QF_H
diff --git a/palace/fem/qfunctions/apply/apply_12_qf.h b/palace/fem/qfunctions/apply/apply_12_qf.h
index b93c4fded..2252e2d16 100644
--- a/palace/fem/qfunctions/apply/apply_12_qf.h
+++ b/palace/fem/qfunctions/apply/apply_12_qf.h
@@ -17,8 +17,8 @@ CEED_QFUNCTION(f_apply_12)(void *, CeedInt Q, const CeedScalar *const *in,
 
     const CeedScalar u20 = u2[i + Q * 0];
     const CeedScalar u21 = u2[i + Q * 1];
-    v2[i + Q * 0] = qd2[i + Q * 0] * u20 + qd2[i + Q * 1] * u21;
-    v2[i + Q * 1] = qd2[i + Q * 1] * u20 + qd2[i + Q * 2] * u21;
+    v2[i + Q * 0] = qd2[i + Q * 0] * u20 + qd2[i + Q * 2] * u21;
+    v2[i + Q * 1] = qd2[i + Q * 1] * u20 + qd2[i + Q * 3] * u21;
   }
   return 0;
 }
diff --git a/palace/fem/qfunctions/apply/apply_13_qf.h b/palace/fem/qfunctions/apply/apply_13_qf.h
index 543be0489..379d35521 100644
--- a/palace/fem/qfunctions/apply/apply_13_qf.h
+++ b/palace/fem/qfunctions/apply/apply_13_qf.h
@@ -18,9 +18,9 @@ CEED_QFUNCTION(f_apply_13)(void *, CeedInt Q, const CeedScalar *const *in,
     const CeedScalar u20 = u2[i + Q * 0];
     const CeedScalar u21 = u2[i + Q * 1];
     const CeedScalar u22 = u2[i + Q * 2];
-    v2[i + Q * 0] = qd2[i + Q * 0] * u20 + qd2[i + Q * 1] * u21 + qd2[i + Q * 2] * u22;
-    v2[i + Q * 1] = qd2[i + Q * 1] * u20 + qd2[i + Q * 3] * u21 + qd2[i + Q * 4] * u22;
-    v2[i + Q * 2] = qd2[i + Q * 2] * u20 + qd2[i + Q * 4] * u21 + qd2[i + Q * 5] * u22;
+    v2[i + Q * 0] = qd2[i + Q * 0] * u20 + qd2[i + Q * 3] * u21 + qd2[i + Q * 6] * u22;
+    v2[i + Q * 1] = qd2[i + Q * 1] * u20 + qd2[i + Q * 4] * u21 + qd2[i + Q * 7] * u22;
+    v2[i + Q * 2] = qd2[i + Q * 2] * u20 + qd2[i + Q * 5] * u21 + qd2[i + Q * 8] * u22;
   }
   return 0;
 }
diff --git a/palace/fem/qfunctions/apply/apply_21_qf.h b/palace/fem/qfunctions/apply/apply_21_qf.h
index d9b61f93f..2065efe96 100644
--- a/palace/fem/qfunctions/apply/apply_21_qf.h
+++ b/palace/fem/qfunctions/apply/apply_21_qf.h
@@ -7,7 +7,7 @@
 CEED_QFUNCTION(f_apply_21)(void *, CeedInt Q, const CeedScalar *const *in,
                            CeedScalar *const *out)
 {
-  const CeedScalar *__restrict__ qd1 = in[0], *__restrict__ qd2 = in[0] + 3 * Q,
+  const CeedScalar *__restrict__ qd1 = in[0], *__restrict__ qd2 = in[0] + 4 * Q,
                                  *__restrict__ u1 = in[1], *__restrict__ u2 = in[2];
   CeedScalar *__restrict__ v1 = out[0], *__restrict__ v2 = out[1];
 
@@ -15,8 +15,8 @@ CEED_QFUNCTION(f_apply_21)(void *, CeedInt Q, const CeedScalar *const *in,
   {
     const CeedScalar u10 = u1[i + Q * 0];
     const CeedScalar u11 = u1[i + Q * 1];
-    v1[i + Q * 0] = qd1[i + Q * 0] * u10 + qd1[i + Q * 1] * u11;
-    v1[i + Q * 1] = qd1[i + Q * 1] * u10 + qd1[i + Q * 2] * u11;
+    v1[i + Q * 0] = qd1[i + Q * 0] * u10 + qd1[i + Q * 2] * u11;
+    v1[i + Q * 1] = qd1[i + Q * 1] * u10 + qd1[i + Q * 3] * u11;
 
     v2[i] = qd2[i] * u2[i];
   }
diff --git a/palace/fem/qfunctions/apply/apply_22_qf.h b/palace/fem/qfunctions/apply/apply_22_qf.h
index bc81094b9..36f7bef8e 100644
--- a/palace/fem/qfunctions/apply/apply_22_qf.h
+++ b/palace/fem/qfunctions/apply/apply_22_qf.h
@@ -7,7 +7,7 @@
 CEED_QFUNCTION(f_apply_22)(void *, CeedInt Q, const CeedScalar *const *in,
                            CeedScalar *const *out)
 {
-  const CeedScalar *__restrict__ qd1 = in[0], *__restrict__ qd2 = in[0] + 3 * Q,
+  const CeedScalar *__restrict__ qd1 = in[0], *__restrict__ qd2 = in[0] + 4 * Q,
                                  *__restrict__ u1 = in[1], *__restrict__ u2 = in[2];
   CeedScalar *__restrict__ v1 = out[0], *__restrict__ v2 = out[1];
 
@@ -15,13 +15,13 @@ CEED_QFUNCTION(f_apply_22)(void *, CeedInt Q, const CeedScalar *const *in,
   {
     const CeedScalar u10 = u1[i + Q * 0];
     const CeedScalar u11 = u1[i + Q * 1];
-    v1[i + Q * 0] = qd1[i + Q * 0] * u10 + qd1[i + Q * 1] * u11;
-    v1[i + Q * 1] = qd1[i + Q * 1] * u10 + qd1[i + Q * 2] * u11;
+    v1[i + Q * 0] = qd1[i + Q * 0] * u10 + qd1[i + Q * 2] * u11;
+    v1[i + Q * 1] = qd1[i + Q * 1] * u10 + qd1[i + Q * 3] * u11;
 
     const CeedScalar u20 = u2[i + Q * 0];
     const CeedScalar u21 = u2[i + Q * 1];
-    v2[i + Q * 0] = qd2[i + Q * 0] * u20 + qd2[i + Q * 1] * u21;
-    v2[i + Q * 1] = qd2[i + Q * 1] * u20 + qd2[i + Q * 2] * u21;
+    v2[i + Q * 0] = qd2[i + Q * 0] * u20 + qd2[i + Q * 2] * u21;
+    v2[i + Q * 1] = qd2[i + Q * 1] * u20 + qd2[i + Q * 3] * u21;
   }
   return 0;
 }
diff --git a/palace/fem/qfunctions/apply/apply_2_qf.h b/palace/fem/qfunctions/apply/apply_2_qf.h
index 8329f2732..b5f6034d9 100644
--- a/palace/fem/qfunctions/apply/apply_2_qf.h
+++ b/palace/fem/qfunctions/apply/apply_2_qf.h
@@ -14,8 +14,8 @@ CEED_QFUNCTION(f_apply_2)(void *, CeedInt Q, const CeedScalar *const *in,
   {
     const CeedScalar u0 = u[i + Q * 0];
     const CeedScalar u1 = u[i + Q * 1];
-    v[i + Q * 0] = qd[i + Q * 0] * u0 + qd[i + Q * 1] * u1;
-    v[i + Q * 1] = qd[i + Q * 1] * u0 + qd[i + Q * 2] * u1;
+    v[i + Q * 0] = qd[i + Q * 0] * u0 + qd[i + Q * 2] * u1;
+    v[i + Q * 1] = qd[i + Q * 1] * u0 + qd[i + Q * 3] * u1;
   }
   return 0;
 }
diff --git a/palace/fem/qfunctions/apply/apply_31_qf.h b/palace/fem/qfunctions/apply/apply_31_qf.h
index 853b5cb41..bcff27902 100644
--- a/palace/fem/qfunctions/apply/apply_31_qf.h
+++ b/palace/fem/qfunctions/apply/apply_31_qf.h
@@ -7,7 +7,7 @@
 CEED_QFUNCTION(f_apply_31)(void *, CeedInt Q, const CeedScalar *const *in,
                            CeedScalar *const *out)
 {
-  const CeedScalar *__restrict__ qd1 = in[0], *__restrict__ qd2 = in[0] + 6 * Q,
+  const CeedScalar *__restrict__ qd1 = in[0], *__restrict__ qd2 = in[0] + 9 * Q,
                                  *__restrict__ u1 = in[1], *__restrict__ u2 = in[2];
   CeedScalar *__restrict__ v1 = out[0], *__restrict__ v2 = out[1];
 
@@ -16,9 +16,9 @@ CEED_QFUNCTION(f_apply_31)(void *, CeedInt Q, const CeedScalar *const *in,
     const CeedScalar u10 = u1[i + Q * 0];
     const CeedScalar u11 = u1[i + Q * 1];
     const CeedScalar u12 = u1[i + Q * 2];
-    v1[i + Q * 0] = qd1[i + Q * 0] * u10 + qd1[i + Q * 1] * u11 + qd1[i + Q * 2] * u12;
-    v1[i + Q * 1] = qd1[i + Q * 1] * u10 + qd1[i + Q * 3] * u11 + qd1[i + Q * 4] * u12;
-    v1[i + Q * 2] = qd1[i + Q * 2] * u10 + qd1[i + Q * 4] * u11 + qd1[i + Q * 5] * u12;
+    v1[i + Q * 0] = qd1[i + Q * 0] * u10 + qd1[i + Q * 3] * u11 + qd1[i + Q * 6] * u12;
+    v1[i + Q * 1] = qd1[i + Q * 1] * u10 + qd1[i + Q * 4] * u11 + qd1[i + Q * 7] * u12;
+    v1[i + Q * 2] = qd1[i + Q * 2] * u10 + qd1[i + Q * 5] * u11 + qd1[i + Q * 8] * u12;
 
     v2[i] = qd2[i] * u2[i];
   }
diff --git a/palace/fem/qfunctions/apply/apply_33_qf.h b/palace/fem/qfunctions/apply/apply_33_qf.h
index ea9781584..4eafd1d51 100644
--- a/palace/fem/qfunctions/apply/apply_33_qf.h
+++ b/palace/fem/qfunctions/apply/apply_33_qf.h
@@ -7,7 +7,7 @@
 CEED_QFUNCTION(f_apply_33)(void *, CeedInt Q, const CeedScalar *const *in,
                            CeedScalar *const *out)
 {
-  const CeedScalar *__restrict__ qd1 = in[0], *__restrict__ qd2 = in[0] + 6 * Q,
+  const CeedScalar *__restrict__ qd1 = in[0], *__restrict__ qd2 = in[0] + 9 * Q,
                                  *__restrict__ u1 = in[1], *__restrict__ u2 = in[2];
   CeedScalar *__restrict__ v1 = out[0], *__restrict__ v2 = out[1];
 
@@ -16,16 +16,16 @@ CEED_QFUNCTION(f_apply_33)(void *, CeedInt Q, const CeedScalar *const *in,
     const CeedScalar u10 = u1[i + Q * 0];
     const CeedScalar u11 = u1[i + Q * 1];
     const CeedScalar u12 = u1[i + Q * 2];
-    v1[i + Q * 0] = qd1[i + Q * 0] * u10 + qd1[i + Q * 1] * u11 + qd1[i + Q * 2] * u12;
-    v1[i + Q * 1] = qd1[i + Q * 1] * u10 + qd1[i + Q * 3] * u11 + qd1[i + Q * 4] * u12;
-    v1[i + Q * 2] = qd1[i + Q * 2] * u10 + qd1[i + Q * 4] * u11 + qd1[i + Q * 5] * u12;
+    v1[i + Q * 0] = qd1[i + Q * 0] * u10 + qd1[i + Q * 3] * u11 + qd1[i + Q * 6] * u12;
+    v1[i + Q * 1] = qd1[i + Q * 1] * u10 + qd1[i + Q * 4] * u11 + qd1[i + Q * 7] * u12;
+    v1[i + Q * 2] = qd1[i + Q * 2] * u10 + qd1[i + Q * 5] * u11 + qd1[i + Q * 8] * u12;
 
     const CeedScalar u20 = u2[i + Q * 0];
     const CeedScalar u21 = u2[i + Q * 1];
     const CeedScalar u22 = u2[i + Q * 2];
-    v2[i + Q * 0] = qd2[i + Q * 0] * u20 + qd2[i + Q * 1] * u21 + qd2[i + Q * 2] * u22;
-    v2[i + Q * 1] = qd2[i + Q * 1] * u20 + qd2[i + Q * 3] * u21 + qd2[i + Q * 4] * u22;
-    v2[i + Q * 2] = qd2[i + Q * 2] * u20 + qd2[i + Q * 4] * u21 + qd2[i + Q * 5] * u22;
+    v2[i + Q * 0] = qd2[i + Q * 0] * u20 + qd2[i + Q * 3] * u21 + qd2[i + Q * 6] * u22;
+    v2[i + Q * 1] = qd2[i + Q * 1] * u20 + qd2[i + Q * 4] * u21 + qd2[i + Q * 7] * u22;
+    v2[i + Q * 2] = qd2[i + Q * 2] * u20 + qd2[i + Q * 5] * u21 + qd2[i + Q * 8] * u22;
   }
   return 0;
 }
diff --git a/palace/fem/qfunctions/apply/apply_3_qf.h b/palace/fem/qfunctions/apply/apply_3_qf.h
index a33bac466..ab9b4240a 100644
--- a/palace/fem/qfunctions/apply/apply_3_qf.h
+++ b/palace/fem/qfunctions/apply/apply_3_qf.h
@@ -15,9 +15,9 @@ CEED_QFUNCTION(f_apply_3)(void *, CeedInt Q, const CeedScalar *const *in,
     const CeedScalar u0 = u[i + Q * 0];
     const CeedScalar u1 = u[i + Q * 1];
     const CeedScalar u2 = u[i + Q * 2];
-    v[i + Q * 0] = qd[i + Q * 0] * u0 + qd[i + Q * 1] * u1 + qd[i + Q * 2] * u2;
-    v[i + Q * 1] = qd[i + Q * 1] * u0 + qd[i + Q * 3] * u1 + qd[i + Q * 4] * u2;
-    v[i + Q * 2] = qd[i + Q * 2] * u0 + qd[i + Q * 4] * u1 + qd[i + Q * 5] * u2;
+    v[i + Q * 0] = qd[i + Q * 0] * u0 + qd[i + Q * 3] * u1 + qd[i + Q * 6] * u2;
+    v[i + Q * 1] = qd[i + Q * 1] * u0 + qd[i + Q * 4] * u1 + qd[i + Q * 7] * u2;
+    v[i + Q * 2] = qd[i + Q * 2] * u0 + qd[i + Q * 5] * u1 + qd[i + Q * 8] * u2;
   }
   return 0;
 }
diff --git a/palace/fem/qfunctions/coeff/coeff_2_qf.h b/palace/fem/qfunctions/coeff/coeff_2_qf.h
index bdb69c08f..efa09b32c 100644
--- a/palace/fem/qfunctions/coeff/coeff_2_qf.h
+++ b/palace/fem/qfunctions/coeff/coeff_2_qf.h
@@ -7,13 +7,14 @@
 #include "coeff_qf.h"
 
 CEED_QFUNCTION_HELPER void CoeffUnpack2(const CeedIntScalar *ctx, const CeedInt attr,
-                                        CeedScalar coeff[3])
+                                        CeedScalar coeff[4])
 {
   const CeedInt k = (NumAttr(ctx) > 0) ? AttrMat(ctx)[attr - 1].first : 0;
   const CeedIntScalar *mat_coeff = MatCoeff(ctx);
-  coeff[0] = mat_coeff[3 * k + 0].second;
-  coeff[1] = mat_coeff[3 * k + 1].second;
-  coeff[2] = mat_coeff[3 * k + 2].second;
+  coeff[0] = mat_coeff[4 * k + 0].second;
+  coeff[1] = mat_coeff[4 * k + 1].second;
+  coeff[2] = mat_coeff[4 * k + 2].second;
+  coeff[3] = mat_coeff[4 * k + 3].second;
 }
 
 #endif  // PALACE_LIBCEED_COEFF_2_QF_H
diff --git a/palace/fem/qfunctions/coeff/coeff_3_qf.h b/palace/fem/qfunctions/coeff/coeff_3_qf.h
index 72ff76255..acf339bfc 100644
--- a/palace/fem/qfunctions/coeff/coeff_3_qf.h
+++ b/palace/fem/qfunctions/coeff/coeff_3_qf.h
@@ -7,16 +7,19 @@
 #include "coeff_qf.h"
 
 CEED_QFUNCTION_HELPER void CoeffUnpack3(const CeedIntScalar *ctx, const CeedInt attr,
-                                        CeedScalar coeff[6])
+                                        CeedScalar coeff[9])
 {
   const CeedInt k = (NumAttr(ctx) > 0) ? AttrMat(ctx)[attr - 1].first : 0;
   const CeedIntScalar *mat_coeff = MatCoeff(ctx);
-  coeff[0] = mat_coeff[6 * k + 0].second;
-  coeff[1] = mat_coeff[6 * k + 1].second;
-  coeff[2] = mat_coeff[6 * k + 2].second;
-  coeff[3] = mat_coeff[6 * k + 3].second;
-  coeff[4] = mat_coeff[6 * k + 4].second;
-  coeff[5] = mat_coeff[6 * k + 5].second;
+  coeff[0] = mat_coeff[9 * k + 0].second;
+  coeff[1] = mat_coeff[9 * k + 1].second;
+  coeff[2] = mat_coeff[9 * k + 2].second;
+  coeff[3] = mat_coeff[9 * k + 3].second;
+  coeff[4] = mat_coeff[9 * k + 4].second;
+  coeff[5] = mat_coeff[9 * k + 5].second;
+  coeff[6] = mat_coeff[9 * k + 6].second;
+  coeff[7] = mat_coeff[9 * k + 7].second;
+  coeff[8] = mat_coeff[9 * k + 8].second;
 }
 
 #endif  // PALACE_LIBCEED_COEFF_3_QF_H
diff --git a/palace/fem/qfunctions/coeff/coeff_qf.h b/palace/fem/qfunctions/coeff/coeff_qf.h
index 0f0154d66..d1bdf9fc6 100644
--- a/palace/fem/qfunctions/coeff/coeff_qf.h
+++ b/palace/fem/qfunctions/coeff/coeff_qf.h
@@ -39,7 +39,7 @@ CEED_QFUNCTION_HELPER const CeedIntScalar *MatCoeff(const CeedIntScalar *ctx)
 template <int DIM>
 CEED_QFUNCTION_HELPER const CeedIntScalar *CoeffPairSecond(const CeedIntScalar *ctx)
 {
-  return ctx + 2 + NumAttr(ctx) + (DIM * (DIM + 1) / 2) * NumMat(ctx);
+  return ctx + 2 + NumAttr(ctx) + (DIM * DIM) * NumMat(ctx);
 }
 
 #endif  // PALACE_LIBCEED_COEFF_QF_H
diff --git a/palace/linalg/eps.hpp b/palace/linalg/eps.hpp
index a939a00da..911f7a2a0 100644
--- a/palace/linalg/eps.hpp
+++ b/palace/linalg/eps.hpp
@@ -58,14 +58,9 @@ class EigenvalueSolver
                             ScaleType type) = 0;
   virtual void SetOperators(const ComplexOperator &K, const ComplexOperator &C,
                             const ComplexOperator &M, ScaleType type) = 0;
-  virtual void SetOperators(const ComplexOperator &K, const ComplexOperator &M,
-                            const ComplexOperator &MP, const ComplexOperator &P1,
-                            const ComplexOperator &P2, ScaleType type) = 0;
   virtual void SetOperators(const ComplexOperator &K, const ComplexOperator &C,
-                            const ComplexOperator &M, const ComplexOperator &MP,
-                            const ComplexOperator &P1, const ComplexOperator &P2,
+                            const ComplexOperator &M, const ComplexOperator &P,
                             ScaleType type) = 0;
-
   // For the linear generalized case, the linear solver should be configured to compute the
   // action of M⁻¹ (with no spectral transformation) or (K - σ M)⁻¹. For the quadratic
   // case, the linear solver should be configured to compute the action of M⁻¹ (with no
diff --git a/palace/linalg/slepc.cpp b/palace/linalg/slepc.cpp
index 2eebaee9b..f5eed679f 100644
--- a/palace/linalg/slepc.cpp
+++ b/palace/linalg/slepc.cpp
@@ -351,16 +351,8 @@ void SlepcEigenvalueSolver::SetOperators(const ComplexOperator &K, const Complex
   MFEM_ABORT("SetOperators not defined for base class SlepcEigenvalueSolver!");
 }
 
-void SlepcEigenvalueSolver::SetOperators(const ComplexOperator &K, const ComplexOperator &M,
-                                         const ComplexOperator &MP, const ComplexOperator &P1, const ComplexOperator &P2,
-                                         EigenvalueSolver::ScaleType type)
-{
-  MFEM_ABORT("SetOperators not defined for base class SlepcEigenvalueSolver!");
-}
-
 void SlepcEigenvalueSolver::SetOperators(const ComplexOperator &K, const ComplexOperator &C,
-                                         const ComplexOperator &M, const ComplexOperator &MP,
-                                         const ComplexOperator &P1, const ComplexOperator &P2,
+                                         const ComplexOperator &M, const ComplexOperator &P,
                                          EigenvalueSolver::ScaleType type)
 {
   MFEM_ABORT("SetOperators not defined for base class SlepcEigenvalueSolver!");
@@ -754,8 +746,8 @@ RG SlepcEPSSolverBase::GetRG() const
 SlepcEPSSolver::SlepcEPSSolver(MPI_Comm comm, int print, const std::string &prefix)
   : SlepcEPSSolverBase(comm, print, prefix)
 {
-  opK = opM = opMP = opP1 = opP2 = nullptr;
-  normK = normM = normMP = normP1 = normP2 = 0.0;
+  opK = opM = opP = nullptr;
+  normK = normM = 0.0;
 }
 
 void SlepcEPSSolver::SetOperators(const ComplexOperator &K, const ComplexOperator &M,
@@ -814,17 +806,14 @@ void SlepcEPSSolver::SetOperators(const ComplexOperator &K, const ComplexOperato
 }
 
 void SlepcEPSSolver::SetOperators(const ComplexOperator &K, const ComplexOperator &M,
-                                  const ComplexOperator &MP, const ComplexOperator &P1, const ComplexOperator &P2,
-                                  EigenvalueSolver::ScaleType type)
+                                  const ComplexOperator &P, EigenvalueSolver::ScaleType type)
 {
   // Construct shell matrices for the scaled operators which define the generalized
   // eigenvalue problem.
   const bool first = (opK == nullptr);
   opK = &K;
   opM = &M;
-  opMP = &MP;
-  opP1 = &P1;
-  opP2 = &P2;
+  opP = &P;
 
   if (first)
   {
@@ -846,10 +835,6 @@ void SlepcEPSSolver::SetOperators(const ComplexOperator &K, const ComplexOperato
   {
     normK = linalg::SpectralNorm(GetComm(), *opK, opK->IsReal());
     normM = linalg::SpectralNorm(GetComm(), *opM, opM->IsReal());
-    normMP = linalg::SpectralNorm(GetComm(), *opMP, opMP->IsReal());
-    normP1 = linalg::SpectralNorm(GetComm(), *opP1, opP1->IsReal());
-    normP2 = linalg::SpectralNorm(GetComm(), *opP2, opP2->IsReal());
-    Mpi::Print("normK, M, MP, P1, P2: {:.3e}, {:.3e}, {:.3e}, {:.3e}, {:.3e}\n", normK, normM, normMP, normP1, normP2);
     MFEM_VERIFY(normK >= 0.0 && normM >= 0.0, "Invalid matrix norms for EPS scaling!");
     if (normK > 0 && normM > 0.0)
     {
@@ -895,20 +880,9 @@ PetscReal SlepcEPSSolver::GetResidualNorm(PetscScalar l, const ComplexVector &x,
 {
   // Compute the i-th eigenpair residual: || (K - λ M) x ||₂ for eigenvalue λ.
   opK->Mult(x, r);
-  if (opMP)
+  if (opP)
   {
-    Mpi::Print("EPS GetResNorm opMP\n");
-    opMP->AddMult(x, r, std::complex<double>(1.0, 0.0));
-  }
-  if (opP1)
-  {
-    Mpi::Print("EPS GetResNorm opP1\n");
-    opP1->AddMult(x, r, std::complex<double>(0.0, 1.0));
-  }
-  if (opP2)
-  {
-    Mpi::Print("EPS GetResNorm opP2\n");
-    opP2->AddMult(x, r, std::complex<double>(0.0, -1.0));
+    opP->AddMult(x, r, 1.0);
   }
   opM->AddMult(x, r, -l);
   return linalg::Norml2(GetComm(), r);
@@ -933,8 +907,8 @@ SlepcPEPLinearSolver::SlepcPEPLinearSolver(MPI_Comm comm, int print,
                                            const std::string &prefix)
   : SlepcEPSSolverBase(comm, print, prefix)
 {
-  opK = opC = opM = opMP = opP1 = opP2 = nullptr;
-  normK = normC = normM = normMP = normP1 = normP2 = 0.0;
+  opK = opC = opM = opP = nullptr;
+  normK = normC = normM = 0.0;
 }
 
 void SlepcPEPLinearSolver::SetOperators(const ComplexOperator &K, const ComplexOperator &C,
@@ -1000,8 +974,7 @@ void SlepcPEPLinearSolver::SetOperators(const ComplexOperator &K, const ComplexO
 }
 
 void SlepcPEPLinearSolver::SetOperators(const ComplexOperator &K, const ComplexOperator &C,
-                                        const ComplexOperator &M, const ComplexOperator &MP,
-                                        const ComplexOperator &P1, const ComplexOperator &P2,
+                                        const ComplexOperator &M, const ComplexOperator &P,
                                         EigenvalueSolver::ScaleType type)
 {
   // Construct shell matrices for the scaled linearized operators which define the block 2x2
@@ -1010,9 +983,7 @@ void SlepcPEPLinearSolver::SetOperators(const ComplexOperator &K, const ComplexO
   opK = &K;
   opC = &C;
   opM = &M;
-  opMP = &MP;
-  opP1 = &P1;
-  opP2 = &P2;
+  opP = &P;
 
   if (first)
   {
@@ -1035,10 +1006,6 @@ void SlepcPEPLinearSolver::SetOperators(const ComplexOperator &K, const ComplexO
     normK = linalg::SpectralNorm(GetComm(), *opK, opK->IsReal());
     normC = linalg::SpectralNorm(GetComm(), *opC, opC->IsReal());
     normM = linalg::SpectralNorm(GetComm(), *opM, opM->IsReal());
-    normMP = linalg::SpectralNorm(GetComm(), *opMP, opMP->IsReal());
-    normP1 = linalg::SpectralNorm(GetComm(), *opP1, opP1->IsReal());
-    normP2 = linalg::SpectralNorm(GetComm(), *opP2, opP2->IsReal());
-    Mpi::Print("normK, C, M, MP, P1, P2: {:.3e}, {:.3e}, {:.3e}, {:.3e}, {:.3e}, {:.3e}\n", normK, normC, normM, normMP, normP1, normP2);
     MFEM_VERIFY(normK >= 0.0 && normC >= 0.0 && normM >= 0.0,
                 "Invalid matrix norms for PEP scaling!");
     if (normK > 0 && normC > 0.0 && normM > 0.0)
@@ -1119,20 +1086,9 @@ PetscReal SlepcPEPLinearSolver::GetResidualNorm(PetscScalar l, const ComplexVect
   // Compute the i-th eigenpair residual: || P(λ) x ||₂ = || (K + λ C + λ² M) x ||₂ for
   // eigenvalue λ.
   opK->Mult(x, r);
-  if (opMP)
-  {
-    Mpi::Print("PEPLinear GetResNorm opMP\n");
-    opMP->AddMult(x, r, std::complex<double>(1.0, 0.0));
-  }
-  if (opP1)
-  {
-    Mpi::Print("PEPLinear GetResNorm opP1\n");
-    opP1->AddMult(x, r, std::complex<double>(0.0, 1.0));
-  }
-  if (opP2)
+  if (opP)
   {
-    Mpi::Print("PEPLinear GetResNorm opP2\n");
-    opP2->AddMult(x, r, std::complex<double>(0.0, -1.0));
+    opP->AddMult(x, r, 1.0);
   }
   opC->AddMult(x, r, l);
   opM->AddMult(x, r, l * l);
@@ -1395,8 +1351,8 @@ RG SlepcPEPSolverBase::GetRG() const
 SlepcPEPSolver::SlepcPEPSolver(MPI_Comm comm, int print, const std::string &prefix)
   : SlepcPEPSolverBase(comm, print, prefix)
 {
-  opK = opC = opM = opMP = opP1 = opP2 = nullptr;
-  normK = normC = normM = normMP = normP1 = normP2 = 0.0;
+  opK = opC = opM = opP = nullptr;
+  normK = normC = normM = 0.0;
 }
 
 void SlepcPEPSolver::SetOperators(const ComplexOperator &K, const ComplexOperator &C,
@@ -1462,8 +1418,7 @@ void SlepcPEPSolver::SetOperators(const ComplexOperator &K, const ComplexOperato
 }
 
 void SlepcPEPSolver::SetOperators(const ComplexOperator &K, const ComplexOperator &C,
-                                  const ComplexOperator &M, const ComplexOperator &MP,
-                                  const ComplexOperator &P1, const ComplexOperator &P2,
+                                  const ComplexOperator &M, const ComplexOperator &P,
                                   EigenvalueSolver::ScaleType type)
 {
   // Construct shell matrices for the scaled operators which define the quadratic polynomial
@@ -1472,9 +1427,7 @@ void SlepcPEPSolver::SetOperators(const ComplexOperator &K, const ComplexOperato
   opK = &K;
   opC = &C;
   opM = &M;
-  opMP = &MP;
-  opP1 = &P1;
-  opP2 = &P2;
+  opP = &P;
 
   if (first)
   {
@@ -1503,10 +1456,6 @@ void SlepcPEPSolver::SetOperators(const ComplexOperator &K, const ComplexOperato
     normK = linalg::SpectralNorm(GetComm(), *opK, opK->IsReal());
     normC = linalg::SpectralNorm(GetComm(), *opC, opC->IsReal());
     normM = linalg::SpectralNorm(GetComm(), *opM, opM->IsReal());
-    normMP = linalg::SpectralNorm(GetComm(), *opMP, opMP->IsReal());
-    normP1 = linalg::SpectralNorm(GetComm(), *opP1, opP1->IsReal());
-    normP2 = linalg::SpectralNorm(GetComm(), *opP2, opP2->IsReal());
-    Mpi::Print("normK, C, M, MP, P1, P2: {:.3e}, {:.3e}, {:.3e}, {:.3e}, {:.3e}, {:.3e}\n", normK, normC, normM, normMP, normP1, normP2);
     MFEM_VERIFY(normK >= 0.0 && normC >= 0.0 && normM >= 0.0,
                 "Invalid matrix norms for PEP scaling!");
     if (normK > 0 && normC > 0.0 && normM > 0.0)
@@ -1551,20 +1500,9 @@ PetscReal SlepcPEPSolver::GetResidualNorm(PetscScalar l, const ComplexVector &x,
   // Compute the i-th eigenpair residual: || P(λ) x ||₂ = || (K + λ C + λ² M) x ||₂ for
   // eigenvalue λ.
   opK->Mult(x, r);
-  if (opMP)
-  {
-    Mpi::Print("PEP GetResNorm opMP\n");
-    opMP->AddMult(x, r, std::complex<double>(1.0, 0.0));
-  }
-  if (opP1)
-  {
-    Mpi::Print("PEP GetResNorm opP1\n");
-    opP1->AddMult(x, r, std::complex<double>(0.0, 1.0));
-  }
-  if (opP2)
+  if (opP)
   {
-    Mpi::Print("PEP GetResNorm opP2\n");
-    opP2->AddMult(x, r, std::complex<double>(0.0, -1.0));
+    opP->AddMult(x, r, 1.0);
   }
   opC->AddMult(x, r, l);
   opM->AddMult(x, r, l * l);
@@ -1602,18 +1540,9 @@ PetscErrorCode __mat_apply_EPS_A0(Mat A, Vec x, Vec y)
 
   PetscCall(FromPetscVec(x, ctx->x1));
   ctx->opK->Mult(ctx->x1, ctx->y1);
-  if (ctx->opMP)
+  if (ctx->opP)
   {
-    std::cerr << "EPS A0 opMP\n";
-    ctx->opMP->AddMult(ctx->x1, ctx->y1, std::complex<double>(1.0, 0.0));
-  }
-  if (ctx->opP1)
-  {
-    ctx->opP1->AddMult(ctx->x1, ctx->y1, std::complex<double>(0.0, 1.0));
-  }
-  if (ctx->opP2)
-  {
-    ctx->opP2->AddMult(ctx->x1, ctx->y1, std::complex<double>(0.0, -1.0));
+    ctx->opP->AddMult(ctx->x1, ctx->y1, 1.0);
   }
   ctx->y1 *= ctx->delta;
   PetscCall(ToPetscVec(ctx->y1, y));
@@ -1691,26 +1620,14 @@ PetscErrorCode __mat_apply_PEPLinear_L0(Mat A, Vec x, Vec y)
   palace::slepc::SlepcPEPLinearSolver *ctx;
   PetscCall(MatShellGetContext(A, (void **)&ctx));
   MFEM_VERIFY(ctx, "Invalid PETSc shell matrix context for SLEPc!");
-  std::cerr << "PEPLinear L0\n";
   PetscCall(FromPetscVec(x, ctx->x1, ctx->x2));
   ctx->y1 = ctx->x2;
   ctx->opC->Mult(ctx->x2, ctx->y2);
   ctx->y2 *= ctx->gamma;
   ctx->opK->AddMult(ctx->x1, ctx->y2, std::complex<double>(1.0, 0.0));
-  if (ctx->opMP)
-  {
-    std::cerr << "PEPLinear L0 opMP\n";
-    ctx->opMP->AddMult(ctx->x1, ctx->y2, std::complex<double>(1.0, 0.0));
-  }
-  if (ctx->opP1)
-  {
-    std::cerr << "PEPLinear L0 opP1\n";
-    ctx->opP1->AddMult(ctx->x1, ctx->y2, std::complex<double>(0.0, 1.0));
-  }
-  if (ctx->opP2)
+  if (ctx->opP)
   {
-    std::cerr << "PEPLinear L0 opP2\n";
-    ctx->opP2->AddMult(ctx->x1, ctx->y2, std::complex<double>(0.0, -1.0));
+    ctx->opP->AddMult(ctx->x1, ctx->y2, 1.0);
   }
   ctx->y2 *= -ctx->delta;
   PetscCall(ToPetscVec(ctx->y1, ctx->y2, y));
@@ -1726,7 +1643,7 @@ PetscErrorCode __mat_apply_PEPLinear_L1(Mat A, Vec x, Vec y)
   palace::slepc::SlepcPEPLinearSolver *ctx;
   PetscCall(MatShellGetContext(A, (void **)&ctx));
   MFEM_VERIFY(ctx, "Invalid PETSc shell matrix context for SLEPc!");
-  std::cerr << "PEPLinear L1\n";
+
   PetscCall(FromPetscVec(x, ctx->x1, ctx->x2));
   ctx->y1 = ctx->x1;
   ctx->opM->Mult(ctx->x2, ctx->y2);
@@ -1742,7 +1659,7 @@ PetscErrorCode __mat_apply_PEPLinear_B(Mat A, Vec x, Vec y)
   palace::slepc::SlepcPEPLinearSolver *ctx;
   PetscCall(MatShellGetContext(A, (void **)&ctx));
   MFEM_VERIFY(ctx, "Invalid PETSc shell matrix context for SLEPc!");
-  std::cerr << "PEPLinear B\n";
+
   PetscCall(FromPetscVec(x, ctx->x1, ctx->x2));
   ctx->opB->Mult(ctx->x1.Real(), ctx->y1.Real());
   ctx->opB->Mult(ctx->x1.Imag(), ctx->y1.Imag());
@@ -1767,7 +1684,7 @@ PetscErrorCode __pc_apply_PEPLinear(PC pc, Vec x, Vec y)
   palace::slepc::SlepcPEPLinearSolver *ctx;
   PetscCall(PCShellGetContext(pc, (void **)&ctx));
   MFEM_VERIFY(ctx, "Invalid PETSc shell PC context for SLEPc!");
-  std::cerr << "PEPLinear\n";
+
   PetscCall(FromPetscVec(x, ctx->x1, ctx->x2));
   if (!ctx->sinvert)
   {
@@ -1792,20 +1709,9 @@ PetscErrorCode __pc_apply_PEPLinear(PC pc, Vec x, Vec y)
   {
     ctx->y1.AXPBY(-ctx->sigma / (ctx->delta * ctx->gamma), ctx->x2, 0.0);  // Temporarily
     ctx->opK->AddMult(ctx->x1, ctx->y1, std::complex<double>(1.0, 0.0));
-    if (ctx->opMP)
-    {
-      std::cerr << "PEPLinear opMP\n";
-      ctx->opMP->AddMult(ctx->x1, ctx->y1, std::complex<double>(1.0, 0.0));
-    }
-    if (ctx->opP1)
-    {
-      std::cerr << "PEPLinear opP1\n";
-      ctx->opP1->AddMult(ctx->x1, ctx->y1, std::complex<double>(0.0, 1.0));
-    }
-    if (ctx->opP2)
+    if (ctx->opP)
     {
-      std::cerr << "PEPLinear opP2\n";
-      ctx->opP2->AddMult(ctx->x1, ctx->y1, std::complex<double>(0.0, -1.0));
+      ctx->opP->AddMult(ctx->x1, ctx->y1, 1.0);
     }
     ctx->opInv->Mult(ctx->y1, ctx->y2);
     if (ctx->opProj)
@@ -1838,18 +1744,9 @@ PetscErrorCode __mat_apply_PEP_A0(Mat A, Vec x, Vec y)
 
   PetscCall(FromPetscVec(x, ctx->x1));
   ctx->opK->Mult(ctx->x1, ctx->y1);
-  if (ctx->opMP)
-  {
-    std::cerr << "PEP A0 opMP\n";
-    ctx->opMP->AddMult(ctx->x1, ctx->y1, std::complex<double>(1.0, 0.0));
-  }
-  if (ctx->opP1)
-  {
-    ctx->opP1->AddMult(ctx->x1, ctx->y1, std::complex<double>(0.0, 1.0));
-  }
-  if (ctx->opP2)
+  if (ctx->opP)
   {
-    ctx->opP2->AddMult(ctx->x1, ctx->y1, std::complex<double>(0.0, -1.0));
+    ctx->opP->AddMult(ctx->x1, ctx->y1, 1.0);
   }
   PetscCall(ToPetscVec(ctx->y1, y));
 
diff --git a/palace/linalg/slepc.hpp b/palace/linalg/slepc.hpp
index e5fe6944f..487149a76 100644
--- a/palace/linalg/slepc.hpp
+++ b/palace/linalg/slepc.hpp
@@ -134,13 +134,9 @@ class SlepcEigenvalueSolver : public EigenvalueSolver
                     ScaleType type) override;
   void SetOperators(const ComplexOperator &K, const ComplexOperator &C,
                     const ComplexOperator &M, ScaleType type) override;
-  void SetOperators(const ComplexOperator &K, const ComplexOperator &M,
-                    const ComplexOperator &MP, const ComplexOperator &P1,
-                    const ComplexOperator &P2, ScaleType type) override;
   void SetOperators(const ComplexOperator &K, const ComplexOperator &C,
-                    const ComplexOperator &M, const ComplexOperator &MP,
-                    const ComplexOperator &P1, const ComplexOperator &P2, ScaleType type) override;
-
+                    const ComplexOperator &M, const ComplexOperator &P,
+                    ScaleType type) override;
   // For the linear generalized case, the linear solver should be configured to compute the
   // action of M⁻¹ (with no spectral transformation) or (K - σ M)⁻¹. For the quadratic
   // case, the linear solver should be configured to compute the action of M⁻¹ (with no
@@ -263,11 +259,11 @@ class SlepcEPSSolver : public SlepcEPSSolverBase
   using SlepcEigenvalueSolver::sinvert;
 
   // References to matrices defining the generalized eigenvalue problem (not owned).
-  const ComplexOperator *opK, *opM, *opMP, *opP1, *opP2;
+  const ComplexOperator *opK, *opM, *opP;
 
 private:
   // Operator norms for scaling.
-  mutable PetscReal normK, normM, normMP, normP1, normP2;
+  mutable PetscReal normK, normM;
 
 protected:
   PetscReal GetResidualNorm(PetscScalar l, const ComplexVector &x,
@@ -282,8 +278,7 @@ class SlepcEPSSolver : public SlepcEPSSolverBase
   void SetOperators(const ComplexOperator &K, const ComplexOperator &M,
                     ScaleType type) override;
   void SetOperators(const ComplexOperator &K, const ComplexOperator &M,
-                    const ComplexOperator &MP ,const ComplexOperator &P1, const ComplexOperator &P2,
-                    ScaleType type) override;
+                    const ComplexOperator &P, ScaleType type) override;
 
   void SetBMat(const Operator &B) override;
 };
@@ -303,14 +298,14 @@ class SlepcPEPLinearSolver : public SlepcEPSSolverBase
 
   // References to matrices defining the quadratic polynomial eigenvalue problem
   // (not owned).
-  const ComplexOperator *opK, *opC, *opM, *opMP, *opP1, *opP2;
+  const ComplexOperator *opK, *opC, *opM, *opP;
 
   // Workspace vectors for operator applications.
   mutable ComplexVector x2, y2;
 
 private:
   // Operator norms for scaling.
-  mutable PetscReal normK, normC, normM, normMP, normP1, normP2;
+  mutable PetscReal normK, normC, normM;
 
 protected:
   PetscReal GetResidualNorm(PetscScalar l, const ComplexVector &x,
@@ -325,9 +320,8 @@ class SlepcPEPLinearSolver : public SlepcEPSSolverBase
   void SetOperators(const ComplexOperator &K, const ComplexOperator &C,
                     const ComplexOperator &M, ScaleType type) override;
   void SetOperators(const ComplexOperator &K, const ComplexOperator &C,
-                    const ComplexOperator &M, const ComplexOperator &MP,
-                    const ComplexOperator &P1, const ComplexOperator &P2, ScaleType type) override;
-
+                    const ComplexOperator &M, const ComplexOperator &P,
+                    ScaleType type) override;
   void SetBMat(const Operator &B) override;
 
   void SetInitialSpace(const ComplexVector &v) override;
@@ -405,11 +399,11 @@ class SlepcPEPSolver : public SlepcPEPSolverBase
 
   // References to matrices defining the quadratic polynomial eigenvalue problem
   // (not owned).
-  const ComplexOperator *opK, *opC, *opM, *opMP, *opP1, *opP2;
+  const ComplexOperator *opK, *opC, *opM, *opP;
 
 private:
   // Operator norms for scaling.
-  mutable PetscReal normK, normC, normM, normMP, normP1, normP2;
+  mutable PetscReal normK, normC, normM;
 
 protected:
   PetscReal GetResidualNorm(PetscScalar l, const ComplexVector &x,
@@ -424,9 +418,8 @@ class SlepcPEPSolver : public SlepcPEPSolverBase
   void SetOperators(const ComplexOperator &K, const ComplexOperator &C,
                     const ComplexOperator &M, ScaleType type) override;
   void SetOperators(const ComplexOperator &K, const ComplexOperator &C,
-                    const ComplexOperator &M, const ComplexOperator &MP,
-                    const ComplexOperator &P1, const ComplexOperator &P2, ScaleType type) override;
-
+                    const ComplexOperator &M, const ComplexOperator &P,
+                    ScaleType type) override;
   void SetBMat(const Operator &B) override;
 };
 
diff --git a/palace/linalg/solver.cpp b/palace/linalg/solver.cpp
index 7ea33e07b..450de1720 100644
--- a/palace/linalg/solver.cpp
+++ b/palace/linalg/solver.cpp
@@ -11,6 +11,7 @@ namespace palace
 template <>
 void MfemWrapperSolver<Operator>::SetOperator(const Operator &op)
 {
+  Mpi::Print("In MfemWrapperSolver SetOperator with Real operator\n");
   // Operator is always assembled as a HypreParMatrix.
   if (const auto *hA = dynamic_cast<const mfem::HypreParMatrix *>(&op))
   {
@@ -52,7 +53,22 @@ void MfemWrapperSolver<ComplexOperator>::SetOperator(const ComplexOperator &op)
   }
   if (hAr && hAi)
   {
-    A.reset(mfem::Add(1.0, *hAr, 1.0, *hAi));
+    //A.reset(mfem::Add(1.0, *hAr, 1.0, *hAi));
+    /**/
+    // A = [Ar, -Ai]
+    //     [Ai,  Ar]
+    mfem::Array2D<const mfem::HypreParMatrix *> blocks(2, 2);
+    mfem::Array2D<double> block_coeffs(2, 2);
+    blocks(0, 0) = hAr;
+    blocks(0, 1) = hAi;
+    blocks(1, 0) = hAi;
+    blocks(1, 1) = hAr;
+    block_coeffs(0, 0) = 1.0;
+    block_coeffs(0, 1) = -1.0;
+    block_coeffs(1, 0) = 1.0;
+    block_coeffs(1, 1) = 1.0;
+    A.reset(mfem::HypreParMatrixFromBlocks(blocks, &block_coeffs));
+    /**/
     if (PtAPr)
     {
       PtAPr->StealParallelAssemble();
@@ -101,13 +117,39 @@ template <>
 void MfemWrapperSolver<ComplexOperator>::Mult(const ComplexVector &x,
                                               ComplexVector &y) const
 {
-  mfem::Array<const Vector *> X(2);
-  mfem::Array<Vector *> Y(2);
-  X[0] = &x.Real();
-  X[1] = &x.Imag();
-  Y[0] = &y.Real();
-  Y[1] = &y.Imag();
-  pc->ArrayMult(X, Y);
+  if (pc->Height() == x.Size())
+  {
+    mfem::Array<const Vector *> X(2);
+    mfem::Array<Vector *> Y(2);
+    X[0] = &x.Real();
+    X[1] = &x.Imag();
+    Y[0] = &y.Real();
+    Y[1] = &y.Imag();
+    pc->ArrayMult(X, Y);
+  }
+  else
+  {
+    // Clean this up? Is there a better way than idx1, idx2 + SetSubVector?
+    mfem::Array<int> idx1(x.Size()), idx2(x.Size());
+    for (int i = 0; i < x.Size(); i++) //move to SetOperator if really needed
+    {
+      idx1[i] = i;
+      idx2[i] = i + x.Size();
+    }
+    Vector X(2 * x.Size()), Y(2 * y.Size()), yr, yi;
+    X.UseDevice(true);
+    Y.UseDevice(true);
+    yr.UseDevice(true);
+    yi.UseDevice(true);
+    X.SetSubVector(idx1, x.Real());
+    X.SetSubVector(idx2, x.Imag());
+    pc->Mult(X, Y);
+    Y.ReadWrite();
+    yr.MakeRef(Y, 0, y.Size());
+    yi.MakeRef(Y, y.Size(), y.Size());
+    y.Real() = yr;
+    y.Imag() = yi;
+  }
 }
 
 }  // namespace palace
diff --git a/palace/models/periodicboundaryoperator.cpp b/palace/models/periodicboundaryoperator.cpp
index d4c3e03be..3e24ea2cd 100644
--- a/palace/models/periodicboundaryoperator.cpp
+++ b/palace/models/periodicboundaryoperator.cpp
@@ -42,15 +42,14 @@ PeriodicBoundaryOperator::PeriodicBoundaryOperator(const IoData &iodata,
   // [k x] = | 0  -k3  k2|
   //         | k3  0  -k1|
   //         |-k2  k1  0 |
-  wave_vector_cross.SetSize(3); // assumes 3D?
+  wave_vector_cross.SetSize(3); // assumes 3D
+  wave_vector_cross = 0.0;
   wave_vector_cross(0,1) = -wave_vector[2];
   wave_vector_cross(0,2) = wave_vector[1];
   wave_vector_cross(1,0) = wave_vector[2];
   wave_vector_cross(1,2) = -wave_vector[0];
   wave_vector_cross(2,0) = -wave_vector[1];
   wave_vector_cross(2,1) = wave_vector[0];
-  //Mpi::Print("Wave vector cross product\n");
-  //wave_vector_cross.Print();
 }
 
 mfem::Array<int>
@@ -141,7 +140,6 @@ void PeriodicBoundaryOperator::AddRealMassCoefficients(double coeff,
     mfem::DenseTensor kxTmuinvkx = linalg::Mult(mat_op.GetInvPermeability(), kx);
     kxTmuinvkx = linalg::Mult(kxT, kxTmuinvkx);
     MaterialPropertyCoefficient kxTmuinvkx_func(mat_op.GetAttributeToMaterial(), kxTmuinvkx);
-    //muinvkx_func.RestrictCoefficient
     f.AddCoefficient(kxTmuinvkx_func.GetAttributeToMaterial(),
                      kxTmuinvkx_func.GetMaterialProperties(), coeff);
   }
@@ -162,7 +160,6 @@ void PeriodicBoundaryOperator::AddWeakCurlCoefficients(double coeff,
     }
     mfem::DenseTensor muinvkx = linalg::Mult(mat_op.GetInvPermeability(), kx);
     MaterialPropertyCoefficient muinvkx_func(mat_op.GetAttributeToMaterial(), muinvkx);
-    //muinvkx_func.RestrictCoefficient
     f.AddCoefficient(muinvkx_func.GetAttributeToMaterial(),
                      muinvkx_func.GetMaterialProperties(), coeff);
   }
@@ -180,14 +177,12 @@ void PeriodicBoundaryOperator::AddCurlCoefficients(double coeff,
     for (int k = 0; k < kxT.SizeK(); k++)
     {
       kxT(k).Transpose(wave_vector_cross);
-      //kxT(k) = wave_vector_cross;
     }
     mfem::DenseTensor kxTmuinv = linalg::Mult(kxT, mat_op.GetInvPermeability());
-    //mfem::DenseTensor kxTmuinv = linalg::Mult(mat_op.GetInvPermeability(), kxT);
     MaterialPropertyCoefficient kxTmuinv_func(mat_op.GetAttributeToMaterial(), kxTmuinv);
-    //muinvkx_func.RestrictCoefficient
     f.AddCoefficient(kxTmuinv_func.GetAttributeToMaterial(),
                      kxTmuinv_func.GetMaterialProperties(), coeff);
   }
 }
+
 }  // namespace palace
diff --git a/palace/models/romoperator.cpp b/palace/models/romoperator.cpp
index 125cada6e..889b83314 100644
--- a/palace/models/romoperator.cpp
+++ b/palace/models/romoperator.cpp
@@ -196,9 +196,7 @@ RomOperator::RomOperator(const IoData &iodata, SpaceOperator &space_op, int max_
   K = space_op.GetStiffnessMatrix<ComplexOperator>(Operator::DIAG_ONE);
   C = space_op.GetDampingMatrix<ComplexOperator>(Operator::DIAG_ZERO);
   M = space_op.GetMassMatrix<ComplexOperator>(Operator::DIAG_ZERO);
-  MP = space_op.GetPeriodicMassMatrix<ComplexOperator>(Operator::DIAG_ZERO);
-  P1 = space_op.GetPeriodicWeakCurlMatrix<ComplexOperator>(Operator::DIAG_ZERO);
-  P2 = space_op.GetPeriodicCurlMatrix<ComplexOperator>(Operator::DIAG_ZERO);
+  PF = space_op.GetPeriodicMatrix<ComplexOperator>(Operator::DIAG_ZERO);
   MFEM_VERIFY(K && M, "Invalid empty HDM matrices when constructing PROM!");
 
   // Set up RHS vector (linear in frequency part) for the incident field at port boundaries,
@@ -249,10 +247,10 @@ void RomOperator::SolveHDM(double omega, ComplexVector &u)
   A2 = space_op.GetExtraSystemMatrix<ComplexOperator>(omega, Operator::DIAG_ZERO);
   has_A2 = (A2 != nullptr);
   auto A = space_op.GetSystemMatrix(std::complex<double>(1.0, 0.0), 1i * omega,
-                                    std::complex<double>(-omega * omega, 0.0), std::complex<double>(1.0, 0.0), 1.0i, -1.0i, K.get(),
-                                    C.get(), M.get(), A2.get(), MP.get(), P1.get(), P2.get());
+                                    std::complex<double>(-omega * omega, 0.0), K.get(),
+                                    C.get(), M.get(), A2.get(), PF.get());
   auto P =
-      space_op.GetPreconditionerMatrix<ComplexOperator>(1.0, omega, -omega * omega, omega, 1.0, 1.0, -1.0);
+      space_op.GetPreconditionerMatrix<ComplexOperator>(1.0, omega, -omega * omega, omega);
   ksp->SetOperators(*A, *P);
 
   // The HDM excitation vector is computed as RHS = iω RHS1 + RHS2(ω).
@@ -317,20 +315,10 @@ void RomOperator::UpdatePROM(double omega, const ComplexVector &u)
   }
   Mr.conservativeResize(dim_V, dim_V);
   ProjectMatInternal(comm, V, *M, Mr, r, dim_V0);
-  if (MP)
+  if (PF)
   {
-    MPr.conservativeResize(dim_V, dim_V);
-    ProjectMatInternal(comm, V, *MP, MPr, r, dim_V0);
-  }
-  if (P1)
-  {
-    P1r.conservativeResize(dim_V, dim_V);
-    ProjectMatInternal(comm, V, *P1, P1r, r, dim_V0);
-  }
-  if (P2)
-  {
-    P2r.conservativeResize(dim_V, dim_V);
-    ProjectMatInternal(comm, V, *P2, P2r, r, dim_V0);
+    PFr.conservativeResize(dim_V, dim_V);
+    ProjectMatInternal(comm, V, *PF, PFr, r, dim_V0);
   }
   Ar.resize(dim_V, dim_V);
   if (RHS1.Size())
@@ -388,17 +376,9 @@ void RomOperator::SolvePROM(double omega, ComplexVector &u)
     Ar += (1i * omega) * Cr;
   }
   Ar += (-omega * omega) * Mr;
-  if (MP)
-  {
-    Ar += MPr;
-  }
-  if (P1)
-  {
-    Ar += 1i * P1r;
-  }
-  if (P2)
+  if (PF)
   {
-    Ar -= 1i * P2r;
+    Ar += PFr;
   }
   if (has_RHS2)
   {
diff --git a/palace/models/romoperator.hpp b/palace/models/romoperator.hpp
index 8cef1e5e6..58894e22f 100644
--- a/palace/models/romoperator.hpp
+++ b/palace/models/romoperator.hpp
@@ -29,7 +29,7 @@ class RomOperator
   SpaceOperator &space_op;
 
   // HDM system matrices and excitation RHS.
-  std::unique_ptr<ComplexOperator> K, M, C, A2, MP, P1, P2;
+  std::unique_ptr<ComplexOperator> K, M, C, A2, PF;
   ComplexVector RHS1, RHS2, r;
   bool has_A2, has_RHS1, has_RHS2;
 
@@ -37,7 +37,7 @@ class RomOperator
   std::unique_ptr<ComplexKspSolver> ksp;
 
   // PROM matrices and vectors.
-  Eigen::MatrixXcd Kr, Mr, Cr, Ar, MPr, P1r, P2r;
+  Eigen::MatrixXcd Kr, Mr, Cr, Ar, PFr;
   Eigen::VectorXcd RHS1r, RHSr;
 
   // PROM reduced-order basis (real-valued) and active dimension.
diff --git a/palace/models/spaceoperator.cpp b/palace/models/spaceoperator.cpp
index 775c4ea0d..f0cbb77ae 100644
--- a/palace/models/spaceoperator.cpp
+++ b/palace/models/spaceoperator.cpp
@@ -467,132 +467,44 @@ SpaceOperator::GetExtraSystemMatrix(double omega, Operator::DiagonalPolicy diag_
 
 template <typename OperType>
 std::unique_ptr<OperType>
-SpaceOperator::GetPeriodicMassMatrix(Operator::DiagonalPolicy diag_policy)
+SpaceOperator::GetPeriodicMatrix(Operator::DiagonalPolicy diag_policy)
 {
   PrintHeader(GetH1Space(), GetNDSpace(), GetRTSpace(), print_hdr);
-  MaterialPropertyCoefficient fpm(mat_op.MaxCeedAttribute());
+  MaterialPropertyCoefficient fpm(mat_op.MaxCeedAttribute()),
+      fpwc(mat_op.MaxCeedAttribute()), fpc(mat_op.MaxCeedAttribute());
   periodic_op.AddRealMassCoefficients(1.0, fpm);
-  int empty = (fpm.empty());
-  Mpi::GlobalMin(1, &empty, GetComm());
-  if (empty)
-  {
-    return {};
-  }
-  constexpr bool skip_zeros = false;
-  auto m = AssembleOperator(GetNDSpace(), nullptr, nullptr, nullptr, nullptr, &fpm, nullptr, nullptr, skip_zeros);
-  if constexpr (std::is_same<OperType, ComplexOperator>::value)
-  {
-    auto M = std::make_unique<ComplexParOperator>(std::move(m), nullptr, GetNDSpace());
-    M->SetEssentialTrueDofs(nd_dbc_tdof_lists.back(), diag_policy);
-    return M;
-  }
-  else
-  {
-    auto M = std::make_unique<ParOperator>(std::move(m), GetNDSpace());
-    M->SetEssentialTrueDofs(nd_dbc_tdof_lists.back(), diag_policy);
-    return M;
-  }
-}
-
-template <typename OperType>
-std::unique_ptr<OperType>
-SpaceOperator::GetPeriodicWeakCurlMatrix(Operator::DiagonalPolicy diag_policy)
-{
-  PrintHeader(GetH1Space(), GetNDSpace(), GetRTSpace(), print_hdr);
-  MaterialPropertyCoefficient fpw(mat_op.MaxCeedAttribute());
-  periodic_op.AddWeakCurlCoefficients(1.0, fpw);
-  int empty = (fpw.empty());
-  Mpi::GlobalMin(1, &empty, GetComm());
-  if (empty)
-  {
-    return {};
-  }
-  constexpr bool skip_zeros = false;
-  //constexpr bool assemble_q_data = false;
-  //BilinearForm a(GetNDSpace(), GetNDSpace()); //? which spaces and what order
-  //BilinearForm a(GetNDSpace());//test
-  //a.AddDomainIntegrator<MixedVectorWeakCurlIntegrator>(f);
-  //if (assemble_q_data)
-  //{
-  //  a.AssembleQuadratureData();
-  //}
-  //auto weakCurl = a.Assemble(skip_zeros);
-  auto weakCurl = AssembleOperator(GetNDSpace(), nullptr, nullptr, nullptr, nullptr, nullptr, &fpw, nullptr, skip_zeros);
-
-  if constexpr (std::is_same<OperType, ComplexOperator>::value)
-  {
-    auto WeakCurl = std::make_unique<ComplexParOperator>(std::move(weakCurl), nullptr, GetNDSpace());
-    WeakCurl->SetEssentialTrueDofs(nd_dbc_tdof_lists.back(), diag_policy);
-    return WeakCurl;
-  }
-  else
-  {
-    auto WeakCurl = std::make_unique<ParOperator>(std::move(weakCurl), GetNDSpace());
-    WeakCurl->SetEssentialTrueDofs(nd_dbc_tdof_lists.back(), diag_policy);
-    return WeakCurl;
-  }
-/*
-  if constexpr (std::is_same<OperType, ComplexOperator>::value)
-  {
-    auto WeakCurl = std::make_unique<ComplexParOperator>(std::move(weakCurl),nullptr, GetNDSpace(), GetNDSpace(),false);
-    return WeakCurl;
-  }
-  else
-  {
-    auto WeakCurl = std::make_unique<ParOperator>(std::move(weakCurl),GetNDSpace(), GetNDSpace(), false);
-    return WeakCurl;
-  }
-  */
-}
-
-template <typename OperType>
-std::unique_ptr<OperType>
-SpaceOperator::GetPeriodicCurlMatrix(Operator::DiagonalPolicy diag_policy)
-{
-  PrintHeader(GetH1Space(), GetNDSpace(), GetRTSpace(), print_hdr);
-  MaterialPropertyCoefficient fp(mat_op.MaxCeedAttribute());
-  periodic_op.AddCurlCoefficients(1.0, fp);
-  int empty = (fp.empty());
-  Mpi::GlobalMin(1, &empty, GetComm());
-  if (empty)
+  periodic_op.AddWeakCurlCoefficients(1.0, fpwc);
+  periodic_op.AddCurlCoefficients(-1.0, fpc);
+  int empty[2] = {(fpm.empty()), (fpwc.empty() && fpc.empty())};
+  Mpi::GlobalMin(2, empty, GetComm());
+  if (empty[0] && empty[1])
   {
     return {};
   }
   constexpr bool skip_zeros = false;
-  //constexpr bool assemble_q_data = false;
-  //BilinearForm a(GetNDSpace(), GetNDSpace()); //? which spaces and what order?
-  //BilinearForm a(GetNDSpace());//test
-  //a.AddDomainIntegrator<MixedVectorCurlIntegrator>(f);
-  //if (assemble_q_data)
-  //{
-  //  a.AssembleQuadratureData();
-  //}
-  //auto curl = a.Assemble(skip_zeros);
-  auto curl = AssembleOperator(GetNDSpace(), nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, &fp, skip_zeros);
-  if constexpr (std::is_same<OperType, ComplexOperator>::value)
+  std::unique_ptr<Operator> pr, pi;
+  if (!empty[0])
   {
-    auto Curl = std::make_unique<ComplexParOperator>(std::move(curl), nullptr, GetNDSpace());
-    Curl->SetEssentialTrueDofs(nd_dbc_tdof_lists.back(), diag_policy);
-    return Curl;
+    pr = AssembleOperator(GetNDSpace(), nullptr, nullptr, nullptr, nullptr, &fpm, nullptr, nullptr, skip_zeros);
   }
-  else
+  if (!empty[1])
   {
-    auto Curl = std::make_unique<ParOperator>(std::move(curl), GetNDSpace());
-    Curl->SetEssentialTrueDofs(nd_dbc_tdof_lists.back(), diag_policy);
-    return Curl;
+    pi = AssembleOperator(GetNDSpace(), nullptr, nullptr, nullptr, nullptr, nullptr, &fpwc, &fpc, skip_zeros);
   }
-  /*
   if constexpr (std::is_same<OperType, ComplexOperator>::value)
   {
-    auto Curl = std::make_unique<ComplexParOperator>(std::move(curl),nullptr, GetNDSpace(), GetNDSpace(),false);
-    return Curl;
+    auto P =
+        std::make_unique<ComplexParOperator>(std::move(pr), std::move(pi), GetNDSpace());
+    P->SetEssentialTrueDofs(nd_dbc_tdof_lists.back(), diag_policy);
+    return P;
   }
   else
   {
-    auto Curl = std::make_unique<ParOperator>(std::move(curl),GetNDSpace(), GetNDSpace(), false);
-    return Curl;
+    MFEM_VERIFY(!pi, "Unexpected imaginary part in GetPeriodicMatrix<Operator>!");
+    auto P = std::make_unique<ParOperator>(std::move(pr), GetNDSpace());
+    P->SetEssentialTrueDofs(nd_dbc_tdof_lists.back(), diag_policy);
+    return P;
   }
-  */
 }
 
 namespace
@@ -600,8 +512,8 @@ namespace
 
 auto BuildParSumOperator(int h, int w, double a0, double a1, double a2,
                          const ParOperator *K, const ParOperator *C, const ParOperator *M,
-                         const ParOperator *A2, double a4, double a5, double a6, const ParOperator *MP,
-                         const ParOperator *P1, const ParOperator *P2, const FiniteElementSpace &fespace)
+                         const ParOperator *A2, const ParOperator *P,
+                        const FiniteElementSpace &fespace)
 {
   auto sum = std::make_unique<SumOperator>(h, w);
   if (K && a0 != 0.0)
@@ -620,17 +532,9 @@ auto BuildParSumOperator(int h, int w, double a0, double a1, double a2,
   {
     sum->AddOperator(A2->LocalOperator(), 1.0);
   }
-  if (MP && a4 != 0.0)
-  {
-    sum->AddOperator(MP->LocalOperator(), a4);
-  }
-  if (P1 && a5 != 0.0)
-  {
-    sum->AddOperator(P1->LocalOperator(), a5);
-  }
-  if (P2 && a6 != 0.0)
+  if (P)
   {
-    sum->AddOperator(P2->LocalOperator(), a6);
+    sum->AddOperator(P->LocalOperator(), 1.0);
   }
   return std::make_unique<ParOperator>(std::move(sum), fespace);
 }
@@ -638,9 +542,8 @@ auto BuildParSumOperator(int h, int w, double a0, double a1, double a2,
 auto BuildParSumOperator(int h, int w, std::complex<double> a0, std::complex<double> a1,
                          std::complex<double> a2, const ComplexParOperator *K,
                          const ComplexParOperator *C, const ComplexParOperator *M,
-                         const ComplexParOperator *A2, std::complex<double> a4,
-                         std::complex<double> a5, std::complex<double> a6, const ComplexParOperator *MP,
-                         const ComplexParOperator *P1, const ComplexParOperator *P2, const FiniteElementSpace &fespace)
+                         const ComplexParOperator *A2, const ComplexParOperator *P,
+                        const FiniteElementSpace &fespace)
 {
   // Block 2 x 2 equivalent-real formulation for each term in the sum:
   //                    [ sumr ]  +=  [ ar  -ai ] [ Ar ]
@@ -733,79 +636,15 @@ auto BuildParSumOperator(int h, int w, std::complex<double> a0, std::complex<dou
       sumi->AddOperator(*A2->LocalOperator().Imag(), 1.0);
     }
   }
-  if (MP && a4 != 0.0)
-  {
-    if (a4.real() != 0.0)
-    {
-      if (MP->LocalOperator().Real())
-      {
-        sumr->AddOperator(*MP->LocalOperator().Real(), a4.real());
-      }
-      if (MP->LocalOperator().Imag())
-      {
-        sumi->AddOperator(*MP->LocalOperator().Imag(), a4.real());
-      }
-    }
-    if (a4.imag() != 0.0)
-    {
-      if (MP->LocalOperator().Imag())
-      {
-        sumr->AddOperator(*MP->LocalOperator().Imag(), -a4.imag());
-      }
-      if (MP->LocalOperator().Real())
-      {
-        sumi->AddOperator(*MP->LocalOperator().Real(), a4.imag());
-      }
-    }
-  }
-  if (P1 && a5 != 0.0)
-  {
-    if (a5.real() != 0.0)
-    {
-      if (P1->LocalOperator().Real())
-      {
-        sumr->AddOperator(*P1->LocalOperator().Real(), a5.real());
-      }
-      if (P1->LocalOperator().Imag())
-      {
-        sumi->AddOperator(*P1->LocalOperator().Imag(), a5.real());
-      }
-    }
-    if (a5.imag() != 0.0)
-    {
-      if (P1->LocalOperator().Imag())
-      {
-        sumr->AddOperator(*P1->LocalOperator().Imag(), -a5.imag());
-      }
-      if (P1->LocalOperator().Real())
-      {
-        sumi->AddOperator(*P1->LocalOperator().Real(), a5.imag());
-      }
-    }
-  }
-  if (P2 && a6 != 0.0)
+  if (P)
   {
-    if (a6.real() != 0.0)
+    if (P->LocalOperator().Real())
     {
-      if (P2->LocalOperator().Real())
-      {
-        sumr->AddOperator(*P2->LocalOperator().Real(), a6.real());
-      }
-      if (P2->LocalOperator().Imag())
-      {
-        sumi->AddOperator(*P2->LocalOperator().Imag(), a6.real());
-      }
+      sumr->AddOperator(*P->LocalOperator().Real(), 1.0);
     }
-    if (a6.imag() != 0.0)
+    if (P->LocalOperator().Imag())
     {
-      if (P2->LocalOperator().Imag())
-      {
-        sumr->AddOperator(*P2->LocalOperator().Imag(), -a6.imag());
-      }
-      if (P2->LocalOperator().Real())
-      {
-        sumi->AddOperator(*P2->LocalOperator().Real(), a6.imag());
-      }
+      sumi->AddOperator(*P->LocalOperator().Imag(), 1.0);
     }
   }
   return std::make_unique<ComplexParOperator>(std::move(sumr), std::move(sumi), fespace);
@@ -816,9 +655,8 @@ auto BuildParSumOperator(int h, int w, std::complex<double> a0, std::complex<dou
 template <typename OperType, typename ScalarType>
 std::unique_ptr<OperType>
 SpaceOperator::GetSystemMatrix(ScalarType a0, ScalarType a1, ScalarType a2,
-                               ScalarType a4, ScalarType a5, ScalarType a6,
                                const OperType *K, const OperType *C, const OperType *M,
-                               const OperType *A2, const OperType *MP, const OperType *P1, const OperType *P2)
+                               const OperType *A2, const OperType *P)
 {
   using ParOperType =
       typename std::conditional<std::is_same<OperType, ComplexOperator>::value,
@@ -828,11 +666,9 @@ SpaceOperator::GetSystemMatrix(ScalarType a0, ScalarType a1, ScalarType a2,
   const auto *PtAP_C = (C) ? dynamic_cast<const ParOperType *>(C) : nullptr;
   const auto *PtAP_M = (M) ? dynamic_cast<const ParOperType *>(M) : nullptr;
   const auto *PtAP_A2 = (A2) ? dynamic_cast<const ParOperType *>(A2) : nullptr;
-  const auto *PtAP_MP = (MP) ? dynamic_cast<const ParOperType *>(MP) : nullptr;
-  const auto *PtAP_P1 = (P1) ? dynamic_cast<const ParOperType *>(P1) : nullptr;
-  const auto *PtAP_P2 = (P2) ? dynamic_cast<const ParOperType *>(P2) : nullptr;
+  const auto *PtAP_P = (P) ? dynamic_cast<const ParOperType *>(P) : nullptr;
   MFEM_VERIFY((!K || PtAP_K) && (!C || PtAP_C) && (!M || PtAP_M) && (!A2 || PtAP_A2)
-               && (!MP || PtAP_MP) && (!P1 || PtAP_P1) && (!P2 || PtAP_P2),
+               && (!P || PtAP_P),
               "SpaceOperator requires ParOperator or ComplexParOperator for system matrix "
               "construction!");
 
@@ -857,27 +693,16 @@ SpaceOperator::GetSystemMatrix(ScalarType a0, ScalarType a1, ScalarType a2,
     height = PtAP_A2->LocalOperator().Height();
     width = PtAP_A2->LocalOperator().Width();
   }
-  else if (PtAP_MP)
-  {
-    height = PtAP_MP->LocalOperator().Height();
-    width = PtAP_MP->LocalOperator().Width();
-  }
-  else if (PtAP_P1)
-  {
-    height = PtAP_P1->LocalOperator().Height();
-    width = PtAP_P1->LocalOperator().Width();
-  }
-  else if (PtAP_P2)
+  else if (PtAP_P)
   {
-    height = PtAP_P2->LocalOperator().Height();
-    width = PtAP_P2->LocalOperator().Width();
+    height = PtAP_P->LocalOperator().Height();
+    width = PtAP_P->LocalOperator().Width();
   }
   MFEM_VERIFY(height >= 0 && width >= 0,
               "At least one argument to GetSystemMatrix must not be empty!");
 
   auto A = BuildParSumOperator(height, width, a0, a1, a2, PtAP_K, PtAP_C, PtAP_M, PtAP_A2,
-                               a4, a5, a6, PtAP_MP, PtAP_P1, PtAP_P2,
-                               GetNDSpace());
+                               PtAP_P, GetNDSpace());
   A->SetEssentialTrueDofs(nd_dbc_tdof_lists.back(), Operator::DiagonalPolicy::DIAG_ONE);
   return A;
 }
@@ -953,9 +778,9 @@ auto BuildLevelParOperator<ComplexOperator>(std::unique_ptr<Operator> &&br,
 
 template <typename OperType>
 std::unique_ptr<OperType> SpaceOperator::GetPreconditionerMatrix(double a0, double a1,
-                                                                 double a2, double a3,
+                                                                 double a2, double a3)/*,
                                                                  double a4, double a5,
-                                                                 double a6)
+                                                                 double a6)*/
 {
   // XX TODO: Handle complex coeff a0/a1/a2/a3 (like GetSystemMatrix)
 
@@ -974,10 +799,8 @@ std::unique_ptr<OperType> SpaceOperator::GetPreconditionerMatrix(double a0, doub
   std::vector<std::unique_ptr<Operator>> br_vec(n_levels), bi_vec(n_levels),
       br_aux_vec(n_levels), bi_aux_vec(n_levels);
   constexpr bool skip_zeros = false, assemble_q_data = false;
-  Mpi::Print("GetPreconditioner pc_mat_real: {:d}, pc_mat_shifted: {:d}\n", pc_mat_real, pc_mat_shifted);
   if (std::is_same<OperType, ComplexOperator>::value && !pc_mat_real)
   {
-    Mpi::Print("GetPreconditioner Complex!\n");
     MaterialPropertyCoefficient dfr(mat_op.MaxCeedAttribute()),
         dfi(mat_op.MaxCeedAttribute()), fr(mat_op.MaxCeedAttribute()),
         fi(mat_op.MaxCeedAttribute()), dfbr(mat_op.MaxCeedBdrAttribute()),
@@ -994,9 +817,9 @@ std::unique_ptr<OperType> SpaceOperator::GetPreconditionerMatrix(double a0, doub
     AddRealMassBdrCoefficients(pc_mat_shifted ? std::abs(a2) : a2, fbr);
     AddImagMassCoefficients(a2, fi);
     AddExtraSystemBdrCoefficients(a3, dfbr, dfbi, fbr, fbi);
-    periodic_op.AddRealMassCoefficients(a4, fmpr);
-    periodic_op.AddWeakCurlCoefficients(a5, fpwi);
-    periodic_op.AddCurlCoefficients(a6, fpi);
+    periodic_op.AddRealMassCoefficients(1.0, fmpr);
+    periodic_op.AddWeakCurlCoefficients(1.0, fpwi);
+    periodic_op.AddCurlCoefficients(-1.0, fpi);
     int empty[2] = {(dfr.empty() && fr.empty() && dfbr.empty() && fbr.empty()
                      && fpwr.empty() && fpr.empty() && fmpr.empty()),
                     (dfi.empty() && fi.empty() && dfbi.empty() && fbi.empty()
@@ -1019,7 +842,6 @@ std::unique_ptr<OperType> SpaceOperator::GetPreconditionerMatrix(double a0, doub
   }
   else
   {
-    Mpi::Print("GetPreconditioner Real!\n");
     MaterialPropertyCoefficient dfr(mat_op.MaxCeedAttribute()),
         fr(mat_op.MaxCeedAttribute()), dfbr(mat_op.MaxCeedBdrAttribute()),
         fbr(mat_op.MaxCeedBdrAttribute()), fpwr(mat_op.MaxCeedAttribute()),
@@ -1031,9 +853,9 @@ std::unique_ptr<OperType> SpaceOperator::GetPreconditionerMatrix(double a0, doub
     AddAbsMassCoefficients(pc_mat_shifted ? std::abs(a2) : a2, fr);
     AddRealMassBdrCoefficients(pc_mat_shifted ? std::abs(a2) : a2, fbr);
     AddExtraSystemBdrCoefficients(a3, dfbr, dfbr, fbr, fbr);
-    periodic_op.AddRealMassCoefficients(a4, fmpr);
-    periodic_op.AddWeakCurlCoefficients(a5, fpwr);
-    periodic_op.AddCurlCoefficients(a6, fpr);
+    periodic_op.AddRealMassCoefficients(1.0, fmpr);
+    periodic_op.AddWeakCurlCoefficients(1.0, fpwr);
+    periodic_op.AddCurlCoefficients(-1.0, fpr);
     int empty = (dfr.empty() && fr.empty() && dfbr.empty() && fbr.empty() &&
                  fmpr.empty() && fpwr.empty() && fpr.empty());
     Mpi::GlobalMin(1, &empty, GetComm());
@@ -1317,34 +1139,23 @@ template std::unique_ptr<ComplexOperator>
 SpaceOperator::GetExtraSystemMatrix(double, Operator::DiagonalPolicy);
 
 template std::unique_ptr<Operator>
-SpaceOperator::GetSystemMatrix<Operator, double>(double, double, double, double, double, double, const Operator *,
+    SpaceOperator::GetPeriodicMatrix(Operator::DiagonalPolicy);
+template std::unique_ptr<ComplexOperator>
+    SpaceOperator::GetPeriodicMatrix(Operator::DiagonalPolicy);
+
+template std::unique_ptr<Operator>
+SpaceOperator::GetSystemMatrix<Operator, double>(double, double, double, const Operator *,
                                                  const Operator *, const Operator *, const Operator *,
-                                                 const Operator *, const Operator *, const Operator *);
+                                                 const Operator *);
 template std::unique_ptr<ComplexOperator>
 SpaceOperator::GetSystemMatrix<ComplexOperator, std::complex<double>>(
-    std::complex<double>, std::complex<double>, std::complex<double>,
     std::complex<double>, std::complex<double>, std::complex<double>,
     const ComplexOperator *, const ComplexOperator *, const ComplexOperator *, const ComplexOperator *,
-    const ComplexOperator *, const ComplexOperator *, const ComplexOperator *);
-
-template std::unique_ptr<Operator>
-SpaceOperator::GetPreconditionerMatrix<Operator>(double, double, double, double, double, double, double);
-template std::unique_ptr<ComplexOperator>
-SpaceOperator::GetPreconditionerMatrix<ComplexOperator>(double, double, double, double, double, double, double);
-
-template std::unique_ptr<Operator>
-SpaceOperator::GetPeriodicWeakCurlMatrix<Operator>(Operator::DiagonalPolicy);
-template std::unique_ptr<ComplexOperator>
-SpaceOperator::GetPeriodicWeakCurlMatrix<ComplexOperator>(Operator::DiagonalPolicy);
-
-template std::unique_ptr<Operator>
-SpaceOperator::GetPeriodicCurlMatrix<Operator>(Operator::DiagonalPolicy);
-template std::unique_ptr<ComplexOperator>
-SpaceOperator::GetPeriodicCurlMatrix<ComplexOperator>(Operator::DiagonalPolicy);
+    const ComplexOperator *);
 
 template std::unique_ptr<Operator>
-SpaceOperator::GetPeriodicMassMatrix<Operator>(Operator::DiagonalPolicy);
+SpaceOperator::GetPreconditionerMatrix<Operator>(double, double, double, double);
 template std::unique_ptr<ComplexOperator>
-SpaceOperator::GetPeriodicMassMatrix<ComplexOperator>(Operator::DiagonalPolicy);
+SpaceOperator::GetPreconditionerMatrix<ComplexOperator>(double, double, double, double);
 
 }  // namespace palace
diff --git a/palace/models/spaceoperator.hpp b/palace/models/spaceoperator.hpp
index 6bec1e33d..a0f117cb9 100644
--- a/palace/models/spaceoperator.hpp
+++ b/palace/models/spaceoperator.hpp
@@ -138,7 +138,7 @@ class SpaceOperator
   auto GlobalTrueVSize() const { return GetNDSpace().GlobalTrueVSize(); }
 
   // Construct any part of the frequency-dependent complex linear system matrix:
-  //                     A = K + iω C - ω² (Mr + i Mi) + A2(ω) + i P1 - i P2.
+  //                     A = K + iω C - ω² (Mr + i Mi) + A2(ω) + P.
   // For time domain problems, any one of K, C, or M = Mr can be constructed. The argument
   // ω is required only for the constructing the "extra" matrix A2(ω).
   template <typename OperType>
@@ -151,22 +151,17 @@ class SpaceOperator
   std::unique_ptr<OperType> GetExtraSystemMatrix(double omega,
                                                  Operator::DiagonalPolicy diag_policy);
   template <typename OperType>
-  std::unique_ptr<OperType> GetPeriodicMassMatrix(Operator::DiagonalPolicy diag_policy);
-  template <typename OperType>
-  std::unique_ptr<OperType> GetPeriodicWeakCurlMatrix(Operator::DiagonalPolicy diag_policy);
-  template <typename OperType>
-  std::unique_ptr<OperType> GetPeriodicCurlMatrix(Operator::DiagonalPolicy diag_policy);
+  std::unique_ptr<OperType> GetPeriodicMatrix(Operator::DiagonalPolicy diag_policy);
 
   // Construct the complete frequency or time domain system matrix using the provided
   // stiffness, damping, mass, and extra matrices:
-  //                     A = a0 K + a1 C + a2 (Mr + i Mi) + A2 + a4 MP + a5 P1 + a6 P2.
+  //                     A = a0 K + a1 C + a2 (Mr + i Mi) + A2 + P.
   // It is assumed that the inputs have been constructed using previous calls to
   // GetSystemMatrix() and the returned operator does not inherit ownership of any of them.
   template <typename OperType, typename ScalarType>
   std::unique_ptr<OperType>
-  GetSystemMatrix(ScalarType a0, ScalarType a1, ScalarType a2, ScalarType a4, ScalarType a5, ScalarType a6, const OperType *K,
-                  const OperType *C, const OperType *M, const OperType *A2 = nullptr,
-                  const OperType *MP = nullptr, const OperType *P1 = nullptr, const OperType *P2 = nullptr);
+  GetSystemMatrix(ScalarType a0, ScalarType a1, ScalarType a2, const OperType *K,
+                  const OperType *C, const OperType *M, const OperType *A2 = nullptr, const OperType *P = nullptr);
 
   // Construct the real, SPD matrix for weighted L2 or H(curl) inner products:
   //                           B = a0 Kr + a2 Mr .
@@ -181,10 +176,10 @@ class SpaceOperator
   // Construct the matrix for frequency or time domain linear system preconditioning. If it
   // is real-valued (Mr > 0, Mi < 0, |Mr + Mi| is done on the material property coefficient,
   // not the matrix entries themselves):
-  //             B = a0 K + a1 C -/+ a2 |Mr + Mi| + A2r(a3) + A2i(a3) + a4 MP + a5 P1 + a6 P2.
+  //             B = a0 K + a1 C -/+ a2 |Mr + Mi| + A2r(a3) + A2i(a3) + P.
   template <typename OperType>
   std::unique_ptr<OperType> GetPreconditionerMatrix(double a0, double a1, double a2,
-                                                    double a3, double a4=0, double a5=0, double a6=0);
+                                                    double a3);
 
   // Construct and return the discrete curl or gradient matrices.
   const Operator &GetGradMatrix() const
diff --git a/palace/models/timeoperator.cpp b/palace/models/timeoperator.cpp
index e7b6cb22b..b5f90da2b 100644
--- a/palace/models/timeoperator.cpp
+++ b/palace/models/timeoperator.cpp
@@ -83,8 +83,8 @@ class TimeDependentCurlCurlOperator : public mfem::SecondOrderTimeDependentOpera
       {
         // Configure the system matrix and also the matrix (matrices) from which the
         // preconditioner will be constructed.
-        A = space_op.GetSystemMatrix(a0, a1, 1.0, 0.0, 0.0, 0.0, K.get(), C.get(), M.get());
-        B = space_op.GetPreconditionerMatrix<Operator>(a0, a1, 1.0, 0.0, 0.0, 0.0);
+        A = space_op.GetSystemMatrix(a0, a1, 1.0, K.get(), C.get(), M.get());
+        B = space_op.GetPreconditionerMatrix<Operator>(a0, a1, 1.0, 0.0);
 
         // Configure the solver.
         if (!kspA)
diff --git a/palace/utils/iodata.cpp b/palace/utils/iodata.cpp
index 1c03c64f2..e17b20820 100644
--- a/palace/utils/iodata.cpp
+++ b/palace/utils/iodata.cpp
@@ -530,8 +530,9 @@ void IoData::NondimensionalizeInputs(mfem::ParMesh &mesh)
   for (int i = 0; i < boundaries.floquet.wave_vector.size(); i++)
   {
     Mpi::Print("Rescaling floquet component from {:.3e}", boundaries.floquet.wave_vector[i]);
-    boundaries.floquet.wave_vector[i] /= 1.0 / Lc;
+    boundaries.floquet.wave_vector[i] /= 1.0 / GetMeshLengthScale();//Lc;
     Mpi::Print("to {:.3e}\n", boundaries.floquet.wave_vector[i]);
+    Mpi::Print("Lc: {:.3e}, L0: {:.3e}\n", Lc, model.L0);
   }
 
   // Wave port offset distance.

From 975a2748d92b59c769ba99b1e87ce55d6c2f3cb7 Mon Sep 17 00:00:00 2001
From: Simon Lapointe <simlap@amazon.com>
Date: Mon, 25 Nov 2024 14:20:34 -0800
Subject: [PATCH 13/49] Merge floquet periodic terms into one operator

---
 palace/linalg/arpack.cpp | 80 ++++++++--------------------------------
 palace/linalg/arpack.hpp | 18 +++------
 2 files changed, 21 insertions(+), 77 deletions(-)

diff --git a/palace/linalg/arpack.cpp b/palace/linalg/arpack.cpp
index ee479be4d..260f08f9a 100644
--- a/palace/linalg/arpack.cpp
+++ b/palace/linalg/arpack.cpp
@@ -201,22 +201,10 @@ void ArpackEigenvalueSolver::SetOperators(const ComplexOperator &K,
   MFEM_ABORT("SetOperators not defined for base class ArpackEigenvalueSolver!");
 }
 
-void ArpackEigenvalueSolver::SetOperators(const ComplexOperator &K,
-                                          const ComplexOperator &M,
-                                          const ComplexOperator &MP,
-                                          const ComplexOperator &P1,
-                                          const ComplexOperator &P2,
-                                          EigenvalueSolver::ScaleType type)
-{
-  MFEM_ABORT("SetOperators not defined for base class ArpackEigenvalueSolver!");
-}
-
 void ArpackEigenvalueSolver::SetOperators(const ComplexOperator &K,
                                           const ComplexOperator &C,
                                           const ComplexOperator &M,
-                                          const ComplexOperator &MP,
-                                          const ComplexOperator &P1,
-                                          const ComplexOperator &P2,
+                                          const ComplexOperator &P,
                                           EigenvalueSolver::ScaleType type)
 {
   MFEM_ABORT("SetOperators not defined for base class ArpackEigenvalueSolver!");
@@ -511,7 +499,7 @@ void ArpackEigenvalueSolver::RescaleEigenvectors(int num_eig)
 ArpackEPSSolver::ArpackEPSSolver(MPI_Comm comm, int print)
   : ArpackEigenvalueSolver(comm, print)
 {
-  opK = opM = opMP = opP1 = opP2 = nullptr;
+  opK = opM = opP = nullptr;
   normK = normM = 0.0;
 }
 
@@ -545,16 +533,13 @@ void ArpackEPSSolver::SetOperators(const ComplexOperator &K, const ComplexOperat
 }
 
 void ArpackEPSSolver::SetOperators(const ComplexOperator &K, const ComplexOperator &M,
-                                   const ComplexOperator &MP, const ComplexOperator &P1, const ComplexOperator &P2,
-                                   EigenvalueSolver::ScaleType type)
+                                   const ComplexOperator &P, EigenvalueSolver::ScaleType type)
 {
   MFEM_VERIFY(!opK || K.Height() == n, "Invalid modification of eigenvalue problem size!");
   bool first = (opK == nullptr);
   opK = &K;
   opM = &M;
-  opMP = &MP;
-  opP1 = &P1;
-  opP2 = &P2;
+  opP = &P;
   if (first && type != ScaleType::NONE)
   {
     normK = linalg::SpectralNorm(comm, *opK, opK->IsReal());
@@ -638,17 +623,9 @@ void ArpackEPSSolver::ApplyOp(const std::complex<double> *px,
   if (!sinvert)
   {
     opK->Mult(x1, z1);
-    if (opMP)
-    {
-      opMP->AddMult(x1, z1, 1.0);
-    }
-    if (opP1)
-    {
-      opP1->AddMult(x1, z1, 1.0i);
-    }
-    if (opP2)
+    if (opP)
     {
-      opP2->AddMult(x1, z1, -1.0i);
+      opP->AddMult(x1, z1, 1.0);
     }
     opInv->Mult(z1, y1);
     y1 *= 1.0 / gamma;
@@ -684,17 +661,9 @@ double ArpackEPSSolver::GetResidualNorm(std::complex<double> l, const ComplexVec
 {
   // Compute the i-th eigenpair residual: || (K - λ M) x ||₂ for eigenvalue λ.
   opK->Mult(x, r);
-  if (opMP)
+  if (opP)
   {
-    opMP->AddMult(x, r, 1.0);
-  }
-  if (opP1)
-  {
-    opP1->AddMult(x, r, 1.0i);
-  }
-  if (opP2)
-  {
-    opP2->AddMult(x, r, -1.0i);
+    opP->AddMult(x, r, 1.0);
   }
   opM->AddMult(x, r, -l);
   return linalg::Norml2(comm, r);
@@ -720,7 +689,7 @@ double ArpackEPSSolver::GetBackwardScaling(std::complex<double> l) const
 ArpackPEPSolver::ArpackPEPSolver(MPI_Comm comm, int print)
   : ArpackEigenvalueSolver(comm, print)
 {
-  opK = opC = opM = opMP = opP1 = opP2 = nullptr;
+  opK = opC = opM = opP = nullptr;
   normK = normC = normM = 0.0;
 }
 
@@ -762,8 +731,7 @@ void ArpackPEPSolver::SetOperators(const ComplexOperator &K, const ComplexOperat
 }
 
 void ArpackPEPSolver::SetOperators(const ComplexOperator &K, const ComplexOperator &C,
-                                   const ComplexOperator &M, const ComplexOperator &MP,
-                                   const ComplexOperator &P1, const ComplexOperator &P2,
+                                   const ComplexOperator &M, const ComplexOperator &P,
                                    EigenvalueSolver::ScaleType type)
 {
   MFEM_VERIFY(!opK || K.Height() == n, "Invalid modification of eigenvalue problem size!");
@@ -771,9 +739,7 @@ void ArpackPEPSolver::SetOperators(const ComplexOperator &K, const ComplexOperat
   opK = &K;
   opC = &C;
   opM = &M;
-  opMP = &MP;
-  opP1 = &P1;
-  opP2 = &P2;
+  opP = &P;
   if (first && type != ScaleType::NONE)
   {
     normK = linalg::SpectralNorm(comm, *opK, opK->IsReal());
@@ -886,17 +852,9 @@ void ArpackPEPSolver::ApplyOp(const std::complex<double> *px,
     }
 
     opK->Mult(x1, z1);
-    if (opMP)
-    {
-      opMP->AddMult(x1, z1, 1.0);
-    }
-    if (opP1)
+    if (opP)
     {
-      opP1->AddMult(x1, z1, 1.0i);
-    }
-    if (opP2)
-    {
-      opP2->AddMult(x1, z1, -1.0i);
+      opP->AddMult(x1, z1, 1.0);
     }
     opC->AddMult(x2, z1, std::complex<double>(gamma, 0.0));
     opInv->Mult(z1, y2);
@@ -956,17 +914,9 @@ double ArpackPEPSolver::GetResidualNorm(std::complex<double> l, const ComplexVec
   // Compute the i-th eigenpair residual: || P(λ) x ||₂ = || (K + λ C + λ² M) x ||₂ for
   // eigenvalue λ.
   opK->Mult(x, r);
-  if (opMP)
-  {
-    opMP->AddMult(x, r, 1.0);
-  }
-  if (opP1)
-  {
-    opP1->AddMult(x, r, 1.0i);
-  }
-  if (opP2)
+  if (opP)
   {
-    opP2->AddMult(x, r, -1.0i);
+    opP->AddMult(x, r, 1.0);
   }
   opC->AddMult(x, r, l);
   opM->AddMult(x, r, l * l);
diff --git a/palace/linalg/arpack.hpp b/palace/linalg/arpack.hpp
index 0e058a4e3..821531264 100644
--- a/palace/linalg/arpack.hpp
+++ b/palace/linalg/arpack.hpp
@@ -120,13 +120,8 @@ class ArpackEigenvalueSolver : public EigenvalueSolver
                     ScaleType type) override;
   void SetOperators(const ComplexOperator &K, const ComplexOperator &C,
                     const ComplexOperator &M, ScaleType type) override;
-  void SetOperators(const ComplexOperator &K, const ComplexOperator &M,
-                    const ComplexOperator &MP, const ComplexOperator &P1,
-                    const ComplexOperator &P2,
-                    ScaleType type) override;
   void SetOperators(const ComplexOperator &K, const ComplexOperator &C,
-                    const ComplexOperator &M, const ComplexOperator &MP,
-                    const ComplexOperator &P1, const ComplexOperator &P2,
+                    const ComplexOperator &M, const ComplexOperator &P,
                     ScaleType type) override;
 
   // For the linear generalized case, the linear solver should be configured to compute the
@@ -189,7 +184,7 @@ class ArpackEPSSolver : public ArpackEigenvalueSolver
 {
 private:
   // References to matrices defining the generalized eigenvalue problem (not owned).
-  const ComplexOperator *opK, *opM, *opMP, *opP1, *opP2;
+  const ComplexOperator *opK, *opM, *opP;
 
   // Operator norms for scaling.
   mutable double normK, normM;
@@ -212,8 +207,7 @@ class ArpackEPSSolver : public ArpackEigenvalueSolver
   void SetOperators(const ComplexOperator &K, const ComplexOperator &M,
                     ScaleType type) override;
   void SetOperators(const ComplexOperator &K, const ComplexOperator &M,
-                    const ComplexOperator &MP, const ComplexOperator &P1,
-                    const ComplexOperator &P2, ScaleType type) override;
+                    const ComplexOperator &P, ScaleType type) override;
 
   int Solve() override;
 };
@@ -224,7 +218,7 @@ class ArpackPEPSolver : public ArpackEigenvalueSolver
 private:
   // References to matrices defining the quadratic polynomial eigenvalue problem
   // (not owned).
-  const ComplexOperator *opK, *opC, *opM, *opMP, *opP1, *opP2;
+  const ComplexOperator *opK, *opC, *opM, *opP;
 
   // Operator norms for scaling.
   mutable double normK, normC, normM;
@@ -250,8 +244,8 @@ class ArpackPEPSolver : public ArpackEigenvalueSolver
   void SetOperators(const ComplexOperator &K, const ComplexOperator &C,
                     const ComplexOperator &M, ScaleType type) override;
   void SetOperators(const ComplexOperator &K, const ComplexOperator &C,
-                    const ComplexOperator &M, const ComplexOperator &MP,
-                    const ComplexOperator &P1, const ComplexOperator &P2, ScaleType type) override;
+                    const ComplexOperator &M, const ComplexOperator &P,
+                    ScaleType type) override;
 
   int Solve() override;
 };

From 57e6d3acbee48c3a672e047b015f9d8afd3c7476 Mon Sep 17 00:00:00 2001
From: Simon Lapointe <simlap@amazon.com>
Date: Mon, 25 Nov 2024 14:20:45 -0800
Subject: [PATCH 14/49] Remove print

---
 palace/utils/iodata.cpp | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/palace/utils/iodata.cpp b/palace/utils/iodata.cpp
index e17b20820..9260919bd 100644
--- a/palace/utils/iodata.cpp
+++ b/palace/utils/iodata.cpp
@@ -529,10 +529,7 @@ void IoData::NondimensionalizeInputs(mfem::ParMesh &mesh)
   // Floquet periodic boundaries
   for (int i = 0; i < boundaries.floquet.wave_vector.size(); i++)
   {
-    Mpi::Print("Rescaling floquet component from {:.3e}", boundaries.floquet.wave_vector[i]);
-    boundaries.floquet.wave_vector[i] /= 1.0 / GetMeshLengthScale();//Lc;
-    Mpi::Print("to {:.3e}\n", boundaries.floquet.wave_vector[i]);
-    Mpi::Print("Lc: {:.3e}, L0: {:.3e}\n", Lc, model.L0);
+    boundaries.floquet.wave_vector[i] /= 1.0 / GetMeshLengthScale();
   }
 
   // Wave port offset distance.

From c45ef21e5e8fc112daedc8fb1d01f68f11c99266 Mon Sep 17 00:00:00 2001
From: Simon Lapointe <simlap@amazon.com>
Date: Mon, 25 Nov 2024 14:39:56 -0800
Subject: [PATCH 15/49] Use CEED Symmetric operator

---
 palace/fem/libceed/operator.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/palace/fem/libceed/operator.cpp b/palace/fem/libceed/operator.cpp
index 3b9a98af2..fb5fc91ca 100644
--- a/palace/fem/libceed/operator.cpp
+++ b/palace/fem/libceed/operator.cpp
@@ -530,7 +530,7 @@ std::unique_ptr<Operator> CeedOperatorCoarsen(const Operator &op_fine,
   };
 
   // Initialize the coarse operator.
-  auto op_coarse = std::make_unique<Operator>(fespace_coarse.GetVSize(),
+  auto op_coarse = std::make_unique<SymmetricOperator>(fespace_coarse.GetVSize(),
                                               fespace_coarse.GetVSize());
 
   // Assemble the coarse operator by coarsening each sub-operator (over threads, geometry

From 5a4f614df360e7e24d730be9fa52b72324cf5dc9 Mon Sep 17 00:00:00 2001
From: Simon Lapointe <simlap@amazon.com>
Date: Mon, 25 Nov 2024 16:24:28 -0800
Subject: [PATCH 16/49] Disable divergence-free projection when using Floquet
 BCs

---
 palace/drivers/eigensolver.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/palace/drivers/eigensolver.cpp b/palace/drivers/eigensolver.cpp
index edb26867b..44fd8f5d6 100644
--- a/palace/drivers/eigensolver.cpp
+++ b/palace/drivers/eigensolver.cpp
@@ -172,7 +172,7 @@ EigenSolver::Solve(const std::vector<std::unique_ptr<Mesh>> &mesh) const
   // Construct a divergence-free projector so the eigenvalue solve is performed in the space
   // orthogonal to the zero eigenvalues of the stiffness matrix.
   std::unique_ptr<DivFreeSolver<ComplexVector>> divfree;
-  if (iodata.solver.linear.divfree_max_it > 0)
+  if (iodata.solver.linear.divfree_max_it > 0 and !PF)
   {
     Mpi::Print(" Configuring divergence-free projection\n");
     constexpr int divfree_verbose = 0;

From 447df808ea0e0a85ed72f6bc861e0588108f1ef1 Mon Sep 17 00:00:00 2001
From: Simon Lapointe <simlap@amazon.com>
Date: Mon, 25 Nov 2024 16:25:00 -0800
Subject: [PATCH 17/49] Define vector indices in SetOperator

---
 palace/linalg/solver.cpp | 15 ++++++++-------
 palace/linalg/solver.hpp |  3 +++
 2 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/palace/linalg/solver.cpp b/palace/linalg/solver.cpp
index 450de1720..06c74322c 100644
--- a/palace/linalg/solver.cpp
+++ b/palace/linalg/solver.cpp
@@ -68,6 +68,13 @@ void MfemWrapperSolver<ComplexOperator>::SetOperator(const ComplexOperator &op)
     block_coeffs(1, 0) = 1.0;
     block_coeffs(1, 1) = 1.0;
     A.reset(mfem::HypreParMatrixFromBlocks(blocks, &block_coeffs));
+    idx1.SetSize(op.Width());
+    idx2.SetSize(op.Width());
+    for (int i = 0; i < op.Width(); i++)
+    {
+      idx1[i] = i;
+      idx2[i] = i + op.Width();
+    }
     /**/
     if (PtAPr)
     {
@@ -129,13 +136,7 @@ void MfemWrapperSolver<ComplexOperator>::Mult(const ComplexVector &x,
   }
   else
   {
-    // Clean this up? Is there a better way than idx1, idx2 + SetSubVector?
-    mfem::Array<int> idx1(x.Size()), idx2(x.Size());
-    for (int i = 0; i < x.Size(); i++) //move to SetOperator if really needed
-    {
-      idx1[i] = i;
-      idx2[i] = i + x.Size();
-    }
+    // Is there a better way than idx1, idx2 + SetSubVector?
     Vector X(2 * x.Size()), Y(2 * y.Size()), yr, yi;
     X.UseDevice(true);
     Y.UseDevice(true);
diff --git a/palace/linalg/solver.hpp b/palace/linalg/solver.hpp
index 3dcc1096a..bb19411f3 100644
--- a/palace/linalg/solver.hpp
+++ b/palace/linalg/solver.hpp
@@ -83,6 +83,9 @@ class MfemWrapperSolver : public Solver<OperType>
   // mfem::Solver::SetOperator (some solvers copy their input).
   bool save_assembled;
 
+  // Indices of real and imaginary parts of the system RHS/solution.
+  mfem::Array<int> idx1, idx2;
+
 public:
   MfemWrapperSolver(std::unique_ptr<mfem::Solver> &&pc, bool save_assembled = true)
     : Solver<OperType>(pc->iterative_mode), pc(std::move(pc)),

From e2d6e46d3284dc1e3679beceef420d899f32e49b Mon Sep 17 00:00:00 2001
From: Simon Lapointe <simlap@amazon.com>
Date: Tue, 26 Nov 2024 10:52:50 -0800
Subject: [PATCH 18/49] Constrain Floquet wave vector components

---
 palace/models/periodicboundaryoperator.cpp | 25 ++++++++++++++++++++--
 palace/utils/configfile.hpp                |  2 +-
 2 files changed, 24 insertions(+), 3 deletions(-)

diff --git a/palace/models/periodicboundaryoperator.cpp b/palace/models/periodicboundaryoperator.cpp
index 3e24ea2cd..c1f05cca0 100644
--- a/palace/models/periodicboundaryoperator.cpp
+++ b/palace/models/periodicboundaryoperator.cpp
@@ -28,7 +28,10 @@ PeriodicBoundaryOperator::PeriodicBoundaryOperator(const IoData &iodata,
   }
   const auto &data = iodata.boundaries.floquet;
   MFEM_VERIFY(data.wave_vector.size() == mesh.SpaceDimension(),
-              "Bloch wave vector size must equal the spatial dimension.");
+              "Floquet/Bloch wave vector size must equal the spatial dimension.");
+  MFEM_VERIFY(mesh.SpaceDimension() == 3,
+              "Quasi-periodic Floquet periodic boundary conditions are only available "
+              " in 3D!");
   wave_vector.SetSize(data.wave_vector.size());
   std::copy(data.wave_vector.begin(), data.wave_vector.end(), wave_vector.GetData());
   non_zero_wave_vector = (wave_vector.Norml2() > std::numeric_limits<double>::epsilon());
@@ -38,11 +41,29 @@ PeriodicBoundaryOperator::PeriodicBoundaryOperator(const IoData &iodata,
               "Quasi-periodic Floquet boundary conditions are only available for "
               " frequency domain driven or eigenmode simulations!");
 
+  // Get mesh dimensions in x/y/z coordinates
+  mfem::Vector bbmin, bbmax;
+  mesh::GetAxisAlignedBoundingBox(mesh, bbmin, bbmax);
+  bbmax -= bbmin;
+
+  // Ensure Floquet wave vector components are in range [-π/L, π/L]
+  for (int i = 0; i < mesh.SpaceDimension(); i++)
+  {
+    if (wave_vector[i] > M_PI / bbmax[i])
+    {
+      wave_vector[i] = - M_PI / bbmax[i] + fmod(wave_vector[i] + M_PI / bbmax[i], 2 * M_PI / bbmax[i]);
+    }
+    else if (wave_vector[i] < M_PI / bbmax[i])
+    {
+      wave_vector[i] = M_PI / bbmax[i] + fmod(wave_vector[i] - M_PI / bbmax[i], 2 * M_PI / bbmax[i]);
+    }
+  }
+
   // Matrix representation of cross product with wave vector
   // [k x] = | 0  -k3  k2|
   //         | k3  0  -k1|
   //         |-k2  k1  0 |
-  wave_vector_cross.SetSize(3); // assumes 3D
+  wave_vector_cross.SetSize(3);
   wave_vector_cross = 0.0;
   wave_vector_cross(0,1) = -wave_vector[2];
   wave_vector_cross(0,2) = wave_vector[1];
diff --git a/palace/utils/configfile.hpp b/palace/utils/configfile.hpp
index f3dcdb670..371304e32 100644
--- a/palace/utils/configfile.hpp
+++ b/palace/utils/configfile.hpp
@@ -489,7 +489,7 @@ struct PeriodicBoundaryData : public internal::DataVector<PeriodicData>
 struct FloquetData
 {
   public:
-    // Bloch wavevector specifying the phase delay in the X/Y/Z directions.
+    // Floquet/Bloch wavevector specifying the phase delay in the X/Y/Z directions.
     std::array<double, 3> wave_vector = {0.0, 0.0, 0.0};
 
     void SetUp(json &boundaries);

From 49b29e4fb7057b926d043ab535505423142f10af Mon Sep 17 00:00:00 2001
From: Simon Lapointe <simlap@amazon.com>
Date: Tue, 26 Nov 2024 14:50:27 -0800
Subject: [PATCH 19/49] Update comments since quadrature data is no longer
 symmetric

---
 palace/fem/qfunctions/apply_qf.h     | 4 ++--
 palace/fem/qfunctions/h1_qf.h        | 2 +-
 palace/fem/qfunctions/hcurl_qf.h     | 2 +-
 palace/fem/qfunctions/hcurlhdiv_qf.h | 3 +--
 palace/fem/qfunctions/hcurlmass_qf.h | 2 +-
 palace/fem/qfunctions/hdiv_qf.h      | 2 +-
 palace/fem/qfunctions/hdivmass_qf.h  | 2 +-
 palace/fem/qfunctions/l2_qf.h        | 2 +-
 palace/fem/qfunctions/l2mass_qf.h    | 2 +-
 9 files changed, 10 insertions(+), 11 deletions(-)

diff --git a/palace/fem/qfunctions/apply_qf.h b/palace/fem/qfunctions/apply_qf.h
index ccf8176c4..a080f7922 100644
--- a/palace/fem/qfunctions/apply_qf.h
+++ b/palace/fem/qfunctions/apply_qf.h
@@ -5,12 +5,12 @@
 #define PALACE_LIBCEED_APPLY_QF_H
 
 // libCEED QFunctions for application of a generic operator with assembled quadrature data.
-// in[0] is (symmetric) quadrature data, shape [ncomp=vdim*(vdim+1)/2, Q]
+// in[0] is quadrature data, shape [ncomp=vdim*vdim, Q]
 // in[1] is active vector, shape [ncomp=vdim, Q]
 // out[0] is active vector, shape [ncomp=vdim, Q]
 
 // For pairwise apply functions, the inputs and outputs come in pairs and the quadrature
-// data is arranged to be applied with the first vdim*(vdim+1)/2 components for the first
+// data is arranged to be applied with the first vdim*vdim components for the first
 // input/output and the remainder for the second.
 
 #include "apply/apply_12_qf.h"
diff --git a/palace/fem/qfunctions/h1_qf.h b/palace/fem/qfunctions/h1_qf.h
index 8de7cf6b2..820f4d97e 100644
--- a/palace/fem/qfunctions/h1_qf.h
+++ b/palace/fem/qfunctions/h1_qf.h
@@ -9,7 +9,7 @@
 // in[1] is active vector, shape [ncomp=vdim, Q]
 // out[0] is active vector, shape [ncomp=vdim, Q]
 
-// Build functions assemble the quadrature point data, stored as a symmetric matrix.
+// Build functions assemble the quadrature point data.
 
 #include "1/h1_1_qf.h"
 #include "1/h1_build_1_qf.h"
diff --git a/palace/fem/qfunctions/hcurl_qf.h b/palace/fem/qfunctions/hcurl_qf.h
index b01ea1e13..e39c8901f 100644
--- a/palace/fem/qfunctions/hcurl_qf.h
+++ b/palace/fem/qfunctions/hcurl_qf.h
@@ -9,7 +9,7 @@
 // in[1] is active vector, shape [qcomp=dim, ncomp=1, Q]
 // out[0] is active vector, shape [qcomp=dim, ncomp=1, Q]
 
-// Build functions assemble the quadrature point data, stored as a symmetric matrix.
+// Build functions assemble the quadrature point data.
 
 #include "21/hcurl_21_qf.h"
 #include "21/hcurl_build_21_qf.h"
diff --git a/palace/fem/qfunctions/hcurlhdiv_qf.h b/palace/fem/qfunctions/hcurlhdiv_qf.h
index 4658a456f..1cd2b3ab8 100644
--- a/palace/fem/qfunctions/hcurlhdiv_qf.h
+++ b/palace/fem/qfunctions/hcurlhdiv_qf.h
@@ -11,8 +11,7 @@
 // in[1] is active vector, shape [qcomp=dim, ncomp=1, Q]
 // out[0] is active vector, shape [qcomp=dim, ncomp=1, Q]
 
-// Build functions assemble the quadrature point data, stored as a symmetric matrix where
-// possible.
+// Build functions assemble the quadrature point data.
 
 #include "21/hcurlhdiv_21_qf.h"
 #include "21/hcurlhdiv_build_21_qf.h"
diff --git a/palace/fem/qfunctions/hcurlmass_qf.h b/palace/fem/qfunctions/hcurlmass_qf.h
index a28eebe7c..9c740a086 100644
--- a/palace/fem/qfunctions/hcurlmass_qf.h
+++ b/palace/fem/qfunctions/hcurlmass_qf.h
@@ -12,7 +12,7 @@
 // out[0] is active vector, shape [ncomp=1, Q]
 // out[1] is active vector gradient, shape [qcomp=dim, ncomp=1, Q]
 
-// Build functions assemble the quadrature point data, stored as a symmetric matrix.
+// Build functions assemble the quadrature point data.
 
 #include "21/hcurlmass_21_qf.h"
 #include "21/hcurlmass_build_21_qf.h"
diff --git a/palace/fem/qfunctions/hdiv_qf.h b/palace/fem/qfunctions/hdiv_qf.h
index cbce03869..8da28702b 100644
--- a/palace/fem/qfunctions/hdiv_qf.h
+++ b/palace/fem/qfunctions/hdiv_qf.h
@@ -10,7 +10,7 @@
 // in[1] is active vector, shape [qcomp=dim, ncomp=1, Q]
 // out[0] is active vector, shape [qcomp=dim, ncomp=1, Q]
 
-// Build functions assemble the quadrature point data, stored as a symmetric matrix.
+// Build functions assemble the quadrature point data.
 
 #include "21/hdiv_21_qf.h"
 #include "21/hdiv_build_21_qf.h"
diff --git a/palace/fem/qfunctions/hdivmass_qf.h b/palace/fem/qfunctions/hdivmass_qf.h
index 617cd2703..29809e14d 100644
--- a/palace/fem/qfunctions/hdivmass_qf.h
+++ b/palace/fem/qfunctions/hdivmass_qf.h
@@ -22,7 +22,7 @@
 // out[0] is active vector, shape [qcomp=dim, ncomp=1, Q]
 // out[1] is active vector curl, shape [ncomp=1, Q]
 
-// Build functions assemble the quadrature point data, stored as a symmetric matrix.
+// Build functions assemble the quadrature point data.
 
 #include "22/hdivmass_22_qf.h"
 #include "22/hdivmass_build_22_qf.h"
diff --git a/palace/fem/qfunctions/l2_qf.h b/palace/fem/qfunctions/l2_qf.h
index 6c8072a43..d931022f6 100644
--- a/palace/fem/qfunctions/l2_qf.h
+++ b/palace/fem/qfunctions/l2_qf.h
@@ -10,7 +10,7 @@
 // in[2] is active vector, shape [ncomp=vdim, Q]
 // out[0] is active vector, shape [ncomp=vdim, Q]
 
-// Build functions assemble the quadrature point data, stored as a symmetric matrix.
+// Build functions assemble the quadrature point data.
 
 #include "1/l2_1_qf.h"
 #include "1/l2_build_1_qf.h"
diff --git a/palace/fem/qfunctions/l2mass_qf.h b/palace/fem/qfunctions/l2mass_qf.h
index d37672c0f..709b507ab 100644
--- a/palace/fem/qfunctions/l2mass_qf.h
+++ b/palace/fem/qfunctions/l2mass_qf.h
@@ -14,7 +14,7 @@
 // out[0] is active vector, shape [qcomp=dim, ncomp=1, Q]
 // out[1] is active vector divergence, shape [ncomp=1, Q]
 
-// Build functions assemble the quadrature point data, stored as a symmetric matrix.
+// Build functions assemble the quadrature point data.
 
 #include "21/l2mass_21_qf.h"
 #include "21/l2mass_build_21_qf.h"

From e8b0d959eb22dd867bcd36ce7631b7681f18aa6d Mon Sep 17 00:00:00 2001
From: Simon Lapointe <simlap@amazon.com>
Date: Mon, 2 Dec 2024 14:12:13 -0800
Subject: [PATCH 20/49] Add floquet periodic mass term to auxiliary operator

---
 palace/fem/bilinearform.cpp     | 20 ++++++++++++++++---
 palace/fem/libceed/operator.cpp | 13 ++++++++----
 palace/fem/libceed/operator.hpp |  3 +++
 palace/linalg/solver.cpp        |  1 -
 palace/models/spaceoperator.cpp | 35 ++++++++++++++++++++++++---------
 palace/utils/geodata.cpp        | 18 ++++++++++++-----
 6 files changed, 68 insertions(+), 22 deletions(-)

diff --git a/palace/fem/bilinearform.cpp b/palace/fem/bilinearform.cpp
index 7fca63cf3..3283d6248 100644
--- a/palace/fem/bilinearform.cpp
+++ b/palace/fem/bilinearform.cpp
@@ -36,7 +36,9 @@ BilinearForm::PartialAssemble(const FiniteElementSpace &trial_fespace,
   std::unique_ptr<ceed::Operator> op;
   if (&trial_fespace == &test_fespace)
   {
-    op = std::make_unique<ceed::SymmetricOperator>(test_fespace.GetVSize(),
+    Mpi::Print("bilinearform.cpp creating non-symmetric op\n");
+    op = std::make_unique<ceed::/*Symmetric*/Operator>(test_fespace.GetVSize(),
+    //op = std::make_unique<ceed::SymmetricOperator>(test_fespace.GetVSize(),
                                                    trial_fespace.GetVSize());
   }
   else
@@ -75,7 +77,13 @@ BilinearForm::PartialAssemble(const FiniteElementSpace &trial_fespace,
           integ->SetMapTypes(trial_map_type, test_map_type);
           integ->Assemble(ceed, trial_restr, test_restr, trial_basis, test_basis,
                           data.geom_data, data.geom_data_restr, &sub_op);
-          op->AddOper(sub_op);  // Sub-operator owned by ceed::Operator
+          // Transpose operator.
+          CeedOperator sub_op_t;
+          integ->SetMapTypes(test_map_type, trial_map_type);
+          integ->Assemble(ceed, test_restr, trial_restr, test_basis, trial_basis,
+                          data.geom_data, data.geom_data_restr, &sub_op_t);
+          op->AddOper(sub_op, sub_op_t);  // Sub-operator owned by ceed::Operator
+          //op->AddOper(sub_op);  // Sub-operator owned by ceed::Operator
         }
       }
       else if (mfem::Geometry::Dimension[geom] == mesh.Dimension() - 1 &&
@@ -95,7 +103,13 @@ BilinearForm::PartialAssemble(const FiniteElementSpace &trial_fespace,
           integ->SetMapTypes(trial_map_type, test_map_type);
           integ->Assemble(ceed, trial_restr, test_restr, trial_basis, test_basis,
                           data.geom_data, data.geom_data_restr, &sub_op);
-          op->AddOper(sub_op);  // Sub-operator owned by ceed::Operator
+          // Transpose operator.
+          CeedOperator sub_op_t;
+          integ->SetMapTypes(test_map_type, trial_map_type);
+          integ->Assemble(ceed, test_restr, trial_restr, test_basis, trial_basis,
+                         data.geom_data, data.geom_data_restr, &sub_op_t);
+          op->AddOper(sub_op, sub_op_t);  // Sub-operator owned by ceed::Operator
+          //op->AddOper(sub_op);  // Sub-operator owned by ceed::Operator
         }
       }
     }
diff --git a/palace/fem/libceed/operator.cpp b/palace/fem/libceed/operator.cpp
index fb5fc91ca..b0726fdae 100644
--- a/palace/fem/libceed/operator.cpp
+++ b/palace/fem/libceed/operator.cpp
@@ -530,7 +530,9 @@ std::unique_ptr<Operator> CeedOperatorCoarsen(const Operator &op_fine,
   };
 
   // Initialize the coarse operator.
-  auto op_coarse = std::make_unique<SymmetricOperator>(fespace_coarse.GetVSize(),
+  Mpi::Print("ceed operator.cpp L533 Creating non-symmetric op_coarse\n");
+  auto op_coarse = std::make_unique</*Symmetric*/Operator>(fespace_coarse.GetVSize(),
+  //auto op_coarse = std::make_unique<SymmetricOperator>(fespace_coarse.GetVSize(),
                                               fespace_coarse.GetVSize());
 
   // Assemble the coarse operator by coarsening each sub-operator (over threads, geometry
@@ -551,14 +553,17 @@ std::unique_ptr<Operator> CeedOperatorCoarsen(const Operator &op_fine,
       }
     }
     CeedInt nsub_ops_fine;
-    CeedOperator *sub_ops_fine;
+    CeedOperator *sub_ops_fine, *sub_ops_fine_t;
     PalaceCeedCall(ceed, CeedCompositeOperatorGetNumSub(op_fine[id], &nsub_ops_fine));
     PalaceCeedCall(ceed, CeedCompositeOperatorGetSubList(op_fine[id], &sub_ops_fine));
+    PalaceCeedCall(ceed, CeedCompositeOperatorGetSubList(op_fine.GetTranspose(id), &sub_ops_fine_t));
     for (CeedInt k = 0; k < nsub_ops_fine; k++)
     {
-      CeedOperator sub_op_coarse;
+      CeedOperator sub_op_coarse, sub_op_coarse_t;
       SingleOperatorCoarsen(ceed, sub_ops_fine[k], &sub_op_coarse);
-      op_coarse->AddOper(sub_op_coarse);  // Sub-operator owned by ceed::Operator
+      SingleOperatorCoarsen(ceed, sub_ops_fine_t[k], &sub_op_coarse_t);
+      op_coarse->AddOper(sub_op_coarse, sub_op_coarse_t);  // Sub-operator owned by ceed::Operator
+      //op_coarse->AddOper(sub_op_coarse);  // Sub-operator owned by ceed::Operator
     }
   }
 
diff --git a/palace/fem/libceed/operator.hpp b/palace/fem/libceed/operator.hpp
index 72848fcea..2da52edd8 100644
--- a/palace/fem/libceed/operator.hpp
+++ b/palace/fem/libceed/operator.hpp
@@ -42,6 +42,9 @@ class Operator : public palace::Operator
   ~Operator() override;
 
   CeedOperator operator[](std::size_t i) const { return op[i]; }
+
+  CeedOperator GetTranspose(std::size_t i) const { return op_t[i]; }
+
   auto Size() const { return op.size(); }
 
   void AddOper(CeedOperator sub_op, CeedOperator sub_op_t = nullptr);
diff --git a/palace/linalg/solver.cpp b/palace/linalg/solver.cpp
index 06c74322c..a4f9acfb1 100644
--- a/palace/linalg/solver.cpp
+++ b/palace/linalg/solver.cpp
@@ -11,7 +11,6 @@ namespace palace
 template <>
 void MfemWrapperSolver<Operator>::SetOperator(const Operator &op)
 {
-  Mpi::Print("In MfemWrapperSolver SetOperator with Real operator\n");
   // Operator is always assembled as a HypreParMatrix.
   if (const auto *hA = dynamic_cast<const mfem::HypreParMatrix *>(&op))
   {
diff --git a/palace/models/spaceoperator.cpp b/palace/models/spaceoperator.cpp
index f0cbb77ae..2cf88d384 100644
--- a/palace/models/spaceoperator.cpp
+++ b/palace/models/spaceoperator.cpp
@@ -261,7 +261,10 @@ void AddIntegrators(BilinearForm &a, const MaterialPropertyCoefficient *df,
 }
 
 void AddAuxIntegrators(BilinearForm &a, const MaterialPropertyCoefficient *f,
-                       const MaterialPropertyCoefficient *fb, bool assemble_q_data = false)
+                       const MaterialPropertyCoefficient *fb,
+                       const MaterialPropertyCoefficient *fpm,
+                       const MaterialPropertyCoefficient *fpw,
+                       const MaterialPropertyCoefficient *fp, bool assemble_q_data = false)
 {
   if (f && !f->empty())
   {
@@ -271,6 +274,18 @@ void AddAuxIntegrators(BilinearForm &a, const MaterialPropertyCoefficient *f,
   {
     a.AddBoundaryIntegrator<DiffusionIntegrator>(*fb);
   }
+  if (fpm && !fpm->empty())
+  {
+    a.AddDomainIntegrator<DiffusionIntegrator>(*fpm);
+  }
+  if (fpw && !fpw->empty())
+  {
+    //a.AddDomainIntegrator<DiffusionIntegrator>(*fpw);
+  }
+    if (fp && !fp->empty())
+  {
+    //a.AddDomainIntegrator<DiffusionIntegrator>(*fp);
+  }
   if (assemble_q_data)
   {
     a.AssembleQuadratureData();
@@ -309,11 +324,15 @@ auto AssembleOperators(const FiniteElementSpaceHierarchy &fespaces,
 
 auto AssembleAuxOperators(const FiniteElementSpaceHierarchy &fespaces,
                           const MaterialPropertyCoefficient *f,
-                          const MaterialPropertyCoefficient *fb, bool skip_zeros = false,
+                          const MaterialPropertyCoefficient *fb,
+                          const MaterialPropertyCoefficient *fpm,
+                          const MaterialPropertyCoefficient *fpw,
+                          const MaterialPropertyCoefficient *fp,
+                          bool skip_zeros = false,
                           bool assemble_q_data = false, std::size_t l0 = 0)
 {
   BilinearForm a(fespaces.GetFinestFESpace());
-  AddAuxIntegrators(a, f, fb, assemble_q_data);
+  AddAuxIntegrators(a, f, fb, fpm, fpw, fp, assemble_q_data);
   return a.Assemble(fespaces, skip_zeros, l0);
 }
 
@@ -778,9 +797,7 @@ auto BuildLevelParOperator<ComplexOperator>(std::unique_ptr<Operator> &&br,
 
 template <typename OperType>
 std::unique_ptr<OperType> SpaceOperator::GetPreconditionerMatrix(double a0, double a1,
-                                                                 double a2, double a3)/*,
-                                                                 double a4, double a5,
-                                                                 double a6)*/
+                                                                 double a2, double a3)
 {
   // XX TODO: Handle complex coeff a0/a1/a2/a3 (like GetSystemMatrix)
 
@@ -830,14 +847,14 @@ std::unique_ptr<OperType> SpaceOperator::GetPreconditionerMatrix(double a0, doub
       br_vec = AssembleOperators(GetNDSpaces(), &dfr, &fr, &dfbr, &fbr, &fmpr, &fpwr, &fpr, skip_zeros,
                                  assemble_q_data);
       br_aux_vec =
-          AssembleAuxOperators(GetH1Spaces(), &fr, &fbr, skip_zeros, assemble_q_data);
+          AssembleAuxOperators(GetH1Spaces(), &fr, &fbr, &fmpr, &fpwr, &fpr, skip_zeros, assemble_q_data);
     }
     if (!empty[1])
     {
       bi_vec = AssembleOperators(GetNDSpaces(), &dfi, &fi, &dfbi, &fbi, &fmpi, &fpwi, &fpi, skip_zeros,
                                  assemble_q_data);
       bi_aux_vec =
-          AssembleAuxOperators(GetH1Spaces(), &fi, &fbi, skip_zeros, assemble_q_data);
+          AssembleAuxOperators(GetH1Spaces(), &fi, &fbi, &fmpi, &fpwi, &fpi, &skip_zeros, assemble_q_data);
     }
   }
   else
@@ -864,7 +881,7 @@ std::unique_ptr<OperType> SpaceOperator::GetPreconditionerMatrix(double a0, doub
       br_vec = AssembleOperators(GetNDSpaces(), &dfr, &fr, &dfbr, &fbr, &fmpr, &fpwr, &fpr, skip_zeros,
                                  assemble_q_data);
       br_aux_vec =
-          AssembleAuxOperators(GetH1Spaces(), &fr, &fbr, skip_zeros, assemble_q_data);
+          AssembleAuxOperators(GetH1Spaces(), &fr, &fbr, &fmpr, &fpwr, &fpr, skip_zeros, assemble_q_data);
     }
   }
 
diff --git a/palace/utils/geodata.cpp b/palace/utils/geodata.cpp
index 4a613a749..01c340f62 100644
--- a/palace/utils/geodata.cpp
+++ b/palace/utils/geodata.cpp
@@ -1771,6 +1771,7 @@ mfem::Vector ComputeNormal2(std::unique_ptr<mfem::Mesh> &mesh,
     mesh->GetBdrElementVertices(el, vert_bdr);
     mfem::Vector bdr_elem_center(sdim), adj_elem_center(sdim);
     mfem::Vector bdr_elem_offset_p(sdim), bdr_elem_offset_n(sdim);
+    // Can maybe use mfem mesh GetElementCenter to compute centroids?
     bdr_elem_center = 0.0;
     for (int j=0; j<vert_bdr.Size(); j++)
     {
@@ -1781,7 +1782,9 @@ mfem::Vector ComputeNormal2(std::unique_ptr<mfem::Mesh> &mesh,
 
     int eladj, info;
     mesh->GetBdrElementAdjacentElement(el, eladj, info);
-    mesh->GetElementVertices(eladj, vert_adj);
+    //mesh->GetElementVertices(eladj, vert_adj);
+    mesh->GetElementCenter(eladj, adj_elem_center);
+    /*
     adj_elem_center = 0.0;
     for (int j=0; j<vert_adj.Size(); j++)
     {
@@ -1789,7 +1792,7 @@ mfem::Vector ComputeNormal2(std::unique_ptr<mfem::Mesh> &mesh,
       adj_elem_center += vx;
     }
     adj_elem_center /= vert_adj.Size();
-
+    */
     bdr_elem_offset_p = bdr_elem_center;
     bdr_elem_offset_p += loc_normal;
     bdr_elem_offset_n = bdr_elem_center;
@@ -1868,6 +1871,8 @@ void ComputeNormal(std::unique_ptr<mfem::Mesh> &periodic_mesh,
   int el, info;
   periodic_mesh->GetBdrElementAdjacentElement(elem, el, info);
   periodic_mesh->GetElementVertices(el, vert_adj);
+
+  // Can maybe use mfem mesh GetElementCenter to compute centroids?
   adj_elem_center = 0.0;
   for (int j=0; j<vert_adj.Size(); j++)
   {
@@ -2281,12 +2286,13 @@ std::unique_ptr<mfem::Mesh> LoadMesh(const std::string &mesh_file, bool remove_c
       // Mixed mesh is trickier
       // MOVE THIS TEST SOMEWHERE ELSE. IT SHOULD ALSO APPLY TO MESHES
       // ALREADY CREATED WITH PERIODICITY!!!
+      /**/
       mfem::Array<mfem::Geometry::Type> geoms;
       periodic_mesh->GetGeometries(3, geoms);
       if (geoms.Size() == 1 && geoms[0] == mfem::Geometry::TETRAHEDRON)
       {
         // Pure tet mesh
-        MFEM_VERIFY(periodic_mesh->GetNE() > 3*num_periodic_bc_elems,
+        MFEM_VERIFY(periodic_mesh->GetNE() > 3 * num_periodic_bc_elems,
         "Not enough mesh elements in periodic direction!");
       }
       else if (geoms.Size() > 1 && has_tets)
@@ -2301,7 +2307,7 @@ std::unique_ptr<mfem::Mesh> LoadMesh(const std::string &mesh_file, bool remove_c
         MFEM_VERIFY(periodic_mesh->GetNE() > num_periodic_bc_elems,
         "Not enough mesh elements in periodic direction!");
       }
-
+      /**/
       mfem::DenseMatrix transformation(4);
       // If only translation is provided -> use it
       // If only affine transfomr is provided -> use it
@@ -2408,7 +2414,7 @@ std::unique_ptr<mfem::Mesh> LoadMesh(const std::string &mesh_file, bool remove_c
         }
 
       }
-
+      Mpi::Print("CreatePeriodicVertexMapping\n");
       auto periodic_mapping = CreatePeriodicVertexMapping(periodic_mesh,
                                                  bdr_v_donor,
                                                  bdr_v_receiver,
@@ -2420,12 +2426,14 @@ std::unique_ptr<mfem::Mesh> LoadMesh(const std::string &mesh_file, bool remove_c
       //auto periodic_mapping =
       //    periodic_mesh->CreatePeriodicVertexMapping({translation2}, 1E-6);
       //periodic_mesh->
+      Mpi::Print("MFEM MakePeriodic\n");
       auto p_mesh = std::make_unique<mfem::Mesh>(
           mfem::Mesh::MakePeriodic(*periodic_mesh, periodic_mapping));
       periodic_mesh = std::move(p_mesh);
     }
     mesh = std::move(periodic_mesh);
   }
+  Mpi::Print("Return mesh\n");
   return mesh;
 }
 

From 951d70fcaa925cb994302443dfc88fdc3323b6ee Mon Sep 17 00:00:00 2001
From: Simon Lapointe <simlap@amazon.com>
Date: Mon, 2 Dec 2024 16:29:30 -0800
Subject: [PATCH 21/49] Fix formatting

---
 palace/drivers/eigensolver.cpp             |   4 +-
 palace/fem/bilinearform.cpp                |  13 +-
 palace/fem/libceed/operator.cpp            |  15 +-
 palace/fem/qfunctions/22/utils_22_qf.h     |   2 +-
 palace/linalg/arpack.cpp                   |   3 +-
 palace/linalg/slepc.cpp                    |   3 +-
 palace/linalg/solver.cpp                   |   2 +-
 palace/models/periodicboundaryoperator.cpp |  28 +-
 palace/models/spaceoperator.cpp            | 132 +++----
 palace/models/spaceoperator.hpp            |   9 +-
 palace/utils/configfile.cpp                |  12 +-
 palace/utils/configfile.hpp                |  13 +-
 palace/utils/geodata.cpp                   | 438 +++++++--------------
 13 files changed, 272 insertions(+), 402 deletions(-)

diff --git a/palace/drivers/eigensolver.cpp b/palace/drivers/eigensolver.cpp
index f7f304466..86accd2d2 100644
--- a/palace/drivers/eigensolver.cpp
+++ b/palace/drivers/eigensolver.cpp
@@ -259,8 +259,8 @@ EigenSolver::Solve(const std::vector<std::unique_ptr<Mesh>> &mesh) const
   // preconditioner for complex linear systems is constructed from a real approximation
   // to the complex system matrix.
   auto A = space_op.GetSystemMatrix(std::complex<double>(1.0, 0.0), 1i * target,
-                                    std::complex<double>(-target * target, 0.0),
-                                    K.get(), C.get(), M.get(), A2.get(), PF.get());
+                                    std::complex<double>(-target * target, 0.0), K.get(),
+                                    C.get(), M.get(), A2.get(), PF.get());
   auto P = space_op.GetPreconditionerMatrix<ComplexOperator>(1.0, target, -target * target,
                                                              target);
   auto ksp = std::make_unique<ComplexKspSolver>(iodata, space_op.GetNDSpaces(),
diff --git a/palace/fem/bilinearform.cpp b/palace/fem/bilinearform.cpp
index 2955e1250..0f8f0708b 100644
--- a/palace/fem/bilinearform.cpp
+++ b/palace/fem/bilinearform.cpp
@@ -37,9 +37,10 @@ BilinearForm::PartialAssemble(const FiniteElementSpace &trial_fespace,
   if (&trial_fespace == &test_fespace)
   {
     Mpi::Print("bilinearform.cpp creating non-symmetric op\n");
-    op = std::make_unique<ceed::/*Symmetric*/Operator>(test_fespace.GetVSize(),
-    //op = std::make_unique<ceed::SymmetricOperator>(test_fespace.GetVSize(),
-                                                   trial_fespace.GetVSize());
+    op = std::make_unique<ceed::/*Symmetric*/ Operator>(
+        test_fespace.GetVSize(),
+        // op = std::make_unique<ceed::SymmetricOperator>(test_fespace.GetVSize(),
+        trial_fespace.GetVSize());
   }
   else
   {
@@ -83,7 +84,7 @@ BilinearForm::PartialAssemble(const FiniteElementSpace &trial_fespace,
           integ->Assemble(ceed, test_restr, trial_restr, test_basis, trial_basis,
                           data.geom_data, data.geom_data_restr, &sub_op_t);
           op->AddSubOperator(sub_op, sub_op_t);  // Sub-operator owned by ceed::Operator
-          //op->AddSubOperator(sub_op);  // Sub-operator owned by ceed::Operator
+          // op->AddSubOperator(sub_op);  // Sub-operator owned by ceed::Operator
         }
       }
       else if (mfem::Geometry::Dimension[geom] == mesh.Dimension() - 1 &&
@@ -108,9 +109,9 @@ BilinearForm::PartialAssemble(const FiniteElementSpace &trial_fespace,
           CeedOperator sub_op_t;
           integ->SetMapTypes(test_map_type, trial_map_type);
           integ->Assemble(ceed, test_restr, trial_restr, test_basis, trial_basis,
-                         data.geom_data, data.geom_data_restr, &sub_op_t);
+                          data.geom_data, data.geom_data_restr, &sub_op_t);
           op->AddSubOperator(sub_op, sub_op_t);  // Sub-operator owned by ceed::Operator
-          //op->AddSubOperator(sub_op);  // Sub-operator owned by ceed::Operator
+          // op->AddSubOperator(sub_op);  // Sub-operator owned by ceed::Operator
         }
       }
     }
diff --git a/palace/fem/libceed/operator.cpp b/palace/fem/libceed/operator.cpp
index 6b7aab625..d5688aeab 100644
--- a/palace/fem/libceed/operator.cpp
+++ b/palace/fem/libceed/operator.cpp
@@ -547,9 +547,10 @@ std::unique_ptr<Operator> CeedOperatorCoarsen(const Operator &op_fine,
 
   // Initialize the coarse operator.
   Mpi::Print("ceed operator.cpp L533 Creating non-symmetric op_coarse\n");
-  auto op_coarse = std::make_unique</*Symmetric*/Operator>(fespace_coarse.GetVSize(),
-  //auto op_coarse = std::make_unique<SymmetricOperator>(fespace_coarse.GetVSize(),
-                                              fespace_coarse.GetVSize());
+  auto op_coarse = std::make_unique</*Symmetric*/ Operator>(
+      fespace_coarse.GetVSize(),
+      // auto op_coarse = std::make_unique<SymmetricOperator>(fespace_coarse.GetVSize(),
+      fespace_coarse.GetVSize());
 
   // Assemble the coarse operator by coarsening each sub-operator (over threads, geometry
   // types, integrators) of the original fine operator.
@@ -572,14 +573,16 @@ std::unique_ptr<Operator> CeedOperatorCoarsen(const Operator &op_fine,
     CeedOperator *sub_ops_fine, *sub_ops_fine_t;
     PalaceCeedCall(ceed, CeedCompositeOperatorGetNumSub(op_fine[id], &nsub_ops_fine));
     PalaceCeedCall(ceed, CeedCompositeOperatorGetSubList(op_fine[id], &sub_ops_fine));
-    PalaceCeedCall(ceed, CeedCompositeOperatorGetSubList(op_fine.GetTranspose(id), &sub_ops_fine_t));
+    PalaceCeedCall(
+        ceed, CeedCompositeOperatorGetSubList(op_fine.GetTranspose(id), &sub_ops_fine_t));
     for (CeedInt k = 0; k < nsub_ops_fine; k++)
     {
       CeedOperator sub_op_coarse, sub_op_coarse_t;
       SingleOperatorCoarsen(ceed, sub_ops_fine[k], &sub_op_coarse);
       SingleOperatorCoarsen(ceed, sub_ops_fine_t[k], &sub_op_coarse_t);
-      op_coarse->AddSubOperator(sub_op_coarse, sub_op_coarse_t);  // Sub-operator owned by ceed::Operator
-      //op_coarse->AddSubOperator(sub_op_coarse);  // Sub-operator owned by ceed::Operator
+      op_coarse->AddSubOperator(sub_op_coarse,
+                                sub_op_coarse_t);  // Sub-operator owned by ceed::Operator
+      // op_coarse->AddSubOperator(sub_op_coarse);  // Sub-operator owned by ceed::Operator
     }
   }
 
diff --git a/palace/fem/qfunctions/22/utils_22_qf.h b/palace/fem/qfunctions/22/utils_22_qf.h
index d3c4d7306..a377c3531 100644
--- a/palace/fem/qfunctions/22/utils_22_qf.h
+++ b/palace/fem/qfunctions/22/utils_22_qf.h
@@ -77,7 +77,7 @@ CEED_QFUNCTION_HELPER void MultBAx22(const CeedScalar A[4], const CeedScalar B[4
 }
 
 CEED_QFUNCTION_HELPER void MultAtBA22(const CeedScalar A[4], const CeedScalar B[4],
-                                      CeedScalar C[/*3*/4])
+                                      CeedScalar C[4])
 {
   // A: 0 2   B: 0 2   C: 0 2
   //    1 3      1 3      1 3
diff --git a/palace/linalg/arpack.cpp b/palace/linalg/arpack.cpp
index 260f08f9a..68f31372e 100644
--- a/palace/linalg/arpack.cpp
+++ b/palace/linalg/arpack.cpp
@@ -533,7 +533,8 @@ void ArpackEPSSolver::SetOperators(const ComplexOperator &K, const ComplexOperat
 }
 
 void ArpackEPSSolver::SetOperators(const ComplexOperator &K, const ComplexOperator &M,
-                                   const ComplexOperator &P, EigenvalueSolver::ScaleType type)
+                                   const ComplexOperator &P,
+                                   EigenvalueSolver::ScaleType type)
 {
   MFEM_VERIFY(!opK || K.Height() == n, "Invalid modification of eigenvalue problem size!");
   bool first = (opK == nullptr);
diff --git a/palace/linalg/slepc.cpp b/palace/linalg/slepc.cpp
index f5eed679f..a3975a1a8 100644
--- a/palace/linalg/slepc.cpp
+++ b/palace/linalg/slepc.cpp
@@ -806,7 +806,8 @@ void SlepcEPSSolver::SetOperators(const ComplexOperator &K, const ComplexOperato
 }
 
 void SlepcEPSSolver::SetOperators(const ComplexOperator &K, const ComplexOperator &M,
-                                  const ComplexOperator &P, EigenvalueSolver::ScaleType type)
+                                  const ComplexOperator &P,
+                                  EigenvalueSolver::ScaleType type)
 {
   // Construct shell matrices for the scaled operators which define the generalized
   // eigenvalue problem.
diff --git a/palace/linalg/solver.cpp b/palace/linalg/solver.cpp
index a4f9acfb1..780bffd34 100644
--- a/palace/linalg/solver.cpp
+++ b/palace/linalg/solver.cpp
@@ -52,7 +52,7 @@ void MfemWrapperSolver<ComplexOperator>::SetOperator(const ComplexOperator &op)
   }
   if (hAr && hAi)
   {
-    //A.reset(mfem::Add(1.0, *hAr, 1.0, *hAi));
+    // A.reset(mfem::Add(1.0, *hAr, 1.0, *hAi));
     /**/
     // A = [Ar, -Ai]
     //     [Ai,  Ar]
diff --git a/palace/models/periodicboundaryoperator.cpp b/palace/models/periodicboundaryoperator.cpp
index c1f05cca0..600e6a0ce 100644
--- a/palace/models/periodicboundaryoperator.cpp
+++ b/palace/models/periodicboundaryoperator.cpp
@@ -36,8 +36,8 @@ PeriodicBoundaryOperator::PeriodicBoundaryOperator(const IoData &iodata,
   std::copy(data.wave_vector.begin(), data.wave_vector.end(), wave_vector.GetData());
   non_zero_wave_vector = (wave_vector.Norml2() > std::numeric_limits<double>::epsilon());
   MFEM_VERIFY(!non_zero_wave_vector ||
-              iodata.problem.type == config::ProblemData::Type::DRIVEN ||
-              iodata.problem.type == config::ProblemData::Type::EIGENMODE,
+                  iodata.problem.type == config::ProblemData::Type::DRIVEN ||
+                  iodata.problem.type == config::ProblemData::Type::EIGENMODE,
               "Quasi-periodic Floquet boundary conditions are only available for "
               " frequency domain driven or eigenmode simulations!");
 
@@ -51,11 +51,13 @@ PeriodicBoundaryOperator::PeriodicBoundaryOperator(const IoData &iodata,
   {
     if (wave_vector[i] > M_PI / bbmax[i])
     {
-      wave_vector[i] = - M_PI / bbmax[i] + fmod(wave_vector[i] + M_PI / bbmax[i], 2 * M_PI / bbmax[i]);
+      wave_vector[i] =
+          -M_PI / bbmax[i] + fmod(wave_vector[i] + M_PI / bbmax[i], 2 * M_PI / bbmax[i]);
     }
     else if (wave_vector[i] < M_PI / bbmax[i])
     {
-      wave_vector[i] = M_PI / bbmax[i] + fmod(wave_vector[i] - M_PI / bbmax[i], 2 * M_PI / bbmax[i]);
+      wave_vector[i] =
+          M_PI / bbmax[i] + fmod(wave_vector[i] - M_PI / bbmax[i], 2 * M_PI / bbmax[i]);
     }
   }
 
@@ -65,12 +67,12 @@ PeriodicBoundaryOperator::PeriodicBoundaryOperator(const IoData &iodata,
   //         |-k2  k1  0 |
   wave_vector_cross.SetSize(3);
   wave_vector_cross = 0.0;
-  wave_vector_cross(0,1) = -wave_vector[2];
-  wave_vector_cross(0,2) = wave_vector[1];
-  wave_vector_cross(1,0) = wave_vector[2];
-  wave_vector_cross(1,2) = -wave_vector[0];
-  wave_vector_cross(2,0) = -wave_vector[1];
-  wave_vector_cross(2,1) = wave_vector[0];
+  wave_vector_cross(0, 1) = -wave_vector[2];
+  wave_vector_cross(0, 2) = wave_vector[1];
+  wave_vector_cross(1, 0) = wave_vector[2];
+  wave_vector_cross(1, 2) = -wave_vector[0];
+  wave_vector_cross(2, 0) = -wave_vector[1];
+  wave_vector_cross(2, 1) = wave_vector[0];
 }
 
 mfem::Array<int>
@@ -110,8 +112,7 @@ PeriodicBoundaryOperator::SetUpBoundaryProperties(const IoData &iodata,
     if (!bdr_warn_list.empty())
     {
       Mpi::Print("\n");
-      Mpi::Warning(
-          "Unknown periodic boundary attributes!\nSolver will just ignore them!");
+      Mpi::Warning("Unknown periodic boundary attributes!\nSolver will just ignore them!");
       utils::PrettyPrint(bdr_warn_list, "Boundary attribute list:");
       Mpi::Print("\n");
     }
@@ -160,7 +161,8 @@ void PeriodicBoundaryOperator::AddRealMassCoefficients(double coeff,
     }
     mfem::DenseTensor kxTmuinvkx = linalg::Mult(mat_op.GetInvPermeability(), kx);
     kxTmuinvkx = linalg::Mult(kxT, kxTmuinvkx);
-    MaterialPropertyCoefficient kxTmuinvkx_func(mat_op.GetAttributeToMaterial(), kxTmuinvkx);
+    MaterialPropertyCoefficient kxTmuinvkx_func(mat_op.GetAttributeToMaterial(),
+                                                kxTmuinvkx);
     f.AddCoefficient(kxTmuinvkx_func.GetAttributeToMaterial(),
                      kxTmuinvkx_func.GetMaterialProperties(), coeff);
   }
diff --git a/palace/models/spaceoperator.cpp b/palace/models/spaceoperator.cpp
index 2cf88d384..fa5f8ce6f 100644
--- a/palace/models/spaceoperator.cpp
+++ b/palace/models/spaceoperator.cpp
@@ -45,9 +45,8 @@ SpaceOperator::SpaceOperator(const IoData &iodata,
         iodata.solver.linear.estimator_mg ? iodata.solver.linear.mg_max_levels : 1, mesh,
         rt_fecs)),
     mat_op(iodata, *mesh.back()), farfield_op(iodata, mat_op, *mesh.back()),
-    periodic_op(iodata, mat_op, *mesh.back()),
-    surf_sigma_op(iodata, mat_op, *mesh.back()), surf_z_op(iodata, mat_op, *mesh.back()),
-    lumped_port_op(iodata, mat_op, *mesh.back()),
+    periodic_op(iodata, mat_op, *mesh.back()), surf_sigma_op(iodata, mat_op, *mesh.back()),
+    surf_z_op(iodata, mat_op, *mesh.back()), lumped_port_op(iodata, mat_op, *mesh.back()),
     wave_port_op(iodata, mat_op, GetNDSpace(), GetH1Space()),
     surf_j_op(iodata, *mesh.back())
 {
@@ -135,9 +134,9 @@ void SpaceOperator::CheckBoundaryProperties()
   for (int i = 0; i < dbc_marker.Size(); i++)
   {
     aux_bdr_marker[i] =
-        (dbc_marker[i] || farfield_marker[i] || periodic_marker[i] || surf_sigma_marker[i] ||
-         surf_z_Rs_marker[i] || surf_z_Ls_marker[i] || lumped_port_Rs_marker[i] ||
-         lumped_port_Ls_marker[i] || wave_port_marker[i]);
+        (dbc_marker[i] || farfield_marker[i] || periodic_marker[i] ||
+         surf_sigma_marker[i] || surf_z_Rs_marker[i] || surf_z_Ls_marker[i] ||
+         lumped_port_Rs_marker[i] || lumped_port_Ls_marker[i] || wave_port_marker[i]);
     if (aux_bdr_marker[i])
     {
       aux_bdr_attr.Append(i + 1);
@@ -159,9 +158,9 @@ void SpaceOperator::CheckBoundaryProperties()
   const auto surf_j_marker = mesh::AttrToMarker(bdr_attr_max, surf_j_op.GetAttrList());
   for (int i = 0; i < dbc_marker.Size(); i++)
   {
-    MFEM_VERIFY(dbc_marker[i] + farfield_marker[i] + periodic_marker[i] + surf_sigma_marker[i] +
-                        surf_z_marker[i] + lumped_port_marker[i] + wave_port_marker[i] +
-                        surf_j_marker[i] <=
+    MFEM_VERIFY(dbc_marker[i] + farfield_marker[i] + periodic_marker[i] +
+                        surf_sigma_marker[i] + surf_z_marker[i] + lumped_port_marker[i] +
+                        wave_port_marker[i] + surf_j_marker[i] <=
                     1,
                 "Boundary attributes should not be specified with multiple BC!");
   }
@@ -280,11 +279,11 @@ void AddAuxIntegrators(BilinearForm &a, const MaterialPropertyCoefficient *f,
   }
   if (fpw && !fpw->empty())
   {
-    //a.AddDomainIntegrator<DiffusionIntegrator>(*fpw);
+    // a.AddDomainIntegrator<DiffusionIntegrator>(*fpw);
   }
-    if (fp && !fp->empty())
+  if (fp && !fp->empty())
   {
-    //a.AddDomainIntegrator<DiffusionIntegrator>(*fp);
+    // a.AddDomainIntegrator<DiffusionIntegrator>(*fp);
   }
   if (assemble_q_data)
   {
@@ -292,30 +291,24 @@ void AddAuxIntegrators(BilinearForm &a, const MaterialPropertyCoefficient *f,
   }
 }
 
-auto AssembleOperator(const FiniteElementSpace &fespace,
-                      const MaterialPropertyCoefficient *df,
-                      const MaterialPropertyCoefficient *f,
-                      const MaterialPropertyCoefficient *dfb,
-                      const MaterialPropertyCoefficient *fb,
-                      const MaterialPropertyCoefficient *fpm,
-                      const MaterialPropertyCoefficient *fpw,
-                      const MaterialPropertyCoefficient *fp, bool skip_zeros = false,
-                      bool assemble_q_data = false)
+auto AssembleOperator(
+    const FiniteElementSpace &fespace, const MaterialPropertyCoefficient *df,
+    const MaterialPropertyCoefficient *f, const MaterialPropertyCoefficient *dfb,
+    const MaterialPropertyCoefficient *fb, const MaterialPropertyCoefficient *fpm,
+    const MaterialPropertyCoefficient *fpw, const MaterialPropertyCoefficient *fp,
+    bool skip_zeros = false, bool assemble_q_data = false)
 {
   BilinearForm a(fespace);
   AddIntegrators(a, df, f, dfb, fb, fpm, fpw, fp, assemble_q_data);
   return a.Assemble(skip_zeros);
 }
 
-auto AssembleOperators(const FiniteElementSpaceHierarchy &fespaces,
-                       const MaterialPropertyCoefficient *df,
-                       const MaterialPropertyCoefficient *f,
-                       const MaterialPropertyCoefficient *dfb,
-                       const MaterialPropertyCoefficient *fb,
-                       const MaterialPropertyCoefficient *fpm,
-                       const MaterialPropertyCoefficient *fpw,
-                       const MaterialPropertyCoefficient *fp, bool skip_zeros = false,
-                       bool assemble_q_data = false, std::size_t l0 = 0)
+auto AssembleOperators(
+    const FiniteElementSpaceHierarchy &fespaces, const MaterialPropertyCoefficient *df,
+    const MaterialPropertyCoefficient *f, const MaterialPropertyCoefficient *dfb,
+    const MaterialPropertyCoefficient *fb, const MaterialPropertyCoefficient *fpm,
+    const MaterialPropertyCoefficient *fpw, const MaterialPropertyCoefficient *fp,
+    bool skip_zeros = false, bool assemble_q_data = false, std::size_t l0 = 0)
 {
   BilinearForm a(fespaces.GetFinestFESpace());
   AddIntegrators(a, df, f, dfb, fb, fpm, fpw, fp, assemble_q_data);
@@ -327,8 +320,7 @@ auto AssembleAuxOperators(const FiniteElementSpaceHierarchy &fespaces,
                           const MaterialPropertyCoefficient *fb,
                           const MaterialPropertyCoefficient *fpm,
                           const MaterialPropertyCoefficient *fpw,
-                          const MaterialPropertyCoefficient *fp,
-                          bool skip_zeros = false,
+                          const MaterialPropertyCoefficient *fp, bool skip_zeros = false,
                           bool assemble_q_data = false, std::size_t l0 = 0)
 {
   BilinearForm a(fespaces.GetFinestFESpace());
@@ -354,7 +346,8 @@ SpaceOperator::GetStiffnessMatrix(Operator::DiagonalPolicy diag_policy)
     return {};
   }
   constexpr bool skip_zeros = false;
-  auto k = AssembleOperator(GetNDSpace(), &df, &f, nullptr, &fb, nullptr, nullptr, nullptr, skip_zeros);
+  auto k = AssembleOperator(GetNDSpace(), &df, &f, nullptr, &fb, nullptr, nullptr, nullptr,
+                            skip_zeros);
   if constexpr (std::is_same<OperType, ComplexOperator>::value)
   {
     auto K = std::make_unique<ComplexParOperator>(std::move(k), nullptr, GetNDSpace());
@@ -385,7 +378,8 @@ SpaceOperator::GetDampingMatrix(Operator::DiagonalPolicy diag_policy)
     return {};
   }
   constexpr bool skip_zeros = false;
-  auto c = AssembleOperator(GetNDSpace(), nullptr, &f, nullptr, &fb, nullptr, nullptr, nullptr, skip_zeros);
+  auto c = AssembleOperator(GetNDSpace(), nullptr, &f, nullptr, &fb, nullptr, nullptr,
+                            nullptr, skip_zeros);
   if constexpr (std::is_same<OperType, ComplexOperator>::value)
   {
     auto C = std::make_unique<ComplexParOperator>(std::move(c), nullptr, GetNDSpace());
@@ -422,11 +416,13 @@ std::unique_ptr<OperType> SpaceOperator::GetMassMatrix(Operator::DiagonalPolicy
   std::unique_ptr<Operator> mr, mi;
   if (!empty[0])
   {
-    mr = AssembleOperator(GetNDSpace(), nullptr, &fr, nullptr, &fbr, nullptr, nullptr, nullptr, skip_zeros);
+    mr = AssembleOperator(GetNDSpace(), nullptr, &fr, nullptr, &fbr, nullptr, nullptr,
+                          nullptr, skip_zeros);
   }
   if (!empty[1])
   {
-    mi = AssembleOperator(GetNDSpace(), nullptr, &fi, nullptr, &fbi, nullptr, nullptr, nullptr, skip_zeros);
+    mi = AssembleOperator(GetNDSpace(), nullptr, &fi, nullptr, &fbi, nullptr, nullptr,
+                          nullptr, skip_zeros);
   }
   if constexpr (std::is_same<OperType, ComplexOperator>::value)
   {
@@ -462,11 +458,13 @@ SpaceOperator::GetExtraSystemMatrix(double omega, Operator::DiagonalPolicy diag_
   std::unique_ptr<Operator> ar, ai;
   if (!empty[0])
   {
-    ar = AssembleOperator(GetNDSpace(), nullptr, nullptr, &dfbr, &fbr, nullptr, nullptr, nullptr, skip_zeros);
+    ar = AssembleOperator(GetNDSpace(), nullptr, nullptr, &dfbr, &fbr, nullptr, nullptr,
+                          nullptr, skip_zeros);
   }
   if (!empty[1])
   {
-    ai = AssembleOperator(GetNDSpace(), nullptr, nullptr, &dfbi, &fbi, nullptr, nullptr, nullptr, skip_zeros);
+    ai = AssembleOperator(GetNDSpace(), nullptr, nullptr, &dfbi, &fbi, nullptr, nullptr,
+                          nullptr, skip_zeros);
   }
   if constexpr (std::is_same<OperType, ComplexOperator>::value)
   {
@@ -504,11 +502,13 @@ SpaceOperator::GetPeriodicMatrix(Operator::DiagonalPolicy diag_policy)
   std::unique_ptr<Operator> pr, pi;
   if (!empty[0])
   {
-    pr = AssembleOperator(GetNDSpace(), nullptr, nullptr, nullptr, nullptr, &fpm, nullptr, nullptr, skip_zeros);
+    pr = AssembleOperator(GetNDSpace(), nullptr, nullptr, nullptr, nullptr, &fpm, nullptr,
+                          nullptr, skip_zeros);
   }
   if (!empty[1])
   {
-    pi = AssembleOperator(GetNDSpace(), nullptr, nullptr, nullptr, nullptr, nullptr, &fpwc, &fpc, skip_zeros);
+    pi = AssembleOperator(GetNDSpace(), nullptr, nullptr, nullptr, nullptr, nullptr, &fpwc,
+                          &fpc, skip_zeros);
   }
   if constexpr (std::is_same<OperType, ComplexOperator>::value)
   {
@@ -532,7 +532,7 @@ namespace
 auto BuildParSumOperator(int h, int w, double a0, double a1, double a2,
                          const ParOperator *K, const ParOperator *C, const ParOperator *M,
                          const ParOperator *A2, const ParOperator *P,
-                        const FiniteElementSpace &fespace)
+                         const FiniteElementSpace &fespace)
 {
   auto sum = std::make_unique<SumOperator>(h, w);
   if (K && a0 != 0.0)
@@ -562,7 +562,7 @@ auto BuildParSumOperator(int h, int w, std::complex<double> a0, std::complex<dou
                          std::complex<double> a2, const ComplexParOperator *K,
                          const ComplexParOperator *C, const ComplexParOperator *M,
                          const ComplexParOperator *A2, const ComplexParOperator *P,
-                        const FiniteElementSpace &fespace)
+                         const FiniteElementSpace &fespace)
 {
   // Block 2 x 2 equivalent-real formulation for each term in the sum:
   //                    [ sumr ]  +=  [ ar  -ai ] [ Ar ]
@@ -686,8 +686,8 @@ SpaceOperator::GetSystemMatrix(ScalarType a0, ScalarType a1, ScalarType a2,
   const auto *PtAP_M = (M) ? dynamic_cast<const ParOperType *>(M) : nullptr;
   const auto *PtAP_A2 = (A2) ? dynamic_cast<const ParOperType *>(A2) : nullptr;
   const auto *PtAP_P = (P) ? dynamic_cast<const ParOperType *>(P) : nullptr;
-  MFEM_VERIFY((!K || PtAP_K) && (!C || PtAP_C) && (!M || PtAP_M) && (!A2 || PtAP_A2)
-               && (!P || PtAP_P),
+  MFEM_VERIFY((!K || PtAP_K) && (!C || PtAP_C) && (!M || PtAP_M) && (!A2 || PtAP_A2) &&
+                  (!P || PtAP_P),
               "SpaceOperator requires ParOperator or ComplexParOperator for system matrix "
               "construction!");
 
@@ -837,24 +837,24 @@ std::unique_ptr<OperType> SpaceOperator::GetPreconditionerMatrix(double a0, doub
     periodic_op.AddRealMassCoefficients(1.0, fmpr);
     periodic_op.AddWeakCurlCoefficients(1.0, fpwi);
     periodic_op.AddCurlCoefficients(-1.0, fpi);
-    int empty[2] = {(dfr.empty() && fr.empty() && dfbr.empty() && fbr.empty()
-                     && fpwr.empty() && fpr.empty() && fmpr.empty()),
-                    (dfi.empty() && fi.empty() && dfbi.empty() && fbi.empty()
-                     && fpwi.empty() && fpi.empty() && fmpi.empty())};
+    int empty[2] = {(dfr.empty() && fr.empty() && dfbr.empty() && fbr.empty() &&
+                     fpwr.empty() && fpr.empty() && fmpr.empty()),
+                    (dfi.empty() && fi.empty() && dfbi.empty() && fbi.empty() &&
+                     fpwi.empty() && fpi.empty() && fmpi.empty())};
     Mpi::GlobalMin(2, empty, GetComm());
     if (!empty[0])
     {
-      br_vec = AssembleOperators(GetNDSpaces(), &dfr, &fr, &dfbr, &fbr, &fmpr, &fpwr, &fpr, skip_zeros,
-                                 assemble_q_data);
-      br_aux_vec =
-          AssembleAuxOperators(GetH1Spaces(), &fr, &fbr, &fmpr, &fpwr, &fpr, skip_zeros, assemble_q_data);
+      br_vec = AssembleOperators(GetNDSpaces(), &dfr, &fr, &dfbr, &fbr, &fmpr, &fpwr, &fpr,
+                                 skip_zeros, assemble_q_data);
+      br_aux_vec = AssembleAuxOperators(GetH1Spaces(), &fr, &fbr, &fmpr, &fpwr, &fpr,
+                                        skip_zeros, assemble_q_data);
     }
     if (!empty[1])
     {
-      bi_vec = AssembleOperators(GetNDSpaces(), &dfi, &fi, &dfbi, &fbi, &fmpi, &fpwi, &fpi, skip_zeros,
-                                 assemble_q_data);
-      bi_aux_vec =
-          AssembleAuxOperators(GetH1Spaces(), &fi, &fbi, &fmpi, &fpwi, &fpi, &skip_zeros, assemble_q_data);
+      bi_vec = AssembleOperators(GetNDSpaces(), &dfi, &fi, &dfbi, &fbi, &fmpi, &fpwi, &fpi,
+                                 skip_zeros, assemble_q_data);
+      bi_aux_vec = AssembleAuxOperators(GetH1Spaces(), &fi, &fbi, &fmpi, &fpwi, &fpi,
+                                        &skip_zeros, assemble_q_data);
     }
   }
   else
@@ -873,15 +873,15 @@ std::unique_ptr<OperType> SpaceOperator::GetPreconditionerMatrix(double a0, doub
     periodic_op.AddRealMassCoefficients(1.0, fmpr);
     periodic_op.AddWeakCurlCoefficients(1.0, fpwr);
     periodic_op.AddCurlCoefficients(-1.0, fpr);
-    int empty = (dfr.empty() && fr.empty() && dfbr.empty() && fbr.empty() &&
-                 fmpr.empty() && fpwr.empty() && fpr.empty());
+    int empty = (dfr.empty() && fr.empty() && dfbr.empty() && fbr.empty() && fmpr.empty() &&
+                 fpwr.empty() && fpr.empty());
     Mpi::GlobalMin(1, &empty, GetComm());
     if (!empty)
     {
-      br_vec = AssembleOperators(GetNDSpaces(), &dfr, &fr, &dfbr, &fbr, &fmpr, &fpwr, &fpr, skip_zeros,
-                                 assemble_q_data);
-      br_aux_vec =
-          AssembleAuxOperators(GetH1Spaces(), &fr, &fbr, &fmpr, &fpwr, &fpr, skip_zeros, assemble_q_data);
+      br_vec = AssembleOperators(GetNDSpaces(), &dfr, &fr, &dfbr, &fbr, &fmpr, &fpwr, &fpr,
+                                 skip_zeros, assemble_q_data);
+      br_aux_vec = AssembleAuxOperators(GetH1Spaces(), &fr, &fbr, &fmpr, &fpwr, &fpr,
+                                        skip_zeros, assemble_q_data);
     }
   }
 
@@ -1162,13 +1162,13 @@ template std::unique_ptr<ComplexOperator>
 
 template std::unique_ptr<Operator>
 SpaceOperator::GetSystemMatrix<Operator, double>(double, double, double, const Operator *,
-                                                 const Operator *, const Operator *, const Operator *,
-                                                 const Operator *);
+                                                 const Operator *, const Operator *,
+                                                 const Operator *, const Operator *);
 template std::unique_ptr<ComplexOperator>
 SpaceOperator::GetSystemMatrix<ComplexOperator, std::complex<double>>(
     std::complex<double>, std::complex<double>, std::complex<double>,
-    const ComplexOperator *, const ComplexOperator *, const ComplexOperator *, const ComplexOperator *,
-    const ComplexOperator *);
+    const ComplexOperator *, const ComplexOperator *, const ComplexOperator *,
+    const ComplexOperator *, const ComplexOperator *);
 
 template std::unique_ptr<Operator>
 SpaceOperator::GetPreconditionerMatrix<Operator>(double, double, double, double);
diff --git a/palace/models/spaceoperator.hpp b/palace/models/spaceoperator.hpp
index a0f117cb9..c6b7b17cd 100644
--- a/palace/models/spaceoperator.hpp
+++ b/palace/models/spaceoperator.hpp
@@ -12,9 +12,9 @@
 #include "linalg/operator.hpp"
 #include "linalg/vector.hpp"
 #include "models/farfieldboundaryoperator.hpp"
-#include "models/periodicboundaryoperator.hpp"
 #include "models/lumpedportoperator.hpp"
 #include "models/materialoperator.hpp"
+#include "models/periodicboundaryoperator.hpp"
 #include "models/surfaceconductivityoperator.hpp"
 #include "models/surfacecurrentoperator.hpp"
 #include "models/surfaceimpedanceoperator.hpp"
@@ -159,9 +159,10 @@ class SpaceOperator
   // It is assumed that the inputs have been constructed using previous calls to
   // GetSystemMatrix() and the returned operator does not inherit ownership of any of them.
   template <typename OperType, typename ScalarType>
-  std::unique_ptr<OperType>
-  GetSystemMatrix(ScalarType a0, ScalarType a1, ScalarType a2, const OperType *K,
-                  const OperType *C, const OperType *M, const OperType *A2 = nullptr, const OperType *P = nullptr);
+  std::unique_ptr<OperType> GetSystemMatrix(ScalarType a0, ScalarType a1, ScalarType a2,
+                                            const OperType *K, const OperType *C,
+                                            const OperType *M, const OperType *A2 = nullptr,
+                                            const OperType *P = nullptr);
 
   // Construct the real, SPD matrix for weighted L2 or H(curl) inner products:
   //                           B = a0 Kr + a2 Mr .
diff --git a/palace/utils/configfile.cpp b/palace/utils/configfile.cpp
index 4d6edebd1..0d566137a 100644
--- a/palace/utils/configfile.cpp
+++ b/palace/utils/configfile.cpp
@@ -1054,7 +1054,6 @@ void FloquetData::SetUp(json &boundaries)
       std::cout << "FloquetWaveVector: " << wave_vector << '\n';
     }
   }
-
 }
 
 void PeriodicBoundaryData::SetUp(json &boundaries)
@@ -1077,19 +1076,20 @@ void PeriodicBoundaryData::SetUp(json &boundaries)
     PeriodicData &data = vecdata.emplace_back();
     data.donor_attributes = it->at("DonorAttributes").get<std::vector<int>>();  // Required
     data.receiver_attributes =
-        it->at("ReceiverAttributes").get<std::vector<int>>();               // Required
+        it->at("ReceiverAttributes").get<std::vector<int>>();  // Required
     auto trslt = it->find("Translation");
-    if (trslt!= it->end())
+    if (trslt != it->end())
     {
       MFEM_VERIFY(trslt->is_array(),
                   "\"Translation\" should specify an array in the configuration file!");
       data.translation = trslt->get<std::array<double, 3>>();
     }
     auto trsfr = it->find("AffineTransformation");
-    if (trsfr!= it->end())
+    if (trsfr != it->end())
     {
-      MFEM_VERIFY(trsfr->is_array(),
-                  "\"AffineTransformation\" should specify an array in the configuration file!");
+      MFEM_VERIFY(
+          trsfr->is_array(),
+          "\"AffineTransformation\" should specify an array in the configuration file!");
       data.affine_transform = trsfr->get<std::array<double, 16>>();
     }
 
diff --git a/palace/utils/configfile.hpp b/palace/utils/configfile.hpp
index 371304e32..2b595dcd0 100644
--- a/palace/utils/configfile.hpp
+++ b/palace/utils/configfile.hpp
@@ -473,7 +473,8 @@ struct PeriodicData
   // Vector defining the direction and distance for this periodic boundary condition.
   std::array<double, 3> translation = {0.0, 0.0, 0.0};
   // Vector defining the affine transformation matrix for this periodic boundary condition.
-  std::array<double, 16> affine_transform = {0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0};
+  std::array<double, 16> affine_transform = {0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
+                                             0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0};
   // List of boundary donor attributes for this periodic boundary condition.
   std::vector<int> donor_attributes = {};
   // List of boundary receiver attributes for this periodic boundary condition.
@@ -488,11 +489,11 @@ struct PeriodicBoundaryData : public internal::DataVector<PeriodicData>
 
 struct FloquetData
 {
-  public:
-    // Floquet/Bloch wavevector specifying the phase delay in the X/Y/Z directions.
-    std::array<double, 3> wave_vector = {0.0, 0.0, 0.0};
+public:
+  // Floquet/Bloch wavevector specifying the phase delay in the X/Y/Z directions.
+  std::array<double, 3> wave_vector = {0.0, 0.0, 0.0};
 
-    void SetUp(json &boundaries);
+  void SetUp(json &boundaries);
 };
 
 struct WavePortData
@@ -650,7 +651,7 @@ struct BoundaryData
   WavePortBoundaryData waveport = {};
   SurfaceCurrentBoundaryData current = {};
   PeriodicBoundaryData periodic = {};
-  FloquetData floquet;//?
+  FloquetData floquet;  //?
   BoundaryPostData postpro = {};
 
   void SetUp(json &config);
diff --git a/palace/utils/geodata.cpp b/palace/utils/geodata.cpp
index 01c340f62..33f0cc965 100644
--- a/palace/utils/geodata.cpp
+++ b/palace/utils/geodata.cpp
@@ -1722,8 +1722,8 @@ namespace
 {
 
 void ComputeCentroid(std::unique_ptr<mfem::Mesh> &mesh,
-                     const std::unordered_set<int> &vertidxs,
-                     mfem::Vector &centroid, double &diameter)
+                     const std::unordered_set<int> &vertidxs, mfem::Vector &centroid,
+                     double &diameter)
 {
   int sdim = mesh->SpaceDimension();
   mfem::Vector xMax(sdim), xMin(sdim), xDiff(sdim);
@@ -1742,12 +1742,12 @@ void ComputeCentroid(std::unique_ptr<mfem::Mesh> &mesh,
 
   xDiff = xMax;
   xDiff -= xMin;
-  diameter = xDiff.Norml2(); // mesh diameter
+  diameter = xDiff.Norml2();  // mesh diameter
 }
 
-mfem::Vector ComputeNormal2(std::unique_ptr<mfem::Mesh> &mesh,
-                            const std::unordered_set<int> &elem_set,
-                            bool inside, bool check_planar=true)
+mfem::Vector ComputeNormal(std::unique_ptr<mfem::Mesh> &mesh,
+                           const std::unordered_set<int> &elem_set, bool inside,
+                           bool check_planar = true)
 {
   int sdim = mesh->SpaceDimension();
   mfem::IsoparametricTransformation T;
@@ -1766,14 +1766,13 @@ mfem::Vector ComputeNormal2(std::unique_ptr<mfem::Mesh> &mesh,
     loc_normal /= loc_normal.Norml2();
 
     // To find if the normal is pointing inside or outside the mesh
-    // We compare the boundary element position to its adjacement element
-    mfem::Array<int> vert_bdr, vert_adj;
+    // we compare the boundary element position to its adjacement element
+    mfem::Array<int> vert_bdr;
     mesh->GetBdrElementVertices(el, vert_bdr);
     mfem::Vector bdr_elem_center(sdim), adj_elem_center(sdim);
     mfem::Vector bdr_elem_offset_p(sdim), bdr_elem_offset_n(sdim);
-    // Can maybe use mfem mesh GetElementCenter to compute centroids?
     bdr_elem_center = 0.0;
-    for (int j=0; j<vert_bdr.Size(); j++)
+    for (int j = 0; j < vert_bdr.Size(); j++)
     {
       mfem::Vector coord(mesh->GetVertex(vert_bdr[j]), sdim);
       bdr_elem_center += coord;
@@ -1782,24 +1781,17 @@ mfem::Vector ComputeNormal2(std::unique_ptr<mfem::Mesh> &mesh,
 
     int eladj, info;
     mesh->GetBdrElementAdjacentElement(el, eladj, info);
-    //mesh->GetElementVertices(eladj, vert_adj);
     mesh->GetElementCenter(eladj, adj_elem_center);
-    /*
-    adj_elem_center = 0.0;
-    for (int j=0; j<vert_adj.Size(); j++)
-    {
-      mfem::Vector vx(mesh->GetVertex(vert_adj[j]), sdim);
-      adj_elem_center += vx;
-    }
-    adj_elem_center /= vert_adj.Size();
-    */
+
     bdr_elem_offset_p = bdr_elem_center;
     bdr_elem_offset_p += loc_normal;
     bdr_elem_offset_n = bdr_elem_center;
     bdr_elem_offset_n -= loc_normal;
-    //Mpi::Print("dist_n: {:.3e}, dist_p: {:.3e}\n", adj_elem_center.DistanceTo(bdr_elem_offset_n), adj_elem_center.DistanceTo(bdr_elem_offset_p));
+    // Mpi::Print("dist_n: {:.3e}, dist_p: {:.3e}\n",
+    // adj_elem_center.DistanceTo(bdr_elem_offset_n),
+    // adj_elem_center.DistanceTo(bdr_elem_offset_p));
     if (inside && (adj_elem_center.DistanceTo(bdr_elem_offset_n) <
-                 adj_elem_center.DistanceTo(bdr_elem_offset_p)))
+                   adj_elem_center.DistanceTo(bdr_elem_offset_p)))
     {
       loc_normal *= -1.0;
     }
@@ -1817,7 +1809,8 @@ mfem::Vector ComputeNormal2(std::unique_ptr<mfem::Mesh> &mesh,
       diff = normal;
       diff /= count;
       diff -= loc_normal;
-      MFEM_VERIFY(diff.Norml2() < 1e-6, "Periodic boundary mapping is only supported for planar boundaries.");
+      MFEM_VERIFY(diff.Norml2() < 1e-6,
+                  "Periodic boundary mapping is only supported for planar boundaries.");
     }
     normal += loc_normal;
 
@@ -1833,76 +1826,9 @@ mfem::Vector ComputeNormal2(std::unique_ptr<mfem::Mesh> &mesh,
   return normal;
 }
 
-void ComputeNormal(std::unique_ptr<mfem::Mesh> &periodic_mesh,
-                   const int elem, mfem::Vector &normal,
-                   bool inside, const double norm_tol = 1e-6)
-{
-  int sdim = periodic_mesh->SpaceDimension();
-
-  if (sdim==1) { MFEM_ABORT("Not implemented."); }
-  else if (sdim == 2) { MFEM_ABORT("Not implemented."); }
-
-  mfem::Array<int> vert_bdr, vert_adj;
-  periodic_mesh->GetBdrElementVertices(elem, vert_bdr);
-  mfem::Vector bdr_elem_center(sdim), adj_elem_center(sdim);
-  mfem::Vector bdr_elem_offset_p(sdim), bdr_elem_offset_n(sdim);
-  mfem::Vector p1(sdim), p2(sdim);
-  bdr_elem_center = 0.0;
-  normal = 0.0;
-  for (int j=0; j<vert_bdr.Size(); j++)
-  {
-    mfem::Vector coord(periodic_mesh->GetVertex(vert_bdr[j]), 3);
-    bdr_elem_center += coord;
-    if (j==0) p1 = coord;
-    if (j==1) p2 = coord;
-    if (j>1 and normal.Norml2() < norm_tol)
-    {
-      mfem::Vector v1(sdim), v2(sdim);
-      v1 = p2;
-      v1 -= p1;
-      v2 = coord;
-      v2 -= p1;
-      v1.cross3D(v2, normal);
-    }
-  }
-  bdr_elem_center /= vert_bdr.Size();
-  normal /= normal.Norml2();
-
-  int el, info;
-  periodic_mesh->GetBdrElementAdjacentElement(elem, el, info);
-  periodic_mesh->GetElementVertices(el, vert_adj);
-
-  // Can maybe use mfem mesh GetElementCenter to compute centroids?
-  adj_elem_center = 0.0;
-  for (int j=0; j<vert_adj.Size(); j++)
-  {
-    mfem::Vector vx(periodic_mesh->GetVertex(vert_adj[j]), 3);
-    adj_elem_center += vx;
-  }
-  adj_elem_center /= vert_adj.Size();
-
-  bdr_elem_offset_p = bdr_elem_center;
-  bdr_elem_offset_p += normal;
-  bdr_elem_offset_n = bdr_elem_center;
-  bdr_elem_offset_n -= normal;
-  //Mpi::Print("dist_n: {:.3e}, dist_p: {:.3e}\n", adj_elem_center.DistanceTo(bdr_elem_offset_n), adj_elem_center.DistanceTo(bdr_elem_offset_p));
-  if (inside && (adj_elem_center.DistanceTo(bdr_elem_offset_n) <
-                 adj_elem_center.DistanceTo(bdr_elem_offset_p)))
-  {
-    normal *= -1.0;
-  }
-  if (!inside && (adj_elem_center.DistanceTo(bdr_elem_offset_p) <
-                  adj_elem_center.DistanceTo(bdr_elem_offset_n)))
-  {
-    normal *= -1.0;
-  }
-}
-
 void FindUniquePoints(std::unique_ptr<mfem::Mesh> &mesh,
-                      const std::unordered_set<int> &vertidxs,
-                      const mfem::Vector &centroid,
-                      const double &diameter,
-                      std::vector<mfem::Vector> &unique_pts,
+                      const std::unordered_set<int> &vertidxs, const mfem::Vector &centroid,
+                      const double &diameter, std::vector<mfem::Vector> &unique_pts,
                       const double &norm_tol = 1e-6)
 {
   const int sdim = mesh->SpaceDimension();
@@ -1913,15 +1839,15 @@ void FindUniquePoints(std::unique_ptr<mfem::Mesh> &mesh,
     coord = mesh->GetVertex(v);
     double dist = coord.DistanceTo(centroid);
     // convert dist to integer to avoid floating differences
-    dist2points[std::round(dist/diameter*1e8)].insert(v);
+    dist2points[std::round(dist / diameter * 1e8)].insert(v);
   }
 
-  std::priority_queue< std::pair<int, int>  ,
-                       std::vector< std::pair<int, int> >,
-                       std::greater <std::pair<int, int> > > q;
-  int k = 10; // number of points to keep
+  std::priority_queue<std::pair<int, int>, std::vector<std::pair<int, int>>,
+                      std::greater<std::pair<int, int>>>
+      q;
+  int k = 10;  // number of points to keep
   int num_unique_dist = 0;
-  for (const auto & [ dist, pts_set ] : dist2points)
+  for (const auto &[dist, pts_set] : dist2points)
   {
     // Only consider unique distances
     if (pts_set.size() == 1)
@@ -1941,16 +1867,16 @@ void FindUniquePoints(std::unique_ptr<mfem::Mesh> &mesh,
     }
   }
 
-  Mpi::Print("num unique dist: {:d}, q.size(): {:d}\n", num_unique_dist, q.size());
   unique_pts.push_back(centroid);
   mfem::Vector normal(sdim);
-  normal = 0.0; //
+  normal = 0.0;  //
 
   while (q.size() > 0 and normal.Norml2() < norm_tol)
   {
     coord = mesh->GetVertex(q.top().second);
-    Mpi::Print("pts: {:d}, x/y/z: {:.3e}, {:.3e}, {:.3e}, dist: {:d}\n", q.top().second, coord[0], coord[1], coord[2], q.top().first);
-    Mpi::Print("dist2points.size(): {:d}\n",dist2points[q.top().first].size());
+    // Mpi::Print("pts: {:d}, x/y/z: {:.3e}, {:.3e}, {:.3e}, dist: {:d}\n", q.top().second,
+    // coord[0], coord[1], coord[2], q.top().first); Mpi::Print("dist2points.size():
+    // {:d}\n",dist2points[q.top().first].size());
     q.pop();
     unique_pts.push_back(coord);
     if (unique_pts.size() == 3)
@@ -1962,7 +1888,7 @@ void FindUniquePoints(std::unique_ptr<mfem::Mesh> &mesh,
       v2 = unique_pts[2];
       v2 -= unique_pts[0];
       v1.cross3D(v2, normal);
-      Mpi::Print("q.size: {:d}, normal.linf: {:.3e}\n", q.size(), normal.Normlinf());
+      // Mpi::Print("q.size: {:d}, normal.linf: {:.3e}\n", q.size(), normal.Normlinf());
       if (normal.Norml2() < norm_tol)
       {
         unique_pts.pop_back();
@@ -1971,68 +1897,6 @@ void FindUniquePoints(std::unique_ptr<mfem::Mesh> &mesh,
   }
 }
 
-void ComputeTransformSVD(const std::vector<mfem::Vector> &donor_pts,
-                         const std::vector<mfem::Vector> &receiver_pts,
-                         mfem::DenseMatrix &transformation)
-{
-  Eigen::MatrixXd A(3,3), B(3,3), R(3,3);
-  Eigen::VectorXd cA(3), cB(3);
-
-  for (int i = 0; i < 3; i++)
-  {
-    A(0,i) = donor_pts[i][0] - donor_pts[0][0];
-    A(1,i) = donor_pts[i][1] - donor_pts[0][1];
-    A(2,i) = donor_pts[i][2] - donor_pts[0][2];
-    B(0,i) = receiver_pts[i][0] - receiver_pts[0][0];
-    B(1,i) = receiver_pts[i][1] - receiver_pts[0][1];
-    B(2,i) = receiver_pts[i][2] - receiver_pts[0][2];
-    cA(i) = donor_pts[0][i];
-    cB(i) = receiver_pts[0][i];
-  }
-
-  // Compute covariance matrix and its SVD
-  R = A * B.transpose();
-  Eigen::JacobiSVD<Eigen::MatrixXd> svd;
-  svd.compute(R, Eigen::ComputeFullU | Eigen::ComputeFullV);
-  Eigen::MatrixXd U = svd.matrixU();
-  Eigen::MatrixXd V = svd.matrixV();
-  // Get rotation matrix
-  R = U * V.transpose();
-
-  // Check determinant
-  double det = R.determinant();
-  if (det < 0)
-  {
-    Mpi::Print("Determinant < 1, ({:.3e}), correct R matrix\n", det);
-    svd.compute(R, Eigen::ComputeFullU | Eigen::ComputeFullV);
-    U = svd.matrixU();
-    V = svd.matrixV();
-    for (int i = 0; i < 3; i++)
-    {
-      V(i,2) *= -1.0;
-    }
-    R = V * U.transpose();
-  }
-
-  // Get translation
-  const Eigen::VectorXd t = cB - R * cA;
-
-  // Form affine transformation matrix
-  for(int i = 0; i < 3; i++)
-  {
-    for(int j = 0; j < 3; j++)
-    {
-      transformation(i,j) = R(i,j);
-    }
-  }
-  transformation(0,3) = t(0);
-  transformation(1,3) = t(1);
-  transformation(2,3) = t(2);
-  transformation(3,3) = 1.0;
-  Mpi::Print("Affine transformation using 3-pt SVD\n");
-  transformation.Print();
-}
-
 void ComputeAffineTransformation(const std::vector<mfem::Vector> &donor_pts,
                                  const std::vector<mfem::Vector> &receiver_pts,
                                  mfem::DenseMatrix &transformation)
@@ -2044,39 +1908,32 @@ void ComputeAffineTransformation(const std::vector<mfem::Vector> &donor_pts,
   mfem::Vector rhs(12), affine_coeffs(12);
   for (int i = 0; i < 4; i++)
   {
-    A(3*i,0) = A(3*i+1,4) = A(3*i+2, 8)  = donor_pts[i][0];
-    A(3*i,1) = A(3*i+1,5) = A(3*i+2, 9)  = donor_pts[i][1];
-    A(3*i,2) = A(3*i+1,6) = A(3*i+2, 10) = donor_pts[i][2];
-    A(3*i,3) = A(3*i+1,7) = A(3*i+2, 11) = 1.0;
-    rhs[3*i+0] = receiver_pts[i][0];
-    rhs[3*i+1] = receiver_pts[i][1];
-    rhs[3*i+2] = receiver_pts[i][2];
-  }
-  Mpi::Print("Donor pts matrix:\n");
-  A.Print();
-  Mpi::Print("Receiver pts RHS:\n");
-  rhs.Print();
-  A.Invert(); // Invert in place
+    A(3 * i, 0) = A(3 * i + 1, 4) = A(3 * i + 2, 8) = donor_pts[i][0];
+    A(3 * i, 1) = A(3 * i + 1, 5) = A(3 * i + 2, 9) = donor_pts[i][1];
+    A(3 * i, 2) = A(3 * i + 1, 6) = A(3 * i + 2, 10) = donor_pts[i][2];
+    A(3 * i, 3) = A(3 * i + 1, 7) = A(3 * i + 2, 11) = 1.0;
+    rhs[3 * i + 0] = receiver_pts[i][0];
+    rhs[3 * i + 1] = receiver_pts[i][1];
+    rhs[3 * i + 2] = receiver_pts[i][2];
+  }
+
   // coeffs = A^-1 rhs
+  A.Invert();  // Invert in place
   A.Mult(rhs, affine_coeffs);
-  Mpi::Print("affine coeffs:\n");
-  affine_coeffs.Print();
+
   // Build affine transformation matrix
   transformation = 0.0;
   for (int i = 0; i < 3; i++)
   {
     for (int j = 0; j < 4; j++)
     {
-      transformation(i,j) = affine_coeffs[i*4+j];
+      transformation(i, j) = affine_coeffs[i * 4 + j];
     }
   }
-  transformation(3,3) = 1.0;
-  Mpi::Print("Affine transform matrix:\n");
-  transformation.Print();
+  transformation(3, 3) = 1.0;
 }
 
-void ComputeRotation(const mfem::Vector &normal1,
-                     const mfem::Vector &normal2,
+void ComputeRotation(const mfem::Vector &normal1, const mfem::Vector &normal2,
                      mfem::DenseMatrix &transformation)
 {
   // Calculate the rotation matrix between two vectors
@@ -2085,39 +1942,35 @@ void ComputeRotation(const mfem::Vector &normal1,
   mfem::Vector v(normal1.Size());
   normal1.cross3D(normal2, v);
   double s = v.Norml2();
-  double c = normal1*normal2;
+  double c = normal1 * normal2;
 
-  vx(0,1) = -v[2];
-  vx(0,2) = v[1];
-  vx(1,0) = v[2];
-  vx(1,2) = -v[0];
-  vx(2,0) = -v[1];
-  vx(2,1) = v[0];
+  vx(0, 1) = -v[2];
+  vx(0, 2) = v[1];
+  vx(1, 0) = v[2];
+  vx(1, 2) = -v[0];
+  vx(2, 0) = -v[1];
+  vx(2, 1) = v[0];
 
-  R(0,0) = R(1,1) = R(2,2) = 1.0;
+  R(0, 0) = R(1, 1) = R(2, 2) = 1.0;
   R += vx;
   Mult(vx, vx, vx2);
-  vx2.Set(1.0/(1.0+c), vx2);
+  vx2.Set(1.0 / (1.0 + c), vx2);
   R += vx2;
-  vx.Print();
-  Mpi::Print("R\n");
-  R.Print();
 
-  for(int i = 0; i < 3; i++)
+  for (int i = 0; i < 3; i++)
   {
-    for(int j = 0; j < 3; j++)
+    for (int j = 0; j < 3; j++)
     {
-      transformation(i,j) = R(i,j);
+      transformation(i, j) = R(i, j);
     }
   }
 }
 
-std::vector<int> CreatePeriodicVertexMapping(
-  std::unique_ptr<mfem::Mesh> &mesh,
-  const std::unordered_set<int> &donor_v,
-  const std::unordered_set<int> &receiver_v,
-  const mfem::DenseMatrix &transform,
-  double tol = 1e-6)
+std::vector<int> CreatePeriodicVertexMapping(std::unique_ptr<mfem::Mesh> &mesh,
+                                             const std::unordered_set<int> &donor_v,
+                                             const std::unordered_set<int> &receiver_v,
+                                             const mfem::DenseMatrix &transform,
+                                             double tol = 1e-6)
 {
   const int sdim = mesh->SpaceDimension();
 
@@ -2128,11 +1981,23 @@ std::vector<int> CreatePeriodicVertexMapping(
   std::unordered_map<int, int> replica2primary;
 
   // KD-tree containing all the receiver points
-  std::unique_ptr<mfem::KDTreeBase<int,double>> kdtree;
-  if (sdim == 1) { kdtree.reset(new mfem::KDTree1D); }
-  else if (sdim == 2) { kdtree.reset(new mfem::KDTree2D); }
-  else if (sdim == 3) { kdtree.reset(new mfem::KDTree3D); }
-  else { MFEM_ABORT("Invalid space dimension."); }
+  std::unique_ptr<mfem::KDTreeBase<int, double>> kdtree;
+  if (sdim == 1)
+  {
+    kdtree.reset(new mfem::KDTree1D);
+  }
+  else if (sdim == 2)
+  {
+    kdtree.reset(new mfem::KDTree2D);
+  }
+  else if (sdim == 3)
+  {
+    kdtree.reset(new mfem::KDTree3D);
+  }
+  else
+  {
+    MFEM_ABORT("Invalid space dimension.");
+  }
 
   // Add all receiver points to KD-tree
   for (const int v : receiver_v)
@@ -2150,23 +2015,28 @@ std::vector<int> CreatePeriodicVertexMapping(
     at.MakeRef(receiver_coord, 0);
 
     coord = mesh->GetVertex(vi);
-    Mpi::Print("Mapping donor point: {:d} ({:.3e}, {:.3e}, {:.3e})", vi, donor_coord[0], donor_coord[1], donor_coord[2]);
-    // Apply transformation
-    // receiver = transform * donor
+    // Mpi::Print("Mapping donor point: {:d} ({:.3e}, {:.3e}, {:.3e})", vi, donor_coord[0],
+    // donor_coord[1], donor_coord[2]);
+    //  Apply transformation
+    //  receiver = transform * donor
     transform.Mult(donor_coord, receiver_coord);
 
     const int vj = kdtree->FindClosestPoint(at.GetData());
     coord = mesh->GetVertex(vj);
     dx = at;
     dx -= coord;
-    Mpi::Print(" to receiver point: {:d} ({:.3e}, {:.3e}, {:.3e}), with transform error {:.3e}\n", vj, receiver_coord[0], receiver_coord[1], receiver_coord[2], dx.Norml2());
+    // Mpi::Print(" to receiver point: {:d} ({:.3e}, {:.3e}, {:.3e}), with transform error
+    // {:.3e}\n", vj, receiver_coord[0], receiver_coord[1], receiver_coord[2], dx.Norml2());
 
-    MFEM_VERIFY(dx.Norml2() < tol, "Could not match points on periodic boundaries, transformed donor point does not correspond to a receive point.");
-
-    MFEM_VERIFY(replica2primary.find(vj) == replica2primary.end(), "Could not match points on periodic boundaries, multiple donor points map to the same receiver point.")
+    MFEM_VERIFY(dx.Norml2() < tol,
+                "Could not match points on periodic boundaries, "
+                "transformed donor point does not correspond to a receive point.");
+    MFEM_VERIFY(
+        replica2primary.find(vj) == replica2primary.end(),
+        "Could not match points on "
+        "periodic boundaries, multiple donor points map to the same receiver point.")
 
     replica2primary[vj] = vi;
-
   }
 
   std::vector<int> v2v(mesh->GetNV());
@@ -2239,7 +2109,7 @@ std::unique_ptr<mfem::Mesh> LoadMesh(const std::string &mesh_file, bool remove_c
     {
       // Identify donor and receiver vertices
       const auto &da = data.donor_attributes, &ra = data.receiver_attributes;
-      double norm_tol = 1e-6; //?
+      double norm_tol = 1e-6;  //?
       const int sdim = periodic_mesh->SpaceDimension();
       mfem::Vector coord(sdim);
       std::unordered_set<int> bdr_v_donor, bdr_v_receiver;
@@ -2258,25 +2128,25 @@ std::unique_ptr<mfem::Mesh> LoadMesh(const std::string &mesh_file, bool remove_c
           {
             has_tets = true;
           }
-          //Mpi::Print("attr: {:d}, donor: {:d}, receiver: {:d}\n", attr, donor, receiver);
-          if (donor) bdr_e_donor.insert(be);
-          if (receiver) bdr_e_receiver.insert(be);
+
+          if (donor)
+            bdr_e_donor.insert(be);
+          if (receiver)
+            bdr_e_receiver.insert(be);
           mfem::Array<int> vertidxs;
-          //int f, o;
-          //periodic_mesh->GetBdrElementFace(be, &f, &o);
-          //periodic_mesh->GetFaceVertices(f, vertidxs);
-          //Mpi::Print("f: {:d}, o: {:d}\n", f, o);
           periodic_mesh->GetBdrElementVertices(be, vertidxs);
           for (int i = 0; i < vertidxs.Size(); i++)
           {
             coord = periodic_mesh->GetVertex(vertidxs[i]);
-            if (donor) bdr_v_donor.insert(vertidxs[i]);
-            else if (receiver) bdr_v_receiver.insert(vertidxs[i]);
+            if (donor)
+              bdr_v_donor.insert(vertidxs[i]);
+            else if (receiver)
+              bdr_v_receiver.insert(vertidxs[i]);
           }
         }
       }
       const int num_periodic_bc_elems = bdr_e_donor.size() + bdr_e_receiver.size();
-      Mpi::Print("Total number of elements: {:d}\n",periodic_mesh->GetNE());
+      Mpi::Print("Total number of elements: {:d}\n", periodic_mesh->GetNE());
       Mpi::Print("Number of periodic BC elements: {:d}\n", num_periodic_bc_elems);
       // How to check if the mesh is OK?
       // Count number of elems in the periodic direction?
@@ -2293,19 +2163,19 @@ std::unique_ptr<mfem::Mesh> LoadMesh(const std::string &mesh_file, bool remove_c
       {
         // Pure tet mesh
         MFEM_VERIFY(periodic_mesh->GetNE() > 3 * num_periodic_bc_elems,
-        "Not enough mesh elements in periodic direction!");
+                    "Not enough mesh elements in periodic direction!");
       }
       else if (geoms.Size() > 1 && has_tets)
       {
         // Mixed mesh
         MFEM_VERIFY(periodic_mesh->GetNE() > num_periodic_bc_elems,
-        "Not enough mesh elements in periodic direction!");
+                    "Not enough mesh elements in periodic direction!");
       }
       else
       {
         // No tets
         MFEM_VERIFY(periodic_mesh->GetNE() > num_periodic_bc_elems,
-        "Not enough mesh elements in periodic direction!");
+                    "Not enough mesh elements in periodic direction!");
       }
       /**/
       mfem::DenseMatrix transformation(4);
@@ -2316,26 +2186,27 @@ std::unique_ptr<mfem::Mesh> LoadMesh(const std::string &mesh_file, bool remove_c
       mfem::Vector translation(data.translation.size());
       std::copy(data.translation.begin(), data.translation.end(), translation.GetData());
       mfem::Vector affine_vec(data.affine_transform.size());
-      std::copy(data.affine_transform.begin(), data.affine_transform.end(), affine_vec.GetData());
+      std::copy(data.affine_transform.begin(), data.affine_transform.end(),
+                affine_vec.GetData());
 
-      if (translation.Norml2() > 1e-12) // which value to use?
+      if (translation.Norml2() > 1e-12)  // which value to use?
       {
         // use user-provided translation
         for (int i = 0; i < 3; i++)
         {
-          transformation(i,i) = 1.0;
-          transformation(i,3) = translation[i];
+          transformation(i, i) = 1.0;
+          transformation(i, 3) = translation[i];
         }
-        transformation(3,3) = 1.0;
+        transformation(3, 3) = 1.0;
       }
-      else if (affine_vec.Norml2() > 1e-12) // which value to use?
+      else if (affine_vec.Norml2() > 1e-12)  // which value to use?
       {
         // use affine transformation matrix
         for (int i = 0; i < 4; i++)
         {
           for (int j = 0; j < 4; j++)
           {
-            transformation(i,j) = affine_vec[i*4+j];
+            transformation(i, j) = affine_vec[i * 4 + j];
           }
         }
       }
@@ -2345,39 +2216,36 @@ std::unique_ptr<mfem::Mesh> LoadMesh(const std::string &mesh_file, bool remove_c
         mfem::Vector donor_centroid(sdim), receiver_centroid(sdim);
         mfem::Vector translation2(sdim);
         mfem::Vector donor_normal(sdim), receiver_normal(sdim);
-        donor_normal = receiver_normal = 0.0;
 
         double donor_dia, receiver_dia, diameter;
-        Mpi::Print("num donor/receiver pts {:d}, {:d}\n",bdr_v_donor.size(), bdr_v_receiver.size());
-        MFEM_VERIFY(bdr_v_donor.size() == bdr_v_receiver.size(), "Different number of vertices on donor and receiver boundaries. Cannot create periodic mesh.");
+        MFEM_VERIFY(
+            bdr_v_donor.size() == bdr_v_receiver.size(),
+            "Different number of "
+            "vertices on donor and receiver boundaries. Cannot create periodic mesh.");
         ComputeCentroid(periodic_mesh, bdr_v_donor, donor_centroid, donor_dia);
-        Mpi::Print("Donor centroid: {:.3e}, {:.3e}, {:.3e}\n", donor_centroid[0], donor_centroid[1], donor_centroid[2]);
         ComputeCentroid(periodic_mesh, bdr_v_receiver, receiver_centroid, receiver_dia);
-        Mpi::Print("Receiver centroid: {:.3e}, {:.3e}, {:.3e}\n", receiver_centroid[0], receiver_centroid[1], receiver_centroid[2]);
+
         translation2 = receiver_centroid;
         translation2 -= donor_centroid;
-        Mpi::Print("computed translation: {:.9e}, {:.9e}, {:.9e}\n", translation2[0], translation2[1], translation2[2]);
-        Mpi::Print("config translation: {:.9e}, {:.9e}, {:.9e}\n", data.translation[0], data.translation[1], data.translation[2]);
+        Mpi::Print("computed translation: {:.9e}, {:.9e}, {:.9e}\n", translation2[0],
+                   translation2[1], translation2[2]);
 
         diameter = std::max(donor_dia, receiver_dia);
         norm_tol = 1e-6 * diameter;
         // Compute normal so it points inside domain for donor and outside for receiver
-        //ComputeNormal(periodic_mesh, *bdr_e_donor.begin(), donor_normal, true, norm_tol);
-        //ComputeNormal(periodic_mesh, *bdr_e_receiver.begin(), receiver_normal, false, norm_tol);
-        //Mpi::Print("Donor normal: {:.9e}, {:.9e}, {:.9e}\n", donor_normal[0], donor_normal[1], donor_normal[2]);
-        //Mpi::Print("Receiver normal: {:.9e}, {:.9e}, {:.9e}\n", receiver_normal[0], receiver_normal[1], receiver_normal[2]);
-
-        // This one computes the average normal over the whole boundary
-        // and checks if the boundary is planar
         // If not planar, error out
-        donor_normal = ComputeNormal2(periodic_mesh, bdr_e_donor, true);
-        receiver_normal = ComputeNormal2(periodic_mesh, bdr_e_receiver, false);
-        Mpi::Print("Donor normal: {:.9e}, {:.9e}, {:.9e}\n", donor_normal[0], donor_normal[1], donor_normal[2]);
-        Mpi::Print("Receiver normal: {:.9e}, {:.9e}, {:.9e}\n", receiver_normal[0], receiver_normal[1], receiver_normal[2]);
+        donor_normal = ComputeNormal(periodic_mesh, bdr_e_donor, true);
+        receiver_normal = ComputeNormal(periodic_mesh, bdr_e_receiver, false);
+        Mpi::Print("Donor normal: {:.9e}, {:.9e}, {:.9e}\n", donor_normal[0],
+                   donor_normal[1], donor_normal[2]);
+        Mpi::Print("Receiver normal: {:.9e}, {:.9e}, {:.9e}\n", receiver_normal[0],
+                   receiver_normal[1], receiver_normal[2]);
 
         std::vector<mfem::Vector> donor_pts, receiver_pts;
-        FindUniquePoints(periodic_mesh, bdr_v_donor, donor_centroid, diameter, donor_pts, norm_tol);
-        FindUniquePoints(periodic_mesh, bdr_v_receiver, receiver_centroid, diameter, receiver_pts, norm_tol);
+        FindUniquePoints(periodic_mesh, bdr_v_donor, donor_centroid, diameter, donor_pts,
+                         norm_tol);
+        FindUniquePoints(periodic_mesh, bdr_v_receiver, receiver_centroid, diameter,
+                         receiver_pts, norm_tol);
 
         // Add point offset from centroid in normal direction
         donor_centroid += donor_normal;
@@ -2388,44 +2256,36 @@ std::unique_ptr<mfem::Mesh> LoadMesh(const std::string &mesh_file, bool remove_c
         Mpi::Print("Number of unique donor pts: {:d}\n", donor_pts.size());
         Mpi::Print("Number of unique receiver pts: {:d}\n", receiver_pts.size());
 
-        MFEM_VERIFY(donor_pts.size() == receiver_pts.size(), "Different number of unique points on donor and receiver boundaries.");
+        MFEM_VERIFY(donor_pts.size() == receiver_pts.size(),
+                    "Different number of unique points on donor and receiver boundaries.");
 
-        if(donor_pts.size() == 4)
+        if (donor_pts.size() == 4)
         {
-          ComputeAffineTransformation(donor_pts, receiver_pts,
-                                      transformation);
-        }/*
-        else if (donor_pts.size() == 3)
-        {
-          ComputeTransformSVD(donor_pts, receiver_pts, transformation);
-        }*/
-        else /*if (donor_pts.size() == 2)*/
+          ComputeAffineTransformation(donor_pts, receiver_pts, transformation);
+        }
+        else
         {
           // Use normals to compute a rotation matrix
           ComputeRotation(donor_normal, receiver_normal, transformation);
 
           // Add centroids translation to transform matrix
-          transformation(0,3) = translation2[0];
-          transformation(1,3) = translation2[1];
-          transformation(2,3) = translation2[2];
-          transformation(3,3) = 1.0;
-          Mpi::Print("Affine transformation matrix\n");
-          transformation.Print();
+          transformation(0, 3) = translation2[0];
+          transformation(1, 3) = translation2[1];
+          transformation(2, 3) = translation2[2];
+          transformation(3, 3) = 1.0;
         }
-
+        Mpi::Print("Affine transformation matrix\n");
+        transformation.Print();
       }
       Mpi::Print("CreatePeriodicVertexMapping\n");
-      auto periodic_mapping = CreatePeriodicVertexMapping(periodic_mesh,
-                                                 bdr_v_donor,
-                                                 bdr_v_receiver,
-                                                 transformation,
-                                                 norm_tol);
-
-      //mfem::Vector translation(data.translation.size());
-      //std::copy(data.translation.begin(), data.translation.end(), translation.GetData());
-      //auto periodic_mapping =
-      //    periodic_mesh->CreatePeriodicVertexMapping({translation2}, 1E-6);
-      //periodic_mesh->
+      auto periodic_mapping = CreatePeriodicVertexMapping(
+          periodic_mesh, bdr_v_donor, bdr_v_receiver, transformation, norm_tol);
+
+      // mfem::Vector translation(data.translation.size());
+      // std::copy(data.translation.begin(), data.translation.end(), translation.GetData());
+      // auto periodic_mapping =
+      //     periodic_mesh->CreatePeriodicVertexMapping({translation2}, 1E-6);
+      // periodic_mesh->
       Mpi::Print("MFEM MakePeriodic\n");
       auto p_mesh = std::make_unique<mfem::Mesh>(
           mfem::Mesh::MakePeriodic(*periodic_mesh, periodic_mapping));

From 7dfcccc9144aca512827cf060cc1265738b0795e Mon Sep 17 00:00:00 2001
From: Simon Lapointe <simlap@amazon.com>
Date: Tue, 3 Dec 2024 17:28:37 -0800
Subject: [PATCH 22/49] Warn against using div-free projection and Floquet BCs

---
 palace/drivers/eigensolver.cpp | 18 +++++++++++++-----
 1 file changed, 13 insertions(+), 5 deletions(-)

diff --git a/palace/drivers/eigensolver.cpp b/palace/drivers/eigensolver.cpp
index 86accd2d2..4bf8e89a0 100644
--- a/palace/drivers/eigensolver.cpp
+++ b/palace/drivers/eigensolver.cpp
@@ -172,15 +172,23 @@ EigenSolver::Solve(const std::vector<std::unique_ptr<Mesh>> &mesh) const
   // Construct a divergence-free projector so the eigenvalue solve is performed in the space
   // orthogonal to the zero eigenvalues of the stiffness matrix.
   std::unique_ptr<DivFreeSolver<ComplexVector>> divfree;
-  if (iodata.solver.linear.divfree_max_it > 0 and !PF)
+  if (iodata.solver.linear.divfree_max_it > 0)
   {
-    Mpi::Print(" Configuring divergence-free projection\n");
-    constexpr int divfree_verbose = 0;
-    divfree = std::make_unique<DivFreeSolver<ComplexVector>>(
+    if (PF)
+    {
+      Mpi::Warning("Divergence-free projection is not compatible with non-zero "
+                   "Floquet wave vector!\n");
+    }
+    else
+    {
+      Mpi::Print(" Configuring divergence-free projection\n");
+      constexpr int divfree_verbose = 0;
+      divfree = std::make_unique<DivFreeSolver<ComplexVector>>(
         space_op.GetMaterialOp(), space_op.GetNDSpace(), space_op.GetH1Spaces(),
         space_op.GetAuxBdrTDofLists(), iodata.solver.linear.divfree_tol,
         iodata.solver.linear.divfree_max_it, divfree_verbose);
-    eigen->SetDivFreeProjector(*divfree);
+      eigen->SetDivFreeProjector(*divfree);
+    }
   }
 
   // Set up the initial space for the eigenvalue solve. Satisfies boundary conditions and is

From e687ab9ff79424347fbf46a9304178df96f33c75 Mon Sep 17 00:00:00 2001
From: Simon Lapointe <simlap@amazon.com>
Date: Tue, 3 Dec 2024 17:29:10 -0800
Subject: [PATCH 23/49] Combine periodic terms in a single helper functions

---
 palace/models/spaceoperator.cpp | 23 ++++++++++++++---------
 palace/models/spaceoperator.hpp |  3 +++
 2 files changed, 17 insertions(+), 9 deletions(-)

diff --git a/palace/models/spaceoperator.cpp b/palace/models/spaceoperator.cpp
index fa5f8ce6f..0fadbbf0e 100644
--- a/palace/models/spaceoperator.cpp
+++ b/palace/models/spaceoperator.cpp
@@ -489,9 +489,7 @@ SpaceOperator::GetPeriodicMatrix(Operator::DiagonalPolicy diag_policy)
   PrintHeader(GetH1Space(), GetNDSpace(), GetRTSpace(), print_hdr);
   MaterialPropertyCoefficient fpm(mat_op.MaxCeedAttribute()),
       fpwc(mat_op.MaxCeedAttribute()), fpc(mat_op.MaxCeedAttribute());
-  periodic_op.AddRealMassCoefficients(1.0, fpm);
-  periodic_op.AddWeakCurlCoefficients(1.0, fpwc);
-  periodic_op.AddCurlCoefficients(-1.0, fpc);
+  AddPeriodicCoefficients(1.0, fpm, fpwc, fpc);
   int empty[2] = {(fpm.empty()), (fpwc.empty() && fpc.empty())};
   Mpi::GlobalMin(2, empty, GetComm());
   if (empty[0] && empty[1])
@@ -834,9 +832,7 @@ std::unique_ptr<OperType> SpaceOperator::GetPreconditionerMatrix(double a0, doub
     AddRealMassBdrCoefficients(pc_mat_shifted ? std::abs(a2) : a2, fbr);
     AddImagMassCoefficients(a2, fi);
     AddExtraSystemBdrCoefficients(a3, dfbr, dfbi, fbr, fbi);
-    periodic_op.AddRealMassCoefficients(1.0, fmpr);
-    periodic_op.AddWeakCurlCoefficients(1.0, fpwi);
-    periodic_op.AddCurlCoefficients(-1.0, fpi);
+    AddPeriodicCoefficients(1.0, fmpr, fpwi, fpi);
     int empty[2] = {(dfr.empty() && fr.empty() && dfbr.empty() && fbr.empty() &&
                      fpwr.empty() && fpr.empty() && fmpr.empty()),
                     (dfi.empty() && fi.empty() && dfbi.empty() && fbi.empty() &&
@@ -853,6 +849,7 @@ std::unique_ptr<OperType> SpaceOperator::GetPreconditionerMatrix(double a0, doub
     {
       bi_vec = AssembleOperators(GetNDSpaces(), &dfi, &fi, &dfbi, &fbi, &fmpi, &fpwi, &fpi,
                                  skip_zeros, assemble_q_data);
+      //periodic_op.AddImagMassCoefficients(7.0, fi);//test - helps in some cases
       bi_aux_vec = AssembleAuxOperators(GetH1Spaces(), &fi, &fbi, &fmpi, &fpwi, &fpi,
                                         &skip_zeros, assemble_q_data);
     }
@@ -870,9 +867,7 @@ std::unique_ptr<OperType> SpaceOperator::GetPreconditionerMatrix(double a0, doub
     AddAbsMassCoefficients(pc_mat_shifted ? std::abs(a2) : a2, fr);
     AddRealMassBdrCoefficients(pc_mat_shifted ? std::abs(a2) : a2, fbr);
     AddExtraSystemBdrCoefficients(a3, dfbr, dfbr, fbr, fbr);
-    periodic_op.AddRealMassCoefficients(1.0, fmpr);
-    periodic_op.AddWeakCurlCoefficients(1.0, fpwr);
-    periodic_op.AddCurlCoefficients(-1.0, fpr);
+    AddPeriodicCoefficients(1.0, fmpr, fpwr, fpr);
     int empty = (dfr.empty() && fr.empty() && dfbr.empty() && fbr.empty() && fmpr.empty() &&
                  fpwr.empty() && fpr.empty());
     Mpi::GlobalMin(1, &empty, GetComm());
@@ -1013,6 +1008,16 @@ void SpaceOperator::AddExtraSystemBdrCoefficients(double omega,
   wave_port_op.AddExtraSystemBdrCoefficients(omega, fbr, fbi);
 }
 
+void SpaceOperator::AddPeriodicCoefficients(double coeff,
+                                            MaterialPropertyCoefficient &fm,
+                                            MaterialPropertyCoefficient &fwc,
+                                            MaterialPropertyCoefficient &fc)
+{
+  periodic_op.AddRealMassCoefficients(coeff, fm);
+  periodic_op.AddWeakCurlCoefficients(coeff, fwc);
+  periodic_op.AddCurlCoefficients(-coeff, fc);
+}
+
 bool SpaceOperator::GetExcitationVector(Vector &RHS)
 {
   // Time domain excitation vector.
diff --git a/palace/models/spaceoperator.hpp b/palace/models/spaceoperator.hpp
index c6b7b17cd..90cc2004a 100644
--- a/palace/models/spaceoperator.hpp
+++ b/palace/models/spaceoperator.hpp
@@ -80,6 +80,9 @@ class SpaceOperator
                                      MaterialPropertyCoefficient &dfbi,
                                      MaterialPropertyCoefficient &fbr,
                                      MaterialPropertyCoefficient &fbi);
+  void AddPeriodicCoefficients(double coeff, MaterialPropertyCoefficient &fm,
+                               MaterialPropertyCoefficient &fwc,
+                               MaterialPropertyCoefficient &fc);
 
   // Helper functions for excitation vector assembly.
   bool AddExcitationVector1Internal(Vector &RHS);

From 967c6e6e1dba6c7461852dd6e5cca98c4e652cb5 Mon Sep 17 00:00:00 2001
From: Simon Lapointe <simlap@amazon.com>
Date: Tue, 3 Dec 2024 17:29:57 -0800
Subject: [PATCH 24/49] Test term to help aux space smoother

---
 palace/models/periodicboundaryoperator.cpp | 34 ++++++++++++++++++++++
 palace/models/periodicboundaryoperator.hpp |  3 +-
 2 files changed, 36 insertions(+), 1 deletion(-)

diff --git a/palace/models/periodicboundaryoperator.cpp b/palace/models/periodicboundaryoperator.cpp
index 600e6a0ce..097022d26 100644
--- a/palace/models/periodicboundaryoperator.cpp
+++ b/palace/models/periodicboundaryoperator.cpp
@@ -73,6 +73,19 @@ PeriodicBoundaryOperator::PeriodicBoundaryOperator(const IoData &iodata,
   wave_vector_cross(1, 2) = -wave_vector[0];
   wave_vector_cross(2, 0) = -wave_vector[1];
   wave_vector_cross(2, 1) = wave_vector[0];
+
+  // Test for preconditioning
+  Mpi::Print("wave vector after clipping:\n");
+  wave_vector.Print();
+  const double wave_vector_norm2 = pow(wave_vector.Norml2(), 2);
+  Mpi::Print("wave vector norml2: {:.3e}\n", wave_vector_norm2);
+  wave_vector_diag.SetSize(3);
+  wave_vector_diag = 0.0;
+  wave_vector_diag(0, 0) = wave_vector_norm2;
+  wave_vector_diag(1, 1) = wave_vector_norm2;
+  wave_vector_diag(2, 2) = wave_vector_norm2;
+  //Mpi::Print("wave vector diag:\n");
+  //wave_vector_diag.Print();
 }
 
 mfem::Array<int>
@@ -208,4 +221,25 @@ void PeriodicBoundaryOperator::AddCurlCoefficients(double coeff,
   }
 }
 
+void PeriodicBoundaryOperator::AddImagMassCoefficients(double coeff,
+                                                       MaterialPropertyCoefficient &f)
+{
+  if (non_zero_wave_vector)
+  {
+    // 1/mu [k x]
+    mfem::DenseTensor kx(mat_op.GetInvPermeability().SizeI(),
+                        mat_op.GetInvPermeability().SizeJ(),
+                        mat_op.GetInvPermeability().SizeK());
+    for (int k = 0; k < kx.SizeK(); k++)
+    {
+      kx(k) = wave_vector_diag;
+    }
+    mfem::DenseTensor muinvkx = linalg::Mult(mat_op.GetInvPermeability(), kx);
+    MaterialPropertyCoefficient muinvkx_func(mat_op.GetAttributeToMaterial(),
+                                             muinvkx);
+    f.AddCoefficient(muinvkx_func.GetAttributeToMaterial(),
+                     muinvkx_func.GetMaterialProperties(), coeff);
+  }
+}
+
 }  // namespace palace
diff --git a/palace/models/periodicboundaryoperator.hpp b/palace/models/periodicboundaryoperator.hpp
index 6205ec284..208e210c1 100644
--- a/palace/models/periodicboundaryoperator.hpp
+++ b/palace/models/periodicboundaryoperator.hpp
@@ -29,7 +29,7 @@ class PeriodicBoundaryOperator
   mfem::Vector wave_vector;
 
   // Matrix representation of cross product with the wave vector.
-  mfem::DenseMatrix wave_vector_cross;
+  mfem::DenseMatrix wave_vector_cross, wave_vector_diag; //test
 
   // Check if the wave vector is zero to bypass additional terms.
   bool non_zero_wave_vector;
@@ -47,6 +47,7 @@ class PeriodicBoundaryOperator
   void AddRealMassCoefficients(double coeff, MaterialPropertyCoefficient &f);
   void AddWeakCurlCoefficients(double coeff, MaterialPropertyCoefficient &f);
   void AddCurlCoefficients(double coeff, MaterialPropertyCoefficient &f);
+  void AddImagMassCoefficients(double coeff, MaterialPropertyCoefficient &f);
 };
 
 }  // namespace palace

From 12ab759b93892fef61a1ef5faf7b47028b9323c5 Mon Sep 17 00:00:00 2001
From: Simon Lapointe <simlap@amazon.com>
Date: Thu, 5 Dec 2024 16:48:28 -0800
Subject: [PATCH 25/49] Test Floquet terms in divergence free projection

---
 palace/drivers/eigensolver.cpp  |  4 +-
 palace/linalg/divfree.cpp       |  4 +-
 palace/linalg/divfree.hpp       |  3 +-
 palace/models/spaceoperator.cpp | 80 +++++++++++++++------------------
 palace/models/spaceoperator.hpp |  2 +
 5 files changed, 44 insertions(+), 49 deletions(-)

diff --git a/palace/drivers/eigensolver.cpp b/palace/drivers/eigensolver.cpp
index 4bf8e89a0..96df99996 100644
--- a/palace/drivers/eigensolver.cpp
+++ b/palace/drivers/eigensolver.cpp
@@ -174,7 +174,7 @@ EigenSolver::Solve(const std::vector<std::unique_ptr<Mesh>> &mesh) const
   std::unique_ptr<DivFreeSolver<ComplexVector>> divfree;
   if (iodata.solver.linear.divfree_max_it > 0)
   {
-    if (PF)
+    if (PF) //BYPASS?!?!?! OR FIND WAY TO MAKE IT WORK?
     {
       Mpi::Warning("Divergence-free projection is not compatible with non-zero "
                    "Floquet wave vector!\n");
@@ -184,7 +184,7 @@ EigenSolver::Solve(const std::vector<std::unique_ptr<Mesh>> &mesh) const
       Mpi::Print(" Configuring divergence-free projection\n");
       constexpr int divfree_verbose = 0;
       divfree = std::make_unique<DivFreeSolver<ComplexVector>>(
-        space_op.GetMaterialOp(), space_op.GetNDSpace(), space_op.GetH1Spaces(),
+        space_op.GetMaterialOp(), space_op.GetPeriodicOp(), space_op.GetNDSpace(), space_op.GetH1Spaces(),
         space_op.GetAuxBdrTDofLists(), iodata.solver.linear.divfree_tol,
         iodata.solver.linear.divfree_max_it, divfree_verbose);
       eigen->SetDivFreeProjector(*divfree);
diff --git a/palace/linalg/divfree.cpp b/palace/linalg/divfree.cpp
index 3468c03bc..9cc765e1a 100644
--- a/palace/linalg/divfree.cpp
+++ b/palace/linalg/divfree.cpp
@@ -13,6 +13,7 @@
 #include "linalg/iterative.hpp"
 #include "linalg/rap.hpp"
 #include "models/materialoperator.hpp"
+#include "models/periodicboundaryoperator.hpp"
 #include "utils/timer.hpp"
 
 namespace palace
@@ -43,7 +44,7 @@ auto BuildLevelParOperator<ComplexOperator>(std::unique_ptr<Operator> &&a,
 
 template <typename VecType>
 DivFreeSolver<VecType>::DivFreeSolver(
-    const MaterialOperator &mat_op, FiniteElementSpace &nd_fespace,
+    const MaterialOperator &mat_op, PeriodicBoundaryOperator &periodic_op, FiniteElementSpace &nd_fespace,
     FiniteElementSpaceHierarchy &h1_fespaces,
     const std::vector<mfem::Array<int>> &h1_bdr_tdof_lists, double tol, int max_it,
     int print)
@@ -85,6 +86,7 @@ DivFreeSolver<VecType>::DivFreeSolver(
   // Create the mass and weak divergence operators for divergence-free projection.
   MaterialPropertyCoefficient epsilon_func(mat_op.GetAttributeToMaterial(),
                                            mat_op.GetPermittivityReal());
+  //periodic_op.AddRealMassCoefficients(-1.0/(0.3144*0.3144), epsilon_func);
   {
     constexpr bool skip_zeros = false;
     BilinearForm m(h1_fespaces.GetFinestFESpace());
diff --git a/palace/linalg/divfree.hpp b/palace/linalg/divfree.hpp
index b538553db..2bfc69ee5 100644
--- a/palace/linalg/divfree.hpp
+++ b/palace/linalg/divfree.hpp
@@ -24,6 +24,7 @@ namespace palace
 class FiniteElementSpaceHierarchy;
 class FiniteElementSpace;
 class MaterialOperator;
+class PeriodicBoundaryOperator;
 
 //
 // This solver implements a projection onto a divergence-free space satisfying Gᵀ M x = 0,
@@ -55,7 +56,7 @@ class DivFreeSolver
   mutable VecType psi, rhs;
 
 public:
-  DivFreeSolver(const MaterialOperator &mat_op, FiniteElementSpace &nd_fespace,
+  DivFreeSolver(const MaterialOperator &mat_op, PeriodicBoundaryOperator &periodic_op, FiniteElementSpace &nd_fespace,
                 FiniteElementSpaceHierarchy &h1_fespaces,
                 const std::vector<mfem::Array<int>> &h1_bdr_tdof_lists, double tol,
                 int max_it, int print);
diff --git a/palace/models/spaceoperator.cpp b/palace/models/spaceoperator.cpp
index 0fadbbf0e..84a4c790c 100644
--- a/palace/models/spaceoperator.cpp
+++ b/palace/models/spaceoperator.cpp
@@ -207,7 +207,6 @@ void AddIntegrators(BilinearForm &a, const MaterialPropertyCoefficient *df,
                     const MaterialPropertyCoefficient *f,
                     const MaterialPropertyCoefficient *dfb,
                     const MaterialPropertyCoefficient *fb,
-                    const MaterialPropertyCoefficient *fpm,
                     const MaterialPropertyCoefficient *fpw,
                     const MaterialPropertyCoefficient *fp, bool assemble_q_data = false)
 {
@@ -241,10 +240,6 @@ void AddIntegrators(BilinearForm &a, const MaterialPropertyCoefficient *df,
       a.AddBoundaryIntegrator<VectorFEMassIntegrator>(*fb);
     }
   }
-  if (fpm && !fpm->empty())
-  {
-    a.AddDomainIntegrator<VectorFEMassIntegrator>(*fpm);
-  }
   if (fpw && !fpw->empty())
   {
     a.AddDomainIntegrator<MixedVectorWeakCurlIntegrator>(*fpw);
@@ -261,7 +256,6 @@ void AddIntegrators(BilinearForm &a, const MaterialPropertyCoefficient *df,
 
 void AddAuxIntegrators(BilinearForm &a, const MaterialPropertyCoefficient *f,
                        const MaterialPropertyCoefficient *fb,
-                       const MaterialPropertyCoefficient *fpm,
                        const MaterialPropertyCoefficient *fpw,
                        const MaterialPropertyCoefficient *fp, bool assemble_q_data = false)
 {
@@ -273,10 +267,6 @@ void AddAuxIntegrators(BilinearForm &a, const MaterialPropertyCoefficient *f,
   {
     a.AddBoundaryIntegrator<DiffusionIntegrator>(*fb);
   }
-  if (fpm && !fpm->empty())
-  {
-    a.AddDomainIntegrator<DiffusionIntegrator>(*fpm);
-  }
   if (fpw && !fpw->empty())
   {
     // a.AddDomainIntegrator<DiffusionIntegrator>(*fpw);
@@ -294,37 +284,36 @@ void AddAuxIntegrators(BilinearForm &a, const MaterialPropertyCoefficient *f,
 auto AssembleOperator(
     const FiniteElementSpace &fespace, const MaterialPropertyCoefficient *df,
     const MaterialPropertyCoefficient *f, const MaterialPropertyCoefficient *dfb,
-    const MaterialPropertyCoefficient *fb, const MaterialPropertyCoefficient *fpm,
-    const MaterialPropertyCoefficient *fpw, const MaterialPropertyCoefficient *fp,
+    const MaterialPropertyCoefficient *fb, const MaterialPropertyCoefficient *fpw,
+    const MaterialPropertyCoefficient *fp,
     bool skip_zeros = false, bool assemble_q_data = false)
 {
   BilinearForm a(fespace);
-  AddIntegrators(a, df, f, dfb, fb, fpm, fpw, fp, assemble_q_data);
+  AddIntegrators(a, df, f, dfb, fb, fpw, fp, assemble_q_data);
   return a.Assemble(skip_zeros);
 }
 
 auto AssembleOperators(
     const FiniteElementSpaceHierarchy &fespaces, const MaterialPropertyCoefficient *df,
     const MaterialPropertyCoefficient *f, const MaterialPropertyCoefficient *dfb,
-    const MaterialPropertyCoefficient *fb, const MaterialPropertyCoefficient *fpm,
-    const MaterialPropertyCoefficient *fpw, const MaterialPropertyCoefficient *fp,
+    const MaterialPropertyCoefficient *fb, const MaterialPropertyCoefficient *fpw,
+    const MaterialPropertyCoefficient *fp,
     bool skip_zeros = false, bool assemble_q_data = false, std::size_t l0 = 0)
 {
   BilinearForm a(fespaces.GetFinestFESpace());
-  AddIntegrators(a, df, f, dfb, fb, fpm, fpw, fp, assemble_q_data);
+  AddIntegrators(a, df, f, dfb, fb, fpw, fp, assemble_q_data);
   return a.Assemble(fespaces, skip_zeros, l0);
 }
 
 auto AssembleAuxOperators(const FiniteElementSpaceHierarchy &fespaces,
                           const MaterialPropertyCoefficient *f,
                           const MaterialPropertyCoefficient *fb,
-                          const MaterialPropertyCoefficient *fpm,
                           const MaterialPropertyCoefficient *fpw,
                           const MaterialPropertyCoefficient *fp, bool skip_zeros = false,
                           bool assemble_q_data = false, std::size_t l0 = 0)
 {
   BilinearForm a(fespaces.GetFinestFESpace());
-  AddAuxIntegrators(a, f, fb, fpm, fpw, fp, assemble_q_data);
+  AddAuxIntegrators(a, f, fb, fpw, fp, assemble_q_data);
   return a.Assemble(fespaces, skip_zeros, l0);
 }
 
@@ -346,7 +335,7 @@ SpaceOperator::GetStiffnessMatrix(Operator::DiagonalPolicy diag_policy)
     return {};
   }
   constexpr bool skip_zeros = false;
-  auto k = AssembleOperator(GetNDSpace(), &df, &f, nullptr, &fb, nullptr, nullptr, nullptr,
+  auto k = AssembleOperator(GetNDSpace(), &df, &f, nullptr, &fb, nullptr, nullptr,
                             skip_zeros);
   if constexpr (std::is_same<OperType, ComplexOperator>::value)
   {
@@ -378,7 +367,7 @@ SpaceOperator::GetDampingMatrix(Operator::DiagonalPolicy diag_policy)
     return {};
   }
   constexpr bool skip_zeros = false;
-  auto c = AssembleOperator(GetNDSpace(), nullptr, &f, nullptr, &fb, nullptr, nullptr,
+  auto c = AssembleOperator(GetNDSpace(), nullptr, &f, nullptr, &fb, nullptr,
                             nullptr, skip_zeros);
   if constexpr (std::is_same<OperType, ComplexOperator>::value)
   {
@@ -416,12 +405,12 @@ std::unique_ptr<OperType> SpaceOperator::GetMassMatrix(Operator::DiagonalPolicy
   std::unique_ptr<Operator> mr, mi;
   if (!empty[0])
   {
-    mr = AssembleOperator(GetNDSpace(), nullptr, &fr, nullptr, &fbr, nullptr, nullptr,
+    mr = AssembleOperator(GetNDSpace(), nullptr, &fr, nullptr, &fbr, nullptr,
                           nullptr, skip_zeros);
   }
   if (!empty[1])
   {
-    mi = AssembleOperator(GetNDSpace(), nullptr, &fi, nullptr, &fbi, nullptr, nullptr,
+    mi = AssembleOperator(GetNDSpace(), nullptr, &fi, nullptr, &fbi, nullptr,
                           nullptr, skip_zeros);
   }
   if constexpr (std::is_same<OperType, ComplexOperator>::value)
@@ -458,12 +447,12 @@ SpaceOperator::GetExtraSystemMatrix(double omega, Operator::DiagonalPolicy diag_
   std::unique_ptr<Operator> ar, ai;
   if (!empty[0])
   {
-    ar = AssembleOperator(GetNDSpace(), nullptr, nullptr, &dfbr, &fbr, nullptr, nullptr,
+    ar = AssembleOperator(GetNDSpace(), nullptr, nullptr, &dfbr, &fbr, nullptr,
                           nullptr, skip_zeros);
   }
   if (!empty[1])
   {
-    ai = AssembleOperator(GetNDSpace(), nullptr, nullptr, &dfbi, &fbi, nullptr, nullptr,
+    ai = AssembleOperator(GetNDSpace(), nullptr, nullptr, &dfbi, &fbi, nullptr,
                           nullptr, skip_zeros);
   }
   if constexpr (std::is_same<OperType, ComplexOperator>::value)
@@ -487,10 +476,10 @@ std::unique_ptr<OperType>
 SpaceOperator::GetPeriodicMatrix(Operator::DiagonalPolicy diag_policy)
 {
   PrintHeader(GetH1Space(), GetNDSpace(), GetRTSpace(), print_hdr);
-  MaterialPropertyCoefficient fpm(mat_op.MaxCeedAttribute()),
-      fpwc(mat_op.MaxCeedAttribute()), fpc(mat_op.MaxCeedAttribute());
-  AddPeriodicCoefficients(1.0, fpm, fpwc, fpc);
-  int empty[2] = {(fpm.empty()), (fpwc.empty() && fpc.empty())};
+  MaterialPropertyCoefficient fm(mat_op.MaxCeedAttribute()),
+      fwc(mat_op.MaxCeedAttribute()), fc(mat_op.MaxCeedAttribute());
+  AddPeriodicCoefficients(1.0, fm, fwc, fc);
+  int empty[2] = {(fm.empty()), (fwc.empty() && fc.empty())};
   Mpi::GlobalMin(2, empty, GetComm());
   if (empty[0] && empty[1])
   {
@@ -500,13 +489,13 @@ SpaceOperator::GetPeriodicMatrix(Operator::DiagonalPolicy diag_policy)
   std::unique_ptr<Operator> pr, pi;
   if (!empty[0])
   {
-    pr = AssembleOperator(GetNDSpace(), nullptr, nullptr, nullptr, nullptr, &fpm, nullptr,
+    pr = AssembleOperator(GetNDSpace(), nullptr, &fm, nullptr, nullptr, nullptr,
                           nullptr, skip_zeros);
   }
   if (!empty[1])
   {
-    pi = AssembleOperator(GetNDSpace(), nullptr, nullptr, nullptr, nullptr, nullptr, &fpwc,
-                          &fpc, skip_zeros);
+    pi = AssembleOperator(GetNDSpace(), nullptr, nullptr, nullptr, nullptr, &fwc,
+                          &fc, skip_zeros);
   }
   if constexpr (std::is_same<OperType, ComplexOperator>::value)
   {
@@ -822,35 +811,35 @@ std::unique_ptr<OperType> SpaceOperator::GetPreconditionerMatrix(double a0, doub
         dfbi(mat_op.MaxCeedBdrAttribute()), fbr(mat_op.MaxCeedBdrAttribute()),
         fbi(mat_op.MaxCeedBdrAttribute()), fpi(mat_op.MaxCeedAttribute()),
         fpwi(mat_op.MaxCeedAttribute()), fpr(mat_op.MaxCeedAttribute()),
-        fpwr(mat_op.MaxCeedAttribute()), fmpr(mat_op.MaxCeedAttribute()),
-        fmpi(mat_op.MaxCeedAttribute());
+        fpwr(mat_op.MaxCeedAttribute());
     AddStiffnessCoefficients(a0, dfr, fr);
     AddStiffnessBdrCoefficients(a0, fbr);
     AddDampingCoefficients(a1, fi);
     AddDampingBdrCoefficients(a1, fbi);
     AddRealMassCoefficients(pc_mat_shifted ? std::abs(a2) : a2, fr);
     AddRealMassBdrCoefficients(pc_mat_shifted ? std::abs(a2) : a2, fbr);
+    Mpi::Print("Complex PC with a2: {:.3e}\n", a2);
     AddImagMassCoefficients(a2, fi);
     AddExtraSystemBdrCoefficients(a3, dfbr, dfbi, fbr, fbi);
-    AddPeriodicCoefficients(1.0, fmpr, fpwi, fpi);
+    AddPeriodicCoefficients(1.0, fr, fpwi, fpi);
     int empty[2] = {(dfr.empty() && fr.empty() && dfbr.empty() && fbr.empty() &&
-                     fpwr.empty() && fpr.empty() && fmpr.empty()),
+                     fpwr.empty() && fpr.empty()),
                     (dfi.empty() && fi.empty() && dfbi.empty() && fbi.empty() &&
-                     fpwi.empty() && fpi.empty() && fmpi.empty())};
+                     fpwi.empty() && fpi.empty())};
     Mpi::GlobalMin(2, empty, GetComm());
     if (!empty[0])
     {
-      br_vec = AssembleOperators(GetNDSpaces(), &dfr, &fr, &dfbr, &fbr, &fmpr, &fpwr, &fpr,
+      br_vec = AssembleOperators(GetNDSpaces(), &dfr, &fr, &dfbr, &fbr, &fpwr, &fpr,
                                  skip_zeros, assemble_q_data);
-      br_aux_vec = AssembleAuxOperators(GetH1Spaces(), &fr, &fbr, &fmpr, &fpwr, &fpr,
+      br_aux_vec = AssembleAuxOperators(GetH1Spaces(), &fr, &fbr, &fpwr, &fpr,
                                         skip_zeros, assemble_q_data);
     }
     if (!empty[1])
     {
-      bi_vec = AssembleOperators(GetNDSpaces(), &dfi, &fi, &dfbi, &fbi, &fmpi, &fpwi, &fpi,
+      bi_vec = AssembleOperators(GetNDSpaces(), &dfi, &fi, &dfbi, &fbi, &fpwi, &fpi,
                                  skip_zeros, assemble_q_data);
       //periodic_op.AddImagMassCoefficients(7.0, fi);//test - helps in some cases
-      bi_aux_vec = AssembleAuxOperators(GetH1Spaces(), &fi, &fbi, &fmpi, &fpwi, &fpi,
+      bi_aux_vec = AssembleAuxOperators(GetH1Spaces(), &fi, &fbi, &fpwi, &fpi,
                                         &skip_zeros, assemble_q_data);
     }
   }
@@ -859,7 +848,7 @@ std::unique_ptr<OperType> SpaceOperator::GetPreconditionerMatrix(double a0, doub
     MaterialPropertyCoefficient dfr(mat_op.MaxCeedAttribute()),
         fr(mat_op.MaxCeedAttribute()), dfbr(mat_op.MaxCeedBdrAttribute()),
         fbr(mat_op.MaxCeedBdrAttribute()), fpwr(mat_op.MaxCeedAttribute()),
-        fpr(mat_op.MaxCeedAttribute()), fmpr(mat_op.MaxCeedAttribute());
+        fpr(mat_op.MaxCeedAttribute());
     AddStiffnessCoefficients(a0, dfr, fr);
     AddStiffnessBdrCoefficients(a0, fbr);
     AddDampingCoefficients(a1, fr);
@@ -867,15 +856,15 @@ std::unique_ptr<OperType> SpaceOperator::GetPreconditionerMatrix(double a0, doub
     AddAbsMassCoefficients(pc_mat_shifted ? std::abs(a2) : a2, fr);
     AddRealMassBdrCoefficients(pc_mat_shifted ? std::abs(a2) : a2, fbr);
     AddExtraSystemBdrCoefficients(a3, dfbr, dfbr, fbr, fbr);
-    AddPeriodicCoefficients(1.0, fmpr, fpwr, fpr);
-    int empty = (dfr.empty() && fr.empty() && dfbr.empty() && fbr.empty() && fmpr.empty() &&
+    AddPeriodicCoefficients(1.0, fr, fpwr, fpr);
+    int empty = (dfr.empty() && fr.empty() && dfbr.empty() && fbr.empty() &&
                  fpwr.empty() && fpr.empty());
     Mpi::GlobalMin(1, &empty, GetComm());
     if (!empty)
     {
-      br_vec = AssembleOperators(GetNDSpaces(), &dfr, &fr, &dfbr, &fbr, &fmpr, &fpwr, &fpr,
+      br_vec = AssembleOperators(GetNDSpaces(), &dfr, &fr, &dfbr, &fbr,  &fpwr, &fpr,
                                  skip_zeros, assemble_q_data);
-      br_aux_vec = AssembleAuxOperators(GetH1Spaces(), &fr, &fbr, &fmpr, &fpwr, &fpr,
+      br_aux_vec = AssembleAuxOperators(GetH1Spaces(), &fr, &fbr, &fpwr, &fpr,
                                         skip_zeros, assemble_q_data);
     }
   }
@@ -1013,6 +1002,7 @@ void SpaceOperator::AddPeriodicCoefficients(double coeff,
                                             MaterialPropertyCoefficient &fwc,
                                             MaterialPropertyCoefficient &fc)
 {
+  // Floquet periodicity contributions.
   periodic_op.AddRealMassCoefficients(coeff, fm);
   periodic_op.AddWeakCurlCoefficients(coeff, fwc);
   periodic_op.AddCurlCoefficients(-coeff, fc);
diff --git a/palace/models/spaceoperator.hpp b/palace/models/spaceoperator.hpp
index 90cc2004a..98cbcb10f 100644
--- a/palace/models/spaceoperator.hpp
+++ b/palace/models/spaceoperator.hpp
@@ -116,9 +116,11 @@ class SpaceOperator
   auto &GetLumpedPortOp() { return lumped_port_op; }
   auto &GetWavePortOp() { return wave_port_op; }
   auto &GetSurfaceCurrentOp() { return surf_j_op; }
+  auto &GetPeriodicOp() { return periodic_op; }
   const auto &GetLumpedPortOp() const { return lumped_port_op; }
   const auto &GetWavePortOp() const { return wave_port_op; }
   const auto &GetSurfaceCurrentOp() const { return surf_j_op; }
+  const auto &GetPeriodicOp() const { return periodic_op; }
 
   // Return the parallel finite element space objects.
   auto &GetNDSpaces() { return nd_fespaces; }

From e713a6f4fab3ad81c42496ea6affd168953a5b9e Mon Sep 17 00:00:00 2001
From: Simon Lapointe <simlap@amazon.com>
Date: Fri, 6 Dec 2024 11:17:51 -0800
Subject: [PATCH 26/49] Clean and reorganize automated periodic boundary
 matching

---
 palace/utils/geodata.cpp | 521 ++++++++++++++++++---------------------
 1 file changed, 246 insertions(+), 275 deletions(-)

diff --git a/palace/utils/geodata.cpp b/palace/utils/geodata.cpp
index 33f0cc965..5f1360a21 100644
--- a/palace/utils/geodata.cpp
+++ b/palace/utils/geodata.cpp
@@ -1721,35 +1721,29 @@ double RebalanceMesh(std::unique_ptr<mfem::ParMesh> &mesh, const IoData &iodata)
 namespace
 {
 
-void ComputeCentroid(std::unique_ptr<mfem::Mesh> &mesh,
-                     const std::unordered_set<int> &vertidxs, mfem::Vector &centroid,
-                     double &diameter)
+// Compute the centroid of a set of vertices.
+mfem::Vector ComputeCentroid(std::unique_ptr<mfem::Mesh> &mesh,
+                             const std::unordered_set<int> &vertidxs)
 {
-  int sdim = mesh->SpaceDimension();
-  mfem::Vector xMax(sdim), xMin(sdim), xDiff(sdim);
+  const int sdim = mesh->SpaceDimension();
+  mfem::Vector centroid(sdim);
   centroid = 0.0;
   for (const int v : vertidxs)
   {
     mfem::Vector coord(mesh->GetVertex(v), 3);
     centroid += coord;
-    for (int j = 0; j < sdim; j++)
-    {
-      xMax[j] = std::max(xMax[j], coord[j]);
-      xMin[j] = std::min(xMin[j], coord[j]);
-    }
   }
   centroid /= (double)vertidxs.size();
-
-  xDiff = xMax;
-  xDiff -= xMin;
-  diameter = xDiff.Norml2();  // mesh diameter
+  return centroid;
 }
 
+// Compute the normal vector for a set of elements. If "inside" is true, normal will
+// point inside the mesh, otherwise it will point outside the mesh.
 mfem::Vector ComputeNormal(std::unique_ptr<mfem::Mesh> &mesh,
                            const std::unordered_set<int> &elem_set, bool inside,
                            bool check_planar = true)
 {
-  int sdim = mesh->SpaceDimension();
+  const int sdim = mesh->SpaceDimension();
   mfem::IsoparametricTransformation T;
   mfem::Vector loc_normal(sdim), normal(sdim);
   normal = 0.0;
@@ -1757,16 +1751,16 @@ mfem::Vector ComputeNormal(std::unique_ptr<mfem::Mesh> &mesh,
 
   auto UpdateNormal = [&](int el, mfem::ElementTransformation &T)
   {
-    // Compute normal
+    // Compute normal.
     const mfem::IntegrationPoint &ip = mfem::Geometries.GetCenter(T.GetGeometryType());
     T.SetIntPoint(&ip);
     mfem::CalcOrtho(T.Jacobian(), loc_normal);
 
-    // Normalize it
+    // Normalize it.
     loc_normal /= loc_normal.Norml2();
 
-    // To find if the normal is pointing inside or outside the mesh
-    // we compare the boundary element position to its adjacement element
+    // To find if the normal is pointing inside or outside the mesh,
+    // we compare the boundary element position to its adjacent element.
     mfem::Array<int> vert_bdr;
     mesh->GetBdrElementVertices(el, vert_bdr);
     mfem::Vector bdr_elem_center(sdim), adj_elem_center(sdim);
@@ -1787,9 +1781,6 @@ mfem::Vector ComputeNormal(std::unique_ptr<mfem::Mesh> &mesh,
     bdr_elem_offset_p += loc_normal;
     bdr_elem_offset_n = bdr_elem_center;
     bdr_elem_offset_n -= loc_normal;
-    // Mpi::Print("dist_n: {:.3e}, dist_p: {:.3e}\n",
-    // adj_elem_center.DistanceTo(bdr_elem_offset_n),
-    // adj_elem_center.DistanceTo(bdr_elem_offset_p));
     if (inside && (adj_elem_center.DistanceTo(bdr_elem_offset_n) <
                    adj_elem_center.DistanceTo(bdr_elem_offset_p)))
     {
@@ -1802,14 +1793,14 @@ mfem::Vector ComputeNormal(std::unique_ptr<mfem::Mesh> &mesh,
     }
 
     // Check if the boundary is planar by comparing the current elem's
-    // normal to the average normal (accumulated so far)
+    // normal to the average normal (accumulated so far).
     if (count > 0 && check_planar)
     {
       mfem::Vector diff(sdim);
       diff = normal;
       diff /= count;
       diff -= loc_normal;
-      MFEM_VERIFY(diff.Norml2() < 1e-6,
+      MFEM_VERIFY(diff.Norml2() < 1e-8,
                   "Periodic boundary mapping is only supported for planar boundaries.");
     }
     normal += loc_normal;
@@ -1826,86 +1817,84 @@ mfem::Vector ComputeNormal(std::unique_ptr<mfem::Mesh> &mesh,
   return normal;
 }
 
-void FindUniquePoints(std::unique_ptr<mfem::Mesh> &mesh,
-                      const std::unordered_set<int> &vertidxs, const mfem::Vector &centroid,
-                      const double &diameter, std::vector<mfem::Vector> &unique_pts,
-                      const double &norm_tol = 1e-6)
+// Identify up to four unique points within a set.
+// 1. The centroid of the set.
+// 2. A point offset from the centroid by 1 mesh unit in the normal direction.
+// 3. The farthest point with a unique distance from the centroid.
+// 4. The 2nd-farthest point with a unique distance from the centroid.
+std::vector<mfem::Vector> FindUniquePoints(std::unique_ptr<mfem::Mesh> &mesh,
+                                           const std::unordered_set<int> &vertidxs,
+                                           const mfem::Vector &centroid,
+                                           const mfem::Vector &normal,
+                                           const double &mesh_dim,
+                                           const double &tol = 1e-6)
 {
   const int sdim = mesh->SpaceDimension();
+  std::vector<mfem::Vector> unique_pts;
+
+  // For each point, compute its distance to the centroid.
   mfem::Vector coord(sdim);
-  std::unordered_map<int, std::unordered_set<int>> dist2points;
+  std::map<int, std::unordered_set<int>, std::greater<int>> dist2points;
   for (const int v : vertidxs)
   {
     coord = mesh->GetVertex(v);
     double dist = coord.DistanceTo(centroid);
-    // convert dist to integer to avoid floating differences
-    dist2points[std::round(dist / diameter * 1e8)].insert(v);
+    // Convert dist to integer to avoid floating point differences.
+    dist2points[std::round(dist / mesh_dim * 1e8)].insert(v);
   }
 
-  std::priority_queue<std::pair<int, int>, std::vector<std::pair<int, int>>,
-                      std::greater<std::pair<int, int>>>
-      q;
-  int k = 10;  // number of points to keep
-  int num_unique_dist = 0;
+  // Loop over the distances, points chosen have a unique distance and are not collinear.
+  // Centroid is always considered a unique point.
+  unique_pts.push_back(centroid);
+  mfem::Vector cross_product(sdim);
+  cross_product = 0.0;
   for (const auto &[dist, pts_set] : dist2points)
   {
-    // Only consider unique distances
-    if (pts_set.size() == 1)
+    // Only consider unique non-zero distances.
+    if (pts_set.size() == 1 && dist > 0)
     {
-      num_unique_dist++;
       int v = *pts_set.begin();
       coord = mesh->GetVertex(v);
-      if (q.size() < k)
-      {
-        q.push(std::pair<int, int>(dist, v));
-      }
-      else if (q.top().first < dist)
-      {
-        q.pop();
-        q.push(std::pair<int, int>(dist, v));
+      unique_pts.push_back(coord); // Add point.
+      // Once we have 3 points, check for collinearity
+      if (unique_pts.size() == 3)
+      {
+        // v1 = P2 - P1, v2 = P3 - P1.
+        mfem::Vector v1(sdim), v2(sdim);
+        v1 = unique_pts[1];
+        v1 -= unique_pts[0];
+        v2 = unique_pts[2];
+        v2 -= unique_pts[0];
+        v1.cross3D(v2, cross_product);
+        // If normal is ~0, points are collinear. Remove last point and continue loop.
+        if (cross_product.Norml2() < tol)
+        {
+          unique_pts.pop_back();
+        }
+        else
+        {
+          break;
+        }
       }
     }
   }
 
-  unique_pts.push_back(centroid);
-  mfem::Vector normal(sdim);
-  normal = 0.0;  //
+  // Add point offset from centroid in normal direction.
+  coord = centroid;
+  coord += normal;
+  unique_pts.push_back(coord);
 
-  while (q.size() > 0 and normal.Norml2() < norm_tol)
-  {
-    coord = mesh->GetVertex(q.top().second);
-    // Mpi::Print("pts: {:d}, x/y/z: {:.3e}, {:.3e}, {:.3e}, dist: {:d}\n", q.top().second,
-    // coord[0], coord[1], coord[2], q.top().first); Mpi::Print("dist2points.size():
-    // {:d}\n",dist2points[q.top().first].size());
-    q.pop();
-    unique_pts.push_back(coord);
-    if (unique_pts.size() == 3)
-    {
-      // v1 = P2 - P1, v2 = P3 - P1
-      mfem::Vector v1(sdim), v2(sdim);
-      v1 = unique_pts[1];
-      v1 -= unique_pts[0];
-      v2 = unique_pts[2];
-      v2 -= unique_pts[0];
-      v1.cross3D(v2, normal);
-      // Mpi::Print("q.size: {:d}, normal.linf: {:.3e}\n", q.size(), normal.Normlinf());
-      if (normal.Norml2() < norm_tol)
-      {
-        unique_pts.pop_back();
-      }
-    }
-  }
+  return unique_pts;
 }
 
+// Use 4 point pairs (donor, receiver) to compute the affine transformation matrix.
 void ComputeAffineTransformation(const std::vector<mfem::Vector> &donor_pts,
                                  const std::vector<mfem::Vector> &receiver_pts,
                                  mfem::DenseMatrix &transformation)
 {
-  // Use 4 point pairs (donor, receiver) to compute the affine
-  // transformation matrix
   mfem::DenseMatrix A(12);
   A = 0.0;
-  mfem::Vector rhs(12), affine_coeffs(12);
+  mfem::Vector rhs(12);
   for (int i = 0; i < 4; i++)
   {
     A(3 * i, 0) = A(3 * i + 1, 4) = A(3 * i + 2, 8) = donor_pts[i][0];
@@ -1917,26 +1906,25 @@ void ComputeAffineTransformation(const std::vector<mfem::Vector> &donor_pts,
     rhs[3 * i + 2] = receiver_pts[i][2];
   }
 
-  // coeffs = A^-1 rhs
-  A.Invert();  // Invert in place
-  A.Mult(rhs, affine_coeffs);
+  // Solve linear system A * rhs = affine coeffs.
+  mfem::LinearSolve(A, rhs.GetData());
 
-  // Build affine transformation matrix
+  // Build affine transformation matrix.
   transformation = 0.0;
   for (int i = 0; i < 3; i++)
   {
     for (int j = 0; j < 4; j++)
     {
-      transformation(i, j) = affine_coeffs[i * 4 + j];
+      transformation(i, j) = rhs[i * 4 + j];
     }
   }
   transformation(3, 3) = 1.0;
 }
 
+// Calculate the rotation matrix between two vectors.
 void ComputeRotation(const mfem::Vector &normal1, const mfem::Vector &normal2,
                      mfem::DenseMatrix &transformation)
 {
-  // Calculate the rotation matrix between two vectors
   mfem::DenseMatrix R(3), vx(3), vx2(3);
 
   mfem::Vector v(normal1.Size());
@@ -1976,11 +1964,10 @@ std::vector<int> CreatePeriodicVertexMapping(std::unique_ptr<mfem::Mesh> &mesh,
 
   mfem::Vector coord(sdim), at(sdim), dx(sdim);
 
-  // Similar to MFEM's CreatePeriodicVertexMapping
-  // maps from replica to primary vertex
+  // Similar to MFEM's CreatePeriodicVertexMapping, maps from replica to primary vertex.
   std::unordered_map<int, int> replica2primary;
 
-  // KD-tree containing all the receiver points
+  // KD-tree containing all the receiver points.
   std::unique_ptr<mfem::KDTreeBase<int, double>> kdtree;
   if (sdim == 1)
   {
@@ -1999,14 +1986,14 @@ std::vector<int> CreatePeriodicVertexMapping(std::unique_ptr<mfem::Mesh> &mesh,
     MFEM_ABORT("Invalid space dimension.");
   }
 
-  // Add all receiver points to KD-tree
+  // Add all receiver points to KD-tree.
   for (const int v : receiver_v)
   {
     kdtree->AddPoint(mesh->GetVertex(v), v);
   }
   kdtree->Sort();
 
-  // Loop over donor points and find the corresponding receiver point
+  // Loop over donor points and find the corresponding receiver point.
   for (int vi : donor_v)
   {
     mfem::Vector donor_coord(4), receiver_coord(4);
@@ -2015,18 +2002,13 @@ std::vector<int> CreatePeriodicVertexMapping(std::unique_ptr<mfem::Mesh> &mesh,
     at.MakeRef(receiver_coord, 0);
 
     coord = mesh->GetVertex(vi);
-    // Mpi::Print("Mapping donor point: {:d} ({:.3e}, {:.3e}, {:.3e})", vi, donor_coord[0],
-    // donor_coord[1], donor_coord[2]);
-    //  Apply transformation
-    //  receiver = transform * donor
+    // Apply transformation, receiver = transform * donor.
     transform.Mult(donor_coord, receiver_coord);
 
     const int vj = kdtree->FindClosestPoint(at.GetData());
     coord = mesh->GetVertex(vj);
     dx = at;
     dx -= coord;
-    // Mpi::Print(" to receiver point: {:d} ({:.3e}, {:.3e}, {:.3e}), with transform error
-    // {:.3e}\n", vj, receiver_coord[0], receiver_coord[1], receiver_coord[2], dx.Norml2());
 
     MFEM_VERIFY(dx.Norml2() < tol,
                 "Could not match points on periodic boundaries, "
@@ -2052,6 +2034,174 @@ std::vector<int> CreatePeriodicVertexMapping(std::unique_ptr<mfem::Mesh> &mesh,
   return v2v;
 }
 
+std::vector<int> DeterminePeriodicVertexMapping(std::unique_ptr<mfem::Mesh> &mesh,
+                                                const struct palace::config::PeriodicData &data,
+                                                const double tol = 1e-8)
+{
+  // Get mesh dimensions, will be used to define a reasonable tolerance in mesh units.
+  const int sdim = mesh->SpaceDimension();
+  mfem::Vector bbmin, bbmax;
+  mesh->GetBoundingBox(bbmin, bbmax);
+  bbmax -= bbmin;
+  const double mesh_dim = bbmax.Norml2();
+  const double mesh_tol = tol * mesh_dim;
+
+  // Identify donor and receiver vertices and elements.
+  const auto &da = data.donor_attributes, &ra = data.receiver_attributes;
+  mfem::Vector coord(sdim);
+  std::unordered_set<int> bdr_v_donor, bdr_v_receiver;
+  std::unordered_set<int> bdr_e_donor, bdr_e_receiver;
+  bool has_tets = false;
+  for (int be = 0; be < mesh->GetNBE(); be++)
+  {
+    int attr = mesh->GetBdrAttribute(be);
+    auto donor = std::find(da.begin(), da.end(), attr) != da.end();
+    auto receiver = std::find(ra.begin(), ra.end(), attr) != ra.end();
+    if (donor || receiver)
+    {
+      int el, info;
+      mesh->GetBdrElementAdjacentElement(be, el, info);
+      if (mesh->GetElementType(el) == mfem::Element::TETRAHEDRON)
+      {
+        has_tets = true;
+      }
+      if (donor)
+      {
+        bdr_e_donor.insert(be);
+      }
+      if (receiver)
+      {
+        bdr_e_receiver.insert(be);
+      }
+      mfem::Array<int> vertidxs;
+      mesh->GetBdrElementVertices(be, vertidxs);
+      for (int i = 0; i < vertidxs.Size(); i++)
+      {
+        coord = mesh->GetVertex(vertidxs[i]);
+        if (donor)
+        {
+          bdr_v_donor.insert(vertidxs[i]);
+        }
+        else if (receiver)
+        {
+          bdr_v_receiver.insert(vertidxs[i]);
+        }
+      }
+    }
+  }
+
+  MFEM_VERIFY(
+    bdr_v_donor.size() == bdr_v_receiver.size(),
+    "Different number of "
+    "vertices on donor and receiver boundaries. Cannot create periodic mesh.");
+
+  const int num_periodic_bc_elems = bdr_e_donor.size() + bdr_e_receiver.size();
+  Mpi::Print("Total number of elements: {:d}\n", mesh->GetNE());
+  Mpi::Print("Number of periodic BC elements: {:d}\n", num_periodic_bc_elems);
+  // How to check if the mesh is OK?
+  // Count number of elems in the periodic direction?
+  // If hex/prism: Count boundary elements on donor+receiver,
+  // if total NE = ndonorE+nReceiverE: not enough cells?
+  // If pure tet mesh NE = 3*(ndonorE+nreceiverE): not enough
+  // Mixed mesh is trickier
+  // MOVE THIS TEST SOMEWHERE ELSE. IT SHOULD ALSO APPLY TO MESHES
+  // ALREADY CREATED WITH PERIODICITY!!!
+  mfem::Array<mfem::Geometry::Type> geoms;
+  mesh->GetGeometries(3, geoms);
+  if (geoms.Size() == 1 && geoms[0] == mfem::Geometry::TETRAHEDRON)
+  {
+    // Pure tet mesh
+    MFEM_VERIFY(mesh->GetNE() > 3 * num_periodic_bc_elems,
+                "Not enough mesh elements in periodic direction!");
+  }
+  else if (geoms.Size() > 1 && has_tets)
+  {
+    // Mixed mesh
+    MFEM_VERIFY(mesh->GetNE() > num_periodic_bc_elems,
+                "Not enough mesh elements in periodic direction!");
+  }
+  else
+  {
+    // No tets
+     MFEM_VERIFY(mesh->GetNE() > num_periodic_bc_elems,
+                 "Not enough mesh elements in periodic direction!");
+    }
+
+  // Determine the affine transformation between donor and receiver points.
+  // Use the translation vector or affine transformation matrix if provided
+  // in the config file, otherwise automatically detect the transformation.
+  mfem::DenseMatrix transformation(4);
+  mfem::Vector translation(data.translation.size());
+  std::copy(data.translation.begin(), data.translation.end(), translation.GetData());
+  mfem::Vector affine_vec(data.affine_transform.size());
+  std::copy(data.affine_transform.begin(), data.affine_transform.end(),
+            affine_vec.GetData());
+
+  if (translation.Norml2() > mesh_tol)
+  {
+    // Use user-provided translation.
+    for (int i = 0; i < 3; i++)
+    {
+      transformation(i, i) = 1.0;
+      transformation(i, 3) = translation[i];
+    }
+    transformation(3, 3) = 1.0;
+  }
+  else if (affine_vec.Norml2() > mesh_tol)
+  {
+    // Use user-provided affine transformation matrix.
+    for (int i = 0; i < 4; i++)
+    {
+      for (int j = 0; j < 4; j++)
+      {
+        transformation(i, j) = affine_vec[i * 4 + j];
+      }
+    }
+  }
+  else
+  {
+    // Automatically detect transformation.
+    // Compute the centroid for each boundary.
+    mfem::Vector donor_centroid, receiver_centroid;
+    donor_centroid = ComputeCentroid(mesh, bdr_v_donor);
+    receiver_centroid = ComputeCentroid(mesh, bdr_v_receiver);
+
+    // Compute the normal vector for each boundary.
+    mfem::Vector donor_normal, receiver_normal;
+    donor_normal = ComputeNormal(mesh, bdr_e_donor, true);
+    receiver_normal = ComputeNormal(mesh, bdr_e_receiver, false);
+
+    // Compute a set of unique points for each boundary.
+    std::vector<mfem::Vector> donor_pts, receiver_pts;
+    donor_pts = FindUniquePoints(mesh, bdr_v_donor, donor_centroid, donor_normal, mesh_dim, mesh_tol);
+    receiver_pts = FindUniquePoints(mesh, bdr_v_receiver, receiver_centroid, receiver_normal, mesh_dim, mesh_tol);
+    MFEM_VERIFY(donor_pts.size() == receiver_pts.size(),
+                "Different number of unique points on donor and receiver boundaries.");
+
+    // With 4 pairs of matching points, compute the unique affine transformation.
+    // With < 4, cannot determine a unique transformation. We assume there is no
+    // rotation along the boundary normal direction, compute the rotation between
+    // the two normals and the translation between the two centroids.
+    if (donor_pts.size() == 4)
+    {
+      ComputeAffineTransformation(donor_pts, receiver_pts, transformation);
+    }
+    else
+    {
+      // Use normals to compute a rotation matrix
+      ComputeRotation(donor_normal, receiver_normal, transformation);
+
+      // Add centroids translation to transform matrix
+      transformation(0, 3) = receiver_centroid[0] - donor_centroid[0];
+      transformation(1, 3) = receiver_centroid[1] - donor_centroid[1];
+      transformation(2, 3) = receiver_centroid[2] - donor_centroid[2];
+      transformation(3, 3) = 1.0;
+    }
+  }
+
+  return CreatePeriodicVertexMapping(mesh, bdr_v_donor, bdr_v_receiver, transformation, mesh_tol);
+}
+
 std::unique_ptr<mfem::Mesh> LoadMesh(const std::string &mesh_file, bool remove_curvature,
                                      const config::BoundaryData &boundaries)
 {
@@ -2107,193 +2257,14 @@ std::unique_ptr<mfem::Mesh> LoadMesh(const std::string &mesh_file, bool remove_c
 
     for (const auto &data : boundaries.periodic)
     {
-      // Identify donor and receiver vertices
-      const auto &da = data.donor_attributes, &ra = data.receiver_attributes;
-      double norm_tol = 1e-6;  //?
-      const int sdim = periodic_mesh->SpaceDimension();
-      mfem::Vector coord(sdim);
-      std::unordered_set<int> bdr_v_donor, bdr_v_receiver;
-      std::unordered_set<int> bdr_e_donor, bdr_e_receiver;
-      bool has_tets = false;
-      for (int be = 0; be < periodic_mesh->GetNBE(); be++)
-      {
-        int attr = periodic_mesh->GetBdrAttribute(be);
-        auto donor = std::find(da.begin(), da.end(), attr) != da.end();
-        auto receiver = std::find(ra.begin(), ra.end(), attr) != ra.end();
-        if (donor || receiver)
-        {
-          int el, info;
-          periodic_mesh->GetBdrElementAdjacentElement(be, el, info);
-          if (periodic_mesh->GetElementType(el) == mfem::Element::TETRAHEDRON)
-          {
-            has_tets = true;
-          }
-
-          if (donor)
-            bdr_e_donor.insert(be);
-          if (receiver)
-            bdr_e_receiver.insert(be);
-          mfem::Array<int> vertidxs;
-          periodic_mesh->GetBdrElementVertices(be, vertidxs);
-          for (int i = 0; i < vertidxs.Size(); i++)
-          {
-            coord = periodic_mesh->GetVertex(vertidxs[i]);
-            if (donor)
-              bdr_v_donor.insert(vertidxs[i]);
-            else if (receiver)
-              bdr_v_receiver.insert(vertidxs[i]);
-          }
-        }
-      }
-      const int num_periodic_bc_elems = bdr_e_donor.size() + bdr_e_receiver.size();
-      Mpi::Print("Total number of elements: {:d}\n", periodic_mesh->GetNE());
-      Mpi::Print("Number of periodic BC elements: {:d}\n", num_periodic_bc_elems);
-      // How to check if the mesh is OK?
-      // Count number of elems in the periodic direction?
-      // If hex/prism: Count boundary elements on donor+receiver,
-      // if total NE = ndonorE+nReceiverE: not enough cells?
-      // If pure tet mesh NE = 3*(ndonorE+nreceiverE): not enough
-      // Mixed mesh is trickier
-      // MOVE THIS TEST SOMEWHERE ELSE. IT SHOULD ALSO APPLY TO MESHES
-      // ALREADY CREATED WITH PERIODICITY!!!
-      /**/
-      mfem::Array<mfem::Geometry::Type> geoms;
-      periodic_mesh->GetGeometries(3, geoms);
-      if (geoms.Size() == 1 && geoms[0] == mfem::Geometry::TETRAHEDRON)
-      {
-        // Pure tet mesh
-        MFEM_VERIFY(periodic_mesh->GetNE() > 3 * num_periodic_bc_elems,
-                    "Not enough mesh elements in periodic direction!");
-      }
-      else if (geoms.Size() > 1 && has_tets)
-      {
-        // Mixed mesh
-        MFEM_VERIFY(periodic_mesh->GetNE() > num_periodic_bc_elems,
-                    "Not enough mesh elements in periodic direction!");
-      }
-      else
-      {
-        // No tets
-        MFEM_VERIFY(periodic_mesh->GetNE() > num_periodic_bc_elems,
-                    "Not enough mesh elements in periodic direction!");
-      }
-      /**/
-      mfem::DenseMatrix transformation(4);
-      // If only translation is provided -> use it
-      // If only affine transfomr is provided -> use it
-      // If both affine transform and translation are provided -> error or warning?
-      // If neither -> automatic detection
-      mfem::Vector translation(data.translation.size());
-      std::copy(data.translation.begin(), data.translation.end(), translation.GetData());
-      mfem::Vector affine_vec(data.affine_transform.size());
-      std::copy(data.affine_transform.begin(), data.affine_transform.end(),
-                affine_vec.GetData());
-
-      if (translation.Norml2() > 1e-12)  // which value to use?
-      {
-        // use user-provided translation
-        for (int i = 0; i < 3; i++)
-        {
-          transformation(i, i) = 1.0;
-          transformation(i, 3) = translation[i];
-        }
-        transformation(3, 3) = 1.0;
-      }
-      else if (affine_vec.Norml2() > 1e-12)  // which value to use?
-      {
-        // use affine transformation matrix
-        for (int i = 0; i < 4; i++)
-        {
-          for (int j = 0; j < 4; j++)
-          {
-            transformation(i, j) = affine_vec[i * 4 + j];
-          }
-        }
-      }
-      else
-      {
-        // automatically detect transformation
-        mfem::Vector donor_centroid(sdim), receiver_centroid(sdim);
-        mfem::Vector translation2(sdim);
-        mfem::Vector donor_normal(sdim), receiver_normal(sdim);
-
-        double donor_dia, receiver_dia, diameter;
-        MFEM_VERIFY(
-            bdr_v_donor.size() == bdr_v_receiver.size(),
-            "Different number of "
-            "vertices on donor and receiver boundaries. Cannot create periodic mesh.");
-        ComputeCentroid(periodic_mesh, bdr_v_donor, donor_centroid, donor_dia);
-        ComputeCentroid(periodic_mesh, bdr_v_receiver, receiver_centroid, receiver_dia);
-
-        translation2 = receiver_centroid;
-        translation2 -= donor_centroid;
-        Mpi::Print("computed translation: {:.9e}, {:.9e}, {:.9e}\n", translation2[0],
-                   translation2[1], translation2[2]);
-
-        diameter = std::max(donor_dia, receiver_dia);
-        norm_tol = 1e-6 * diameter;
-        // Compute normal so it points inside domain for donor and outside for receiver
-        // If not planar, error out
-        donor_normal = ComputeNormal(periodic_mesh, bdr_e_donor, true);
-        receiver_normal = ComputeNormal(periodic_mesh, bdr_e_receiver, false);
-        Mpi::Print("Donor normal: {:.9e}, {:.9e}, {:.9e}\n", donor_normal[0],
-                   donor_normal[1], donor_normal[2]);
-        Mpi::Print("Receiver normal: {:.9e}, {:.9e}, {:.9e}\n", receiver_normal[0],
-                   receiver_normal[1], receiver_normal[2]);
-
-        std::vector<mfem::Vector> donor_pts, receiver_pts;
-        FindUniquePoints(periodic_mesh, bdr_v_donor, donor_centroid, diameter, donor_pts,
-                         norm_tol);
-        FindUniquePoints(periodic_mesh, bdr_v_receiver, receiver_centroid, diameter,
-                         receiver_pts, norm_tol);
-
-        // Add point offset from centroid in normal direction
-        donor_centroid += donor_normal;
-        receiver_centroid += receiver_normal;
-        donor_pts.push_back(donor_centroid);
-        receiver_pts.push_back(receiver_centroid);
-
-        Mpi::Print("Number of unique donor pts: {:d}\n", donor_pts.size());
-        Mpi::Print("Number of unique receiver pts: {:d}\n", receiver_pts.size());
-
-        MFEM_VERIFY(donor_pts.size() == receiver_pts.size(),
-                    "Different number of unique points on donor and receiver boundaries.");
-
-        if (donor_pts.size() == 4)
-        {
-          ComputeAffineTransformation(donor_pts, receiver_pts, transformation);
-        }
-        else
-        {
-          // Use normals to compute a rotation matrix
-          ComputeRotation(donor_normal, receiver_normal, transformation);
-
-          // Add centroids translation to transform matrix
-          transformation(0, 3) = translation2[0];
-          transformation(1, 3) = translation2[1];
-          transformation(2, 3) = translation2[2];
-          transformation(3, 3) = 1.0;
-        }
-        Mpi::Print("Affine transformation matrix\n");
-        transformation.Print();
-      }
-      Mpi::Print("CreatePeriodicVertexMapping\n");
-      auto periodic_mapping = CreatePeriodicVertexMapping(
-          periodic_mesh, bdr_v_donor, bdr_v_receiver, transformation, norm_tol);
-
-      // mfem::Vector translation(data.translation.size());
-      // std::copy(data.translation.begin(), data.translation.end(), translation.GetData());
-      // auto periodic_mapping =
-      //     periodic_mesh->CreatePeriodicVertexMapping({translation2}, 1E-6);
-      // periodic_mesh->
-      Mpi::Print("MFEM MakePeriodic\n");
+      auto periodic_mapping = DeterminePeriodicVertexMapping(periodic_mesh, data);
       auto p_mesh = std::make_unique<mfem::Mesh>(
           mfem::Mesh::MakePeriodic(*periodic_mesh, periodic_mapping));
       periodic_mesh = std::move(p_mesh);
     }
     mesh = std::move(periodic_mesh);
   }
-  Mpi::Print("Return mesh\n");
+
   return mesh;
 }
 

From b5ef9bd39afc6a7d61ee44279f2124a946f1d492 Mon Sep 17 00:00:00 2001
From: Simon Lapointe <simlap@amazon.com>
Date: Fri, 6 Dec 2024 11:18:07 -0800
Subject: [PATCH 27/49] Remove print

---
 palace/models/spaceoperator.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/palace/models/spaceoperator.cpp b/palace/models/spaceoperator.cpp
index 84a4c790c..f0ea44db3 100644
--- a/palace/models/spaceoperator.cpp
+++ b/palace/models/spaceoperator.cpp
@@ -818,7 +818,6 @@ std::unique_ptr<OperType> SpaceOperator::GetPreconditionerMatrix(double a0, doub
     AddDampingBdrCoefficients(a1, fbi);
     AddRealMassCoefficients(pc_mat_shifted ? std::abs(a2) : a2, fr);
     AddRealMassBdrCoefficients(pc_mat_shifted ? std::abs(a2) : a2, fbr);
-    Mpi::Print("Complex PC with a2: {:.3e}\n", a2);
     AddImagMassCoefficients(a2, fi);
     AddExtraSystemBdrCoefficients(a3, dfbr, dfbi, fbr, fbi);
     AddPeriodicCoefficients(1.0, fr, fpwi, fpi);

From 9382889094092624e7c90e46831e6a4bed81e5f7 Mon Sep 17 00:00:00 2001
From: Simon Lapointe <simlap@amazon.com>
Date: Fri, 6 Dec 2024 14:50:30 -0800
Subject: [PATCH 28/49] Use symmetricOperator

---
 palace/fem/bilinearform.cpp     | 24 ++++--------------------
 palace/fem/libceed/operator.cpp | 16 ++++------------
 2 files changed, 8 insertions(+), 32 deletions(-)

diff --git a/palace/fem/bilinearform.cpp b/palace/fem/bilinearform.cpp
index 0f8f0708b..e589e1013 100644
--- a/palace/fem/bilinearform.cpp
+++ b/palace/fem/bilinearform.cpp
@@ -36,11 +36,8 @@ BilinearForm::PartialAssemble(const FiniteElementSpace &trial_fespace,
   std::unique_ptr<ceed::Operator> op;
   if (&trial_fespace == &test_fespace)
   {
-    Mpi::Print("bilinearform.cpp creating non-symmetric op\n");
-    op = std::make_unique<ceed::/*Symmetric*/ Operator>(
-        test_fespace.GetVSize(),
-        // op = std::make_unique<ceed::SymmetricOperator>(test_fespace.GetVSize(),
-        trial_fespace.GetVSize());
+    op = std::make_unique<ceed::SymmetricOperator>(test_fespace.GetVSize(),
+                                                   trial_fespace.GetVSize());
   }
   else
   {
@@ -78,13 +75,7 @@ BilinearForm::PartialAssemble(const FiniteElementSpace &trial_fespace,
           integ->SetMapTypes(trial_map_type, test_map_type);
           integ->Assemble(ceed, trial_restr, test_restr, trial_basis, test_basis,
                           data.geom_data, data.geom_data_restr, &sub_op);
-          // Transpose operator.
-          CeedOperator sub_op_t;
-          integ->SetMapTypes(test_map_type, trial_map_type);
-          integ->Assemble(ceed, test_restr, trial_restr, test_basis, trial_basis,
-                          data.geom_data, data.geom_data_restr, &sub_op_t);
-          op->AddSubOperator(sub_op, sub_op_t);  // Sub-operator owned by ceed::Operator
-          // op->AddSubOperator(sub_op);  // Sub-operator owned by ceed::Operator
+          op->AddSubOperator(sub_op);  // Sub-operator owned by ceed::Operator
         }
       }
       else if (mfem::Geometry::Dimension[geom] == mesh.Dimension() - 1 &&
@@ -104,14 +95,7 @@ BilinearForm::PartialAssemble(const FiniteElementSpace &trial_fespace,
           integ->SetMapTypes(trial_map_type, test_map_type);
           integ->Assemble(ceed, trial_restr, test_restr, trial_basis, test_basis,
                           data.geom_data, data.geom_data_restr, &sub_op);
-
-          // Transpose operator.
-          CeedOperator sub_op_t;
-          integ->SetMapTypes(test_map_type, trial_map_type);
-          integ->Assemble(ceed, test_restr, trial_restr, test_basis, trial_basis,
-                          data.geom_data, data.geom_data_restr, &sub_op_t);
-          op->AddSubOperator(sub_op, sub_op_t);  // Sub-operator owned by ceed::Operator
-          // op->AddSubOperator(sub_op);  // Sub-operator owned by ceed::Operator
+          op->AddSubOperator(sub_op);  // Sub-operator owned by ceed::Operator
         }
       }
     }
diff --git a/palace/fem/libceed/operator.cpp b/palace/fem/libceed/operator.cpp
index d5688aeab..484757fab 100644
--- a/palace/fem/libceed/operator.cpp
+++ b/palace/fem/libceed/operator.cpp
@@ -546,10 +546,7 @@ std::unique_ptr<Operator> CeedOperatorCoarsen(const Operator &op_fine,
   };
 
   // Initialize the coarse operator.
-  Mpi::Print("ceed operator.cpp L533 Creating non-symmetric op_coarse\n");
-  auto op_coarse = std::make_unique</*Symmetric*/ Operator>(
-      fespace_coarse.GetVSize(),
-      // auto op_coarse = std::make_unique<SymmetricOperator>(fespace_coarse.GetVSize(),
+  auto op_coarse = std::make_unique<SymmetricOperator>(fespace_coarse.GetVSize(),
       fespace_coarse.GetVSize());
 
   // Assemble the coarse operator by coarsening each sub-operator (over threads, geometry
@@ -570,19 +567,14 @@ std::unique_ptr<Operator> CeedOperatorCoarsen(const Operator &op_fine,
       }
     }
     CeedInt nsub_ops_fine;
-    CeedOperator *sub_ops_fine, *sub_ops_fine_t;
+    CeedOperator *sub_ops_fine;
     PalaceCeedCall(ceed, CeedCompositeOperatorGetNumSub(op_fine[id], &nsub_ops_fine));
     PalaceCeedCall(ceed, CeedCompositeOperatorGetSubList(op_fine[id], &sub_ops_fine));
-    PalaceCeedCall(
-        ceed, CeedCompositeOperatorGetSubList(op_fine.GetTranspose(id), &sub_ops_fine_t));
     for (CeedInt k = 0; k < nsub_ops_fine; k++)
     {
-      CeedOperator sub_op_coarse, sub_op_coarse_t;
+      CeedOperator sub_op_coarse;
       SingleOperatorCoarsen(ceed, sub_ops_fine[k], &sub_op_coarse);
-      SingleOperatorCoarsen(ceed, sub_ops_fine_t[k], &sub_op_coarse_t);
-      op_coarse->AddSubOperator(sub_op_coarse,
-                                sub_op_coarse_t);  // Sub-operator owned by ceed::Operator
-      // op_coarse->AddSubOperator(sub_op_coarse);  // Sub-operator owned by ceed::Operator
+      op_coarse->AddSubOperator(sub_op_coarse);  // Sub-operator owned by ceed::Operator
     }
   }
 

From 6a2733375d96e77fa879e3f64e17a04759550633 Mon Sep 17 00:00:00 2001
From: Simon Lapointe <simlap@amazon.com>
Date: Fri, 6 Dec 2024 15:52:38 -0800
Subject: [PATCH 29/49] Add Floquet BC regression test

---
 examples/cylinder/floquet.json                | 69 +++++++++++++++++++
 .../ref/cylinder/floquet/domain-E.csv         | 16 +++++
 test/examples/ref/cylinder/floquet/eig.csv    | 16 +++++
 .../ref/cylinder/floquet/error-indicators.csv |  2 +
 test/examples/runtests.jl                     | 16 +++++
 5 files changed, 119 insertions(+)
 create mode 100644 examples/cylinder/floquet.json
 create mode 100644 test/examples/ref/cylinder/floquet/domain-E.csv
 create mode 100644 test/examples/ref/cylinder/floquet/eig.csv
 create mode 100644 test/examples/ref/cylinder/floquet/error-indicators.csv

diff --git a/examples/cylinder/floquet.json b/examples/cylinder/floquet.json
new file mode 100644
index 000000000..a1d0179a3
--- /dev/null
+++ b/examples/cylinder/floquet.json
@@ -0,0 +1,69 @@
+{
+  "Problem":
+  {
+    "Type": "Eigenmode",
+    "Verbose": 2,
+    "Output": "postpro/floquet"
+  },
+  "Model":
+  {
+    "Mesh": "mesh/cylinder_tet.msh",
+    "L0": 1.0e-2,  // cm
+  },
+  "Domains":
+  {
+    "Materials":
+    [
+      {
+        "Attributes": [1],
+        "Permeability": 1.0,
+        "Permittivity": 2.08,
+        "LossTan": 0.0004
+      }
+    ],
+    "Postprocessing":
+    {
+      "Energy":
+      [
+        {
+          "Index": 1,
+          "Attributes": [1]
+        }
+      ]
+    }
+  },
+  "Boundaries":
+  {
+    "Periodic":
+    [
+      {
+        "DonorAttributes": [2],
+        "ReceiverAttributes": [3],
+      }
+    ],
+    "FloquetWaveVector": [0.0, 0.0, 0.2],
+    "PEC":
+    {
+      "Attributes": [4]
+    }
+  },
+  "Solver":
+  {
+    "Order": 4,
+    "Device": "CPU",
+    "Eigenmode":
+    {
+      "N": 15,
+      "Tol": 1.0e-8,
+      "Target": 2.0,  // TE f111 ~ 2.9 GHz
+      "Save": 15
+    },
+    "Linear":
+    {
+      "Type": "Default",
+      "KSPType": "GMRES",
+      "Tol": 1.0e-8,
+      "MaxIts": 100
+    }
+  }
+}
diff --git a/test/examples/ref/cylinder/floquet/domain-E.csv b/test/examples/ref/cylinder/floquet/domain-E.csv
new file mode 100644
index 000000000..e9599ad66
--- /dev/null
+++ b/test/examples/ref/cylinder/floquet/domain-E.csv
@@ -0,0 +1,16 @@
+               m,              E_elec (J),               E_mag (J),               E_cap (J),               E_ind (J),           E_elec[1] (J),               p_elec[1],            E_mag[1] (J),                p_mag[1]
+ 1.000000000e+00,        +9.139656208e-02,        +8.396001243e-02,        +0.000000000e+00,        +0.000000000e+00,        +9.139656208e-02,        +1.000000000e+00,        +8.396001243e-02,        +1.000000000e+00
+ 2.000000000e+00,        +9.139656208e-02,        +8.396001238e-02,        +0.000000000e+00,        +0.000000000e+00,        +9.139656208e-02,        +1.000000000e+00,        +8.396001238e-02,        +1.000000000e+00
+ 3.000000000e+00,        +9.139656208e-02,        +8.259719511e-02,        +0.000000000e+00,        +0.000000000e+00,        +9.139656208e-02,        +1.000000000e+00,        +8.259719511e-02,        +1.000000000e+00
+ 4.000000000e+00,        +9.139656208e-02,        +8.854647493e-02,        +0.000000000e+00,        +0.000000000e+00,        +9.139656208e-02,        +1.000000000e+00,        +8.854647493e-02,        +1.000000000e+00
+ 5.000000000e+00,        +9.139656208e-02,        +8.854647490e-02,        +0.000000000e+00,        +0.000000000e+00,        +9.139656208e-02,        +1.000000000e+00,        +8.854647490e-02,        +1.000000000e+00
+ 6.000000000e+00,        +9.139656208e-02,        +1.197882058e-01,        +0.000000000e+00,        +0.000000000e+00,        +9.139656208e-02,        +1.000000000e+00,        +1.197882058e-01,        +1.000000000e+00
+ 7.000000000e+00,        +9.139656208e-02,        +1.197873702e-01,        +0.000000000e+00,        +0.000000000e+00,        +9.139656208e-02,        +1.000000000e+00,        +1.197873702e-01,        +1.000000000e+00
+ 8.000000000e+00,        +9.139656208e-02,        +1.133419874e-01,        +0.000000000e+00,        +0.000000000e+00,        +9.139656208e-02,        +1.000000000e+00,        +1.133419874e-01,        +1.000000000e+00
+ 9.000000000e+00,        +9.139656208e-02,        +8.776995268e-02,        +0.000000000e+00,        +0.000000000e+00,        +9.139656208e-02,        +1.000000000e+00,        +8.776995268e-02,        +1.000000000e+00
+ 1.000000000e+01,        +9.139656208e-02,        +8.776992232e-02,        +0.000000000e+00,        +0.000000000e+00,        +9.139656208e-02,        +1.000000000e+00,        +8.776992232e-02,        +1.000000000e+00
+ 1.100000000e+01,        +9.139656208e-02,        +8.956490411e-02,        +0.000000000e+00,        +0.000000000e+00,        +9.139656208e-02,        +1.000000000e+00,        +8.956490411e-02,        +1.000000000e+00
+ 1.200000000e+01,        +9.139656208e-02,        +1.092859937e-01,        +0.000000000e+00,        +0.000000000e+00,        +9.139656208e-02,        +1.000000000e+00,        +1.092859937e-01,        +1.000000000e+00
+ 1.300000000e+01,        +9.139656208e-02,        +1.092833156e-01,        +0.000000000e+00,        +0.000000000e+00,        +9.139656208e-02,        +1.000000000e+00,        +1.092833156e-01,        +1.000000000e+00
+ 1.400000000e+01,        +9.139656208e-02,        +7.127501992e-02,        +0.000000000e+00,        +0.000000000e+00,        +9.139656208e-02,        +1.000000000e+00,        +7.127501992e-02,        +1.000000000e+00
+ 1.500000000e+01,        +9.139656208e-02,        +7.127536201e-02,        +0.000000000e+00,        +0.000000000e+00,        +9.139656208e-02,        +1.000000000e+00,        +7.127536201e-02,        +1.000000000e+00
diff --git a/test/examples/ref/cylinder/floquet/eig.csv b/test/examples/ref/cylinder/floquet/eig.csv
new file mode 100644
index 000000000..d5da15340
--- /dev/null
+++ b/test/examples/ref/cylinder/floquet/eig.csv
@@ -0,0 +1,16 @@
+               m,             Re{f} (GHz),             Im{f} (GHz),                       Q,           Error (Bkwd.),            Error (Abs.)
+ 1.000000000e+00,        +2.319626808e+00,        +4.639253185e-04,        +2.500000282e+03,        +1.999760958e-11,        +4.029717489e-09
+ 2.000000000e+00,        +2.319626808e+00,        +4.639253619e-04,        +2.500000049e+03,        +1.849595462e-11,        +3.727119060e-09
+ 3.000000000e+00,        +2.978290894e+00,        +5.956578804e-04,        +2.500001302e+03,        +2.234081359e-11,        +4.543546337e-09
+ 4.000000000e+00,        +3.746919744e+00,        +7.493842620e-04,        +2.499999005e+03,        +1.762537263e-10,        +3.633222123e-08
+ 5.000000000e+00,        +3.746919745e+00,        +7.493844540e-04,        +2.499998365e+03,        +1.669193404e-10,        +3.440806917e-08
+ 6.000000000e+00,        +3.840529872e+00,        +7.681060833e-04,        +2.499999696e+03,        +2.316277711e-11,        +4.783469321e-09
+ 7.000000000e+00,        +3.840598782e+00,        +7.681202233e-04,        +2.499998531e+03,        +2.767226121e-11,        +5.714754890e-09
+ 8.000000000e+00,        +4.270788392e+00,        +8.541575585e-04,        +2.500000401e+03,        +1.916990868e-11,        +3.994623301e-09
+ 9.000000000e+00,        +4.673907077e+00,        +9.347865640e-04,        +2.499986281e+03,        +1.251696254e-10,        +2.632395728e-08
+ 1.000000000e+01,        +4.673907889e+00,        +9.347818655e-04,        +2.499999280e+03,        +1.091467378e-10,        +2.295424397e-08
+ 1.100000000e+01,        +4.673916935e+00,        +9.347831502e-04,        +2.500000683e+03,        +4.778943984e-11,        +1.005042129e-08
+ 1.200000000e+01,        +4.838231281e+00,        +9.676461726e-04,        +2.500000266e+03,        +2.804442521e-11,        +5.921337718e-09
+ 1.300000000e+01,        +4.838639730e+00,        +9.677284116e-04,        +2.499998847e+03,        +4.467517065e-11,        +9.432870050e-09
+ 1.400000000e+01,        +4.978855360e+00,        +9.957705501e-04,        +2.500001360e+03,        +3.283860525e-11,        +6.957807276e-09
+ 1.500000000e+01,        +4.978915348e+00,        +9.957825426e-04,        +2.500001373e+03,        +3.336592586e-11,        +7.069546032e-09
diff --git a/test/examples/ref/cylinder/floquet/error-indicators.csv b/test/examples/ref/cylinder/floquet/error-indicators.csv
new file mode 100644
index 000000000..56f2053c1
--- /dev/null
+++ b/test/examples/ref/cylinder/floquet/error-indicators.csv
@@ -0,0 +1,2 @@
+                    Norm,                 Minimum,                 Maximum,                    Mean
+        +3.776351128e-03,        +7.708020807e-05,        +3.998012825e-04,        +2.006231775e-04
diff --git a/test/examples/runtests.jl b/test/examples/runtests.jl
index 65ddbadb6..fa3fe2635 100644
--- a/test/examples/runtests.jl
+++ b/test/examples/runtests.jl
@@ -26,6 +26,7 @@ else
         "cylinder/cavity_pec",
         "cylinder/cavity_impedance",
         "cylinder/waveguide",
+        "cylinder/floquet",
         "coaxial/open",
         "coaxial/matched",
         "cpw/lumped_uniform",
@@ -113,6 +114,21 @@ if "cylinder/waveguide" in cases
     )
 end
 
+if "cylinder/floquet" in cases
+    @info "Testing cylinder/floquet (periodic)..."
+    @time testcase(
+        "cylinder",
+        "floquet.json",
+        "floquet";
+        palace=palace,
+        np=numprocs,
+        rtol=reltol,
+        atol=abstol,
+        excluded_columns=["Maximum", "Minimum", "Mean", "Error (Bkwd.)", "Error (Abs.)"],
+        skip_rowcount=true
+    )
+end
+
 # Coarser test tolerances for driven simulations with ports
 reltol = 2.0e-2
 abstol = 2.0e-12

From bebcab652839b188610e9065e7d294300c58ecea Mon Sep 17 00:00:00 2001
From: Simon Lapointe <simlap@amazon.com>
Date: Mon, 9 Dec 2024 15:16:22 -0800
Subject: [PATCH 30/49] Allow Floquet wave vector specification for meshes with
 and without build-in periodicity

---
 docs/src/config/boundaries.md              | 31 +++++++++++--
 docs/src/examples/cylinder.md              | 13 +++---
 docs/src/guide/boundaries.md               | 14 +++---
 examples/cylinder/floquet.json             |  2 +-
 palace/models/periodicboundaryoperator.cpp | 52 +++++++++++++++++-----
 palace/models/periodicboundaryoperator.hpp |  6 +--
 palace/utils/configfile.cpp                |  9 ++++
 palace/utils/configfile.hpp                |  8 +++-
 palace/utils/iodata.cpp                    | 11 ++++-
 9 files changed, 113 insertions(+), 33 deletions(-)

diff --git a/docs/src/config/boundaries.md b/docs/src/config/boundaries.md
index c373e7ce4..3100d87fb 100644
--- a/docs/src/config/boundaries.md
+++ b/docs/src/config/boundaries.md
@@ -125,7 +125,10 @@ electrostatic simulations. Entries of the capacitance matrix are extracted corre
 each terminal boundary.
 
 `"Periodic"` :  Array of objects for configuring periodic boundary conditions for surfaces
-with meshes that are identical after a specified translation.
+with meshes that are identical after translation and/or rotation.
+
+`"FloquetWaveVector"` :  Object for configuring Floquet periodic boundary conditions for
+meshes generated with built-in periodicity.
 
 `"Postprocessing"` :  Top-level object for configuring boundary postprocessing.
 
@@ -527,7 +530,9 @@ boundary.
     {
         "DonorAttributes": [<int array>],
         "ReceiverAttributes": [<int array>],
-        "Translation": [<float array>]
+        "Translation": [<float array>],
+        "AffineTransformation": [<float array>],
+        "FloquetWaveVector": [<float array>]
     },
     ...
 ]
@@ -541,8 +546,26 @@ attributes for this periodic boundary.
 `"ReceiverAttributes" [None]` :  Integer array of the receiver attributes of the mesh boundary
 attributes for this periodic boundary.
 
-`"Translation" [None]` :  Defines the distance between the donor and receiver attributes in
-mesh units.
+`"Translation" [None]` :  Optional floating point array defining the distance between the donor
+and receiver attributes in mesh units. If neither `"Translation"` or `"AffineTransformation"` are
+specified, the transformation between donor and receiver boundaries is automatically detected.
+
+`"AffineTransformation" [None]` :  Optional floating point array of size 16 defining the
+three-dimensional (4 x 4) affine transformation matrix (in row major format) between the donor
+and receiver attributes in mesh units. If neither `"Translation"` or `"AffineTransformation"` are
+specified, the transformation between donor and receiver boundaries is automatically detected.
+
+`"FloquetWaveVector" [None]` :  Optional floating point array defining the phase delay between
+this pair of donor and receiver periodic boundaries in the X/Y/Z directions in radians per mesh
+unit. If multiple periodic boundary pairs are used, the Floquet wave vector will be summed over
+the periodic boundary pairs.
+
+## `boundaries["FloquetWaveVector"]`
+
+Optional floating point array defining the phase delay between the periodic boundaries in the X/Y/Z
+directions in radians per mesh unit, for meshes generated with built-in periodicity. This should not
+be used for non-periodic meshes, or for meshes generated without built-in periodicity. In the latter
+case, the Floquet wave vector should be specified via `"boundaries["Periodic"]["FloquetWaveVector"]"`.
 
 ## `boundaries["Postprocessing"]["SurfaceFlux"]`
 
diff --git a/docs/src/examples/cylinder.md b/docs/src/examples/cylinder.md
index b5d0507a8..7c08fcfd4 100644
--- a/docs/src/examples/cylinder.md
+++ b/docs/src/examples/cylinder.md
@@ -6,7 +6,7 @@
 # Eigenmodes of a Cylinder
 
 !!! note
-    
+
     The files for this example can be found in the
     [`examples/cylinder/`](https://github.com/awslabs/palace/blob/main/examples/cylinder)
     directory of the *Palace* source code.
@@ -254,15 +254,18 @@ such modes to higher frequencies. The relevant modes are tabulated as
 
 For this problem, we use curved tetrahedral elements from the mesh file
 [`mesh/cavity_tet.msh`](https://github.com/awslabs/palace/blob/main/examples/cylinder/mesh/cavity_tet.msh),
-and the configuration file
-[`waveguide.json`](https://github.com/awslabs/palace/blob/main/examples/cylinder/waveguide.json).
+and the configuration files
+[`waveguide.json`](https://github.com/awslabs/palace/blob/main/examples/cylinder/waveguide.json) and
+[`floquet.json`](https://github.com/awslabs/palace/blob/main/examples/cylinder/floquet.json).
 
-The main difference between this configuration file and those used in the cavity example is
+The main difference between these configuration files and those used in the cavity example is
 in the `"Boundaries"` object: `waveguide.json` specifies a perfect electric conductor
 (`"PEC"`) boundary condition for the exterior surface and a periodic boundary condition
 (`"Periodic"`) on the cross-sections of the cylinder (in the $z-$ direction). The periodic
 attribute pairs are defined by `"DonorAttributes"` and `"ReceiverAttributes"`, and the
-distance between them is given by the `"Translation"` vector in mesh units.
+distance between them is given by the `"Translation"` vector in mesh units. In `floquet.json`,
+an additional `"FloquetWaveVector"` specifies the phase delay between the donor and receiver
+boundaries in the X/Y/Z directions.
 
 The file `postpro/waveguide/eig.csv` contains information about the computed eigenfrequencies and
 associated quality factors:
diff --git a/docs/src/guide/boundaries.md b/docs/src/guide/boundaries.md
index e0ffdee48..edb4a3d2b 100644
--- a/docs/src/guide/boundaries.md
+++ b/docs/src/guide/boundaries.md
@@ -74,7 +74,7 @@ Periodic boundary conditions on an existing mesh can be specified using the
 ["Periodic"](../config/boundaries.md#boundaries%5B%22Periodic%22%5D) boundary keyword. This
 boundary condition enforces that the solution on the specified boundaries be exactly equal,
 and requires that the surface meshes on the donor and receiver boundaries be identical up to
-translation. Periodicity in *Palace* is also supported through meshes generated
+translation or rotation. Periodicity in *Palace* is also supported through meshes generated
 incorporating periodicity as part of the meshing process.
 
 ## Lumped and wave port excitation
@@ -83,16 +83,16 @@ incorporating periodicity as part of the meshing process.
     A lumped port applies a similar boundary condition to a
     [surface impedance](#Impedance-boundary) boundary, but takes on a special meaning for
     each simulation type.
-    
+
     For frequency domain driven simulations, ports are used to provide a lumped port
     excitation and postprocess voltages, currents, and scattering parameters. Likewise, for
     transient simulations, they perform a similar purpose but for time domain computed
     quantities.
-    
+
     For eigenmode simulations where there is no excitation, lumped ports are used to specify
     properties and postprocess energy-participation ratios (EPRs) corresponding to
     linearized circuit elements.
-    
+
     Note that a single lumped port (given by a single integer `"Index"`) can be made up of
     multiple boundary attributes in the mesh in order to model, for example, a multielement
     lumped port. To use this functionality, use the `"Elements"` object under
@@ -104,18 +104,18 @@ incorporating periodicity as part of the meshing process.
     shape which is computed by solving a 2D boundary mode eigenproblem on each wave port
     boundary. This allows for more accurate scattering parameter calculations when modeling
     waveguides or transmission lines with arbitrary cross sections.
-    
+
     The homogeneous Dirichlet boundary conditions for the wave port boundary mode analysis
     are taken from the `"PEC"` boundaries of the full 3D model, as well as any optional
     additional boundary attributes given under `"WavePortPEC"`. Any boundary of the wave
     port not labeled with with a PEC condition has the natural boundary condition for zero
     tangential magnetic field prescribed for the purpose of port mode calculation.
-    
+
     Unlike lumped ports, wave port boundaries cannot be defined internal to the
     computational domain and instead must exist only on the outer boundary of the domain
     (they are to be "one-sided" in the sense that mesh elements only exist on one side of
     the boundary).
-    
+
     Wave ports are not currently compatible with nonconformal mesh refinement.
 
 The incident field excitation at a lumped or wave port is controlled by setting
diff --git a/examples/cylinder/floquet.json b/examples/cylinder/floquet.json
index a1d0179a3..09671cbd9 100644
--- a/examples/cylinder/floquet.json
+++ b/examples/cylinder/floquet.json
@@ -39,9 +39,9 @@
       {
         "DonorAttributes": [2],
         "ReceiverAttributes": [3],
+        "FloquetWaveVector": [0.0, 0.0, 0.2]
       }
     ],
-    "FloquetWaveVector": [0.0, 0.0, 0.2],
     "PEC":
     {
       "Attributes": [4]
diff --git a/palace/models/periodicboundaryoperator.cpp b/palace/models/periodicboundaryoperator.cpp
index 097022d26..a37c4b920 100644
--- a/palace/models/periodicboundaryoperator.cpp
+++ b/palace/models/periodicboundaryoperator.cpp
@@ -26,28 +26,59 @@ PeriodicBoundaryOperator::PeriodicBoundaryOperator(const IoData &iodata,
     std::sort(periodic_attr.begin(), periodic_attr.end());
     utils::PrettyPrint(periodic_attr);
   }
+  const int sdim = mesh.SpaceDimension();
+  const double tol = std::numeric_limits<double>::epsilon();
+
+  // Sum Floquet wave vector over periodic boundary pairs.
+  wave_vector.SetSize(sdim);
+  mfem::Vector local_wave_vector(sdim);
+  wave_vector = 0.0;
+  for (const auto &data : iodata.boundaries.periodic)
+  {
+    MFEM_VERIFY(data.wave_vector.size() == sdim,
+                "Floquet wave vector size must equal the spatial dimension.");
+    std::copy(data.wave_vector.begin(), data.wave_vector.end(), local_wave_vector.GetData());
+    wave_vector += local_wave_vector;
+  }
+  non_zero_wave_vector = (wave_vector.Norml2() > tol);
+
+  // Get Floquet wave vector specified outside of periodic boundary definitions.
   const auto &data = iodata.boundaries.floquet;
-  MFEM_VERIFY(data.wave_vector.size() == mesh.SpaceDimension(),
-              "Floquet/Bloch wave vector size must equal the spatial dimension.");
-  MFEM_VERIFY(mesh.SpaceDimension() == 3,
-              "Quasi-periodic Floquet periodic boundary conditions are only available "
-              " in 3D!");
-  wave_vector.SetSize(data.wave_vector.size());
-  std::copy(data.wave_vector.begin(), data.wave_vector.end(), wave_vector.GetData());
-  non_zero_wave_vector = (wave_vector.Norml2() > std::numeric_limits<double>::epsilon());
+  MFEM_VERIFY(data.wave_vector.size() == sdim,
+              "Floquet wave vector size must equal the spatial dimension.");
+  std::copy(data.wave_vector.begin(), data.wave_vector.end(), local_wave_vector.GetData());
+  if (non_zero_wave_vector && local_wave_vector.Norml2() > tol)
+  {
+    mfem::Vector diff(sdim);
+    diff = wave_vector;
+    diff -= local_wave_vector;
+    MFEM_VERIFY(diff.Norml2() < tol, "Conflicting definitions of the Floquet wave vector in the "
+                "configuration file.");
+    wave_vector = local_wave_vector;
+  }
+  else if (!non_zero_wave_vector)
+  {
+    wave_vector = local_wave_vector;
+    non_zero_wave_vector = (wave_vector.Norml2() > tol);
+  }
+
   MFEM_VERIFY(!non_zero_wave_vector ||
                   iodata.problem.type == config::ProblemData::Type::DRIVEN ||
                   iodata.problem.type == config::ProblemData::Type::EIGENMODE,
               "Quasi-periodic Floquet boundary conditions are only available for "
               " frequency domain driven or eigenmode simulations!");
 
+  MFEM_VERIFY(non_zero_wave_vector && sdim == 3,
+              "Quasi-periodic Floquet periodic boundary conditions are only available "
+              " in 3D!");
+
   // Get mesh dimensions in x/y/z coordinates
   mfem::Vector bbmin, bbmax;
   mesh::GetAxisAlignedBoundingBox(mesh, bbmin, bbmax);
   bbmax -= bbmin;
 
-  // Ensure Floquet wave vector components are in range [-π/L, π/L]
-  for (int i = 0; i < mesh.SpaceDimension(); i++)
+  // Ensure Floquet wave vector components are in range [-π/L, π/L].
+  for (int i = 0; i < sdim; i++)
   {
     if (wave_vector[i] > M_PI / bbmax[i])
     {
@@ -221,6 +252,7 @@ void PeriodicBoundaryOperator::AddCurlCoefficients(double coeff,
   }
 }
 
+// TEST - REMOVE LATER!!!
 void PeriodicBoundaryOperator::AddImagMassCoefficients(double coeff,
                                                        MaterialPropertyCoefficient &f)
 {
diff --git a/palace/models/periodicboundaryoperator.hpp b/palace/models/periodicboundaryoperator.hpp
index 208e210c1..38dfa518a 100644
--- a/palace/models/periodicboundaryoperator.hpp
+++ b/palace/models/periodicboundaryoperator.hpp
@@ -25,11 +25,11 @@ class PeriodicBoundaryOperator
   // List of all periodic boundary condition attributes.
   mfem::Array<int> periodic_attr;
 
-  // Bloch wave vector for Floquet boundary conditions.
+  // Floquet/Bloch wave vector for Floquet boundary conditions.
   mfem::Vector wave_vector;
 
   // Matrix representation of cross product with the wave vector.
-  mfem::DenseMatrix wave_vector_cross, wave_vector_diag; //test
+  mfem::DenseMatrix wave_vector_cross, wave_vector_diag; //test - remove wave_vector_diag later!
 
   // Check if the wave vector is zero to bypass additional terms.
   bool non_zero_wave_vector;
@@ -47,7 +47,7 @@ class PeriodicBoundaryOperator
   void AddRealMassCoefficients(double coeff, MaterialPropertyCoefficient &f);
   void AddWeakCurlCoefficients(double coeff, MaterialPropertyCoefficient &f);
   void AddCurlCoefficients(double coeff, MaterialPropertyCoefficient &f);
-  void AddImagMassCoefficients(double coeff, MaterialPropertyCoefficient &f);
+  void AddImagMassCoefficients(double coeff, MaterialPropertyCoefficient &f); // test - remove later
 };
 
 }  // namespace palace
diff --git a/palace/utils/configfile.cpp b/palace/utils/configfile.cpp
index 0e163a3d9..578d972ae 100644
--- a/palace/utils/configfile.cpp
+++ b/palace/utils/configfile.cpp
@@ -1092,12 +1092,20 @@ void PeriodicBoundaryData::SetUp(json &boundaries)
           "\"AffineTransformation\" should specify an array in the configuration file!");
       data.affine_transform = trsfr->get<std::array<double, 16>>();
     }
+    auto floquet = it->find("FloquetWaveVector");
+    if (floquet != it->end())
+    {
+      MFEM_VERIFY(floquet->is_array(),
+                "\"FloquetWaveVector\" should specify an array in the configuration file!");
+      data.wave_vector = floquet->get<std::array<double, 3>>();
+    }
 
     // Cleanup
     it->erase("DonorAttributes");
     it->erase("ReceiverAttributes");
     it->erase("Translation");
     it->erase("AffineTransformation");
+    it->erase("FloquetWaveVector");
     MFEM_VERIFY(it->empty(),
                 "Found an unsupported configuration file keyword under \"Periodic\"!\n"
                     << it->dump(2));
@@ -1109,6 +1117,7 @@ void PeriodicBoundaryData::SetUp(json &boundaries)
       std::cout << "ReceiverAttributes: " << data.receiver_attributes << '\n';
       std::cout << "Translation: " << data.translation << '\n';
       std::cout << "AffineTransformation: " << data.affine_transform << '\n';
+      std::cout << "FloquetWaveVector: " << data.wave_vector << '\n';
     }
   }
 }
diff --git a/palace/utils/configfile.hpp b/palace/utils/configfile.hpp
index 1f558b865..c47b9fa3c 100644
--- a/palace/utils/configfile.hpp
+++ b/palace/utils/configfile.hpp
@@ -472,13 +472,19 @@ struct PeriodicData
 public:
   // Vector defining the direction and distance for this periodic boundary condition.
   std::array<double, 3> translation = {0.0, 0.0, 0.0};
+
   // Vector defining the affine transformation matrix for this periodic boundary condition.
   std::array<double, 16> affine_transform = {0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
                                              0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0};
+
   // List of boundary donor attributes for this periodic boundary condition.
   std::vector<int> donor_attributes = {};
+
   // List of boundary receiver attributes for this periodic boundary condition.
   std::vector<int> receiver_attributes = {};
+
+  // Floquet/Bloch wavevector specifying the phase delay in the X/Y/Z directions.
+  std::array<double, 3> wave_vector = {0.0, 0.0, 0.0};
 };
 
 struct PeriodicBoundaryData : public internal::DataVector<PeriodicData>
@@ -651,7 +657,7 @@ struct BoundaryData
   WavePortBoundaryData waveport = {};
   SurfaceCurrentBoundaryData current = {};
   PeriodicBoundaryData periodic = {};
-  FloquetData floquet;  //?
+  FloquetData floquet;
   BoundaryPostData postpro = {};
 
   void SetUp(json &config);
diff --git a/palace/utils/iodata.cpp b/palace/utils/iodata.cpp
index c497eaf6e..1627facfe 100644
--- a/palace/utils/iodata.cpp
+++ b/palace/utils/iodata.cpp
@@ -524,10 +524,17 @@ void IoData::NondimensionalizeInputs(mfem::ParMesh &mesh)
     data.Cs /= electromagnetics::epsilon0_ * Lc;
   }
 
-  // Floquet periodic boundaries
+  // Floquet periodic boundaries.
   for (int i = 0; i < boundaries.floquet.wave_vector.size(); i++)
   {
-    boundaries.floquet.wave_vector[i] /= 1.0 / GetMeshLengthScale();
+    boundaries.floquet.wave_vector[i] *= GetMeshLengthScale();
+  }
+  for (auto &data : boundaries.periodic)
+  {
+    for (int i = 0; i < data.wave_vector.size(); i++)
+    {
+      data.wave_vector[i] *= GetMeshLengthScale();
+    }
   }
 
   // Wave port offset distance.

From 1101ef9543e815eb3c255b4cf6267f16a230302a Mon Sep 17 00:00:00 2001
From: Simon Lapointe <simlap@amazon.com>
Date: Mon, 9 Dec 2024 16:28:59 -0800
Subject: [PATCH 31/49] Fix error in VERIFY

---
 palace/models/periodicboundaryoperator.cpp | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/palace/models/periodicboundaryoperator.cpp b/palace/models/periodicboundaryoperator.cpp
index a37c4b920..3789f2a4d 100644
--- a/palace/models/periodicboundaryoperator.cpp
+++ b/palace/models/periodicboundaryoperator.cpp
@@ -67,8 +67,9 @@ PeriodicBoundaryOperator::PeriodicBoundaryOperator(const IoData &iodata,
                   iodata.problem.type == config::ProblemData::Type::EIGENMODE,
               "Quasi-periodic Floquet boundary conditions are only available for "
               " frequency domain driven or eigenmode simulations!");
-
-  MFEM_VERIFY(non_zero_wave_vector && sdim == 3,
+  Mpi::Print("wave_vector.Norml2(): {:.3e}\n",wave_vector.Norml2());
+  wave_vector.Print();
+  MFEM_VERIFY(!non_zero_wave_vector || sdim == 3,
               "Quasi-periodic Floquet periodic boundary conditions are only available "
               " in 3D!");
 

From 0dd577dba6e76b861dbf3c83b4beac8742acd342 Mon Sep 17 00:00:00 2001
From: Simon Lapointe <simlap@amazon.com>
Date: Tue, 10 Dec 2024 12:12:02 -0800
Subject: [PATCH 32/49] Implement B field Floquet correction

---
 palace/drivers/drivensolver.cpp            |  2 +-
 palace/drivers/eigensolver.cpp             | 42 ++++++++++--
 palace/linalg/solver.cpp                   |  4 +-
 palace/models/periodicboundaryoperator.cpp | 26 +++++++-
 palace/models/periodicboundaryoperator.hpp |  1 +
 palace/models/postoperator.cpp             | 67 ++++++++++++++++++--
 palace/models/postoperator.hpp             |  6 +-
 palace/models/romoperator.cpp              |  2 +-
 palace/models/spaceoperator.cpp            | 74 +++++++++++++++++++++-
 palace/models/spaceoperator.hpp            | 12 ++--
 10 files changed, 210 insertions(+), 26 deletions(-)

diff --git a/palace/drivers/drivensolver.cpp b/palace/drivers/drivensolver.cpp
index c6fee6b5d..2c737362e 100644
--- a/palace/drivers/drivensolver.cpp
+++ b/palace/drivers/drivensolver.cpp
@@ -117,7 +117,7 @@ ErrorIndicator DrivenSolver::SweepUniform(SpaceOperator &space_op, PostOperator
   auto C = space_op.GetDampingMatrix<ComplexOperator>(Operator::DIAG_ZERO);
   auto M = space_op.GetMassMatrix<ComplexOperator>(Operator::DIAG_ZERO);
   auto A2 = space_op.GetExtraSystemMatrix<ComplexOperator>(omega0, Operator::DIAG_ZERO);
-  auto PF = space_op.GetPeriodicMatrix<ComplexOperator>(Operator::DIAG_ZERO);
+  auto PF = space_op.GetFloquetMatrix<ComplexOperator>(Operator::DIAG_ZERO);
   const auto &Curl = space_op.GetCurlMatrix();
 
   // Set up the linear solver and set operators for the first frequency step. The
diff --git a/palace/drivers/eigensolver.cpp b/palace/drivers/eigensolver.cpp
index 96df99996..f72088d48 100644
--- a/palace/drivers/eigensolver.cpp
+++ b/palace/drivers/eigensolver.cpp
@@ -13,6 +13,7 @@
 #include "linalg/operator.hpp"
 #include "linalg/slepc.hpp"
 #include "linalg/vector.hpp"
+#include "linalg/jacobi.hpp"
 #include "models/lumpedportoperator.hpp"
 #include "models/postoperator.hpp"
 #include "models/spaceoperator.hpp"
@@ -36,7 +37,7 @@ EigenSolver::Solve(const std::vector<std::unique_ptr<Mesh>> &mesh) const
   auto K = space_op.GetStiffnessMatrix<ComplexOperator>(Operator::DIAG_ONE);
   auto C = space_op.GetDampingMatrix<ComplexOperator>(Operator::DIAG_ZERO);
   auto M = space_op.GetMassMatrix<ComplexOperator>(Operator::DIAG_ZERO);
-  auto PF = space_op.GetPeriodicMatrix<ComplexOperator>(Operator::DIAG_ZERO);
+  auto FP = space_op.GetFloquetMatrix<ComplexOperator>(Operator::DIAG_ZERO);
   auto A2 = space_op.GetExtraSystemMatrix<ComplexOperator>(1.0, Operator::DIAG_ZERO);
   A2 = nullptr;
 
@@ -128,9 +129,9 @@ EigenSolver::Solve(const std::vector<std::unique_ptr<Mesh>> &mesh) const
                                           : EigenvalueSolver::ScaleType::NONE;
   if (C)
   {
-    if (PF)
+    if (FP)
     {
-      eigen->SetOperators(*K, *C, *M, *PF, scale);
+      eigen->SetOperators(*K, *C, *M, *FP, scale);
     }
     else
     {
@@ -139,9 +140,9 @@ EigenSolver::Solve(const std::vector<std::unique_ptr<Mesh>> &mesh) const
   }
   else
   {
-    if (PF)
+    if (FP)
     {
-      eigen->SetOperators(*K, *M, *PF, scale);
+      eigen->SetOperators(*K, *M, *FP, scale);
     }
     else
     {
@@ -174,7 +175,7 @@ EigenSolver::Solve(const std::vector<std::unique_ptr<Mesh>> &mesh) const
   std::unique_ptr<DivFreeSolver<ComplexVector>> divfree;
   if (iodata.solver.linear.divfree_max_it > 0)
   {
-    if (PF) //BYPASS?!?!?! OR FIND WAY TO MAKE IT WORK?
+    if (FP) //BYPASS?!?!?! OR FIND WAY TO MAKE IT WORK?
     {
       Mpi::Warning("Divergence-free projection is not compatible with non-zero "
                    "Floquet wave vector!\n");
@@ -268,7 +269,7 @@ EigenSolver::Solve(const std::vector<std::unique_ptr<Mesh>> &mesh) const
   // to the complex system matrix.
   auto A = space_op.GetSystemMatrix(std::complex<double>(1.0, 0.0), 1i * target,
                                     std::complex<double>(-target * target, 0.0), K.get(),
-                                    C.get(), M.get(), A2.get(), PF.get());
+                                    C.get(), M.get(), A2.get(), FP.get());
   auto P = space_op.GetPreconditionerMatrix<ComplexOperator>(1.0, target, -target * target,
                                                              target);
   auto ksp = std::make_unique<ComplexKspSolver>(iodata, space_op.GetNDSpaces(),
@@ -331,6 +332,33 @@ EigenSolver::Solve(const std::vector<std::unique_ptr<Mesh>> &mesh) const
     eigen->GetEigenvector(i, E);
     Curl.Mult(E.Real(), B.Real());
     Curl.Mult(E.Imag(), B.Imag());
+    /**/
+    //  kp x E / omega, just a test, clean up later
+    if (FP)
+    {
+      auto FM = space_op.GetFloquetCorrectionMassMatrix<ComplexOperator>();
+      auto FC = space_op.GetFloquetCorrectionCrossMatrix<ComplexOperator>();
+      ComplexVector BF(Curl.Width()), RHS(Curl.Width());
+      BF.UseDevice(true);
+      RHS.UseDevice(true);
+
+      auto pcg = std::make_unique<CgSolver<ComplexOperator>>(space_op.GetComm(), 0);
+      pcg->SetInitialGuess(0);
+      pcg->SetRelTol(iodata.solver.linear.tol);
+      pcg->SetAbsTol(std::numeric_limits<double>::epsilon());
+      pcg->SetMaxIter(iodata.solver.linear.max_it);
+      auto jac = std::make_unique<JacobiSmoother<ComplexOperator>>(space_op.GetComm());
+      auto kspM = std::make_unique<ComplexKspSolver>(std::move(pcg), std::move(jac));
+      kspM->SetOperators(*FM, *FM);
+
+      // Floquet correction = kp x E / omega = FC * E / omega
+      FC->Mult(E, RHS);
+      kspM->Mult(RHS, BF);
+
+      BF *= -1.0 / omega;
+      post_op.SetBFGridFunction(BF);
+    }
+    /**/
     B *= -1.0 / (1i * omega);
     post_op.SetEGridFunction(E);
     post_op.SetBGridFunction(B);
diff --git a/palace/linalg/solver.cpp b/palace/linalg/solver.cpp
index 780bffd34..6f49e2f17 100644
--- a/palace/linalg/solver.cpp
+++ b/palace/linalg/solver.cpp
@@ -52,8 +52,10 @@ void MfemWrapperSolver<ComplexOperator>::SetOperator(const ComplexOperator &op)
   }
   if (hAr && hAi)
   {
-    // A.reset(mfem::Add(1.0, *hAr, 1.0, *hAi));
+    //Mpi::Print("Using real coarse solve\n");
+    //A.reset(mfem::Add(1.0, *hAr, 1.0, *hAi));
     /**/
+    Mpi::Print("Using complex coarse solve\n");
     // A = [Ar, -Ai]
     //     [Ai,  Ar]
     mfem::Array2D<const mfem::HypreParMatrix *> blocks(2, 2);
diff --git a/palace/models/periodicboundaryoperator.cpp b/palace/models/periodicboundaryoperator.cpp
index 3789f2a4d..03e87389a 100644
--- a/palace/models/periodicboundaryoperator.cpp
+++ b/palace/models/periodicboundaryoperator.cpp
@@ -113,9 +113,9 @@ PeriodicBoundaryOperator::PeriodicBoundaryOperator(const IoData &iodata,
   Mpi::Print("wave vector norml2: {:.3e}\n", wave_vector_norm2);
   wave_vector_diag.SetSize(3);
   wave_vector_diag = 0.0;
-  wave_vector_diag(0, 0) = wave_vector_norm2;
-  wave_vector_diag(1, 1) = wave_vector_norm2;
-  wave_vector_diag(2, 2) = wave_vector_norm2;
+  wave_vector_diag(0, 0) = 1.0;//wave_vector_norm2;
+  wave_vector_diag(1, 1) = 1.0;//wave_vector_norm2;
+  wave_vector_diag(2, 2) = 1.0;//wave_vector_norm2;
   //Mpi::Print("wave vector diag:\n");
   //wave_vector_diag.Print();
 }
@@ -275,4 +275,24 @@ void PeriodicBoundaryOperator::AddImagMassCoefficients(double coeff,
   }
 }
 
+void PeriodicBoundaryOperator::AddFloquetCrossCoefficients(double coeff,
+                                                       MaterialPropertyCoefficient &f)
+{
+  if (non_zero_wave_vector)
+  {
+    // [k x]
+    mfem::DenseTensor kx(mat_op.GetInvPermeability().SizeI(),
+                         mat_op.GetInvPermeability().SizeJ(),
+                         mat_op.GetInvPermeability().SizeK());
+    for (int k = 0; k < kx.SizeK(); k++)
+    {
+      kx(k) = wave_vector_cross;
+      //kx(k) = wave_vector_diag;//test
+    }
+    MaterialPropertyCoefficient kx_func(mat_op.GetAttributeToMaterial(), kx);
+    f.AddCoefficient(kx_func.GetAttributeToMaterial(),
+                     kx_func.GetMaterialProperties(), coeff);
+  }
+}
+
 }  // namespace palace
diff --git a/palace/models/periodicboundaryoperator.hpp b/palace/models/periodicboundaryoperator.hpp
index 38dfa518a..a9ec508ba 100644
--- a/palace/models/periodicboundaryoperator.hpp
+++ b/palace/models/periodicboundaryoperator.hpp
@@ -48,6 +48,7 @@ class PeriodicBoundaryOperator
   void AddWeakCurlCoefficients(double coeff, MaterialPropertyCoefficient &f);
   void AddCurlCoefficients(double coeff, MaterialPropertyCoefficient &f);
   void AddImagMassCoefficients(double coeff, MaterialPropertyCoefficient &f); // test - remove later
+  void AddFloquetCrossCoefficients(double coeff, MaterialPropertyCoefficient &f); // test
 };
 
 }  // namespace palace
diff --git a/palace/models/postoperator.cpp b/palace/models/postoperator.cpp
index 2a584b6ba..c692f9ba4 100644
--- a/palace/models/postoperator.cpp
+++ b/palace/models/postoperator.cpp
@@ -48,6 +48,9 @@ PostOperator::PostOperator(const IoData &iodata, SpaceOperator &space_op,
     B(std::make_unique<GridFunction>(space_op.GetRTSpace(),
                                      iodata.problem.type !=
                                          config::ProblemData::Type::TRANSIENT)),
+    BF(std::make_unique<GridFunction>(space_op.GetNDSpace(),
+                                      iodata.problem.type !=
+                                         config::ProblemData::Type::TRANSIENT)),
     lumped_port_init(false), wave_port_init(false),
     paraview(CreateParaviewPath(iodata, name), &space_op.GetNDSpace().GetParMesh()),
     paraview_bdr(CreateParaviewPath(iodata, name) + "_boundary",
@@ -60,6 +63,7 @@ PostOperator::PostOperator(const IoData &iodata, SpaceOperator &space_op,
 
   E_sr = std::make_unique<BdrFieldVectorCoefficient>(E->Real());
   B_sr = std::make_unique<BdrFieldVectorCoefficient>(B->Real());
+  BF_sr = std::make_unique<BdrFieldVectorCoefficient>(BF->Real());
   J_sr = std::make_unique<BdrSurfaceCurrentVectorCoefficient>(B->Real(), mat_op);
   Q_sr = std::make_unique<BdrSurfaceFluxCoefficient<SurfaceFluxType::ELECTRIC>>(
       &E->Real(), nullptr, mat_op, true, mfem::Vector());
@@ -67,6 +71,7 @@ PostOperator::PostOperator(const IoData &iodata, SpaceOperator &space_op,
   {
     E_si = std::make_unique<BdrFieldVectorCoefficient>(E->Imag());
     B_si = std::make_unique<BdrFieldVectorCoefficient>(B->Imag());
+    BF_si = std::make_unique<BdrFieldVectorCoefficient>(BF->Imag());
     J_si = std::make_unique<BdrSurfaceCurrentVectorCoefficient>(B->Imag(), mat_op);
     Q_si = std::make_unique<BdrSurfaceFluxCoefficient<SurfaceFluxType::ELECTRIC>>(
         &E->Imag(), nullptr, mat_op, true, mfem::Vector());
@@ -201,6 +206,21 @@ void PostOperator::InitializeDataCollection(const IoData &iodata)
       paraview_bdr.RegisterVCoeffField("B", B_sr.get());
     }
   }
+  if (BF)
+  {
+    if (HasImag())
+    {
+      paraview.RegisterField("BF_real", &BF->Real());
+      paraview.RegisterField("BF_imag", &BF->Imag());
+      paraview_bdr.RegisterVCoeffField("BF_real", BF_sr.get());
+      paraview_bdr.RegisterVCoeffField("BF_imag", BF_si.get());
+    }
+    else
+    {
+      paraview.RegisterField("BF", &BF->Real());
+      paraview_bdr.RegisterVCoeffField("BF", BF_sr.get());
+    }
+  }
   if (V)
   {
     paraview.RegisterField("V", &V->Real());
@@ -281,6 +301,21 @@ void PostOperator::SetEGridFunction(const ComplexVector &e, bool exchange_face_n
   lumped_port_init = wave_port_init = false;
 }
 
+void PostOperator::SetBFGridFunction(const ComplexVector &bf, bool exchange_face_nbr_data)
+{
+  MFEM_VERIFY(HasImag(),
+              "SetBFGridFunction for complex-valued output called when HasImag() == false!");
+  MFEM_VERIFY(BF, "Incorrect usage of PostOperator::SetBFGridFunction!");
+  BF->Real().SetFromTrueDofs(bf.Real());  // Parallel distribute
+  BF->Imag().SetFromTrueDofs(bf.Imag());
+  if (exchange_face_nbr_data)
+  {
+    BF->Real().ExchangeFaceNbrData();  // Ready for parallel comm on shared faces
+    BF->Imag().ExchangeFaceNbrData();
+  }
+  has_floquet = true;
+}
+
 void PostOperator::SetBGridFunction(const ComplexVector &b, bool exchange_face_nbr_data)
 {
   MFEM_VERIFY(HasImag(),
@@ -669,7 +704,7 @@ namespace
 {
 
 template <typename T>
-void ScaleGridFunctions(double L, int dim, bool imag, T &E, T &B, T &V, T &A)
+void ScaleGridFunctions(double L, int dim, bool imag, T &E, T &B, T &V, T &A, T &BF)
 {
   // For fields on H(curl) and H(div) spaces, we "undo" the effect of redimensionalizing
   // the mesh which would carry into the fields during the mapping from reference to
@@ -702,6 +737,17 @@ void ScaleGridFunctions(double L, int dim, bool imag, T &E, T &B, T &V, T &A)
       B->Imag().FaceNbrData() *= Ld;
     }
   }
+  if (BF)
+  {
+    // Piola transform: J^-T
+    BF->Real() *= L;
+    BF->Real().FaceNbrData() *= L;
+    if (imag)
+    {
+      BF->Imag() *= L;
+      BF->Imag().FaceNbrData() *= L;
+    }
+  }
   if (A)
   {
     // Piola transform: J^-T
@@ -719,7 +765,7 @@ void PostOperator::WriteFields(int step, double time) const
   mfem::ParMesh &mesh =
       HasE() ? *E->ParFESpace()->GetParMesh() : *B->ParFESpace()->GetParMesh();
   mesh::DimensionalizeMesh(mesh, mesh_Lc0);
-  ScaleGridFunctions(mesh_Lc0, mesh.Dimension(), HasImag(), E, B, V, A);
+  ScaleGridFunctions(mesh_Lc0, mesh.Dimension(), HasImag(), E, B, V, A, BF);
   paraview.SetCycle(step);
   paraview.SetTime(time);
   paraview_bdr.SetCycle(step);
@@ -727,7 +773,7 @@ void PostOperator::WriteFields(int step, double time) const
   paraview.Save();
   paraview_bdr.Save();
   mesh::NondimensionalizeMesh(mesh, mesh_Lc0);
-  ScaleGridFunctions(1.0 / mesh_Lc0, mesh.Dimension(), HasImag(), E, B, V, A);
+  ScaleGridFunctions(1.0 / mesh_Lc0, mesh.Dimension(), HasImag(), E, B, V, A, BF);
 }
 
 void PostOperator::WriteFieldsFinal(const ErrorIndicator *indicator) const
@@ -814,7 +860,20 @@ std::vector<std::complex<double>> PostOperator::ProbeEField() const
 std::vector<std::complex<double>> PostOperator::ProbeBField() const
 {
   MFEM_VERIFY(B, "PostOperator is not configured for magnetic flux density probes!");
-  return interp_op.ProbeField(*B);
+  if (has_floquet)
+  {
+    auto probe_B = interp_op.ProbeField(*B);
+    auto probe_BF = interp_op.ProbeField(*BF);
+    for (int i = 0; i < probe_B.size(); i++)
+    {
+      probe_B[i] += probe_BF[i];
+    }
+    return probe_B;
+  }
+  else
+  {
+    return interp_op.ProbeField(*B);
+  }
 }
 
 }  // namespace palace
diff --git a/palace/models/postoperator.hpp b/palace/models/postoperator.hpp
index 2522d815e..b0c09717e 100644
--- a/palace/models/postoperator.hpp
+++ b/palace/models/postoperator.hpp
@@ -44,9 +44,10 @@ class PostOperator
   const DomainPostOperator dom_post_op;
 
   // Objects for grid function postprocessing from the FE solution.
-  mutable std::unique_ptr<GridFunction> E, B, V, A;
-  std::unique_ptr<mfem::VectorCoefficient> S, E_sr, E_si, B_sr, B_si, A_s, J_sr, J_si;
+  mutable std::unique_ptr<GridFunction> E, B, V, A, BF;
+  std::unique_ptr<mfem::VectorCoefficient> S, E_sr, E_si, B_sr, B_si, A_s, J_sr, J_si, BF_sr, BF_si;
   std::unique_ptr<mfem::Coefficient> U_e, U_m, V_s, Q_sr, Q_si;
+  bool has_floquet = false;
 
   // Wave port boundary mode field postprocessing.
   struct WavePortFieldData
@@ -91,6 +92,7 @@ class PostOperator
   // nondimensionalized consistently (B ~ E (L₀ ω₀ E₀⁻¹)).
   void SetEGridFunction(const ComplexVector &e, bool exchange_face_nbr_data = true);
   void SetBGridFunction(const ComplexVector &b, bool exchange_face_nbr_data = true);
+  void SetBFGridFunction(const ComplexVector &b, bool exchange_face_nbr_data = true);
   void SetEGridFunction(const Vector &e, bool exchange_face_nbr_data = true);
   void SetBGridFunction(const Vector &b, bool exchange_face_nbr_data = true);
   void SetVGridFunction(const Vector &v, bool exchange_face_nbr_data = true);
diff --git a/palace/models/romoperator.cpp b/palace/models/romoperator.cpp
index 889b83314..af8d1a8a3 100644
--- a/palace/models/romoperator.cpp
+++ b/palace/models/romoperator.cpp
@@ -196,7 +196,7 @@ RomOperator::RomOperator(const IoData &iodata, SpaceOperator &space_op, int max_
   K = space_op.GetStiffnessMatrix<ComplexOperator>(Operator::DIAG_ONE);
   C = space_op.GetDampingMatrix<ComplexOperator>(Operator::DIAG_ZERO);
   M = space_op.GetMassMatrix<ComplexOperator>(Operator::DIAG_ZERO);
-  PF = space_op.GetPeriodicMatrix<ComplexOperator>(Operator::DIAG_ZERO);
+  PF = space_op.GetFloquetMatrix<ComplexOperator>(Operator::DIAG_ZERO);
   MFEM_VERIFY(K && M, "Invalid empty HDM matrices when constructing PROM!");
 
   // Set up RHS vector (linear in frequency part) for the incident field at port boundaries,
diff --git a/palace/models/spaceoperator.cpp b/palace/models/spaceoperator.cpp
index f0ea44db3..04da291a3 100644
--- a/palace/models/spaceoperator.cpp
+++ b/palace/models/spaceoperator.cpp
@@ -473,7 +473,7 @@ SpaceOperator::GetExtraSystemMatrix(double omega, Operator::DiagonalPolicy diag_
 
 template <typename OperType>
 std::unique_ptr<OperType>
-SpaceOperator::GetPeriodicMatrix(Operator::DiagonalPolicy diag_policy)
+SpaceOperator::GetFloquetMatrix(Operator::DiagonalPolicy diag_policy)
 {
   PrintHeader(GetH1Space(), GetNDSpace(), GetRTSpace(), print_hdr);
   MaterialPropertyCoefficient fm(mat_op.MaxCeedAttribute()),
@@ -513,6 +513,65 @@ SpaceOperator::GetPeriodicMatrix(Operator::DiagonalPolicy diag_policy)
   }
 }
 
+template <typename OperType>
+std::unique_ptr<OperType>
+SpaceOperator::GetFloquetCorrectionCrossMatrix()
+{
+  PrintHeader(GetH1Space(), GetNDSpace(), GetRTSpace(), print_hdr);
+  MaterialPropertyCoefficient f(mat_op.MaxCeedAttribute());
+  periodic_op.AddFloquetCrossCoefficients(1.0, f);
+  int empty = (f.empty());
+  Mpi::GlobalMin(1, &empty, GetComm());
+  if (empty)
+  {
+    return {};
+  }
+  constexpr bool skip_zeros = false;
+  std::unique_ptr<Operator> m;
+  if (!empty)
+  {
+    m = AssembleOperator(GetNDSpace(), nullptr, &f, nullptr, nullptr, nullptr,
+                          nullptr, skip_zeros);
+  }
+  if constexpr (std::is_same<OperType, ComplexOperator>::value)
+  {
+    auto M =
+        std::make_unique<ComplexParOperator>(std::move(m), nullptr, GetNDSpace());
+    //M->SetEssentialTrueDofs(nd_dbc_tdof_lists.back(), diag_policy);
+    return M;
+  }
+  else
+  {
+    auto M = std::make_unique<ParOperator>(std::move(m), GetNDSpace());
+    //M->SetEssentialTrueDofs(nd_dbc_tdof_lists.back(), diag_policy);
+    return M;
+  }
+}
+
+template <typename OperType>
+std::unique_ptr<OperType>
+SpaceOperator::GetFloquetCorrectionMassMatrix()
+{
+  PrintHeader(GetH1Space(), GetNDSpace(), GetRTSpace(), print_hdr);
+  constexpr bool skip_zeros = false;
+  BilinearForm a(GetNDSpace());
+  a.AddDomainIntegrator<VectorFEMassIntegrator>();
+  std::unique_ptr<Operator> m = a.Assemble(skip_zeros);
+  if constexpr (std::is_same<OperType, ComplexOperator>::value)
+  {
+    auto M =
+        std::make_unique<ComplexParOperator>(std::move(m), nullptr, GetNDSpace());
+    //M->SetEssentialTrueDofs(nd_dbc_tdof_lists.back(), diag_policy);
+    return M;
+  }
+  else
+  {
+    auto M = std::make_unique<ParOperator>(std::move(m), GetNDSpace());
+    //M->SetEssentialTrueDofs(nd_dbc_tdof_lists.back(), diag_policy);
+    return M;
+  }
+}
+
 namespace
 {
 
@@ -1150,9 +1209,18 @@ template std::unique_ptr<ComplexOperator>
 SpaceOperator::GetExtraSystemMatrix(double, Operator::DiagonalPolicy);
 
 template std::unique_ptr<Operator>
-    SpaceOperator::GetPeriodicMatrix(Operator::DiagonalPolicy);
+    SpaceOperator::GetFloquetMatrix(Operator::DiagonalPolicy);
+template std::unique_ptr<ComplexOperator>
+    SpaceOperator::GetFloquetMatrix(Operator::DiagonalPolicy);
+
+template std::unique_ptr<Operator>
+    SpaceOperator::GetFloquetCorrectionCrossMatrix();
+template std::unique_ptr<ComplexOperator>
+    SpaceOperator::GetFloquetCorrectionCrossMatrix();
+template std::unique_ptr<Operator>
+    SpaceOperator::GetFloquetCorrectionMassMatrix();
 template std::unique_ptr<ComplexOperator>
-    SpaceOperator::GetPeriodicMatrix(Operator::DiagonalPolicy);
+    SpaceOperator::GetFloquetCorrectionMassMatrix();
 
 template std::unique_ptr<Operator>
 SpaceOperator::GetSystemMatrix<Operator, double>(double, double, double, const Operator *,
diff --git a/palace/models/spaceoperator.hpp b/palace/models/spaceoperator.hpp
index 98cbcb10f..b34d81e8d 100644
--- a/palace/models/spaceoperator.hpp
+++ b/palace/models/spaceoperator.hpp
@@ -143,7 +143,7 @@ class SpaceOperator
   auto GlobalTrueVSize() const { return GetNDSpace().GlobalTrueVSize(); }
 
   // Construct any part of the frequency-dependent complex linear system matrix:
-  //                     A = K + iω C - ω² (Mr + i Mi) + A2(ω) + P.
+  //                     A = K + iω C - ω² (Mr + i Mi) + A2(ω) + FP.
   // For time domain problems, any one of K, C, or M = Mr can be constructed. The argument
   // ω is required only for the constructing the "extra" matrix A2(ω).
   template <typename OperType>
@@ -156,11 +156,15 @@ class SpaceOperator
   std::unique_ptr<OperType> GetExtraSystemMatrix(double omega,
                                                  Operator::DiagonalPolicy diag_policy);
   template <typename OperType>
-  std::unique_ptr<OperType> GetPeriodicMatrix(Operator::DiagonalPolicy diag_policy);
+  std::unique_ptr<OperType> GetFloquetMatrix(Operator::DiagonalPolicy diag_policy);
+  template <typename OperType>
+  std::unique_ptr<OperType> GetFloquetCorrectionCrossMatrix();
+  template <typename OperType>
+  std::unique_ptr<OperType> GetFloquetCorrectionMassMatrix();
 
   // Construct the complete frequency or time domain system matrix using the provided
   // stiffness, damping, mass, and extra matrices:
-  //                     A = a0 K + a1 C + a2 (Mr + i Mi) + A2 + P.
+  //                     A = a0 K + a1 C + a2 (Mr + i Mi) + A2 + FP.
   // It is assumed that the inputs have been constructed using previous calls to
   // GetSystemMatrix() and the returned operator does not inherit ownership of any of them.
   template <typename OperType, typename ScalarType>
@@ -182,7 +186,7 @@ class SpaceOperator
   // Construct the matrix for frequency or time domain linear system preconditioning. If it
   // is real-valued (Mr > 0, Mi < 0, |Mr + Mi| is done on the material property coefficient,
   // not the matrix entries themselves):
-  //             B = a0 K + a1 C -/+ a2 |Mr + Mi| + A2r(a3) + A2i(a3) + P.
+  //             B = a0 K + a1 C -/+ a2 |Mr + Mi| + A2r(a3) + A2i(a3) + FP.
   template <typename OperType>
   std::unique_ptr<OperType> GetPreconditionerMatrix(double a0, double a1, double a2,
                                                     double a3);

From 00e4b8c23c0b28fec98e84ff1ae88104f3836e1f Mon Sep 17 00:00:00 2001
From: Simon Lapointe <simlap@amazon.com>
Date: Tue, 10 Dec 2024 12:13:06 -0800
Subject: [PATCH 33/49] Clean up periodic transform detection

---
 palace/utils/geodata.cpp | 28 +++++++++++++++-------------
 1 file changed, 15 insertions(+), 13 deletions(-)

diff --git a/palace/utils/geodata.cpp b/palace/utils/geodata.cpp
index 5f1360a21..509b1f6f0 100644
--- a/palace/utils/geodata.cpp
+++ b/palace/utils/geodata.cpp
@@ -1792,7 +1792,7 @@ mfem::Vector ComputeNormal(std::unique_ptr<mfem::Mesh> &mesh,
       loc_normal *= -1.0;
     }
 
-    // Check if the boundary is planar by comparing the current elem's
+    // Check if the boundary is planar by comparing the current element's
     // normal to the average normal (accumulated so far).
     if (count > 0 && check_planar)
     {
@@ -1847,7 +1847,6 @@ std::vector<mfem::Vector> FindUniquePoints(std::unique_ptr<mfem::Mesh> &mesh,
   // Centroid is always considered a unique point.
   unique_pts.push_back(centroid);
   mfem::Vector cross_product(sdim);
-  cross_product = 0.0;
   for (const auto &[dist, pts_set] : dist2points)
   {
     // Only consider unique non-zero distances.
@@ -1866,7 +1865,7 @@ std::vector<mfem::Vector> FindUniquePoints(std::unique_ptr<mfem::Mesh> &mesh,
         v2 = unique_pts[2];
         v2 -= unique_pts[0];
         v1.cross3D(v2, cross_product);
-        // If normal is ~0, points are collinear. Remove last point and continue loop.
+        // If cross product is ~0, points are collinear. Remove last point and continue loop.
         if (cross_product.Norml2() < tol)
         {
           unique_pts.pop_back();
@@ -1954,6 +1953,8 @@ void ComputeRotation(const mfem::Vector &normal1, const mfem::Vector &normal2,
   }
 }
 
+// Create the vertex mapping between sets of donor and receiver pts related
+// by an affine transformation matrix.
 std::vector<int> CreatePeriodicVertexMapping(std::unique_ptr<mfem::Mesh> &mesh,
                                              const std::unordered_set<int> &donor_v,
                                              const std::unordered_set<int> &receiver_v,
@@ -2034,6 +2035,10 @@ std::vector<int> CreatePeriodicVertexMapping(std::unique_ptr<mfem::Mesh> &mesh,
   return v2v;
 }
 
+// Determine the vertex mapping between donor and receiver boundary attributes.
+// Uses the translation vector or affine transformation matrix specified in the
+// configuration file. If not provided, attempts to automatically detect the
+// affine transformation between donor and receiver boundary vertices.
 std::vector<int> DeterminePeriodicVertexMapping(std::unique_ptr<mfem::Mesh> &mesh,
                                                 const struct palace::config::PeriodicData &data,
                                                 const double tol = 1e-8)
@@ -2048,7 +2053,6 @@ std::vector<int> DeterminePeriodicVertexMapping(std::unique_ptr<mfem::Mesh> &mes
 
   // Identify donor and receiver vertices and elements.
   const auto &da = data.donor_attributes, &ra = data.receiver_attributes;
-  mfem::Vector coord(sdim);
   std::unordered_set<int> bdr_v_donor, bdr_v_receiver;
   std::unordered_set<int> bdr_e_donor, bdr_e_receiver;
   bool has_tets = false;
@@ -2069,7 +2073,7 @@ std::vector<int> DeterminePeriodicVertexMapping(std::unique_ptr<mfem::Mesh> &mes
       {
         bdr_e_donor.insert(be);
       }
-      if (receiver)
+      else if (receiver)
       {
         bdr_e_receiver.insert(be);
       }
@@ -2077,7 +2081,6 @@ std::vector<int> DeterminePeriodicVertexMapping(std::unique_ptr<mfem::Mesh> &mes
       mesh->GetBdrElementVertices(be, vertidxs);
       for (int i = 0; i < vertidxs.Size(); i++)
       {
-        coord = mesh->GetVertex(vertidxs[i]);
         if (donor)
         {
           bdr_v_donor.insert(vertidxs[i]);
@@ -2090,14 +2093,10 @@ std::vector<int> DeterminePeriodicVertexMapping(std::unique_ptr<mfem::Mesh> &mes
     }
   }
 
-  MFEM_VERIFY(
-    bdr_v_donor.size() == bdr_v_receiver.size(),
+  MFEM_VERIFY(bdr_v_donor.size() == bdr_v_receiver.size(),
     "Different number of "
     "vertices on donor and receiver boundaries. Cannot create periodic mesh.");
 
-  const int num_periodic_bc_elems = bdr_e_donor.size() + bdr_e_receiver.size();
-  Mpi::Print("Total number of elements: {:d}\n", mesh->GetNE());
-  Mpi::Print("Number of periodic BC elements: {:d}\n", num_periodic_bc_elems);
   // How to check if the mesh is OK?
   // Count number of elems in the periodic direction?
   // If hex/prism: Count boundary elements on donor+receiver,
@@ -2106,6 +2105,9 @@ std::vector<int> DeterminePeriodicVertexMapping(std::unique_ptr<mfem::Mesh> &mes
   // Mixed mesh is trickier
   // MOVE THIS TEST SOMEWHERE ELSE. IT SHOULD ALSO APPLY TO MESHES
   // ALREADY CREATED WITH PERIODICITY!!!
+  const int num_periodic_bc_elems = bdr_e_donor.size() + bdr_e_receiver.size();
+  Mpi::Print("Total number of elements: {:d}\n", mesh->GetNE());
+  Mpi::Print("Number of periodic BC elements: {:d}\n", num_periodic_bc_elems);
   mfem::Array<mfem::Geometry::Type> geoms;
   mesh->GetGeometries(3, geoms);
   if (geoms.Size() == 1 && geoms[0] == mfem::Geometry::TETRAHEDRON)
@@ -2125,7 +2127,7 @@ std::vector<int> DeterminePeriodicVertexMapping(std::unique_ptr<mfem::Mesh> &mes
     // No tets
      MFEM_VERIFY(mesh->GetNE() > num_periodic_bc_elems,
                  "Not enough mesh elements in periodic direction!");
-    }
+  }
 
   // Determine the affine transformation between donor and receiver points.
   // Use the translation vector or affine transformation matrix if provided
@@ -2176,7 +2178,7 @@ std::vector<int> DeterminePeriodicVertexMapping(std::unique_ptr<mfem::Mesh> &mes
     donor_pts = FindUniquePoints(mesh, bdr_v_donor, donor_centroid, donor_normal, mesh_dim, mesh_tol);
     receiver_pts = FindUniquePoints(mesh, bdr_v_receiver, receiver_centroid, receiver_normal, mesh_dim, mesh_tol);
     MFEM_VERIFY(donor_pts.size() == receiver_pts.size(),
-                "Different number of unique points on donor and receiver boundaries.");
+                "Different number of unique points on donor and receiver periodic boundaries.");
 
     // With 4 pairs of matching points, compute the unique affine transformation.
     // With < 4, cannot determine a unique transformation. We assume there is no

From cc6aeca6ff3effb3ee0f8ce0816806ae62c0aef8 Mon Sep 17 00:00:00 2001
From: Simon Lapointe <simlap@amazon.com>
Date: Thu, 12 Dec 2024 15:13:07 -0800
Subject: [PATCH 34/49] Undo Floquet divfree test

---
 palace/linalg/divfree.cpp | 4 +---
 palace/linalg/divfree.hpp | 3 +--
 2 files changed, 2 insertions(+), 5 deletions(-)

diff --git a/palace/linalg/divfree.cpp b/palace/linalg/divfree.cpp
index 9cc765e1a..3468c03bc 100644
--- a/palace/linalg/divfree.cpp
+++ b/palace/linalg/divfree.cpp
@@ -13,7 +13,6 @@
 #include "linalg/iterative.hpp"
 #include "linalg/rap.hpp"
 #include "models/materialoperator.hpp"
-#include "models/periodicboundaryoperator.hpp"
 #include "utils/timer.hpp"
 
 namespace palace
@@ -44,7 +43,7 @@ auto BuildLevelParOperator<ComplexOperator>(std::unique_ptr<Operator> &&a,
 
 template <typename VecType>
 DivFreeSolver<VecType>::DivFreeSolver(
-    const MaterialOperator &mat_op, PeriodicBoundaryOperator &periodic_op, FiniteElementSpace &nd_fespace,
+    const MaterialOperator &mat_op, FiniteElementSpace &nd_fespace,
     FiniteElementSpaceHierarchy &h1_fespaces,
     const std::vector<mfem::Array<int>> &h1_bdr_tdof_lists, double tol, int max_it,
     int print)
@@ -86,7 +85,6 @@ DivFreeSolver<VecType>::DivFreeSolver(
   // Create the mass and weak divergence operators for divergence-free projection.
   MaterialPropertyCoefficient epsilon_func(mat_op.GetAttributeToMaterial(),
                                            mat_op.GetPermittivityReal());
-  //periodic_op.AddRealMassCoefficients(-1.0/(0.3144*0.3144), epsilon_func);
   {
     constexpr bool skip_zeros = false;
     BilinearForm m(h1_fespaces.GetFinestFESpace());
diff --git a/palace/linalg/divfree.hpp b/palace/linalg/divfree.hpp
index 2bfc69ee5..b538553db 100644
--- a/palace/linalg/divfree.hpp
+++ b/palace/linalg/divfree.hpp
@@ -24,7 +24,6 @@ namespace palace
 class FiniteElementSpaceHierarchy;
 class FiniteElementSpace;
 class MaterialOperator;
-class PeriodicBoundaryOperator;
 
 //
 // This solver implements a projection onto a divergence-free space satisfying Gᵀ M x = 0,
@@ -56,7 +55,7 @@ class DivFreeSolver
   mutable VecType psi, rhs;
 
 public:
-  DivFreeSolver(const MaterialOperator &mat_op, PeriodicBoundaryOperator &periodic_op, FiniteElementSpace &nd_fespace,
+  DivFreeSolver(const MaterialOperator &mat_op, FiniteElementSpace &nd_fespace,
                 FiniteElementSpaceHierarchy &h1_fespaces,
                 const std::vector<mfem::Array<int>> &h1_bdr_tdof_lists, double tol,
                 int max_it, int print);

From 0a862ec49bf1057270d9f5ee15858ec144194826 Mon Sep 17 00:00:00 2001
From: Simon Lapointe <simlap@amazon.com>
Date: Thu, 12 Dec 2024 15:16:31 -0800
Subject: [PATCH 35/49] Use RT-space B field correction

---
 palace/drivers/drivensolver.cpp     | 17 +++++-
 palace/drivers/eigensolver.cpp      | 57 ++++++------------
 palace/linalg/CMakeLists.txt        |  1 +
 palace/linalg/floquetcorrection.cpp | 92 +++++++++++++++++++++++++++++
 palace/linalg/floquetcorrection.hpp | 68 +++++++++++++++++++++
 palace/models/postoperator.cpp      | 67 ++-------------------
 palace/models/postoperator.hpp      |  6 +-
 palace/models/spaceoperator.cpp     | 68 ---------------------
 palace/models/spaceoperator.hpp     |  4 --
 9 files changed, 198 insertions(+), 182 deletions(-)
 create mode 100644 palace/linalg/floquetcorrection.cpp
 create mode 100644 palace/linalg/floquetcorrection.hpp

diff --git a/palace/drivers/drivensolver.cpp b/palace/drivers/drivensolver.cpp
index 2c737362e..0150823f8 100644
--- a/palace/drivers/drivensolver.cpp
+++ b/palace/drivers/drivensolver.cpp
@@ -8,6 +8,7 @@
 #include "fem/errorindicator.hpp"
 #include "fem/mesh.hpp"
 #include "linalg/errorestimator.hpp"
+#include "linalg/floquetcorrection.hpp"
 #include "linalg/ksp.hpp"
 #include "linalg/operator.hpp"
 #include "linalg/vector.hpp"
@@ -117,7 +118,7 @@ ErrorIndicator DrivenSolver::SweepUniform(SpaceOperator &space_op, PostOperator
   auto C = space_op.GetDampingMatrix<ComplexOperator>(Operator::DIAG_ZERO);
   auto M = space_op.GetMassMatrix<ComplexOperator>(Operator::DIAG_ZERO);
   auto A2 = space_op.GetExtraSystemMatrix<ComplexOperator>(omega0, Operator::DIAG_ZERO);
-  auto PF = space_op.GetFloquetMatrix<ComplexOperator>(Operator::DIAG_ZERO);
+  auto FP = space_op.GetFloquetMatrix<ComplexOperator>(Operator::DIAG_ZERO);
   const auto &Curl = space_op.GetCurlMatrix();
 
   // Set up the linear solver and set operators for the first frequency step. The
@@ -125,7 +126,7 @@ ErrorIndicator DrivenSolver::SweepUniform(SpaceOperator &space_op, PostOperator
   // to the complex system matrix.
   auto A = space_op.GetSystemMatrix(std::complex<double>(1.0, 0.0), 1i * omega0,
                                     std::complex<double>(-omega0 * omega0, 0.0), K.get(),
-                                    C.get(), M.get(), A2.get(), PF.get());
+                                    C.get(), M.get(), A2.get(), FP.get());
   auto P = space_op.GetPreconditionerMatrix<ComplexOperator>(1.0, omega0, -omega0 * omega0,
                                                              omega0);
   ComplexKspSolver ksp(iodata, space_op.GetNDSpaces(), &space_op.GetH1Spaces());
@@ -164,7 +165,7 @@ ErrorIndicator DrivenSolver::SweepUniform(SpaceOperator &space_op, PostOperator
       A2 = space_op.GetExtraSystemMatrix<ComplexOperator>(omega, Operator::DIAG_ZERO);
       A = space_op.GetSystemMatrix(std::complex<double>(1.0, 0.0), 1i * omega,
                                    std::complex<double>(-omega * omega, 0.0), K.get(),
-                                   C.get(), M.get(), A2.get(), PF.get());
+                                   C.get(), M.get(), A2.get(), FP.get());
       P = space_op.GetPreconditionerMatrix<ComplexOperator>(1.0, omega, -omega * omega,
                                                             omega);
       ksp.SetOperators(*A, *P);
@@ -179,6 +180,16 @@ ErrorIndicator DrivenSolver::SweepUniform(SpaceOperator &space_op, PostOperator
     Curl.Mult(E.Real(), B.Real());
     Curl.Mult(E.Imag(), B.Imag());
     B *= -1.0 / (1i * omega);
+    if (FP)
+    {
+      // Calculate B field correction for Floquet BCs.
+      // B = -1/(iω) ∇ x E - 1/ω kp x E
+      std::unique_ptr<FloquetCorrSolver<ComplexVector>> floquet_corr;
+      floquet_corr = std::make_unique<FloquetCorrSolver<ComplexVector>>(
+        space_op.GetMaterialOp(), space_op.GetPeriodicOp(), space_op.GetNDSpace(),
+        space_op.GetRTSpace(), iodata.solver.linear.tol, iodata.solver.linear.max_it, 0);
+      floquet_corr->AddMult(E, B, -1.0 / omega);
+    }
     post_op.SetEGridFunction(E);
     post_op.SetBGridFunction(B);
     post_op.UpdatePorts(space_op.GetLumpedPortOp(), space_op.GetWavePortOp(), omega);
diff --git a/palace/drivers/eigensolver.cpp b/palace/drivers/eigensolver.cpp
index f72088d48..cf3f27fdb 100644
--- a/palace/drivers/eigensolver.cpp
+++ b/palace/drivers/eigensolver.cpp
@@ -9,11 +9,12 @@
 #include "linalg/arpack.hpp"
 #include "linalg/divfree.hpp"
 #include "linalg/errorestimator.hpp"
+#include "linalg/floquetcorrection.hpp"
+//#include "linalg/jacobi.hpp"
 #include "linalg/ksp.hpp"
 #include "linalg/operator.hpp"
 #include "linalg/slepc.hpp"
 #include "linalg/vector.hpp"
-#include "linalg/jacobi.hpp"
 #include "models/lumpedportoperator.hpp"
 #include "models/postoperator.hpp"
 #include "models/spaceoperator.hpp"
@@ -173,23 +174,15 @@ EigenSolver::Solve(const std::vector<std::unique_ptr<Mesh>> &mesh) const
   // Construct a divergence-free projector so the eigenvalue solve is performed in the space
   // orthogonal to the zero eigenvalues of the stiffness matrix.
   std::unique_ptr<DivFreeSolver<ComplexVector>> divfree;
-  if (iodata.solver.linear.divfree_max_it > 0)
+  if (iodata.solver.linear.divfree_max_it > 0 && !FP)
   {
-    if (FP) //BYPASS?!?!?! OR FIND WAY TO MAKE IT WORK?
-    {
-      Mpi::Warning("Divergence-free projection is not compatible with non-zero "
-                   "Floquet wave vector!\n");
-    }
-    else
-    {
-      Mpi::Print(" Configuring divergence-free projection\n");
-      constexpr int divfree_verbose = 0;
-      divfree = std::make_unique<DivFreeSolver<ComplexVector>>(
-        space_op.GetMaterialOp(), space_op.GetPeriodicOp(), space_op.GetNDSpace(), space_op.GetH1Spaces(),
+    Mpi::Print(" Configuring divergence-free projection\n");
+    constexpr int divfree_verbose = 0;
+    divfree = std::make_unique<DivFreeSolver<ComplexVector>>(
+        space_op.GetMaterialOp(), space_op.GetNDSpace(), space_op.GetH1Spaces(),
         space_op.GetAuxBdrTDofLists(), iodata.solver.linear.divfree_tol,
         iodata.solver.linear.divfree_max_it, divfree_verbose);
-      eigen->SetDivFreeProjector(*divfree);
-    }
+    eigen->SetDivFreeProjector(*divfree);
   }
 
   // Set up the initial space for the eigenvalue solve. Satisfies boundary conditions and is
@@ -332,34 +325,18 @@ EigenSolver::Solve(const std::vector<std::unique_ptr<Mesh>> &mesh) const
     eigen->GetEigenvector(i, E);
     Curl.Mult(E.Real(), B.Real());
     Curl.Mult(E.Imag(), B.Imag());
-    /**/
-    //  kp x E / omega, just a test, clean up later
+
+    B *= -1.0 / (1i * omega);
     if (FP)
     {
-      auto FM = space_op.GetFloquetCorrectionMassMatrix<ComplexOperator>();
-      auto FC = space_op.GetFloquetCorrectionCrossMatrix<ComplexOperator>();
-      ComplexVector BF(Curl.Width()), RHS(Curl.Width());
-      BF.UseDevice(true);
-      RHS.UseDevice(true);
-
-      auto pcg = std::make_unique<CgSolver<ComplexOperator>>(space_op.GetComm(), 0);
-      pcg->SetInitialGuess(0);
-      pcg->SetRelTol(iodata.solver.linear.tol);
-      pcg->SetAbsTol(std::numeric_limits<double>::epsilon());
-      pcg->SetMaxIter(iodata.solver.linear.max_it);
-      auto jac = std::make_unique<JacobiSmoother<ComplexOperator>>(space_op.GetComm());
-      auto kspM = std::make_unique<ComplexKspSolver>(std::move(pcg), std::move(jac));
-      kspM->SetOperators(*FM, *FM);
-
-      // Floquet correction = kp x E / omega = FC * E / omega
-      FC->Mult(E, RHS);
-      kspM->Mult(RHS, BF);
-
-      BF *= -1.0 / omega;
-      post_op.SetBFGridFunction(BF);
+      // Calculate B field correction for Floquet BCs.
+      // B = -1/(iω) ∇ x E - 1/ω kp x E.
+      std::unique_ptr<FloquetCorrSolver<ComplexVector>> floquet_corr;
+      floquet_corr = std::make_unique<FloquetCorrSolver<ComplexVector>>(
+        space_op.GetMaterialOp(), space_op.GetPeriodicOp(), space_op.GetNDSpace(),
+        space_op.GetRTSpace(), iodata.solver.linear.tol, iodata.solver.linear.max_it, 0);
+      floquet_corr->AddMult(E, B, -1.0 / omega);
     }
-    /**/
-    B *= -1.0 / (1i * omega);
     post_op.SetEGridFunction(E);
     post_op.SetBGridFunction(B);
     post_op.UpdatePorts(space_op.GetLumpedPortOp(), omega.real());
diff --git a/palace/linalg/CMakeLists.txt b/palace/linalg/CMakeLists.txt
index 86eae9f82..c94301c8f 100644
--- a/palace/linalg/CMakeLists.txt
+++ b/palace/linalg/CMakeLists.txt
@@ -15,6 +15,7 @@ target_sources(${LIB_TARGET_NAME}
   ${CMAKE_CURRENT_SOURCE_DIR}/distrelaxation.cpp
   ${CMAKE_CURRENT_SOURCE_DIR}/divfree.cpp
   ${CMAKE_CURRENT_SOURCE_DIR}/errorestimator.cpp
+  ${CMAKE_CURRENT_SOURCE_DIR}/floquetcorrection.cpp
   ${CMAKE_CURRENT_SOURCE_DIR}/gmg.cpp
   ${CMAKE_CURRENT_SOURCE_DIR}/hcurl.cpp
   ${CMAKE_CURRENT_SOURCE_DIR}/hypre.cpp
diff --git a/palace/linalg/floquetcorrection.cpp b/palace/linalg/floquetcorrection.cpp
new file mode 100644
index 000000000..3964039ad
--- /dev/null
+++ b/palace/linalg/floquetcorrection.cpp
@@ -0,0 +1,92 @@
+// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+#include "floquetcorrection.hpp"
+
+#include <limits>
+#include <mfem.hpp>
+#include "fem/bilinearform.hpp"
+#include "fem/fespace.hpp"
+#include "fem/integrator.hpp"
+#include "linalg/iterative.hpp"
+#include "linalg/jacobi.hpp"
+#include "linalg/rap.hpp"
+#include "models/materialoperator.hpp"
+#include "models/periodicboundaryoperator.hpp"
+
+namespace palace
+{
+
+template <typename VecType>
+FloquetCorrSolver<VecType>::FloquetCorrSolver(
+    const MaterialOperator &mat_op,
+    PeriodicBoundaryOperator &periodic_op,
+    FiniteElementSpace &nd_fespace, FiniteElementSpace &rt_fespace,
+    double tol, int max_it, int print)
+{
+  // Create the mass and cross product operators for Floquet correction.
+  {
+    constexpr bool skip_zeros = false;
+    BilinearForm a(rt_fespace);
+    a.AddDomainIntegrator<VectorFEMassIntegrator>();
+    std::unique_ptr<Operator> m = a.Assemble(skip_zeros);
+    if constexpr (std::is_same<OperType, ComplexOperator>::value)
+    {
+      M = std::make_unique<ComplexParOperator>(std::move(m), nullptr, rt_fespace);
+    }
+    else
+    {
+      M = std::make_unique<ParOperator>(std::move(m), rt_fespace);
+    }
+  }
+
+  {
+    MaterialPropertyCoefficient f(mat_op.MaxCeedAttribute());
+    periodic_op.AddFloquetCrossCoefficients(1.0, f);
+    constexpr bool skip_zeros = false;
+    BilinearForm a(nd_fespace, rt_fespace);
+    a.AddDomainIntegrator<VectorFEMassIntegrator>(f);
+    std::unique_ptr<Operator> m = a.Assemble(skip_zeros);
+    if constexpr (std::is_same<OperType, ComplexOperator>::value)
+    {
+      Cross = std::make_unique<ComplexParOperator>(std::move(m), nullptr, nd_fespace, rt_fespace, false);
+    }
+    else
+    {
+      Cross = std::make_unique<ParOperator>(std::move(m), nd_fespace, rt_fespace, false);
+    }
+  }
+
+  // Setup the linear solver.
+  auto pcg = std::make_unique<CgSolver<OperType>>(rt_fespace.GetComm(), print);
+  pcg->SetInitialGuess(0);
+  pcg->SetRelTol(tol);
+  pcg->SetAbsTol(std::numeric_limits<double>::epsilon());
+  pcg->SetMaxIter(max_it);
+  auto jac = std::make_unique<JacobiSmoother<OperType>>(rt_fespace.GetComm());
+  ksp = std::make_unique<BaseKspSolver<OperType>>(std::move(pcg), std::move(jac));
+  ksp->SetOperators(*M, *M);
+
+  rhs.SetSize(rt_fespace.GetTrueVSize());
+  rhs.UseDevice(true);
+}
+
+template <typename VecType>
+void FloquetCorrSolver<VecType>::Mult(const VecType &x, VecType &y) const
+{
+  Cross->Mult(x, rhs);
+  ksp->Mult(rhs, y);
+}
+
+template <typename VecType>
+void FloquetCorrSolver<VecType>::AddMult(const VecType &x, VecType &y, ScalarType a) const
+{
+  this->Mult(x, rhs);
+  rhs *= a;
+  y += rhs;
+}
+
+template class FloquetCorrSolver<Vector>;
+template class FloquetCorrSolver<ComplexVector>;
+
+}  // namespace palace
diff --git a/palace/linalg/floquetcorrection.hpp b/palace/linalg/floquetcorrection.hpp
new file mode 100644
index 000000000..fde11db15
--- /dev/null
+++ b/palace/linalg/floquetcorrection.hpp
@@ -0,0 +1,68 @@
+// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+#ifndef PALACE_LINALG_FLOQUET_CORR_HPP
+#define PALACE_LINALG_FLOQUET_CORR_HPP
+
+#include <memory>
+#include <vector>
+#include "linalg/ksp.hpp"
+#include "linalg/operator.hpp"
+#include "linalg/vector.hpp"
+
+namespace mfem
+{
+
+template <typename T>
+class Array;
+
+}  // namespace mfem
+
+namespace palace
+{
+
+class FiniteElementSpace;
+class MaterialOperator;
+class PeriodicBoundaryOperator;
+
+//
+// This solver calculates a correction for the magnetic flux density field
+// when Floquet periodicity is imposed. The correction is the cross product
+// of the Floquet wave vector with the electric field.
+//
+template <typename VecType>
+class FloquetCorrSolver
+{
+  using OperType = typename std::conditional<std::is_same<VecType, ComplexVector>::value,
+                                             ComplexOperator, Operator>::type;
+  using ScalarType =
+      typename std::conditional<std::is_same<OperType, ComplexOperator>::value,
+                                std::complex<double>, double>::type;
+
+private:
+  // Operators for the floquet correction.
+  std::unique_ptr<OperType> M, Cross;
+
+  // Linear solver for the linear system M y = x.
+  std::unique_ptr<BaseKspSolver<OperType>> ksp;
+
+  // Workspace objects for solver application.
+  mutable VecType rhs;
+
+public:
+  FloquetCorrSolver(const MaterialOperator &mat_op,
+                    PeriodicBoundaryOperator &periodic_op,
+                    FiniteElementSpace &nd_fespace,
+                    FiniteElementSpace &rt_fespace,
+                    double tol, int max_it, int print);
+
+  // Given a vector of Nedelec dofs for an arbitrary vector field, compute
+  // the Raviart-Thomas space field y = [kp x] x, where [kp x] is a matrix
+  // representing the action of the cross product with the Floquet wave vector.
+  void Mult(const VecType &x, VecType &y) const;
+  void AddMult(const VecType &x, VecType &y, ScalarType a=1.0) const;
+};
+
+}  // namespace palace
+
+#endif  // PALACE_LINALG_FLOQUET_CORR_HPP
diff --git a/palace/models/postoperator.cpp b/palace/models/postoperator.cpp
index c692f9ba4..2a584b6ba 100644
--- a/palace/models/postoperator.cpp
+++ b/palace/models/postoperator.cpp
@@ -48,9 +48,6 @@ PostOperator::PostOperator(const IoData &iodata, SpaceOperator &space_op,
     B(std::make_unique<GridFunction>(space_op.GetRTSpace(),
                                      iodata.problem.type !=
                                          config::ProblemData::Type::TRANSIENT)),
-    BF(std::make_unique<GridFunction>(space_op.GetNDSpace(),
-                                      iodata.problem.type !=
-                                         config::ProblemData::Type::TRANSIENT)),
     lumped_port_init(false), wave_port_init(false),
     paraview(CreateParaviewPath(iodata, name), &space_op.GetNDSpace().GetParMesh()),
     paraview_bdr(CreateParaviewPath(iodata, name) + "_boundary",
@@ -63,7 +60,6 @@ PostOperator::PostOperator(const IoData &iodata, SpaceOperator &space_op,
 
   E_sr = std::make_unique<BdrFieldVectorCoefficient>(E->Real());
   B_sr = std::make_unique<BdrFieldVectorCoefficient>(B->Real());
-  BF_sr = std::make_unique<BdrFieldVectorCoefficient>(BF->Real());
   J_sr = std::make_unique<BdrSurfaceCurrentVectorCoefficient>(B->Real(), mat_op);
   Q_sr = std::make_unique<BdrSurfaceFluxCoefficient<SurfaceFluxType::ELECTRIC>>(
       &E->Real(), nullptr, mat_op, true, mfem::Vector());
@@ -71,7 +67,6 @@ PostOperator::PostOperator(const IoData &iodata, SpaceOperator &space_op,
   {
     E_si = std::make_unique<BdrFieldVectorCoefficient>(E->Imag());
     B_si = std::make_unique<BdrFieldVectorCoefficient>(B->Imag());
-    BF_si = std::make_unique<BdrFieldVectorCoefficient>(BF->Imag());
     J_si = std::make_unique<BdrSurfaceCurrentVectorCoefficient>(B->Imag(), mat_op);
     Q_si = std::make_unique<BdrSurfaceFluxCoefficient<SurfaceFluxType::ELECTRIC>>(
         &E->Imag(), nullptr, mat_op, true, mfem::Vector());
@@ -206,21 +201,6 @@ void PostOperator::InitializeDataCollection(const IoData &iodata)
       paraview_bdr.RegisterVCoeffField("B", B_sr.get());
     }
   }
-  if (BF)
-  {
-    if (HasImag())
-    {
-      paraview.RegisterField("BF_real", &BF->Real());
-      paraview.RegisterField("BF_imag", &BF->Imag());
-      paraview_bdr.RegisterVCoeffField("BF_real", BF_sr.get());
-      paraview_bdr.RegisterVCoeffField("BF_imag", BF_si.get());
-    }
-    else
-    {
-      paraview.RegisterField("BF", &BF->Real());
-      paraview_bdr.RegisterVCoeffField("BF", BF_sr.get());
-    }
-  }
   if (V)
   {
     paraview.RegisterField("V", &V->Real());
@@ -301,21 +281,6 @@ void PostOperator::SetEGridFunction(const ComplexVector &e, bool exchange_face_n
   lumped_port_init = wave_port_init = false;
 }
 
-void PostOperator::SetBFGridFunction(const ComplexVector &bf, bool exchange_face_nbr_data)
-{
-  MFEM_VERIFY(HasImag(),
-              "SetBFGridFunction for complex-valued output called when HasImag() == false!");
-  MFEM_VERIFY(BF, "Incorrect usage of PostOperator::SetBFGridFunction!");
-  BF->Real().SetFromTrueDofs(bf.Real());  // Parallel distribute
-  BF->Imag().SetFromTrueDofs(bf.Imag());
-  if (exchange_face_nbr_data)
-  {
-    BF->Real().ExchangeFaceNbrData();  // Ready for parallel comm on shared faces
-    BF->Imag().ExchangeFaceNbrData();
-  }
-  has_floquet = true;
-}
-
 void PostOperator::SetBGridFunction(const ComplexVector &b, bool exchange_face_nbr_data)
 {
   MFEM_VERIFY(HasImag(),
@@ -704,7 +669,7 @@ namespace
 {
 
 template <typename T>
-void ScaleGridFunctions(double L, int dim, bool imag, T &E, T &B, T &V, T &A, T &BF)
+void ScaleGridFunctions(double L, int dim, bool imag, T &E, T &B, T &V, T &A)
 {
   // For fields on H(curl) and H(div) spaces, we "undo" the effect of redimensionalizing
   // the mesh which would carry into the fields during the mapping from reference to
@@ -737,17 +702,6 @@ void ScaleGridFunctions(double L, int dim, bool imag, T &E, T &B, T &V, T &A, T
       B->Imag().FaceNbrData() *= Ld;
     }
   }
-  if (BF)
-  {
-    // Piola transform: J^-T
-    BF->Real() *= L;
-    BF->Real().FaceNbrData() *= L;
-    if (imag)
-    {
-      BF->Imag() *= L;
-      BF->Imag().FaceNbrData() *= L;
-    }
-  }
   if (A)
   {
     // Piola transform: J^-T
@@ -765,7 +719,7 @@ void PostOperator::WriteFields(int step, double time) const
   mfem::ParMesh &mesh =
       HasE() ? *E->ParFESpace()->GetParMesh() : *B->ParFESpace()->GetParMesh();
   mesh::DimensionalizeMesh(mesh, mesh_Lc0);
-  ScaleGridFunctions(mesh_Lc0, mesh.Dimension(), HasImag(), E, B, V, A, BF);
+  ScaleGridFunctions(mesh_Lc0, mesh.Dimension(), HasImag(), E, B, V, A);
   paraview.SetCycle(step);
   paraview.SetTime(time);
   paraview_bdr.SetCycle(step);
@@ -773,7 +727,7 @@ void PostOperator::WriteFields(int step, double time) const
   paraview.Save();
   paraview_bdr.Save();
   mesh::NondimensionalizeMesh(mesh, mesh_Lc0);
-  ScaleGridFunctions(1.0 / mesh_Lc0, mesh.Dimension(), HasImag(), E, B, V, A, BF);
+  ScaleGridFunctions(1.0 / mesh_Lc0, mesh.Dimension(), HasImag(), E, B, V, A);
 }
 
 void PostOperator::WriteFieldsFinal(const ErrorIndicator *indicator) const
@@ -860,20 +814,7 @@ std::vector<std::complex<double>> PostOperator::ProbeEField() const
 std::vector<std::complex<double>> PostOperator::ProbeBField() const
 {
   MFEM_VERIFY(B, "PostOperator is not configured for magnetic flux density probes!");
-  if (has_floquet)
-  {
-    auto probe_B = interp_op.ProbeField(*B);
-    auto probe_BF = interp_op.ProbeField(*BF);
-    for (int i = 0; i < probe_B.size(); i++)
-    {
-      probe_B[i] += probe_BF[i];
-    }
-    return probe_B;
-  }
-  else
-  {
-    return interp_op.ProbeField(*B);
-  }
+  return interp_op.ProbeField(*B);
 }
 
 }  // namespace palace
diff --git a/palace/models/postoperator.hpp b/palace/models/postoperator.hpp
index b0c09717e..2522d815e 100644
--- a/palace/models/postoperator.hpp
+++ b/palace/models/postoperator.hpp
@@ -44,10 +44,9 @@ class PostOperator
   const DomainPostOperator dom_post_op;
 
   // Objects for grid function postprocessing from the FE solution.
-  mutable std::unique_ptr<GridFunction> E, B, V, A, BF;
-  std::unique_ptr<mfem::VectorCoefficient> S, E_sr, E_si, B_sr, B_si, A_s, J_sr, J_si, BF_sr, BF_si;
+  mutable std::unique_ptr<GridFunction> E, B, V, A;
+  std::unique_ptr<mfem::VectorCoefficient> S, E_sr, E_si, B_sr, B_si, A_s, J_sr, J_si;
   std::unique_ptr<mfem::Coefficient> U_e, U_m, V_s, Q_sr, Q_si;
-  bool has_floquet = false;
 
   // Wave port boundary mode field postprocessing.
   struct WavePortFieldData
@@ -92,7 +91,6 @@ class PostOperator
   // nondimensionalized consistently (B ~ E (L₀ ω₀ E₀⁻¹)).
   void SetEGridFunction(const ComplexVector &e, bool exchange_face_nbr_data = true);
   void SetBGridFunction(const ComplexVector &b, bool exchange_face_nbr_data = true);
-  void SetBFGridFunction(const ComplexVector &b, bool exchange_face_nbr_data = true);
   void SetEGridFunction(const Vector &e, bool exchange_face_nbr_data = true);
   void SetBGridFunction(const Vector &b, bool exchange_face_nbr_data = true);
   void SetVGridFunction(const Vector &v, bool exchange_face_nbr_data = true);
diff --git a/palace/models/spaceoperator.cpp b/palace/models/spaceoperator.cpp
index 04da291a3..c800193b1 100644
--- a/palace/models/spaceoperator.cpp
+++ b/palace/models/spaceoperator.cpp
@@ -513,65 +513,6 @@ SpaceOperator::GetFloquetMatrix(Operator::DiagonalPolicy diag_policy)
   }
 }
 
-template <typename OperType>
-std::unique_ptr<OperType>
-SpaceOperator::GetFloquetCorrectionCrossMatrix()
-{
-  PrintHeader(GetH1Space(), GetNDSpace(), GetRTSpace(), print_hdr);
-  MaterialPropertyCoefficient f(mat_op.MaxCeedAttribute());
-  periodic_op.AddFloquetCrossCoefficients(1.0, f);
-  int empty = (f.empty());
-  Mpi::GlobalMin(1, &empty, GetComm());
-  if (empty)
-  {
-    return {};
-  }
-  constexpr bool skip_zeros = false;
-  std::unique_ptr<Operator> m;
-  if (!empty)
-  {
-    m = AssembleOperator(GetNDSpace(), nullptr, &f, nullptr, nullptr, nullptr,
-                          nullptr, skip_zeros);
-  }
-  if constexpr (std::is_same<OperType, ComplexOperator>::value)
-  {
-    auto M =
-        std::make_unique<ComplexParOperator>(std::move(m), nullptr, GetNDSpace());
-    //M->SetEssentialTrueDofs(nd_dbc_tdof_lists.back(), diag_policy);
-    return M;
-  }
-  else
-  {
-    auto M = std::make_unique<ParOperator>(std::move(m), GetNDSpace());
-    //M->SetEssentialTrueDofs(nd_dbc_tdof_lists.back(), diag_policy);
-    return M;
-  }
-}
-
-template <typename OperType>
-std::unique_ptr<OperType>
-SpaceOperator::GetFloquetCorrectionMassMatrix()
-{
-  PrintHeader(GetH1Space(), GetNDSpace(), GetRTSpace(), print_hdr);
-  constexpr bool skip_zeros = false;
-  BilinearForm a(GetNDSpace());
-  a.AddDomainIntegrator<VectorFEMassIntegrator>();
-  std::unique_ptr<Operator> m = a.Assemble(skip_zeros);
-  if constexpr (std::is_same<OperType, ComplexOperator>::value)
-  {
-    auto M =
-        std::make_unique<ComplexParOperator>(std::move(m), nullptr, GetNDSpace());
-    //M->SetEssentialTrueDofs(nd_dbc_tdof_lists.back(), diag_policy);
-    return M;
-  }
-  else
-  {
-    auto M = std::make_unique<ParOperator>(std::move(m), GetNDSpace());
-    //M->SetEssentialTrueDofs(nd_dbc_tdof_lists.back(), diag_policy);
-    return M;
-  }
-}
-
 namespace
 {
 
@@ -1213,15 +1154,6 @@ template std::unique_ptr<Operator>
 template std::unique_ptr<ComplexOperator>
     SpaceOperator::GetFloquetMatrix(Operator::DiagonalPolicy);
 
-template std::unique_ptr<Operator>
-    SpaceOperator::GetFloquetCorrectionCrossMatrix();
-template std::unique_ptr<ComplexOperator>
-    SpaceOperator::GetFloquetCorrectionCrossMatrix();
-template std::unique_ptr<Operator>
-    SpaceOperator::GetFloquetCorrectionMassMatrix();
-template std::unique_ptr<ComplexOperator>
-    SpaceOperator::GetFloquetCorrectionMassMatrix();
-
 template std::unique_ptr<Operator>
 SpaceOperator::GetSystemMatrix<Operator, double>(double, double, double, const Operator *,
                                                  const Operator *, const Operator *,
diff --git a/palace/models/spaceoperator.hpp b/palace/models/spaceoperator.hpp
index b34d81e8d..ca0ca8c0e 100644
--- a/palace/models/spaceoperator.hpp
+++ b/palace/models/spaceoperator.hpp
@@ -157,10 +157,6 @@ class SpaceOperator
                                                  Operator::DiagonalPolicy diag_policy);
   template <typename OperType>
   std::unique_ptr<OperType> GetFloquetMatrix(Operator::DiagonalPolicy diag_policy);
-  template <typename OperType>
-  std::unique_ptr<OperType> GetFloquetCorrectionCrossMatrix();
-  template <typename OperType>
-  std::unique_ptr<OperType> GetFloquetCorrectionMassMatrix();
 
   // Construct the complete frequency or time domain system matrix using the provided
   // stiffness, damping, mass, and extra matrices:

From f2288a5a2a8bbb30ec5def5382ad8a31d555ef94 Mon Sep 17 00:00:00 2001
From: Simon Lapointe <simlap@amazon.com>
Date: Thu, 12 Dec 2024 15:19:04 -0800
Subject: [PATCH 36/49] Remove Aux space preconditioner tests

---
 palace/models/periodicboundaryoperator.cpp | 36 ----------------------
 palace/models/periodicboundaryoperator.hpp |  7 ++---
 palace/models/spaceoperator.cpp            |  1 -
 3 files changed, 3 insertions(+), 41 deletions(-)

diff --git a/palace/models/periodicboundaryoperator.cpp b/palace/models/periodicboundaryoperator.cpp
index 03e87389a..a34b834ee 100644
--- a/palace/models/periodicboundaryoperator.cpp
+++ b/palace/models/periodicboundaryoperator.cpp
@@ -105,19 +105,6 @@ PeriodicBoundaryOperator::PeriodicBoundaryOperator(const IoData &iodata,
   wave_vector_cross(1, 2) = -wave_vector[0];
   wave_vector_cross(2, 0) = -wave_vector[1];
   wave_vector_cross(2, 1) = wave_vector[0];
-
-  // Test for preconditioning
-  Mpi::Print("wave vector after clipping:\n");
-  wave_vector.Print();
-  const double wave_vector_norm2 = pow(wave_vector.Norml2(), 2);
-  Mpi::Print("wave vector norml2: {:.3e}\n", wave_vector_norm2);
-  wave_vector_diag.SetSize(3);
-  wave_vector_diag = 0.0;
-  wave_vector_diag(0, 0) = 1.0;//wave_vector_norm2;
-  wave_vector_diag(1, 1) = 1.0;//wave_vector_norm2;
-  wave_vector_diag(2, 2) = 1.0;//wave_vector_norm2;
-  //Mpi::Print("wave vector diag:\n");
-  //wave_vector_diag.Print();
 }
 
 mfem::Array<int>
@@ -253,28 +240,6 @@ void PeriodicBoundaryOperator::AddCurlCoefficients(double coeff,
   }
 }
 
-// TEST - REMOVE LATER!!!
-void PeriodicBoundaryOperator::AddImagMassCoefficients(double coeff,
-                                                       MaterialPropertyCoefficient &f)
-{
-  if (non_zero_wave_vector)
-  {
-    // 1/mu [k x]
-    mfem::DenseTensor kx(mat_op.GetInvPermeability().SizeI(),
-                        mat_op.GetInvPermeability().SizeJ(),
-                        mat_op.GetInvPermeability().SizeK());
-    for (int k = 0; k < kx.SizeK(); k++)
-    {
-      kx(k) = wave_vector_diag;
-    }
-    mfem::DenseTensor muinvkx = linalg::Mult(mat_op.GetInvPermeability(), kx);
-    MaterialPropertyCoefficient muinvkx_func(mat_op.GetAttributeToMaterial(),
-                                             muinvkx);
-    f.AddCoefficient(muinvkx_func.GetAttributeToMaterial(),
-                     muinvkx_func.GetMaterialProperties(), coeff);
-  }
-}
-
 void PeriodicBoundaryOperator::AddFloquetCrossCoefficients(double coeff,
                                                        MaterialPropertyCoefficient &f)
 {
@@ -287,7 +252,6 @@ void PeriodicBoundaryOperator::AddFloquetCrossCoefficients(double coeff,
     for (int k = 0; k < kx.SizeK(); k++)
     {
       kx(k) = wave_vector_cross;
-      //kx(k) = wave_vector_diag;//test
     }
     MaterialPropertyCoefficient kx_func(mat_op.GetAttributeToMaterial(), kx);
     f.AddCoefficient(kx_func.GetAttributeToMaterial(),
diff --git a/palace/models/periodicboundaryoperator.hpp b/palace/models/periodicboundaryoperator.hpp
index a9ec508ba..b48dc16a6 100644
--- a/palace/models/periodicboundaryoperator.hpp
+++ b/palace/models/periodicboundaryoperator.hpp
@@ -29,7 +29,7 @@ class PeriodicBoundaryOperator
   mfem::Vector wave_vector;
 
   // Matrix representation of cross product with the wave vector.
-  mfem::DenseMatrix wave_vector_cross, wave_vector_diag; //test - remove wave_vector_diag later!
+  mfem::DenseMatrix wave_vector_cross;
 
   // Check if the wave vector is zero to bypass additional terms.
   bool non_zero_wave_vector;
@@ -43,12 +43,11 @@ class PeriodicBoundaryOperator
   // Returns array of periodic BC attributes.
   const auto &GetAttrList() const { return periodic_attr; }
 
-  // Add contributions to system matrices
+  // Add contributions to system matrices.
   void AddRealMassCoefficients(double coeff, MaterialPropertyCoefficient &f);
   void AddWeakCurlCoefficients(double coeff, MaterialPropertyCoefficient &f);
   void AddCurlCoefficients(double coeff, MaterialPropertyCoefficient &f);
-  void AddImagMassCoefficients(double coeff, MaterialPropertyCoefficient &f); // test - remove later
-  void AddFloquetCrossCoefficients(double coeff, MaterialPropertyCoefficient &f); // test
+  void AddFloquetCrossCoefficients(double coeff, MaterialPropertyCoefficient &f);
 };
 
 }  // namespace palace
diff --git a/palace/models/spaceoperator.cpp b/palace/models/spaceoperator.cpp
index c800193b1..9717012ee 100644
--- a/palace/models/spaceoperator.cpp
+++ b/palace/models/spaceoperator.cpp
@@ -837,7 +837,6 @@ std::unique_ptr<OperType> SpaceOperator::GetPreconditionerMatrix(double a0, doub
     {
       bi_vec = AssembleOperators(GetNDSpaces(), &dfi, &fi, &dfbi, &fbi, &fpwi, &fpi,
                                  skip_zeros, assemble_q_data);
-      //periodic_op.AddImagMassCoefficients(7.0, fi);//test - helps in some cases
       bi_aux_vec = AssembleAuxOperators(GetH1Spaces(), &fi, &fbi, &fpwi, &fpi,
                                         &skip_zeros, assemble_q_data);
     }

From 9fbe941825a8d826807274de161531ec10af18be Mon Sep 17 00:00:00 2001
From: Simon Lapointe <simlap@amazon.com>
Date: Thu, 12 Dec 2024 15:21:15 -0800
Subject: [PATCH 37/49] Remove print

---
 palace/models/periodicboundaryoperator.cpp | 2 --
 1 file changed, 2 deletions(-)

diff --git a/palace/models/periodicboundaryoperator.cpp b/palace/models/periodicboundaryoperator.cpp
index a34b834ee..ff9b6a994 100644
--- a/palace/models/periodicboundaryoperator.cpp
+++ b/palace/models/periodicboundaryoperator.cpp
@@ -67,8 +67,6 @@ PeriodicBoundaryOperator::PeriodicBoundaryOperator(const IoData &iodata,
                   iodata.problem.type == config::ProblemData::Type::EIGENMODE,
               "Quasi-periodic Floquet boundary conditions are only available for "
               " frequency domain driven or eigenmode simulations!");
-  Mpi::Print("wave_vector.Norml2(): {:.3e}\n",wave_vector.Norml2());
-  wave_vector.Print();
   MFEM_VERIFY(!non_zero_wave_vector || sdim == 3,
               "Quasi-periodic Floquet periodic boundary conditions are only available "
               " in 3D!");

From b865e9c42a3ff16bf03f42e98b4ccbf5f35ba0c7 Mon Sep 17 00:00:00 2001
From: Simon Lapointe <simlap@amazon.com>
Date: Thu, 12 Dec 2024 15:24:37 -0800
Subject: [PATCH 38/49] Undo Aux space preconditioner tests

---
 palace/models/spaceoperator.cpp | 23 ++++++-----------------
 1 file changed, 6 insertions(+), 17 deletions(-)

diff --git a/palace/models/spaceoperator.cpp b/palace/models/spaceoperator.cpp
index 9717012ee..f141e6418 100644
--- a/palace/models/spaceoperator.cpp
+++ b/palace/models/spaceoperator.cpp
@@ -256,8 +256,7 @@ void AddIntegrators(BilinearForm &a, const MaterialPropertyCoefficient *df,
 
 void AddAuxIntegrators(BilinearForm &a, const MaterialPropertyCoefficient *f,
                        const MaterialPropertyCoefficient *fb,
-                       const MaterialPropertyCoefficient *fpw,
-                       const MaterialPropertyCoefficient *fp, bool assemble_q_data = false)
+                       bool assemble_q_data = false)
 {
   if (f && !f->empty())
   {
@@ -267,14 +266,6 @@ void AddAuxIntegrators(BilinearForm &a, const MaterialPropertyCoefficient *f,
   {
     a.AddBoundaryIntegrator<DiffusionIntegrator>(*fb);
   }
-  if (fpw && !fpw->empty())
-  {
-    // a.AddDomainIntegrator<DiffusionIntegrator>(*fpw);
-  }
-  if (fp && !fp->empty())
-  {
-    // a.AddDomainIntegrator<DiffusionIntegrator>(*fp);
-  }
   if (assemble_q_data)
   {
     a.AssembleQuadratureData();
@@ -307,13 +298,11 @@ auto AssembleOperators(
 
 auto AssembleAuxOperators(const FiniteElementSpaceHierarchy &fespaces,
                           const MaterialPropertyCoefficient *f,
-                          const MaterialPropertyCoefficient *fb,
-                          const MaterialPropertyCoefficient *fpw,
-                          const MaterialPropertyCoefficient *fp, bool skip_zeros = false,
+                          const MaterialPropertyCoefficient *fb, bool skip_zeros = false,
                           bool assemble_q_data = false, std::size_t l0 = 0)
 {
   BilinearForm a(fespaces.GetFinestFESpace());
-  AddAuxIntegrators(a, f, fb, fpw, fp, assemble_q_data);
+  AddAuxIntegrators(a, f, fb, assemble_q_data);
   return a.Assemble(fespaces, skip_zeros, l0);
 }
 
@@ -830,14 +819,14 @@ std::unique_ptr<OperType> SpaceOperator::GetPreconditionerMatrix(double a0, doub
     {
       br_vec = AssembleOperators(GetNDSpaces(), &dfr, &fr, &dfbr, &fbr, &fpwr, &fpr,
                                  skip_zeros, assemble_q_data);
-      br_aux_vec = AssembleAuxOperators(GetH1Spaces(), &fr, &fbr, &fpwr, &fpr,
+      br_aux_vec = AssembleAuxOperators(GetH1Spaces(), &fr, &fbr,
                                         skip_zeros, assemble_q_data);
     }
     if (!empty[1])
     {
       bi_vec = AssembleOperators(GetNDSpaces(), &dfi, &fi, &dfbi, &fbi, &fpwi, &fpi,
                                  skip_zeros, assemble_q_data);
-      bi_aux_vec = AssembleAuxOperators(GetH1Spaces(), &fi, &fbi, &fpwi, &fpi,
+      bi_aux_vec = AssembleAuxOperators(GetH1Spaces(), &fi, &fbi,
                                         &skip_zeros, assemble_q_data);
     }
   }
@@ -862,7 +851,7 @@ std::unique_ptr<OperType> SpaceOperator::GetPreconditionerMatrix(double a0, doub
     {
       br_vec = AssembleOperators(GetNDSpaces(), &dfr, &fr, &dfbr, &fbr,  &fpwr, &fpr,
                                  skip_zeros, assemble_q_data);
-      br_aux_vec = AssembleAuxOperators(GetH1Spaces(), &fr, &fbr, &fpwr, &fpr,
+      br_aux_vec = AssembleAuxOperators(GetH1Spaces(), &fr, &fbr,
                                         skip_zeros, assemble_q_data);
     }
   }

From 18eab19954f135dff38b1d6d5008da9a0867b430 Mon Sep 17 00:00:00 2001
From: Simon Lapointe <simlap@amazon.com>
Date: Thu, 12 Dec 2024 15:24:48 -0800
Subject: [PATCH 39/49] Remove print

---
 palace/linalg/solver.cpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/palace/linalg/solver.cpp b/palace/linalg/solver.cpp
index 6f49e2f17..6c3818b92 100644
--- a/palace/linalg/solver.cpp
+++ b/palace/linalg/solver.cpp
@@ -55,7 +55,7 @@ void MfemWrapperSolver<ComplexOperator>::SetOperator(const ComplexOperator &op)
     //Mpi::Print("Using real coarse solve\n");
     //A.reset(mfem::Add(1.0, *hAr, 1.0, *hAi));
     /**/
-    Mpi::Print("Using complex coarse solve\n");
+    //Mpi::Print("Using complex coarse solve\n");
     // A = [Ar, -Ai]
     //     [Ai,  Ar]
     mfem::Array2D<const mfem::HypreParMatrix *> blocks(2, 2);
@@ -137,7 +137,6 @@ void MfemWrapperSolver<ComplexOperator>::Mult(const ComplexVector &x,
   }
   else
   {
-    // Is there a better way than idx1, idx2 + SetSubVector?
     Vector X(2 * x.Size()), Y(2 * y.Size()), yr, yi;
     X.UseDevice(true);
     Y.UseDevice(true);

From a01573cf371b35219b3ab6b22a5ac81170a094f2 Mon Sep 17 00:00:00 2001
From: Simon Lapointe <simlap@amazon.com>
Date: Thu, 12 Dec 2024 15:33:18 -0800
Subject: [PATCH 40/49] Remove unnecessary include

---
 palace/drivers/eigensolver.cpp | 2 --
 1 file changed, 2 deletions(-)

diff --git a/palace/drivers/eigensolver.cpp b/palace/drivers/eigensolver.cpp
index cf3f27fdb..b75930cc3 100644
--- a/palace/drivers/eigensolver.cpp
+++ b/palace/drivers/eigensolver.cpp
@@ -10,7 +10,6 @@
 #include "linalg/divfree.hpp"
 #include "linalg/errorestimator.hpp"
 #include "linalg/floquetcorrection.hpp"
-//#include "linalg/jacobi.hpp"
 #include "linalg/ksp.hpp"
 #include "linalg/operator.hpp"
 #include "linalg/slepc.hpp"
@@ -325,7 +324,6 @@ EigenSolver::Solve(const std::vector<std::unique_ptr<Mesh>> &mesh) const
     eigen->GetEigenvector(i, E);
     Curl.Mult(E.Real(), B.Real());
     Curl.Mult(E.Imag(), B.Imag());
-
     B *= -1.0 / (1i * omega);
     if (FP)
     {

From 76e325b1ed456cb86ad3d4f904c5c73625e6b38f Mon Sep 17 00:00:00 2001
From: Simon Lapointe <simlap@amazon.com>
Date: Thu, 12 Dec 2024 15:45:02 -0800
Subject: [PATCH 41/49] Update regression test results

---
 .../ref/cylinder/floquet/domain-E.csv         | 30 +++++++++----------
 .../ref/cylinder/floquet/error-indicators.csv |  2 +-
 2 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/test/examples/ref/cylinder/floquet/domain-E.csv b/test/examples/ref/cylinder/floquet/domain-E.csv
index e9599ad66..0d90d967f 100644
--- a/test/examples/ref/cylinder/floquet/domain-E.csv
+++ b/test/examples/ref/cylinder/floquet/domain-E.csv
@@ -1,16 +1,16 @@
                m,              E_elec (J),               E_mag (J),               E_cap (J),               E_ind (J),           E_elec[1] (J),               p_elec[1],            E_mag[1] (J),                p_mag[1]
- 1.000000000e+00,        +9.139656208e-02,        +8.396001243e-02,        +0.000000000e+00,        +0.000000000e+00,        +9.139656208e-02,        +1.000000000e+00,        +8.396001243e-02,        +1.000000000e+00
- 2.000000000e+00,        +9.139656208e-02,        +8.396001238e-02,        +0.000000000e+00,        +0.000000000e+00,        +9.139656208e-02,        +1.000000000e+00,        +8.396001238e-02,        +1.000000000e+00
- 3.000000000e+00,        +9.139656208e-02,        +8.259719511e-02,        +0.000000000e+00,        +0.000000000e+00,        +9.139656208e-02,        +1.000000000e+00,        +8.259719511e-02,        +1.000000000e+00
- 4.000000000e+00,        +9.139656208e-02,        +8.854647493e-02,        +0.000000000e+00,        +0.000000000e+00,        +9.139656208e-02,        +1.000000000e+00,        +8.854647493e-02,        +1.000000000e+00
- 5.000000000e+00,        +9.139656208e-02,        +8.854647490e-02,        +0.000000000e+00,        +0.000000000e+00,        +9.139656208e-02,        +1.000000000e+00,        +8.854647490e-02,        +1.000000000e+00
- 6.000000000e+00,        +9.139656208e-02,        +1.197882058e-01,        +0.000000000e+00,        +0.000000000e+00,        +9.139656208e-02,        +1.000000000e+00,        +1.197882058e-01,        +1.000000000e+00
- 7.000000000e+00,        +9.139656208e-02,        +1.197873702e-01,        +0.000000000e+00,        +0.000000000e+00,        +9.139656208e-02,        +1.000000000e+00,        +1.197873702e-01,        +1.000000000e+00
- 8.000000000e+00,        +9.139656208e-02,        +1.133419874e-01,        +0.000000000e+00,        +0.000000000e+00,        +9.139656208e-02,        +1.000000000e+00,        +1.133419874e-01,        +1.000000000e+00
- 9.000000000e+00,        +9.139656208e-02,        +8.776995268e-02,        +0.000000000e+00,        +0.000000000e+00,        +9.139656208e-02,        +1.000000000e+00,        +8.776995268e-02,        +1.000000000e+00
- 1.000000000e+01,        +9.139656208e-02,        +8.776992232e-02,        +0.000000000e+00,        +0.000000000e+00,        +9.139656208e-02,        +1.000000000e+00,        +8.776992232e-02,        +1.000000000e+00
- 1.100000000e+01,        +9.139656208e-02,        +8.956490411e-02,        +0.000000000e+00,        +0.000000000e+00,        +9.139656208e-02,        +1.000000000e+00,        +8.956490411e-02,        +1.000000000e+00
- 1.200000000e+01,        +9.139656208e-02,        +1.092859937e-01,        +0.000000000e+00,        +0.000000000e+00,        +9.139656208e-02,        +1.000000000e+00,        +1.092859937e-01,        +1.000000000e+00
- 1.300000000e+01,        +9.139656208e-02,        +1.092833156e-01,        +0.000000000e+00,        +0.000000000e+00,        +9.139656208e-02,        +1.000000000e+00,        +1.092833156e-01,        +1.000000000e+00
- 1.400000000e+01,        +9.139656208e-02,        +7.127501992e-02,        +0.000000000e+00,        +0.000000000e+00,        +9.139656208e-02,        +1.000000000e+00,        +7.127501992e-02,        +1.000000000e+00
- 1.500000000e+01,        +9.139656208e-02,        +7.127536201e-02,        +0.000000000e+00,        +0.000000000e+00,        +9.139656208e-02,        +1.000000000e+00,        +7.127536201e-02,        +1.000000000e+00
+ 1.000000000e+00,        +9.139656208e-02,        +9.139656935e-02,        +0.000000000e+00,        +0.000000000e+00,        +9.139656208e-02,        +1.000000000e+00,        +9.139656935e-02,        +1.000000000e+00
+ 2.000000000e+00,        +9.139656208e-02,        +9.139656936e-02,        +0.000000000e+00,        +0.000000000e+00,        +9.139656208e-02,        +1.000000000e+00,        +9.139656936e-02,        +1.000000000e+00
+ 3.000000000e+00,        +9.139656208e-02,        +9.139656929e-02,        +0.000000000e+00,        +0.000000000e+00,        +9.139656208e-02,        +1.000000000e+00,        +9.139656929e-02,        +1.000000000e+00
+ 4.000000000e+00,        +9.139656208e-02,        +9.139656867e-02,        +0.000000000e+00,        +0.000000000e+00,        +9.139656208e-02,        +1.000000000e+00,        +9.139656867e-02,        +1.000000000e+00
+ 5.000000000e+00,        +9.139656208e-02,        +9.139656871e-02,        +0.000000000e+00,        +0.000000000e+00,        +9.139656208e-02,        +1.000000000e+00,        +9.139656871e-02,        +1.000000000e+00
+ 6.000000000e+00,        +9.139656208e-02,        +9.139655468e-02,        +0.000000000e+00,        +0.000000000e+00,        +9.139656208e-02,        +1.000000000e+00,        +9.139655468e-02,        +1.000000000e+00
+ 7.000000000e+00,        +9.139656208e-02,        +9.139654101e-02,        +0.000000000e+00,        +0.000000000e+00,        +9.139656208e-02,        +1.000000000e+00,        +9.139654101e-02,        +1.000000000e+00
+ 8.000000000e+00,        +9.139656208e-02,        +9.139654466e-02,        +0.000000000e+00,        +0.000000000e+00,        +9.139656208e-02,        +1.000000000e+00,        +9.139654466e-02,        +1.000000000e+00
+ 9.000000000e+00,        +9.139656208e-02,        +9.139656650e-02,        +0.000000000e+00,        +0.000000000e+00,        +9.139656208e-02,        +1.000000000e+00,        +9.139656650e-02,        +1.000000000e+00
+ 1.000000000e+01,        +9.139656208e-02,        +9.139656631e-02,        +0.000000000e+00,        +0.000000000e+00,        +9.139656208e-02,        +1.000000000e+00,        +9.139656631e-02,        +1.000000000e+00
+ 1.100000000e+01,        +9.139656208e-02,        +9.139656651e-02,        +0.000000000e+00,        +0.000000000e+00,        +9.139656208e-02,        +1.000000000e+00,        +9.139656651e-02,        +1.000000000e+00
+ 1.200000000e+01,        +9.139656208e-02,        +9.139655368e-02,        +0.000000000e+00,        +0.000000000e+00,        +9.139656208e-02,        +1.000000000e+00,        +9.139655368e-02,        +1.000000000e+00
+ 1.300000000e+01,        +9.139656208e-02,        +9.139649243e-02,        +0.000000000e+00,        +0.000000000e+00,        +9.139656208e-02,        +1.000000000e+00,        +9.139649243e-02,        +1.000000000e+00
+ 1.400000000e+01,        +9.139656208e-02,        +9.139656049e-02,        +0.000000000e+00,        +0.000000000e+00,        +9.139656208e-02,        +1.000000000e+00,        +9.139656049e-02,        +1.000000000e+00
+ 1.500000000e+01,        +9.139656208e-02,        +9.139655227e-02,        +0.000000000e+00,        +0.000000000e+00,        +9.139656208e-02,        +1.000000000e+00,        +9.139655227e-02,        +1.000000000e+00
diff --git a/test/examples/ref/cylinder/floquet/error-indicators.csv b/test/examples/ref/cylinder/floquet/error-indicators.csv
index 56f2053c1..3908b367d 100644
--- a/test/examples/ref/cylinder/floquet/error-indicators.csv
+++ b/test/examples/ref/cylinder/floquet/error-indicators.csv
@@ -1,2 +1,2 @@
                     Norm,                 Minimum,                 Maximum,                    Mean
-        +3.776351128e-03,        +7.708020807e-05,        +3.998012825e-04,        +2.006231775e-04
+        +3.835705717e-03,        +7.777469844e-05,        +4.079571163e-04,        +2.034049655e-04

From 650dce37ff59bc4b43c7b50e27a16a1607d2f941 Mon Sep 17 00:00:00 2001
From: Simon Lapointe <simlap@amazon.com>
Date: Thu, 12 Dec 2024 15:59:33 -0800
Subject: [PATCH 42/49] Fix formatting

---
 docs/src/examples/cylinder.md              |  2 +-
 docs/src/guide/boundaries.md               | 12 +--
 palace/drivers/drivensolver.cpp            |  4 +-
 palace/drivers/eigensolver.cpp             |  4 +-
 palace/fem/libceed/operator.cpp            |  2 +-
 palace/linalg/floquetcorrection.cpp        | 13 ++--
 palace/linalg/floquetcorrection.hpp        |  8 +-
 palace/linalg/solver.cpp                   | 10 +--
 palace/models/periodicboundaryoperator.cpp | 12 +--
 palace/models/spaceoperator.cpp            | 88 +++++++++++-----------
 palace/utils/configfile.cpp                |  5 +-
 palace/utils/geodata.cpp                   | 37 +++++----
 12 files changed, 103 insertions(+), 94 deletions(-)

diff --git a/docs/src/examples/cylinder.md b/docs/src/examples/cylinder.md
index 7c08fcfd4..6fe6d8ece 100644
--- a/docs/src/examples/cylinder.md
+++ b/docs/src/examples/cylinder.md
@@ -6,7 +6,7 @@
 # Eigenmodes of a Cylinder
 
 !!! note
-
+    
     The files for this example can be found in the
     [`examples/cylinder/`](https://github.com/awslabs/palace/blob/main/examples/cylinder)
     directory of the *Palace* source code.
diff --git a/docs/src/guide/boundaries.md b/docs/src/guide/boundaries.md
index edb4a3d2b..1d0a32f1f 100644
--- a/docs/src/guide/boundaries.md
+++ b/docs/src/guide/boundaries.md
@@ -83,16 +83,16 @@ incorporating periodicity as part of the meshing process.
     A lumped port applies a similar boundary condition to a
     [surface impedance](#Impedance-boundary) boundary, but takes on a special meaning for
     each simulation type.
-
+    
     For frequency domain driven simulations, ports are used to provide a lumped port
     excitation and postprocess voltages, currents, and scattering parameters. Likewise, for
     transient simulations, they perform a similar purpose but for time domain computed
     quantities.
-
+    
     For eigenmode simulations where there is no excitation, lumped ports are used to specify
     properties and postprocess energy-participation ratios (EPRs) corresponding to
     linearized circuit elements.
-
+    
     Note that a single lumped port (given by a single integer `"Index"`) can be made up of
     multiple boundary attributes in the mesh in order to model, for example, a multielement
     lumped port. To use this functionality, use the `"Elements"` object under
@@ -104,18 +104,18 @@ incorporating periodicity as part of the meshing process.
     shape which is computed by solving a 2D boundary mode eigenproblem on each wave port
     boundary. This allows for more accurate scattering parameter calculations when modeling
     waveguides or transmission lines with arbitrary cross sections.
-
+    
     The homogeneous Dirichlet boundary conditions for the wave port boundary mode analysis
     are taken from the `"PEC"` boundaries of the full 3D model, as well as any optional
     additional boundary attributes given under `"WavePortPEC"`. Any boundary of the wave
     port not labeled with with a PEC condition has the natural boundary condition for zero
     tangential magnetic field prescribed for the purpose of port mode calculation.
-
+    
     Unlike lumped ports, wave port boundaries cannot be defined internal to the
     computational domain and instead must exist only on the outer boundary of the domain
     (they are to be "one-sided" in the sense that mesh elements only exist on one side of
     the boundary).
-
+    
     Wave ports are not currently compatible with nonconformal mesh refinement.
 
 The incident field excitation at a lumped or wave port is controlled by setting
diff --git a/palace/drivers/drivensolver.cpp b/palace/drivers/drivensolver.cpp
index 0150823f8..b9b16ba01 100644
--- a/palace/drivers/drivensolver.cpp
+++ b/palace/drivers/drivensolver.cpp
@@ -186,8 +186,8 @@ ErrorIndicator DrivenSolver::SweepUniform(SpaceOperator &space_op, PostOperator
       // B = -1/(iω) ∇ x E - 1/ω kp x E
       std::unique_ptr<FloquetCorrSolver<ComplexVector>> floquet_corr;
       floquet_corr = std::make_unique<FloquetCorrSolver<ComplexVector>>(
-        space_op.GetMaterialOp(), space_op.GetPeriodicOp(), space_op.GetNDSpace(),
-        space_op.GetRTSpace(), iodata.solver.linear.tol, iodata.solver.linear.max_it, 0);
+          space_op.GetMaterialOp(), space_op.GetPeriodicOp(), space_op.GetNDSpace(),
+          space_op.GetRTSpace(), iodata.solver.linear.tol, iodata.solver.linear.max_it, 0);
       floquet_corr->AddMult(E, B, -1.0 / omega);
     }
     post_op.SetEGridFunction(E);
diff --git a/palace/drivers/eigensolver.cpp b/palace/drivers/eigensolver.cpp
index b75930cc3..dcd4e1b11 100644
--- a/palace/drivers/eigensolver.cpp
+++ b/palace/drivers/eigensolver.cpp
@@ -331,8 +331,8 @@ EigenSolver::Solve(const std::vector<std::unique_ptr<Mesh>> &mesh) const
       // B = -1/(iω) ∇ x E - 1/ω kp x E.
       std::unique_ptr<FloquetCorrSolver<ComplexVector>> floquet_corr;
       floquet_corr = std::make_unique<FloquetCorrSolver<ComplexVector>>(
-        space_op.GetMaterialOp(), space_op.GetPeriodicOp(), space_op.GetNDSpace(),
-        space_op.GetRTSpace(), iodata.solver.linear.tol, iodata.solver.linear.max_it, 0);
+          space_op.GetMaterialOp(), space_op.GetPeriodicOp(), space_op.GetNDSpace(),
+          space_op.GetRTSpace(), iodata.solver.linear.tol, iodata.solver.linear.max_it, 0);
       floquet_corr->AddMult(E, B, -1.0 / omega);
     }
     post_op.SetEGridFunction(E);
diff --git a/palace/fem/libceed/operator.cpp b/palace/fem/libceed/operator.cpp
index 484757fab..2fba1f7eb 100644
--- a/palace/fem/libceed/operator.cpp
+++ b/palace/fem/libceed/operator.cpp
@@ -547,7 +547,7 @@ std::unique_ptr<Operator> CeedOperatorCoarsen(const Operator &op_fine,
 
   // Initialize the coarse operator.
   auto op_coarse = std::make_unique<SymmetricOperator>(fespace_coarse.GetVSize(),
-      fespace_coarse.GetVSize());
+                                                       fespace_coarse.GetVSize());
 
   // Assemble the coarse operator by coarsening each sub-operator (over threads, geometry
   // types, integrators) of the original fine operator.
diff --git a/palace/linalg/floquetcorrection.cpp b/palace/linalg/floquetcorrection.cpp
index 3964039ad..4431df7ad 100644
--- a/palace/linalg/floquetcorrection.cpp
+++ b/palace/linalg/floquetcorrection.cpp
@@ -18,11 +18,11 @@ namespace palace
 {
 
 template <typename VecType>
-FloquetCorrSolver<VecType>::FloquetCorrSolver(
-    const MaterialOperator &mat_op,
-    PeriodicBoundaryOperator &periodic_op,
-    FiniteElementSpace &nd_fespace, FiniteElementSpace &rt_fespace,
-    double tol, int max_it, int print)
+FloquetCorrSolver<VecType>::FloquetCorrSolver(const MaterialOperator &mat_op,
+                                              PeriodicBoundaryOperator &periodic_op,
+                                              FiniteElementSpace &nd_fespace,
+                                              FiniteElementSpace &rt_fespace, double tol,
+                                              int max_it, int print)
 {
   // Create the mass and cross product operators for Floquet correction.
   {
@@ -49,7 +49,8 @@ FloquetCorrSolver<VecType>::FloquetCorrSolver(
     std::unique_ptr<Operator> m = a.Assemble(skip_zeros);
     if constexpr (std::is_same<OperType, ComplexOperator>::value)
     {
-      Cross = std::make_unique<ComplexParOperator>(std::move(m), nullptr, nd_fespace, rt_fespace, false);
+      Cross = std::make_unique<ComplexParOperator>(std::move(m), nullptr, nd_fespace,
+                                                   rt_fespace, false);
     }
     else
     {
diff --git a/palace/linalg/floquetcorrection.hpp b/palace/linalg/floquetcorrection.hpp
index fde11db15..34f8baccc 100644
--- a/palace/linalg/floquetcorrection.hpp
+++ b/palace/linalg/floquetcorrection.hpp
@@ -50,17 +50,15 @@ class FloquetCorrSolver
   mutable VecType rhs;
 
 public:
-  FloquetCorrSolver(const MaterialOperator &mat_op,
-                    PeriodicBoundaryOperator &periodic_op,
-                    FiniteElementSpace &nd_fespace,
-                    FiniteElementSpace &rt_fespace,
+  FloquetCorrSolver(const MaterialOperator &mat_op, PeriodicBoundaryOperator &periodic_op,
+                    FiniteElementSpace &nd_fespace, FiniteElementSpace &rt_fespace,
                     double tol, int max_it, int print);
 
   // Given a vector of Nedelec dofs for an arbitrary vector field, compute
   // the Raviart-Thomas space field y = [kp x] x, where [kp x] is a matrix
   // representing the action of the cross product with the Floquet wave vector.
   void Mult(const VecType &x, VecType &y) const;
-  void AddMult(const VecType &x, VecType &y, ScalarType a=1.0) const;
+  void AddMult(const VecType &x, VecType &y, ScalarType a = 1.0) const;
 };
 
 }  // namespace palace
diff --git a/palace/linalg/solver.cpp b/palace/linalg/solver.cpp
index 6c3818b92..188c14091 100644
--- a/palace/linalg/solver.cpp
+++ b/palace/linalg/solver.cpp
@@ -52,12 +52,12 @@ void MfemWrapperSolver<ComplexOperator>::SetOperator(const ComplexOperator &op)
   }
   if (hAr && hAi)
   {
-    //Mpi::Print("Using real coarse solve\n");
-    //A.reset(mfem::Add(1.0, *hAr, 1.0, *hAi));
+    // Mpi::Print("Using real coarse solve\n");
+    // A.reset(mfem::Add(1.0, *hAr, 1.0, *hAi));
     /**/
-    //Mpi::Print("Using complex coarse solve\n");
-    // A = [Ar, -Ai]
-    //     [Ai,  Ar]
+    // Mpi::Print("Using complex coarse solve\n");
+    //  A = [Ar, -Ai]
+    //      [Ai,  Ar]
     mfem::Array2D<const mfem::HypreParMatrix *> blocks(2, 2);
     mfem::Array2D<double> block_coeffs(2, 2);
     blocks(0, 0) = hAr;
diff --git a/palace/models/periodicboundaryoperator.cpp b/palace/models/periodicboundaryoperator.cpp
index ff9b6a994..91854d0c3 100644
--- a/palace/models/periodicboundaryoperator.cpp
+++ b/palace/models/periodicboundaryoperator.cpp
@@ -37,7 +37,8 @@ PeriodicBoundaryOperator::PeriodicBoundaryOperator(const IoData &iodata,
   {
     MFEM_VERIFY(data.wave_vector.size() == sdim,
                 "Floquet wave vector size must equal the spatial dimension.");
-    std::copy(data.wave_vector.begin(), data.wave_vector.end(), local_wave_vector.GetData());
+    std::copy(data.wave_vector.begin(), data.wave_vector.end(),
+              local_wave_vector.GetData());
     wave_vector += local_wave_vector;
   }
   non_zero_wave_vector = (wave_vector.Norml2() > tol);
@@ -52,7 +53,8 @@ PeriodicBoundaryOperator::PeriodicBoundaryOperator(const IoData &iodata,
     mfem::Vector diff(sdim);
     diff = wave_vector;
     diff -= local_wave_vector;
-    MFEM_VERIFY(diff.Norml2() < tol, "Conflicting definitions of the Floquet wave vector in the "
+    MFEM_VERIFY(diff.Norml2() < tol,
+                "Conflicting definitions of the Floquet wave vector in the "
                 "configuration file.");
     wave_vector = local_wave_vector;
   }
@@ -239,7 +241,7 @@ void PeriodicBoundaryOperator::AddCurlCoefficients(double coeff,
 }
 
 void PeriodicBoundaryOperator::AddFloquetCrossCoefficients(double coeff,
-                                                       MaterialPropertyCoefficient &f)
+                                                           MaterialPropertyCoefficient &f)
 {
   if (non_zero_wave_vector)
   {
@@ -252,8 +254,8 @@ void PeriodicBoundaryOperator::AddFloquetCrossCoefficients(double coeff,
       kx(k) = wave_vector_cross;
     }
     MaterialPropertyCoefficient kx_func(mat_op.GetAttributeToMaterial(), kx);
-    f.AddCoefficient(kx_func.GetAttributeToMaterial(),
-                     kx_func.GetMaterialProperties(), coeff);
+    f.AddCoefficient(kx_func.GetAttributeToMaterial(), kx_func.GetMaterialProperties(),
+                     coeff);
   }
 }
 
diff --git a/palace/models/spaceoperator.cpp b/palace/models/spaceoperator.cpp
index f141e6418..e9d7c3fd3 100644
--- a/palace/models/spaceoperator.cpp
+++ b/palace/models/spaceoperator.cpp
@@ -255,8 +255,7 @@ void AddIntegrators(BilinearForm &a, const MaterialPropertyCoefficient *df,
 }
 
 void AddAuxIntegrators(BilinearForm &a, const MaterialPropertyCoefficient *f,
-                       const MaterialPropertyCoefficient *fb,
-                       bool assemble_q_data = false)
+                       const MaterialPropertyCoefficient *fb, bool assemble_q_data = false)
 {
   if (f && !f->empty())
   {
@@ -272,24 +271,28 @@ void AddAuxIntegrators(BilinearForm &a, const MaterialPropertyCoefficient *f,
   }
 }
 
-auto AssembleOperator(
-    const FiniteElementSpace &fespace, const MaterialPropertyCoefficient *df,
-    const MaterialPropertyCoefficient *f, const MaterialPropertyCoefficient *dfb,
-    const MaterialPropertyCoefficient *fb, const MaterialPropertyCoefficient *fpw,
-    const MaterialPropertyCoefficient *fp,
-    bool skip_zeros = false, bool assemble_q_data = false)
+auto AssembleOperator(const FiniteElementSpace &fespace,
+                      const MaterialPropertyCoefficient *df,
+                      const MaterialPropertyCoefficient *f,
+                      const MaterialPropertyCoefficient *dfb,
+                      const MaterialPropertyCoefficient *fb,
+                      const MaterialPropertyCoefficient *fpw,
+                      const MaterialPropertyCoefficient *fp, bool skip_zeros = false,
+                      bool assemble_q_data = false)
 {
   BilinearForm a(fespace);
   AddIntegrators(a, df, f, dfb, fb, fpw, fp, assemble_q_data);
   return a.Assemble(skip_zeros);
 }
 
-auto AssembleOperators(
-    const FiniteElementSpaceHierarchy &fespaces, const MaterialPropertyCoefficient *df,
-    const MaterialPropertyCoefficient *f, const MaterialPropertyCoefficient *dfb,
-    const MaterialPropertyCoefficient *fb, const MaterialPropertyCoefficient *fpw,
-    const MaterialPropertyCoefficient *fp,
-    bool skip_zeros = false, bool assemble_q_data = false, std::size_t l0 = 0)
+auto AssembleOperators(const FiniteElementSpaceHierarchy &fespaces,
+                       const MaterialPropertyCoefficient *df,
+                       const MaterialPropertyCoefficient *f,
+                       const MaterialPropertyCoefficient *dfb,
+                       const MaterialPropertyCoefficient *fb,
+                       const MaterialPropertyCoefficient *fpw,
+                       const MaterialPropertyCoefficient *fp, bool skip_zeros = false,
+                       bool assemble_q_data = false, std::size_t l0 = 0)
 {
   BilinearForm a(fespaces.GetFinestFESpace());
   AddIntegrators(a, df, f, dfb, fb, fpw, fp, assemble_q_data);
@@ -324,8 +327,8 @@ SpaceOperator::GetStiffnessMatrix(Operator::DiagonalPolicy diag_policy)
     return {};
   }
   constexpr bool skip_zeros = false;
-  auto k = AssembleOperator(GetNDSpace(), &df, &f, nullptr, &fb, nullptr, nullptr,
-                            skip_zeros);
+  auto k =
+      AssembleOperator(GetNDSpace(), &df, &f, nullptr, &fb, nullptr, nullptr, skip_zeros);
   if constexpr (std::is_same<OperType, ComplexOperator>::value)
   {
     auto K = std::make_unique<ComplexParOperator>(std::move(k), nullptr, GetNDSpace());
@@ -356,8 +359,8 @@ SpaceOperator::GetDampingMatrix(Operator::DiagonalPolicy diag_policy)
     return {};
   }
   constexpr bool skip_zeros = false;
-  auto c = AssembleOperator(GetNDSpace(), nullptr, &f, nullptr, &fb, nullptr,
-                            nullptr, skip_zeros);
+  auto c = AssembleOperator(GetNDSpace(), nullptr, &f, nullptr, &fb, nullptr, nullptr,
+                            skip_zeros);
   if constexpr (std::is_same<OperType, ComplexOperator>::value)
   {
     auto C = std::make_unique<ComplexParOperator>(std::move(c), nullptr, GetNDSpace());
@@ -394,13 +397,13 @@ std::unique_ptr<OperType> SpaceOperator::GetMassMatrix(Operator::DiagonalPolicy
   std::unique_ptr<Operator> mr, mi;
   if (!empty[0])
   {
-    mr = AssembleOperator(GetNDSpace(), nullptr, &fr, nullptr, &fbr, nullptr,
-                          nullptr, skip_zeros);
+    mr = AssembleOperator(GetNDSpace(), nullptr, &fr, nullptr, &fbr, nullptr, nullptr,
+                          skip_zeros);
   }
   if (!empty[1])
   {
-    mi = AssembleOperator(GetNDSpace(), nullptr, &fi, nullptr, &fbi, nullptr,
-                          nullptr, skip_zeros);
+    mi = AssembleOperator(GetNDSpace(), nullptr, &fi, nullptr, &fbi, nullptr, nullptr,
+                          skip_zeros);
   }
   if constexpr (std::is_same<OperType, ComplexOperator>::value)
   {
@@ -436,13 +439,13 @@ SpaceOperator::GetExtraSystemMatrix(double omega, Operator::DiagonalPolicy diag_
   std::unique_ptr<Operator> ar, ai;
   if (!empty[0])
   {
-    ar = AssembleOperator(GetNDSpace(), nullptr, nullptr, &dfbr, &fbr, nullptr,
-                          nullptr, skip_zeros);
+    ar = AssembleOperator(GetNDSpace(), nullptr, nullptr, &dfbr, &fbr, nullptr, nullptr,
+                          skip_zeros);
   }
   if (!empty[1])
   {
-    ai = AssembleOperator(GetNDSpace(), nullptr, nullptr, &dfbi, &fbi, nullptr,
-                          nullptr, skip_zeros);
+    ai = AssembleOperator(GetNDSpace(), nullptr, nullptr, &dfbi, &fbi, nullptr, nullptr,
+                          skip_zeros);
   }
   if constexpr (std::is_same<OperType, ComplexOperator>::value)
   {
@@ -465,8 +468,8 @@ std::unique_ptr<OperType>
 SpaceOperator::GetFloquetMatrix(Operator::DiagonalPolicy diag_policy)
 {
   PrintHeader(GetH1Space(), GetNDSpace(), GetRTSpace(), print_hdr);
-  MaterialPropertyCoefficient fm(mat_op.MaxCeedAttribute()),
-      fwc(mat_op.MaxCeedAttribute()), fc(mat_op.MaxCeedAttribute());
+  MaterialPropertyCoefficient fm(mat_op.MaxCeedAttribute()), fwc(mat_op.MaxCeedAttribute()),
+      fc(mat_op.MaxCeedAttribute());
   AddPeriodicCoefficients(1.0, fm, fwc, fc);
   int empty[2] = {(fm.empty()), (fwc.empty() && fc.empty())};
   Mpi::GlobalMin(2, empty, GetComm());
@@ -478,13 +481,13 @@ SpaceOperator::GetFloquetMatrix(Operator::DiagonalPolicy diag_policy)
   std::unique_ptr<Operator> pr, pi;
   if (!empty[0])
   {
-    pr = AssembleOperator(GetNDSpace(), nullptr, &fm, nullptr, nullptr, nullptr,
-                          nullptr, skip_zeros);
+    pr = AssembleOperator(GetNDSpace(), nullptr, &fm, nullptr, nullptr, nullptr, nullptr,
+                          skip_zeros);
   }
   if (!empty[1])
   {
-    pi = AssembleOperator(GetNDSpace(), nullptr, nullptr, nullptr, nullptr, &fwc,
-                          &fc, skip_zeros);
+    pi = AssembleOperator(GetNDSpace(), nullptr, nullptr, nullptr, nullptr, &fwc, &fc,
+                          skip_zeros);
   }
   if constexpr (std::is_same<OperType, ComplexOperator>::value)
   {
@@ -819,15 +822,15 @@ std::unique_ptr<OperType> SpaceOperator::GetPreconditionerMatrix(double a0, doub
     {
       br_vec = AssembleOperators(GetNDSpaces(), &dfr, &fr, &dfbr, &fbr, &fpwr, &fpr,
                                  skip_zeros, assemble_q_data);
-      br_aux_vec = AssembleAuxOperators(GetH1Spaces(), &fr, &fbr,
-                                        skip_zeros, assemble_q_data);
+      br_aux_vec =
+          AssembleAuxOperators(GetH1Spaces(), &fr, &fbr, skip_zeros, assemble_q_data);
     }
     if (!empty[1])
     {
       bi_vec = AssembleOperators(GetNDSpaces(), &dfi, &fi, &dfbi, &fbi, &fpwi, &fpi,
                                  skip_zeros, assemble_q_data);
-      bi_aux_vec = AssembleAuxOperators(GetH1Spaces(), &fi, &fbi,
-                                        &skip_zeros, assemble_q_data);
+      bi_aux_vec =
+          AssembleAuxOperators(GetH1Spaces(), &fi, &fbi, &skip_zeros, assemble_q_data);
     }
   }
   else
@@ -844,15 +847,15 @@ std::unique_ptr<OperType> SpaceOperator::GetPreconditionerMatrix(double a0, doub
     AddRealMassBdrCoefficients(pc_mat_shifted ? std::abs(a2) : a2, fbr);
     AddExtraSystemBdrCoefficients(a3, dfbr, dfbr, fbr, fbr);
     AddPeriodicCoefficients(1.0, fr, fpwr, fpr);
-    int empty = (dfr.empty() && fr.empty() && dfbr.empty() && fbr.empty() &&
-                 fpwr.empty() && fpr.empty());
+    int empty = (dfr.empty() && fr.empty() && dfbr.empty() && fbr.empty() && fpwr.empty() &&
+                 fpr.empty());
     Mpi::GlobalMin(1, &empty, GetComm());
     if (!empty)
     {
-      br_vec = AssembleOperators(GetNDSpaces(), &dfr, &fr, &dfbr, &fbr,  &fpwr, &fpr,
+      br_vec = AssembleOperators(GetNDSpaces(), &dfr, &fr, &dfbr, &fbr, &fpwr, &fpr,
                                  skip_zeros, assemble_q_data);
-      br_aux_vec = AssembleAuxOperators(GetH1Spaces(), &fr, &fbr,
-                                        skip_zeros, assemble_q_data);
+      br_aux_vec =
+          AssembleAuxOperators(GetH1Spaces(), &fr, &fbr, skip_zeros, assemble_q_data);
     }
   }
 
@@ -984,8 +987,7 @@ void SpaceOperator::AddExtraSystemBdrCoefficients(double omega,
   wave_port_op.AddExtraSystemBdrCoefficients(omega, fbr, fbi);
 }
 
-void SpaceOperator::AddPeriodicCoefficients(double coeff,
-                                            MaterialPropertyCoefficient &fm,
+void SpaceOperator::AddPeriodicCoefficients(double coeff, MaterialPropertyCoefficient &fm,
                                             MaterialPropertyCoefficient &fwc,
                                             MaterialPropertyCoefficient &fc)
 {
diff --git a/palace/utils/configfile.cpp b/palace/utils/configfile.cpp
index 578d972ae..5c95c1854 100644
--- a/palace/utils/configfile.cpp
+++ b/palace/utils/configfile.cpp
@@ -1095,8 +1095,9 @@ void PeriodicBoundaryData::SetUp(json &boundaries)
     auto floquet = it->find("FloquetWaveVector");
     if (floquet != it->end())
     {
-      MFEM_VERIFY(floquet->is_array(),
-                "\"FloquetWaveVector\" should specify an array in the configuration file!");
+      MFEM_VERIFY(
+          floquet->is_array(),
+          "\"FloquetWaveVector\" should specify an array in the configuration file!");
       data.wave_vector = floquet->get<std::array<double, 3>>();
     }
 
diff --git a/palace/utils/geodata.cpp b/palace/utils/geodata.cpp
index 509b1f6f0..b682d4e63 100644
--- a/palace/utils/geodata.cpp
+++ b/palace/utils/geodata.cpp
@@ -1826,8 +1826,7 @@ std::vector<mfem::Vector> FindUniquePoints(std::unique_ptr<mfem::Mesh> &mesh,
                                            const std::unordered_set<int> &vertidxs,
                                            const mfem::Vector &centroid,
                                            const mfem::Vector &normal,
-                                           const double &mesh_dim,
-                                           const double &tol = 1e-6)
+                                           const double &mesh_dim, const double &tol = 1e-6)
 {
   const int sdim = mesh->SpaceDimension();
   std::vector<mfem::Vector> unique_pts;
@@ -1854,7 +1853,7 @@ std::vector<mfem::Vector> FindUniquePoints(std::unique_ptr<mfem::Mesh> &mesh,
     {
       int v = *pts_set.begin();
       coord = mesh->GetVertex(v);
-      unique_pts.push_back(coord); // Add point.
+      unique_pts.push_back(coord);  // Add point.
       // Once we have 3 points, check for collinearity
       if (unique_pts.size() == 3)
       {
@@ -1865,7 +1864,8 @@ std::vector<mfem::Vector> FindUniquePoints(std::unique_ptr<mfem::Mesh> &mesh,
         v2 = unique_pts[2];
         v2 -= unique_pts[0];
         v1.cross3D(v2, cross_product);
-        // If cross product is ~0, points are collinear. Remove last point and continue loop.
+        // If cross product is ~0, points are collinear. Remove last point and continue
+        // loop.
         if (cross_product.Norml2() < tol)
         {
           unique_pts.pop_back();
@@ -2039,9 +2039,10 @@ std::vector<int> CreatePeriodicVertexMapping(std::unique_ptr<mfem::Mesh> &mesh,
 // Uses the translation vector or affine transformation matrix specified in the
 // configuration file. If not provided, attempts to automatically detect the
 // affine transformation between donor and receiver boundary vertices.
-std::vector<int> DeterminePeriodicVertexMapping(std::unique_ptr<mfem::Mesh> &mesh,
-                                                const struct palace::config::PeriodicData &data,
-                                                const double tol = 1e-8)
+std::vector<int>
+DeterminePeriodicVertexMapping(std::unique_ptr<mfem::Mesh> &mesh,
+                               const struct palace::config::PeriodicData &data,
+                               const double tol = 1e-8)
 {
   // Get mesh dimensions, will be used to define a reasonable tolerance in mesh units.
   const int sdim = mesh->SpaceDimension();
@@ -2094,8 +2095,8 @@ std::vector<int> DeterminePeriodicVertexMapping(std::unique_ptr<mfem::Mesh> &mes
   }
 
   MFEM_VERIFY(bdr_v_donor.size() == bdr_v_receiver.size(),
-    "Different number of "
-    "vertices on donor and receiver boundaries. Cannot create periodic mesh.");
+              "Different number of "
+              "vertices on donor and receiver boundaries. Cannot create periodic mesh.");
 
   // How to check if the mesh is OK?
   // Count number of elems in the periodic direction?
@@ -2125,8 +2126,8 @@ std::vector<int> DeterminePeriodicVertexMapping(std::unique_ptr<mfem::Mesh> &mes
   else
   {
     // No tets
-     MFEM_VERIFY(mesh->GetNE() > num_periodic_bc_elems,
-                 "Not enough mesh elements in periodic direction!");
+    MFEM_VERIFY(mesh->GetNE() > num_periodic_bc_elems,
+                "Not enough mesh elements in periodic direction!");
   }
 
   // Determine the affine transformation between donor and receiver points.
@@ -2175,10 +2176,13 @@ std::vector<int> DeterminePeriodicVertexMapping(std::unique_ptr<mfem::Mesh> &mes
 
     // Compute a set of unique points for each boundary.
     std::vector<mfem::Vector> donor_pts, receiver_pts;
-    donor_pts = FindUniquePoints(mesh, bdr_v_donor, donor_centroid, donor_normal, mesh_dim, mesh_tol);
-    receiver_pts = FindUniquePoints(mesh, bdr_v_receiver, receiver_centroid, receiver_normal, mesh_dim, mesh_tol);
-    MFEM_VERIFY(donor_pts.size() == receiver_pts.size(),
-                "Different number of unique points on donor and receiver periodic boundaries.");
+    donor_pts = FindUniquePoints(mesh, bdr_v_donor, donor_centroid, donor_normal, mesh_dim,
+                                 mesh_tol);
+    receiver_pts = FindUniquePoints(mesh, bdr_v_receiver, receiver_centroid,
+                                    receiver_normal, mesh_dim, mesh_tol);
+    MFEM_VERIFY(
+        donor_pts.size() == receiver_pts.size(),
+        "Different number of unique points on donor and receiver periodic boundaries.");
 
     // With 4 pairs of matching points, compute the unique affine transformation.
     // With < 4, cannot determine a unique transformation. We assume there is no
@@ -2201,7 +2205,8 @@ std::vector<int> DeterminePeriodicVertexMapping(std::unique_ptr<mfem::Mesh> &mes
     }
   }
 
-  return CreatePeriodicVertexMapping(mesh, bdr_v_donor, bdr_v_receiver, transformation, mesh_tol);
+  return CreatePeriodicVertexMapping(mesh, bdr_v_donor, bdr_v_receiver, transformation,
+                                     mesh_tol);
 }
 
 std::unique_ptr<mfem::Mesh> LoadMesh(const std::string &mesh_file, bool remove_curvature,

From 2e2ae18430f57ccf7da61b0543af0f9176aacce1 Mon Sep 17 00:00:00 2001
From: Simon Lapointe <simlap@amazon.com>
Date: Thu, 12 Dec 2024 16:38:42 -0800
Subject: [PATCH 43/49] Update CHANGELOG

---
 CHANGELOG.md | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 104794915..7e721b218 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -45,6 +45,9 @@ The format of this changelog is based on
   - Added adaptive time-stepping capability for transient simulations. The new ODE integrators
     rely on the SUNDIALS library and can be specified by setting the
     `config["Solver"]["Transient"]["Type"]` option to `"CVODE"` or `"ARKODE"`.
+  - Added support for Floquet periodic boundary conditions with phase-delay constraints.
+    The Floquet wave vector can be specified along with periodic boundaries in the
+    `config["Boundaries"]["Periodic"]` configuration.
 
 ## [0.13.0] - 2024-05-20
 

From 102386dd0a0af325eb70b8f8672ce507b9677dd3 Mon Sep 17 00:00:00 2001
From: Simon Lapointe <simlap@amazon.com>
Date: Thu, 12 Dec 2024 16:38:55 -0800
Subject: [PATCH 44/49] Remove prints

---
 palace/utils/geodata.cpp | 2 --
 1 file changed, 2 deletions(-)

diff --git a/palace/utils/geodata.cpp b/palace/utils/geodata.cpp
index b682d4e63..e1b8c64dc 100644
--- a/palace/utils/geodata.cpp
+++ b/palace/utils/geodata.cpp
@@ -2107,8 +2107,6 @@ DeterminePeriodicVertexMapping(std::unique_ptr<mfem::Mesh> &mesh,
   // MOVE THIS TEST SOMEWHERE ELSE. IT SHOULD ALSO APPLY TO MESHES
   // ALREADY CREATED WITH PERIODICITY!!!
   const int num_periodic_bc_elems = bdr_e_donor.size() + bdr_e_receiver.size();
-  Mpi::Print("Total number of elements: {:d}\n", mesh->GetNE());
-  Mpi::Print("Number of periodic BC elements: {:d}\n", num_periodic_bc_elems);
   mfem::Array<mfem::Geometry::Type> geoms;
   mesh->GetGeometries(3, geoms);
   if (geoms.Size() == 1 && geoms[0] == mfem::Geometry::TETRAHEDRON)

From 7a1d90eff45c685f691a032ff9b7250f9535b3fa Mon Sep 17 00:00:00 2001
From: Simon Lapointe <simlap@amazon.com>
Date: Fri, 13 Dec 2024 08:02:10 -0800
Subject: [PATCH 45/49] Initialize Floquet correction solver outside of
 frequency loop

---
 palace/drivers/drivensolver.cpp | 13 +++++++++----
 palace/drivers/eigensolver.cpp  | 13 +++++++++----
 2 files changed, 18 insertions(+), 8 deletions(-)

diff --git a/palace/drivers/drivensolver.cpp b/palace/drivers/drivensolver.cpp
index b9b16ba01..1b5dfcdfd 100644
--- a/palace/drivers/drivensolver.cpp
+++ b/palace/drivers/drivensolver.cpp
@@ -148,6 +148,15 @@ ErrorIndicator DrivenSolver::SweepUniform(SpaceOperator &space_op, PostOperator
       iodata.solver.linear.estimator_mg);
   ErrorIndicator indicator;
 
+  // If using Floquet BCs, a correction term (kp x E) needs to be added to the B field.
+  std::unique_ptr<FloquetCorrSolver<ComplexVector>> floquet_corr;
+  if (FP)
+  {
+    floquet_corr = std::make_unique<FloquetCorrSolver<ComplexVector>>(
+        space_op.GetMaterialOp(), space_op.GetPeriodicOp(), space_op.GetNDSpace(),
+        space_op.GetRTSpace(), iodata.solver.linear.tol, iodata.solver.linear.max_it, 0);
+  }
+
   // Main frequency sweep loop.
   int step = step0;
   double omega = omega0;
@@ -184,10 +193,6 @@ ErrorIndicator DrivenSolver::SweepUniform(SpaceOperator &space_op, PostOperator
     {
       // Calculate B field correction for Floquet BCs.
       // B = -1/(iω) ∇ x E - 1/ω kp x E
-      std::unique_ptr<FloquetCorrSolver<ComplexVector>> floquet_corr;
-      floquet_corr = std::make_unique<FloquetCorrSolver<ComplexVector>>(
-          space_op.GetMaterialOp(), space_op.GetPeriodicOp(), space_op.GetNDSpace(),
-          space_op.GetRTSpace(), iodata.solver.linear.tol, iodata.solver.linear.max_it, 0);
       floquet_corr->AddMult(E, B, -1.0 / omega);
     }
     post_op.SetEGridFunction(E);
diff --git a/palace/drivers/eigensolver.cpp b/palace/drivers/eigensolver.cpp
index dcd4e1b11..f30514612 100644
--- a/palace/drivers/eigensolver.cpp
+++ b/palace/drivers/eigensolver.cpp
@@ -184,6 +184,15 @@ EigenSolver::Solve(const std::vector<std::unique_ptr<Mesh>> &mesh) const
     eigen->SetDivFreeProjector(*divfree);
   }
 
+  // If using Floquet BCs, a correction term (kp x E) needs to be added to the B field.
+  std::unique_ptr<FloquetCorrSolver<ComplexVector>> floquet_corr;
+  if (FP)
+  {
+    floquet_corr = std::make_unique<FloquetCorrSolver<ComplexVector>>(
+        space_op.GetMaterialOp(), space_op.GetPeriodicOp(), space_op.GetNDSpace(),
+        space_op.GetRTSpace(), iodata.solver.linear.tol, iodata.solver.linear.max_it, 0);
+  }
+
   // Set up the initial space for the eigenvalue solve. Satisfies boundary conditions and is
   // projected appropriately.
   if (iodata.solver.eigenmode.init_v0)
@@ -329,10 +338,6 @@ EigenSolver::Solve(const std::vector<std::unique_ptr<Mesh>> &mesh) const
     {
       // Calculate B field correction for Floquet BCs.
       // B = -1/(iω) ∇ x E - 1/ω kp x E.
-      std::unique_ptr<FloquetCorrSolver<ComplexVector>> floquet_corr;
-      floquet_corr = std::make_unique<FloquetCorrSolver<ComplexVector>>(
-          space_op.GetMaterialOp(), space_op.GetPeriodicOp(), space_op.GetNDSpace(),
-          space_op.GetRTSpace(), iodata.solver.linear.tol, iodata.solver.linear.max_it, 0);
       floquet_corr->AddMult(E, B, -1.0 / omega);
     }
     post_op.SetEGridFunction(E);

From 14c8ce30efae2e68d2d31650d4664b72cff8865a Mon Sep 17 00:00:00 2001
From: Simon Lapointe <simlap@amazon.com>
Date: Fri, 13 Dec 2024 08:04:32 -0800
Subject: [PATCH 46/49] Remove unnecessary comments

---
 palace/utils/geodata.cpp | 10 ++--------
 1 file changed, 2 insertions(+), 8 deletions(-)

diff --git a/palace/utils/geodata.cpp b/palace/utils/geodata.cpp
index e1b8c64dc..461df52c1 100644
--- a/palace/utils/geodata.cpp
+++ b/palace/utils/geodata.cpp
@@ -2098,14 +2098,8 @@ DeterminePeriodicVertexMapping(std::unique_ptr<mfem::Mesh> &mesh,
               "Different number of "
               "vertices on donor and receiver boundaries. Cannot create periodic mesh.");
 
-  // How to check if the mesh is OK?
-  // Count number of elems in the periodic direction?
-  // If hex/prism: Count boundary elements on donor+receiver,
-  // if total NE = ndonorE+nReceiverE: not enough cells?
-  // If pure tet mesh NE = 3*(ndonorE+nreceiverE): not enough
-  // Mixed mesh is trickier
-  // MOVE THIS TEST SOMEWHERE ELSE. IT SHOULD ALSO APPLY TO MESHES
-  // ALREADY CREATED WITH PERIODICITY!!!
+  // Check if mesh has enough elements in periodic direction. MFEM's periodicity
+  // fails for meshes with <=2 elements in the period direction.
   const int num_periodic_bc_elems = bdr_e_donor.size() + bdr_e_receiver.size();
   mfem::Array<mfem::Geometry::Type> geoms;
   mesh->GetGeometries(3, geoms);

From 7259f7c5ab0360069603683789d6264fff0617ab Mon Sep 17 00:00:00 2001
From: Simon Lapointe <simlap@amazon.com>
Date: Fri, 13 Dec 2024 08:25:32 -0800
Subject: [PATCH 47/49] Update schema for Floquet BCs

---
 scripts/schema/config/boundaries.json | 30 +++++++++++++++++++++++++--
 1 file changed, 28 insertions(+), 2 deletions(-)

diff --git a/scripts/schema/config/boundaries.json b/scripts/schema/config/boundaries.json
index ec563245c..3dff3c815 100644
--- a/scripts/schema/config/boundaries.json
+++ b/scripts/schema/config/boundaries.json
@@ -231,15 +231,25 @@
       {
         "type": "object",
         "additionalProperties": false,
-        "required": ["DonorAttributes", "ReceiverAttributes", "Translation"],
+        "required": ["DonorAttributes", "ReceiverAttributes"],
         "properties":
         {
           "DonorAttributes": { "$ref": "#/$defs/DonorAttributes" },
           "ReceiverAttributes": { "$ref": "#/$defs/ReceiverAttributes" },
-          "Translation": { "$ref": "#/$defs/Translation" }
+          "Translation": { "$ref": "#/$defs/Translation" },
+          "AffineTransformation": { "$ref": "#/$defs/AffineTransformation" },
+          "FloquetWaveVector": { "$ref": "#/$defs/FloquetWaveVector" }
         }
       }
     },
+    "FloquetWaveVector":
+    {
+      "type": "array",
+      "additionalItems": false,
+      "items": { "type": "number" },
+      "minItems": 3,
+      "maxItems": 3
+    },
     "Postprocessing":
     {
       "type": "object",
@@ -327,6 +337,22 @@
       "minItems": 3,
       "maxItems": 3
     },
+    "AffineTransformation":
+    {
+      "type": "array",
+      "additionalItems": false,
+      "items": { "type": "number" },
+      "minItems": 16,
+      "maxItems": 16
+    },
+    "FloquetWaveVector":
+    {
+      "type": "array",
+      "additionalItems": false,
+      "items": { "type": "number" },
+      "minItems": 3,
+      "maxItems": 3
+    },
     "Direction":
     {
       "anyOf":

From 4ea97e04254d2d1adcf0aeccafb6d9a4854f4085 Mon Sep 17 00:00:00 2001
From: Simon Lapointe <simlap@amazon.com>
Date: Fri, 13 Dec 2024 09:39:43 -0800
Subject: [PATCH 48/49] Fix bug assuming symmetric coefficients

---
 palace/fem/qfunctions/2/h1_2_qf.h | 4 ++--
 palace/fem/qfunctions/2/l2_2_qf.h | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/palace/fem/qfunctions/2/h1_2_qf.h b/palace/fem/qfunctions/2/h1_2_qf.h
index 4185e8624..348b7a3a4 100644
--- a/palace/fem/qfunctions/2/h1_2_qf.h
+++ b/palace/fem/qfunctions/2/h1_2_qf.h
@@ -19,8 +19,8 @@ CEED_QFUNCTION(f_apply_h1_2)(void *__restrict__ ctx, CeedInt Q, const CeedScalar
 
     const CeedScalar u0 = u[i + Q * 0];
     const CeedScalar u1 = u[i + Q * 1];
-    v[i + Q * 0] = wdetJ[i] * (coeff[0] * u0 + coeff[1] * u1);
-    v[i + Q * 1] = wdetJ[i] * (coeff[1] * u0 + coeff[2] * u1);
+    v[i + Q * 0] = wdetJ[i] * (coeff[0] * u0 + coeff[2] * u1);
+    v[i + Q * 1] = wdetJ[i] * (coeff[1] * u0 + coeff[3] * u1);
   }
   return 0;
 }
diff --git a/palace/fem/qfunctions/2/l2_2_qf.h b/palace/fem/qfunctions/2/l2_2_qf.h
index 8407fef1b..e1536e04f 100644
--- a/palace/fem/qfunctions/2/l2_2_qf.h
+++ b/palace/fem/qfunctions/2/l2_2_qf.h
@@ -20,8 +20,8 @@ CEED_QFUNCTION(f_apply_l2_2)(void *__restrict__ ctx, CeedInt Q, const CeedScalar
 
     const CeedScalar u0 = u[i + Q * 0];
     const CeedScalar u1 = u[i + Q * 1];
-    v[i + Q * 0] = w * (coeff[0] * u0 + coeff[1] * u1);
-    v[i + Q * 1] = w * (coeff[1] * u0 + coeff[2] * u1);
+    v[i + Q * 0] = w * (coeff[0] * u0 + coeff[2] * u1);
+    v[i + Q * 1] = w * (coeff[1] * u0 + coeff[3] * u1);
   }
   return 0;
 }

From d72f40d5912784dd77a60ad5c20d6122dcabe507 Mon Sep 17 00:00:00 2001
From: Simon Lapointe <simlap@amazon.com>
Date: Fri, 13 Dec 2024 09:55:39 -0800
Subject: [PATCH 49/49] Update waveguide example

---
 examples/cylinder/waveguide.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/cylinder/waveguide.json b/examples/cylinder/waveguide.json
index e24de9ab6..c69dcc47a 100644
--- a/examples/cylinder/waveguide.json
+++ b/examples/cylinder/waveguide.json
@@ -39,7 +39,7 @@
       {
         "DonorAttributes": [2],
         "ReceiverAttributes": [3],
-        "Translation": [0.0, 0.0, 5.48] // in L0 units
+        "Translation": [0.0, 0.0, -5.48] // in L0 units
       }
     ],
     "PEC":