diff --git a/.gitmodules b/.gitmodules
index b58b1dcf..58f055d6 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -1,9 +1,6 @@
 [submodule "externals/YAKL"]
 	path = externals/YAKL
 	url = git@github.com:mrnorman/YAKL.git
-[submodule "externals/eigen"]
-	path = externals/eigen
-	url = https://gitlab.com/libeigen/eigen.git
 [submodule "externals/yaml-cpp"]
 	path = externals/yaml-cpp
 	url = git@github.com:jbeder/yaml-cpp.git
diff --git a/dynamics/CMakeLists.txt b/dynamics/CMakeLists.txt
index 4c037c48..cd75f1bd 100644
--- a/dynamics/CMakeLists.txt
+++ b/dynamics/CMakeLists.txt
@@ -4,13 +4,14 @@ add_library(dynamics INTERFACE)
 #Choose dycore
 if     ("${PAM_DYCORE}" STREQUAL "awfl")
   add_subdirectory(awfl ./dycore)
-elseif ("${PAM_DYCORE}" STREQUAL "spam++")
+  target_compile_definitions(dynamics INTERFACE PAM_DYCORE_AWFL)
+elseif ("${PAM_DYCORE}" STREQUAL "spam")
   add_subdirectory(spam ./dycore)
 else ()
   if ("${PAM_DYCORE}" STREQUAL "")
-    message(FATAL_ERROR "ERROR: You must specify -DPAM_DYCORE=[awfl,spam++] in your CMake configure line.")
+    message(FATAL_ERROR "ERROR: You must specify -DPAM_DYCORE=[awfl,spam] in your CMake configure line.")
   else ()
-    message(FATAL_ERROR "ERROR: \"${PAM_DYCORE}\" is an invalid PAM_DYCORE CMake option. Valid options include \"awfl\",\"spam++\".")
+    message(FATAL_ERROR "ERROR: \"${PAM_DYCORE}\" is an invalid PAM_DYCORE CMake option. Valid options include \"awfl\",\"spam\".")
   endif()
 endif()
 
diff --git a/dynamics/awfl/Dycore.h b/dynamics/awfl/Dycore.h
index ee76eba2..0245683a 100644
--- a/dynamics/awfl/Dycore.h
+++ b/dynamics/awfl/Dycore.h
@@ -1,10 +1,1551 @@
 
-#include "Spatial_expl.h"
-#include "Temporal_ader.h"
+#pragma once
 
-// Define the Spatial operator based on constants from the Temporal operatora header file
-typedef Spatial_operator<nTimeDerivs,timeAvg,nAder> Spatial;
+#include "awfl_const.h"
+#include "TransformMatrices.h"
+#include "TransformMatrices_variable.h"
+#include "WenoLimiter.h"
+#include "idealized_profiles.h"
+#include "MultipleFields.h"
+#include "pam_coupler.h"
 
-// Define the Temporal operator based on the Spatial operator
-typedef Temporal_operator<Spatial> Dycore;
+
+class Dycore {
+  public:
+
+  // Order of accuracy (numerical convergence for smooth flows) for the dynamical core
+  #ifndef MW_ORD
+    int  static constexpr ord = 5;
+  #else
+    int  static constexpr ord = MW_ORD;
+  #endif
+  // This is one value higher than normal to facilitate removing the Riemann solver phase
+  int  static constexpr hs  = (ord+1)/2; // Number of halo cells ("hs" == "halo size")
+  int  static constexpr num_state = 5;
+
+  // IDs for the variables in the state vector
+  int  static constexpr idR = 0;  // Density
+  int  static constexpr idU = 1;  // u-momentum
+  int  static constexpr idV = 2;  // v-momentum
+  int  static constexpr idW = 3;  // w-momentum
+  int  static constexpr idT = 4;  // Density * potential temperature
+
+  SArray<real,3,hs,hs,hs> weno_recon_lower;
+
+
+
+  real2d compute_mass( pam::PamCoupler const &coupler , realConst5d state , realConst5d tracers ) const {
+    using yakl::c::parallel_for;
+    using yakl::c::SimpleBounds;
+    auto num_tracers = coupler.get_num_tracers();
+    auto nens        = coupler.get_nens();
+    auto nx          = coupler.get_nx();
+    auto ny          = coupler.get_ny();
+    auto nz          = coupler.get_nz();
+    auto dz          = coupler.get_data_manager_device_readonly().get<real const,2>("vertical_cell_dz");
+    int num_vars = num_tracers+2;
+    realHost2d mass("mass",num_vars,nens);
+    for (int ivar=0; ivar < num_vars; ivar++) {
+      for (int iens=0; iens < nens; iens++) {
+        real3d tmp("tmp",nz,ny,nx);
+        parallel_for( YAKL_AUTO_LABEL() , SimpleBounds<3>(nz,ny,nx) , YAKL_LAMBDA (int k, int j, int i) {
+          if      (ivar <  num_tracers  ) { tmp(k,j,i) = tracers(ivar,hs+k,hs+j,hs+i,iens)*dz(k,iens); }
+          else if (ivar == num_tracers  ) { tmp(k,j,i) = state  (idR ,hs+k,hs+j,hs+i,iens)*dz(k,iens); }
+          else if (ivar == num_tracers+1) { tmp(k,j,i) = state  (idT ,hs+k,hs+j,hs+i,iens)*dz(k,iens); }
+        });
+        mass(ivar,iens) = yakl::intrinsics::sum(tmp)/tmp.size();
+      }
+    }
+    return mass.createDeviceCopy();
+  }
+
+
+
+  // Compute the maximum stable time step using very conservative assumptions about max wind speed
+  real compute_time_step( pam::PamCoupler const &coupler , real cfl = 0.8 ) const {
+    using yakl::c::parallel_for;
+    using yakl::c::SimpleBounds;
+    auto nens     = coupler.get_nens();
+    auto nx       = coupler.get_nx();
+    auto ny       = coupler.get_ny();
+    auto nz       = coupler.get_nz();
+    auto dx       = coupler.get_dx();
+    auto dy       = coupler.get_dy();
+    auto R_d      = coupler.get_option<real>("R_d");
+    auto R_v      = coupler.get_option<real>("R_v");
+    auto gamma_d  = coupler.get_option<real>("gamma_d");
+    auto &dm      = coupler.get_data_manager_device_readonly();
+    auto dm_rho_d = dm.get<real const,4>("density_dry");
+    auto dm_uvel  = dm.get<real const,4>("uvel"       );
+    auto dm_vvel  = dm.get<real const,4>("vvel"       );
+    auto dm_wvel  = dm.get<real const,4>("wvel"       );
+    auto dm_temp  = dm.get<real const,4>("temp"       );
+    auto dm_rho_v = dm.get<real const,4>("water_vapor");
+    auto dz       = dm.get<real const,2>("vertical_cell_dz");
+    real4d dt4d("dt4d",nz,ny,nx,nens);
+    parallel_for( YAKL_AUTO_LABEL() , SimpleBounds<4>(nz,ny,nx,nens) , YAKL_LAMBDA (int k, int j, int i, int iens) {
+      real rho_d = dm_rho_d(k,j,i,iens);
+      real u     = dm_uvel (k,j,i,iens);
+      real v     = dm_vvel (k,j,i,iens);
+      real w     = dm_wvel (k,j,i,iens);
+      real temp  = dm_temp (k,j,i,iens);
+      real rho_v = dm_rho_v(k,j,i,iens);
+      real rho   = rho_d + rho_v;
+      real p = ( rho_d*R_d + rho_v*R_v ) * temp;
+      real cs = sqrt(gamma_d*p/rho);
+      real dtx = cfl * dx         / (std::abs(u)+cs);
+      real dty = cfl * dy         / (std::abs(v)+cs);
+      real dtz = cfl * dz(k,iens) / (std::abs(w)+cs);
+      dt4d(k,j,i,iens) = std::min( std::min( dtx , dty ) , dtz );
+    });
+    return yakl::intrinsics::minval( dt4d );
+  }
+
+
+
+  // Perform a single time step using SSPRK3 time stepping
+  void timeStep(pam::PamCoupler &coupler) {
+    using yakl::c::parallel_for;
+    using yakl::c::SimpleBounds;
+    using yakl::intrinsics::maxval;
+    using yakl::intrinsics::abs;
+
+    auto num_tracers     = coupler.get_num_tracers();
+    auto nens            = coupler.get_nens();
+    auto nx              = coupler.get_nx();
+    auto ny              = coupler.get_ny();
+    auto nz              = coupler.get_nz();
+    auto tracer_positive = coupler.get_data_manager_device_readonly().get<bool const,1>("tracer_positive");
+
+    real dt_phys = coupler.get_option<real>("crm_dt");
+
+    // Create arrays to hold state and tracers with halos on the left and right of the domain
+    // Cells [0:hs-1] are the left halos, and cells [nx+hs:nx+2*hs-1] are the right halos
+    real5d state  ("state"  ,num_state  ,nz+2*hs,ny+2*hs,nx+2*hs,nens);
+    real5d tracers("tracers",num_tracers,nz+2*hs,ny+2*hs,nx+2*hs,nens);
+
+    // Populate the state and tracers arrays using data from the coupler, convert to the dycore's desired state
+    convert_coupler_to_dynamics( coupler , state , tracers );
+
+    parallel_for( YAKL_AUTO_LABEL() , SimpleBounds<5>(num_tracers,nz,ny,nx,nens) , YAKL_LAMBDA (int l, int k, int j, int i, int iens) {
+      if (tracer_positive(l)) {
+        tracers(l,hs+k,hs+j,hs+i,iens) = std::max( 0._fp , tracers(l,hs+k,hs+j,hs+i,iens) );
+      }
+    });
+
+    #ifdef PAM_DEBUG
+      auto mass_init = compute_mass( coupler , state , tracers );
+    #endif
+
+    // Get the max stable time step for the dynamics. dt_phys might be > dt_dyn, meaning we would need to sub-cycle
+    real dt_dyn = compute_time_step( coupler );
+
+    // Get the number of sub-cycles we need, and set the dynamics time step accordingly
+    int ncycles = (int) std::ceil( dt_phys / dt_dyn );
+    dt_dyn = dt_phys / ncycles;
+
+    for (int icycle = 0; icycle < ncycles; icycle++) {
+      // SSPRK3 requires temporary arrays to hold intermediate state and tracers arrays
+      real5d state_tmp   ("state_tmp"   ,num_state  ,nz+2*hs,ny+2*hs,nx+2*hs,nens);
+      real5d state_tend  ("state_tend"  ,num_state  ,nz     ,ny     ,nx     ,nens);
+      real5d tracers_tmp ("tracers_tmp" ,num_tracers,nz+2*hs,ny+2*hs,nx+2*hs,nens);
+      real5d tracers_tend("tracers_tend",num_tracers,nz     ,ny     ,nx     ,nens);
+      //////////////
+      // Stage 1
+      //////////////
+      parallel_for( YAKL_AUTO_LABEL() , SimpleBounds<5>(num_tracers,nz,ny,nx,nens) , YAKL_LAMBDA (int l, int k, int j, int i, int iens) {
+        // Store the starting point for FCT positivity in the next stage
+        tracers_tend(l,k,j,i,iens) = tracers(l,hs+k,hs+j,hs+i,iens);
+      });
+      compute_tendencies( coupler , state     , state_tend , tracers     , tracers_tend , dt_dyn );
+      // Apply tendencies
+      parallel_for( YAKL_AUTO_LABEL() , SimpleBounds<4>(nz,ny,nx,nens) , YAKL_LAMBDA (int k, int j, int i, int iens) {
+        for (int l = 0; l < num_state  ; l++) {
+          state_tmp  (l,hs+k,hs+j,hs+i,iens) = state  (l,hs+k,hs+j,hs+i,iens) + dt_dyn * state_tend  (l,k,j,i,iens);
+        }
+        for (int l = 0; l < num_tracers; l++) {
+          tracers_tmp(l,hs+k,hs+j,hs+i,iens) = tracers(l,hs+k,hs+j,hs+i,iens) + dt_dyn * tracers_tend(l,k,j,i,iens);
+          // For machine precision negative values after FCT-enforced positivity application
+          if (tracer_positive(l)) {
+            tracers_tmp(l,hs+k,hs+j,hs+i,iens) = std::max( 0._fp , tracers_tmp(l,hs+k,hs+j,hs+i,iens) );
+          }
+          // Store the starting point for FCT positivity in the next stage
+          tracers_tend(l,k,j,i,iens) = (3._fp/4._fp) * tracers    (l,hs+k,hs+j,hs+i,iens) + 
+                                       (1._fp/4._fp) * tracers_tmp(l,hs+k,hs+j,hs+i,iens);
+        }
+      });
+      //////////////
+      // Stage 2
+      //////////////
+      compute_tendencies( coupler , state_tmp , state_tend , tracers_tmp , tracers_tend , (1._fp/4._fp) * dt_dyn );
+      // Apply tendencies
+      parallel_for( YAKL_AUTO_LABEL() , SimpleBounds<4>(nz,ny,nx,nens) , YAKL_LAMBDA (int k, int j, int i, int iens) {
+        for (int l = 0; l < num_state  ; l++) {
+          state_tmp  (l,hs+k,hs+j,hs+i,iens) = (3._fp/4._fp) * state      (l,hs+k,hs+j,hs+i,iens) + 
+                                               (1._fp/4._fp) * state_tmp  (l,hs+k,hs+j,hs+i,iens) +
+                                               (1._fp/4._fp) * dt_dyn * state_tend  (l,k,j,i,iens);
+        }
+        for (int l = 0; l < num_tracers; l++) {
+          tracers_tmp(l,hs+k,hs+j,hs+i,iens) = (3._fp/4._fp) * tracers    (l,hs+k,hs+j,hs+i,iens) + 
+                                               (1._fp/4._fp) * tracers_tmp(l,hs+k,hs+j,hs+i,iens) +
+                                               (1._fp/4._fp) * dt_dyn * tracers_tend(l,k,j,i,iens);
+          // For machine precision negative values after FCT-enforced positivity application
+          if (tracer_positive(l)) {
+            tracers_tmp(l,hs+k,hs+j,hs+i,iens) = std::max( 0._fp , tracers_tmp(l,hs+k,hs+j,hs+i,iens) );
+          }
+          // Store the starting point for FCT positivity in the next stage
+          tracers_tend(l,k,j,i,iens) = (1._fp/3._fp) * tracers    (l,hs+k,hs+j,hs+i,iens) +
+                                       (2._fp/3._fp) * tracers_tmp(l,hs+k,hs+j,hs+i,iens);
+        }
+      });
+      //////////////
+      // Stage 3
+      //////////////
+      compute_tendencies( coupler , state_tmp , state_tend , tracers_tmp , tracers_tend , (2._fp/3._fp) * dt_dyn );
+      // Apply tendencies
+      parallel_for( YAKL_AUTO_LABEL() , SimpleBounds<4>(nz,ny,nx,nens) , YAKL_LAMBDA (int k, int j, int i, int iens) {
+        for (int l = 0; l < num_state  ; l++) {
+          state      (l,hs+k,hs+j,hs+i,iens) = (1._fp/3._fp) * state      (l,hs+k,hs+j,hs+i,iens) +
+                                               (2._fp/3._fp) * state_tmp  (l,hs+k,hs+j,hs+i,iens) +
+                                               (2._fp/3._fp) * dt_dyn * state_tend  (l,k,j,i,iens);
+        }
+        for (int l = 0; l < num_tracers; l++) {
+          tracers    (l,hs+k,hs+j,hs+i,iens) = (1._fp/3._fp) * tracers    (l,hs+k,hs+j,hs+i,iens) +
+                                               (2._fp/3._fp) * tracers_tmp(l,hs+k,hs+j,hs+i,iens) +
+                                               (2._fp/3._fp) * dt_dyn * tracers_tend(l,k,j,i,iens);
+          // For machine precision negative values after FCT-enforced positivity application
+          if (tracer_positive(l)) {
+            tracers    (l,hs+k,hs+j,hs+i,iens) = std::max( 0._fp , tracers    (l,hs+k,hs+j,hs+i,iens) );
+          }
+        }
+      });
+    }
+
+    #ifdef PAM_DEBUG
+      auto mass_final = compute_mass( coupler , state , tracers );
+      using yakl::componentwise::operator-;
+      using yakl::componentwise::operator+;
+      using yakl::componentwise::operator/;
+      using yakl::intrinsics::abs;
+      auto abs_mass_diff   = abs(mass_final - mass_init).createHostCopy();
+      auto rel_mass_diff   = ( abs(mass_final - mass_init) / (abs(mass_init) + 1.e-20) ).createHostCopy();
+      auto mass_diff       = (mass_final - mass_init).createHostCopy();
+      auto mass_init_host  = mass_init.createHostCopy();
+      auto mass_final_host = mass_final.createHostCopy();
+      int num_vars = rel_mass_diff.extent(0);
+      for (int ivar=0; ivar < num_vars; ivar++) {
+        for (int iens=0; iens < nens; iens++) {
+          if (rel_mass_diff(ivar,iens) > 1.e-10) {
+            if (abs_mass_diff(ivar,iens) > 1.e-10) {
+              std::cout << "WARNING: conservation violated variable,ensemble,rel_diff,mass_diff,init,final: "
+                        << ivar << " , "
+                        << iens << " , "
+                        << std::scientific << std::setw(10) << rel_mass_diff  (ivar,iens) << " , "
+                        << std::scientific << std::setw(10) << mass_diff      (ivar,iens) << " , " 
+                        << std::scientific << std::setw(10) << mass_init_host (ivar,iens) << " , " 
+                        << std::scientific << std::setw(10) << mass_final_host(ivar,iens) << std::endl;
+            }
+          }
+        }
+      }
+    #endif
+
+    // Convert the dycore's state back to the coupler's state
+    convert_dynamics_to_coupler( coupler , state , tracers );
+  }
+
+
+
+  // Compute the tendencies for state and tracers for one semi-discretized step inside the RK integrator
+  // Tendencies are the time rate of change for a quantity
+  // Coupler is non-const because we are writing to the flux variables
+  void compute_tendencies( pam::PamCoupler &coupler   ,
+                           real5d const &state        ,
+                           real5d const &state_tend   ,
+                           real5d const &tracers      ,
+                           real5d const &tracers_tend ,
+                           real dt                    ) const {
+    using yakl::c::parallel_for;
+    using yakl::c::SimpleBounds;
+    using std::min;
+    using std::max;
+
+    auto nens         = coupler.get_nens();
+    auto nx           = coupler.get_nx();
+    auto ny           = coupler.get_ny();
+    auto nz           = coupler.get_nz();
+    auto dx           = coupler.get_dx();
+    auto dy           = coupler.get_dy();
+    auto sim2d        = ny == 1;
+    auto C0           = coupler.get_option<real>("C0"     );
+    auto gamma_d      = coupler.get_option<real>("gamma_d");
+    auto grav         = coupler.get_option<real>("grav"   );
+    auto num_tracers  = coupler.get_num_tracers();
+    auto grav_balance = coupler.get_option<bool>("balance_hydrostasis_with_gravity");
+
+    // The store a single values flux at cell edges
+    auto &dm                   = coupler.get_data_manager_device_readwrite();
+    auto tracer_positive       = dm.get<bool const,1>("tracer_positive");
+    auto dz                    = dm.get<real const,2>("vertical_cell_dz");
+    auto vert_weno_recon_lower = dm.get<real const,5>("vert_weno_recon_lower");
+    auto vert_sten_to_coefs    = dm.get<real const,4>("vert_sten_to_coefs");
+    auto hy_dens_cells         = dm.get<real const,2>("hy_dens_cells");
+    auto hy_pressure_cells     = dm.get<real const,2>("hy_pressure_cells");
+    auto grav_var              = dm.get<real const,2>("variable_gravity");
+
+    YAKL_SCOPE( weno_recon_lower , this->weno_recon_lower );
+
+    // Use TransformMatrices class to create matrices & GLL points to convert degrees of freedom as needed
+    SArray<real,2,ord,ord> sten_to_coefs;
+    SArray<real,2,ord,2  > coefs_to_gll;
+    TransformMatrices::coefs_to_gll_lower(coefs_to_gll );
+    TransformMatrices::sten_to_coefs     (sten_to_coefs);
+    SArray<real,1,hs+1> idl;
+    real                sigma;
+    weno::wenoSetIdealSigma<ord>(idl,sigma);
+
+    real4d pressure("pressure",nz+2*hs,ny+2*hs,nx+2*hs,nens);
+
+    // Compute pressure perturbation, density perturbation, and divide density from all other quantities before interpolation
+    parallel_for( YAKL_AUTO_LABEL() , SimpleBounds<4>(nz,ny,nx,nens) , YAKL_LAMBDA (int k, int j, int i, int iens) {
+      if (grav_balance) {
+        pressure (hs+k,hs+j,hs+i,iens) = C0*std::pow(state(idT,hs+k,hs+j,hs+i,iens),gamma_d);
+      } else {
+        pressure (hs+k,hs+j,hs+i,iens) = C0*std::pow(state(idT,hs+k,hs+j,hs+i,iens),gamma_d) - hy_pressure_cells(k,iens);
+      }
+      state(idU,hs+k,hs+j,hs+i,iens) /= state(idR,hs+k,hs+j,hs+i,iens);
+      state(idV,hs+k,hs+j,hs+i,iens) /= state(idR,hs+k,hs+j,hs+i,iens);
+      state(idW,hs+k,hs+j,hs+i,iens) /= state(idR,hs+k,hs+j,hs+i,iens);
+      state(idT,hs+k,hs+j,hs+i,iens) /= state(idR,hs+k,hs+j,hs+i,iens);
+      for (int tr=0; tr < num_tracers; tr++) { tracers(tr,hs+k,hs+j,hs+i,iens) /= state(idR,hs+k,hs+j,hs+i,iens); }
+    });
+
+    halo_exchange( coupler , state , tracers , pressure );
+
+    real5d state_flux_x  ("state_flux_x"  ,num_state  ,nz,ny,nx+1,nens);
+    real5d state_flux_y  ("state_flux_y"  ,num_state  ,nz,ny+1,nx,nens);
+    real5d state_flux_z  ("state_flux_z"  ,num_state  ,nz+1,ny,nx,nens);
+    real5d tracers_flux_x("tracers_flux_x",num_tracers,nz,ny,nx+1,nens);
+    real5d tracers_flux_y("tracers_flux_y",num_tracers,nz,ny+1,nx,nens);
+    real5d tracers_flux_z("tracers_flux_z",num_tracers,nz+1,ny,nx,nens);
+
+    // Compute samples of state and tracers at cell edges using cell-centered reconstructions at high-order with WENO
+    // At the end of this, we will have two samples per cell edge in each dimension, one from each adjacent cell.
+    parallel_for( YAKL_AUTO_LABEL() , SimpleBounds<4>(nz+1,ny+1,nx+1,nens) , YAKL_LAMBDA (int k, int j, int i, int iens) {
+      real constexpr cs = 350;
+      ////////////////////////////////////////////////////////
+      // X-direction
+      ////////////////////////////////////////////////////////
+      if (j < ny && k < nz) {
+        SArray<real,1,ord> stencil;
+        // ACOUSTIC
+        real ru, pp;
+        {
+          // rho*u (left estimate)
+          int i_upw = 0;
+          for (int s=0; s < ord; s++) { stencil(s) = state(idR,hs+k,hs+j,i+i_upw+s,iens)*state(idU,hs+k,hs+j,i+i_upw+s,iens); }
+          real ru_L = reconstruct(stencil,coefs_to_gll,sten_to_coefs,weno_recon_lower,idl,sigma,1-i_upw);
+          // rho*u (right estimate)
+          i_upw = 1;
+          for (int s=0; s < ord; s++) { stencil(s) = state(idR,hs+k,hs+j,i+i_upw+s,iens)*state(idU,hs+k,hs+j,i+i_upw+s,iens); }
+          real ru_R = reconstruct(stencil,coefs_to_gll,sten_to_coefs,weno_recon_lower,idl,sigma,1-i_upw);
+          // pressure perturbation (left estimate)
+          i_upw = 0;
+          for (int s=0; s < ord; s++) { stencil(s) = pressure(hs+k,hs+j,i+i_upw+s,iens); }
+          real pp_L = reconstruct(stencil,coefs_to_gll,sten_to_coefs,weno_recon_lower,idl,sigma,1-i_upw);
+          // pressure perturbation (right estimate)
+          i_upw = 1;
+          for (int s=0; s < ord; s++) { stencil(s) = pressure(hs+k,hs+j,i+i_upw+s,iens); }
+          real pp_R = reconstruct(stencil,coefs_to_gll,sten_to_coefs,weno_recon_lower,idl,sigma,1-i_upw);
+          // Characteristics & upwind values
+          real w1 = 0.5_fp * (pp_R-cs*ru_R);
+          real w2 = 0.5_fp * (pp_L+cs*ru_L);
+          pp = w1+w2;
+          ru = (w2-w1)/cs;
+          state_flux_x(idR,k,j,i,iens) = ru;
+        }
+        // ADVECTIVE
+        int i_upw = ru > 0 ? 0 : 1;
+        // u-velocity
+        for (int s=0; s < ord; s++) { stencil(s) = state(idU,hs+k,hs+j,i+i_upw+s,iens); }
+        state_flux_x(idU,k,j,i,iens) = ru * reconstruct(stencil,coefs_to_gll,sten_to_coefs,weno_recon_lower,idl,sigma,1-i_upw) + pp;
+        // v-velocity
+        for (int s=0; s < ord; s++) { stencil(s) = state(idV,hs+k,hs+j,i+i_upw+s,iens); }
+        state_flux_x(idV,k,j,i,iens) = ru * reconstruct(stencil,coefs_to_gll,sten_to_coefs,weno_recon_lower,idl,sigma,1-i_upw);
+        // w-velocity
+        for (int s=0; s < ord; s++) { stencil(s) = state(idW,hs+k,hs+j,i+i_upw+s,iens); }
+        state_flux_x(idW,k,j,i,iens) = ru * reconstruct(stencil,coefs_to_gll,sten_to_coefs,weno_recon_lower,idl,sigma,1-i_upw);
+        // theta
+        for (int s=0; s < ord; s++) { stencil(s) = state(idT,hs+k,hs+j,i+i_upw+s,iens); }
+        state_flux_x(idT,k,j,i,iens) = ru * reconstruct(stencil,coefs_to_gll,sten_to_coefs,weno_recon_lower,idl,sigma,1-i_upw);
+        // tracers
+        for (int tr=0; tr < num_tracers; tr++) {
+          for (int s=0; s < ord; s++) { stencil(s) = tracers(tr,hs+k,hs+j,i+i_upw+s,iens); }
+          tracers_flux_x(tr,k,j,i,iens) = ru * reconstruct(stencil,coefs_to_gll,sten_to_coefs,weno_recon_lower,idl,sigma,1-i_upw);
+        }
+      }
+
+      ////////////////////////////////////////////////////////
+      // Y-direction
+      ////////////////////////////////////////////////////////
+      if (i < nx && k < nz) {
+        if (! sim2d) {
+          SArray<real,1,ord> stencil;
+          // ACOUSTIC
+          real rv, pp;
+          {
+            // rho*v (left estimate)
+            int j_upw = 0;
+            for (int s=0; s < ord; s++) { stencil(s) = state(idR,hs+k,j+j_upw+s,hs+i,iens)*state(idV,hs+k,j+j_upw+s,hs+i,iens); }
+            real rv_L = reconstruct(stencil,coefs_to_gll,sten_to_coefs,weno_recon_lower,idl,sigma,1-j_upw);
+            // rho*v (right estimate)
+            j_upw = 1;
+            for (int s=0; s < ord; s++) { stencil(s) = state(idR,hs+k,j+j_upw+s,hs+i,iens)*state(idV,hs+k,j+j_upw+s,hs+i,iens); }
+            real rv_R = reconstruct(stencil,coefs_to_gll,sten_to_coefs,weno_recon_lower,idl,sigma,1-j_upw);
+            // pressure perturbation (left estimate)
+            j_upw = 0;
+            for (int s=0; s < ord; s++) { stencil(s) = pressure(hs+k,j+j_upw+s,hs+i,iens); }
+            real pp_L = reconstruct(stencil,coefs_to_gll,sten_to_coefs,weno_recon_lower,idl,sigma,1-j_upw);
+            // pressure perturbation (right estimate)
+            j_upw = 1;
+            for (int s=0; s < ord; s++) { stencil(s) = pressure(hs+k,j+j_upw+s,hs+i,iens); }
+            real pp_R = reconstruct(stencil,coefs_to_gll,sten_to_coefs,weno_recon_lower,idl,sigma,1-j_upw);
+            // Characteristics & upwind values
+            real w1 = 0.5_fp * (pp_R-cs*rv_R);
+            real w2 = 0.5_fp * (pp_L+cs*rv_L);
+            pp = w1+w2;
+            rv = (w2-w1)/cs;
+            state_flux_y(idR,k,j,i,iens) = rv;
+          }
+          // ADVECTIVE
+          int j_upw = rv > 0 ? 0 : 1;
+          // u-velocity
+          for (int s=0; s < ord; s++) { stencil(s) = state(idU,hs+k,j+j_upw+s,hs+i,iens); }
+          state_flux_y(idU,k,j,i,iens) = rv * reconstruct(stencil,coefs_to_gll,sten_to_coefs,weno_recon_lower,idl,sigma,1-j_upw);
+          // v-velocity
+          for (int s=0; s < ord; s++) { stencil(s) = state(idV,hs+k,j+j_upw+s,hs+i,iens); }
+          state_flux_y(idV,k,j,i,iens) = rv * reconstruct(stencil,coefs_to_gll,sten_to_coefs,weno_recon_lower,idl,sigma,1-j_upw) + pp;
+          // w-velocity
+          for (int s=0; s < ord; s++) { stencil(s) = state(idW,hs+k,j+j_upw+s,hs+i,iens); }
+          state_flux_y(idW,k,j,i,iens) = rv * reconstruct(stencil,coefs_to_gll,sten_to_coefs,weno_recon_lower,idl,sigma,1-j_upw);
+          // theta
+          for (int s=0; s < ord; s++) { stencil(s) = state(idT,hs+k,j+j_upw+s,hs+i,iens); }
+          state_flux_y(idT,k,j,i,iens) = rv * reconstruct(stencil,coefs_to_gll,sten_to_coefs,weno_recon_lower,idl,sigma,1-j_upw);
+          // tracers
+          for (int tr=0; tr < num_tracers; tr++) {
+            for (int s=0; s < ord; s++) { stencil(s) = tracers(tr,hs+k,j+j_upw+s,hs+i,iens); }
+            tracers_flux_y(tr,k,j,i,iens) = rv * reconstruct(stencil,coefs_to_gll,sten_to_coefs,weno_recon_lower,idl,sigma,1-j_upw);
+          }
+        } else {
+          state_flux_y(idR,k,j,i,iens) = 0;
+          state_flux_y(idU,k,j,i,iens) = 0;
+          state_flux_y(idV,k,j,i,iens) = 0;
+          state_flux_y(idW,k,j,i,iens) = 0;
+          state_flux_y(idT,k,j,i,iens) = 0;
+          for (int tr=0; tr < num_tracers; tr++) { tracers_flux_y(tr,k,j,i,iens) = 0; }
+        }
+      }
+
+      ////////////////////////////////////////////////////////
+      // Z-direction
+      ////////////////////////////////////////////////////////
+      if (i < nx && j < ny) {
+        SArray<real,1,ord> stencil;
+        SArray<real,2,ord,ord>  s2c_loc[2];
+        SArray<real,3,hs,hs,hs> wrl_loc[2];
+        for (int i1=0; i1 < ord; i1++) {
+          for (int i2=0; i2 < ord; i2++) {
+            s2c_loc[0](i1,i2) = vert_sten_to_coefs(k  ,i1,i2,iens);
+            s2c_loc[1](i1,i2) = vert_sten_to_coefs(k+1,i1,i2,iens);
+          }
+        }
+        for (int i1=0; i1 < hs; i1++) {
+          for (int i2=0; i2 < hs; i2++) {
+            for (int i3=0; i3 < hs; i3++) {
+              wrl_loc[0](i1,i2,i3) = vert_weno_recon_lower(k  ,i1,i2,i3,iens);
+              wrl_loc[1](i1,i2,i3) = vert_weno_recon_lower(k+1,i1,i2,i3,iens);
+            }
+          }
+        }
+        // ACOUSTIC
+        real rw, pp;
+        {
+          // rho*w (left estimate)
+          int k_upw = 0;
+          for (int s=0; s < ord; s++) { stencil(s) = state(idR,k+k_upw+s,hs+j,hs+i,iens)*state(idW,k+k_upw+s,hs+j,hs+i,iens); }
+          real rw_L = reconstruct(stencil,coefs_to_gll,s2c_loc[k_upw],wrl_loc[k_upw],idl,sigma,1-k_upw);
+          if ((k == 0 || k == nz)) rw_L = 0;
+          // rho*w (right estimate)
+          k_upw = 1;
+          for (int s=0; s < ord; s++) { stencil(s) = state(idR,k+k_upw+s,hs+j,hs+i,iens)*state(idW,k+k_upw+s,hs+j,hs+i,iens); }
+          real rw_R = reconstruct(stencil,coefs_to_gll,s2c_loc[k_upw],wrl_loc[k_upw],idl,sigma,1-k_upw);
+          if ((k == 0 || k == nz)) rw_R = 0;
+          // pressure perturbation (left estimate)
+          k_upw = 0;
+          for (int s=0; s < ord; s++) { stencil(s) = pressure(k+k_upw+s,hs+j,hs+i,iens); }
+          real pp_L = reconstruct(stencil,coefs_to_gll,s2c_loc[k_upw],wrl_loc[k_upw],idl,sigma,1-k_upw);
+          // pressure perturbation (right estimate)
+          k_upw = 1;
+          for (int s=0; s < ord; s++) { stencil(s) = pressure(k+k_upw+s,hs+j,hs+i,iens); }
+          real pp_R = reconstruct(stencil,coefs_to_gll,s2c_loc[k_upw],wrl_loc[k_upw],idl,sigma,1-k_upw);
+          // Characteristics & upwind values
+          real w1 = 0.5_fp * (pp_R-cs*rw_R);
+          real w2 = 0.5_fp * (pp_L+cs*rw_L);
+          pp = w1+w2;
+          rw = (w2-w1)/cs;
+          if (k == 0 || k == nz) rw = 0;
+          state_flux_z(idR,k,j,i,iens) = rw;
+        }
+        // ADVECTIVE
+        int k_upw = rw > 0 ? 0 : 1;
+        // u-velocity
+        for (int s=0; s < ord; s++) { stencil(s) = state(idU,k+k_upw+s,hs+j,hs+i,iens); }
+        state_flux_z(idU,k,j,i,iens) = rw * reconstruct(stencil,coefs_to_gll,s2c_loc[k_upw],wrl_loc[k_upw],idl,sigma,1-k_upw);
+        // v-velocity
+        for (int s=0; s < ord; s++) { stencil(s) = state(idV,k+k_upw+s,hs+j,hs+i,iens); }
+        state_flux_z(idV,k,j,i,iens) = rw * reconstruct(stencil,coefs_to_gll,s2c_loc[k_upw],wrl_loc[k_upw],idl,sigma,1-k_upw);
+        // w-velocity
+        for (int s=0; s < ord; s++) { stencil(s) = state(idW,k+k_upw+s,hs+j,hs+i,iens); }
+        state_flux_z(idW,k,j,i,iens) = rw * reconstruct(stencil,coefs_to_gll,s2c_loc[k_upw],wrl_loc[k_upw],idl,sigma,1-k_upw) + pp;
+        // theta
+        for (int s=0; s < ord; s++) { stencil(s) = state(idT,k+k_upw+s,hs+j,hs+i,iens); }
+        state_flux_z(idT,k,j,i,iens) = rw * reconstruct(stencil,coefs_to_gll,s2c_loc[k_upw],wrl_loc[k_upw],idl,sigma,1-k_upw);
+        // tracers
+        for (int tr=0; tr < num_tracers; tr++) {
+          for (int s=0; s < ord; s++) { stencil(s) = tracers(tr,k+k_upw+s,hs+j,hs+i,iens); }
+          tracers_flux_z(tr,k,j,i,iens) = rw * reconstruct(stencil,coefs_to_gll,s2c_loc[k_upw],wrl_loc[k_upw],idl,sigma,1-k_upw);
+        }
+      }
+    });
+
+    // Flux Corrected Transport to enforce positivity for tracer species that must remain non-negative
+    // This looks like it has a race condition, but it does not. Only one of the adjacent cells can ever change
+    // a given edge flux because it's only changed if its sign oriented outward from a cell.
+    // Also, multiply density back onto the state and tracers
+    parallel_for( YAKL_AUTO_LABEL() , SimpleBounds<4>(nz,ny,nx,nens) ,
+                                      YAKL_LAMBDA (int k, int j, int i, int iens) {
+      state(idU,hs+k,hs+j,hs+i,iens) *= state(idR,hs+k,hs+j,hs+i,iens);
+      state(idV,hs+k,hs+j,hs+i,iens) *= state(idR,hs+k,hs+j,hs+i,iens);
+      state(idW,hs+k,hs+j,hs+i,iens) *= state(idR,hs+k,hs+j,hs+i,iens);
+      state(idT,hs+k,hs+j,hs+i,iens) *= state(idR,hs+k,hs+j,hs+i,iens);
+      for (int tr=0; tr < num_tracers; tr++) {
+        tracers(tr,hs+k,hs+j,hs+i,iens) *= state(idR,hs+k,hs+j,hs+i,iens);
+        if (tracer_positive(tr)) {
+          real mass_available = max(tracers_tend(tr,k,j,i,iens),0._fp) * dx * dy * dz(k,iens);
+          real flux_out_x = ( max(tracers_flux_x(tr,k,j,i+1,iens),0._fp) - min(tracers_flux_x(tr,k,j,i,iens),0._fp) ) / dx;
+          real flux_out_y = ( max(tracers_flux_y(tr,k,j+1,i,iens),0._fp) - min(tracers_flux_y(tr,k,j,i,iens),0._fp) ) / dy;
+          real flux_out_z = ( max(tracers_flux_z(tr,k+1,j,i,iens),0._fp) - min(tracers_flux_z(tr,k,j,i,iens),0._fp) ) / dz(k,iens);
+          real mass_out = (flux_out_x + flux_out_y + flux_out_z) * dt * dx * dy * dz(k,iens);
+          if (mass_out > mass_available) {
+            real mult = mass_available / mass_out;
+            if (tracers_flux_x(tr,k,j,i+1,iens) > 0) tracers_flux_x(tr,k,j,i+1,iens) *= mult;
+            if (tracers_flux_x(tr,k,j,i  ,iens) < 0) tracers_flux_x(tr,k,j,i  ,iens) *= mult;
+            if (tracers_flux_y(tr,k,j+1,i,iens) > 0) tracers_flux_y(tr,k,j+1,i,iens) *= mult;
+            if (tracers_flux_y(tr,k,j  ,i,iens) < 0) tracers_flux_y(tr,k,j  ,i,iens) *= mult;
+            if (tracers_flux_z(tr,k+1,j,i,iens) > 0) tracers_flux_z(tr,k+1,j,i,iens) *= mult;
+            if (tracers_flux_z(tr,k  ,j,i,iens) < 0) tracers_flux_z(tr,k  ,j,i,iens) *= mult;
+          }
+        }
+      }
+    });
+
+    // Compute tendencies as the flux divergence + gravity source term
+    parallel_for( YAKL_AUTO_LABEL() , SimpleBounds<4>(nz,ny,nx,nens) , YAKL_LAMBDA (int k, int j, int i, int iens) {
+      for (int l = 0; l < num_state; l++) {
+        state_tend  (l,k,j,i,iens) = -( state_flux_x(l,k  ,j  ,i+1,iens) - state_flux_x(l,k,j,i,iens) ) / dx
+                                     -( state_flux_y(l,k  ,j+1,i  ,iens) - state_flux_y(l,k,j,i,iens) ) / dy
+                                     -( state_flux_z(l,k+1,j  ,i  ,iens) - state_flux_z(l,k,j,i,iens) ) / dz(k,iens);
+        if (l == idW) {
+          if (grav_balance) {
+            state_tend(l,k,j,i,iens) += -grav_var(k,iens) * state(idR,hs+k,hs+j,hs+i,iens);
+          } else {
+            state_tend(l,k,j,i,iens) += -grav * ( state(idR,hs+k,hs+j,hs+i,iens) - hy_dens_cells(k,iens) );
+          }
+        }
+        if (l == idV && sim2d) state_tend(l,k,j,i,iens) = 0;
+      }
+      for (int l = 0; l < num_tracers; l++) {
+        real fx   = tracers_flux_x(l,k  ,j  ,i  ,iens);
+        real fxp1 = tracers_flux_x(l,k  ,j  ,i+1,iens);
+        real fy   = tracers_flux_y(l,k  ,j  ,i  ,iens);
+        real fyp1 = tracers_flux_y(l,k  ,j+1,i  ,iens);
+        real fz   = tracers_flux_z(l,k  ,j  ,i  ,iens);
+        real fzp1 = tracers_flux_z(l,k+1,j  ,i  ,iens);
+        if (i == 0   ) {
+          fx = std::min( fx , tracers_flux_x(l,k,j,nx,iens) );
+        }
+        if (i == nx-1) { fxp1 = std::min( fxp1 , tracers_flux_x(l,k,j,0 ,iens) ); }
+        if (j == 0   ) { fy   = std::min( fy   , tracers_flux_y(l,k,ny,i,iens) ); }
+        if (j == ny-1) { fyp1 = std::min( fyp1 , tracers_flux_y(l,k,0 ,i,iens) ); }
+        tracers_tend(l,k,j,i,iens) = -( fxp1 - fx ) / dx
+                                     -( fyp1 - fy ) / dy
+                                     -( fzp1 - fz ) / dz(k,iens);
+      }
+    });
+
+  }
+
+
+
+  // ord stencil cell averages to two GLL point values via high-order reconstruction and WENO limiting
+  YAKL_INLINE static real reconstruct( SArray<real,1,ord>      const &stencil          ,
+                                       SArray<real,2,ord,2>    const &coefs_to_gll     ,
+                                       SArray<real,2,ord,ord>  const &sten_to_coefs    ,
+                                       SArray<real,3,hs,hs,hs> const &weno_recon_lower ,
+                                       SArray<real,1,hs+1>     const &idl              ,
+                                       real                          sigma             ,
+                                       int                           ind               ) {
+    // Reconstruct values
+    SArray<real,1,ord> wenoCoefs;
+    weno::compute_weno_coefs<ord>( weno_recon_lower , sten_to_coefs , stencil , wenoCoefs , idl , sigma );
+    real tmp = 0;
+    for (int s=0; s < ord; s++) { tmp += coefs_to_gll(s,ind) * wenoCoefs(s); }
+    return tmp;
+  }
+
+
+
+  void halo_exchange( pam::PamCoupler const &coupler  ,
+                      real5d          const &state    ,
+                      real5d          const &tracers  ,
+                      real4d          const &pressure ) const {
+    using yakl::c::parallel_for;
+    using yakl::c::SimpleBounds;
+
+    auto nens         = coupler.get_nens();
+    auto nx           = coupler.get_nx();
+    auto ny           = coupler.get_ny();
+    auto nz           = coupler.get_nz();
+    auto num_tracers  = coupler.get_num_tracers();
+    auto sim2d        = ny == 1;
+    auto gamma        = coupler.get_option<real>("gamma_d");
+    auto C0           = coupler.get_option<real>("C0");
+    auto grav         = coupler.get_option<real>("grav");
+    auto grav_balance = coupler.get_option<bool>("balance_hydrostasis_with_gravity");
+    auto dz = coupler.get_data_manager_device_readonly().get<real const,2>("vertical_cell_dz");
+
+    int npack = num_state + num_tracers + 1;
+
+    parallel_for( YAKL_AUTO_LABEL() , SimpleBounds<5>(npack,nz,ny,hs,nens) ,
+                                      YAKL_LAMBDA (int v, int k, int j, int ii, int iens) {
+      if (v < num_state) {
+        state  (v          ,hs+k,hs+j,nx+hs+ii,iens) = state  (v          ,hs+k,hs+j,hs+ii,iens);
+        state  (v          ,hs+k,hs+j,      ii,iens) = state  (v          ,hs+k,hs+j,nx+ii,iens);
+      } else if (v < num_state + num_tracers) {
+        tracers(v-num_state,hs+k,hs+j,nx+hs+ii,iens) = tracers(v-num_state,hs+k,hs+j,hs+ii,iens);
+        tracers(v-num_state,hs+k,hs+j,      ii,iens) = tracers(v-num_state,hs+k,hs+j,nx+ii,iens);
+      } else {
+        pressure(hs+k,hs+j,nx+hs+ii,iens) = pressure(hs+k,hs+j,hs+ii,iens);
+        pressure(hs+k,hs+j,      ii,iens) = pressure(hs+k,hs+j,nx+ii,iens);
+      }
+    });
+
+    if (!sim2d) {
+      parallel_for( YAKL_AUTO_LABEL() , SimpleBounds<5>(npack,nz,hs,nx,nens) ,
+                                        YAKL_LAMBDA (int v, int k, int jj, int i, int iens) {
+        if (v < num_state) {
+          state  (v          ,hs+k,ny+hs+jj,hs+i,iens) = state  (v          ,hs+k,hs+jj,hs+i,iens);
+          state  (v          ,hs+k,      jj,hs+i,iens) = state  (v          ,hs+k,ny+jj,hs+i,iens);
+        } else if (v < num_state + num_tracers) {
+          tracers(v-num_state,hs+k,ny+hs+jj,hs+i,iens) = tracers(v-num_state,hs+k,hs+jj,hs+i,iens);
+          tracers(v-num_state,hs+k,      jj,hs+i,iens) = tracers(v-num_state,hs+k,ny+jj,hs+i,iens);
+        } else {
+          pressure(hs+k,ny+hs+jj,hs+i,iens) = pressure(hs+k,hs+jj,hs+i,iens);
+          pressure(hs+k,      jj,hs+i,iens) = pressure(hs+k,ny+jj,hs+i,iens);
+        }
+      });
+    }
+
+    ////////////////////////////////////
+    // Begin boundary conditions
+    ////////////////////////////////////
+    parallel_for( YAKL_AUTO_LABEL() , SimpleBounds<4>(hs,ny,nx,nens) ,
+                                      YAKL_LAMBDA (int kk, int j, int i, int iens) {
+      // Exclude density (idR == 0) because that's calculated lower down
+      for (int l=1; l < num_state; l++) {
+        if (l == idW) {
+          state(l,      kk,hs+j,hs+i,iens) = 0;
+          state(l,hs+nz+kk,hs+j,hs+i,iens) = 0;
+        } else {
+          state(l,      kk,hs+j,hs+i,iens) = state(l,hs+0   ,hs+j,hs+i,iens);
+          state(l,hs+nz+kk,hs+j,hs+i,iens) = state(l,hs+nz-1,hs+j,hs+i,iens);
+        }
+      }
+      for (int l=0; l < num_tracers; l++) {
+        tracers(l,      kk,hs+j,hs+i,iens) = tracers(l,hs+0   ,hs+j,hs+i,iens);
+        tracers(l,hs+nz+kk,hs+j,hs+i,iens) = tracers(l,hs+nz-1,hs+j,hs+i,iens);
+      }
+      if (! grav_balance) {
+        pressure(      kk,hs+j,hs+i,iens) = pressure(hs+0   ,hs+j,hs+i,iens);
+        pressure(hs+nz+kk,hs+j,hs+i,iens) = pressure(hs+nz-1,hs+j,hs+i,iens);
+      }
+      {
+        int  k0       = hs;
+        int  k        = k0-1-kk;
+        real rho0     = state(idR,k0,hs+j,hs+i,iens);
+        real theta0   = state(idT,k0,hs+j,hs+i,iens);
+        real rho0_gm1 = std::pow(rho0  ,gamma-1);
+        real theta0_g = std::pow(theta0,gamma  );
+        state(idR,k,hs+j,hs+i,iens) = std::pow( rho0_gm1 + grav*(gamma-1)*dz(k0-hs,iens)*(kk+1)/(gamma*C0*theta0_g) ,
+                                                1._fp/(gamma-1) );
+        if (grav_balance) {
+          real rt = state(idR,k,hs+j,hs+i,iens) * state(idT,k,hs+j,hs+i,iens);
+          pressure(k,hs+j,hs+i,iens) = C0*std::pow( rt , gamma );
+        }
+      }
+      {
+        int  k0       = hs+nz-1;
+        int  k        = k0+1+kk;
+        real rho0     = state(idR,k0,hs+j,hs+i,iens);
+        real theta0   = state(idT,k0,hs+j,hs+i,iens);
+        real rho0_gm1 = std::pow(rho0  ,gamma-1);
+        real theta0_g = std::pow(theta0,gamma  );
+        state(idR,k,hs+j,hs+i,iens) = std::pow( rho0_gm1 - grav*(gamma-1)*dz(k0-hs,iens)*(kk+1)/(gamma*C0*theta0_g) ,
+                                                1._fp/(gamma-1) );
+        if (grav_balance) {
+          real rt = state(idR,k,hs+j,hs+i,iens) * state(idT,k,hs+j,hs+i,iens);
+          pressure(k,hs+j,hs+i,iens) = C0*std::pow( rt , gamma );
+        }
+      }
+    });
+  }
+
+
+
+  // Creates initial data at a point in space for the rising moist thermal test case
+  YAKL_INLINE static void thermal(real x, real y, real z, real xlen, real ylen, real grav, real C0, real gamma,
+                                  real cp, real p0, real R_d, real R_v, real &rho, real &u, real &v, real &w,
+                                  real &theta, real &rho_v, real &hr, real &ht) {
+    hydro_const_theta(z,grav,C0,cp,p0,gamma,R_d,hr,ht);
+    real rho_d   = hr;
+    u            = 0.;
+    v            = 0.;
+    w            = 0.;
+    real theta_d = ht + sample_ellipse_cosine(2._fp  ,  x,y,z  ,  xlen/2,ylen/2,2000.  ,  2000.,2000.,2000.);
+    real p_d     = C0 * pow( rho_d*theta_d , gamma );
+    real temp    = p_d / rho_d / R_d;
+    real sat_pv  = saturation_vapor_pressure(temp);
+    real sat_rv  = sat_pv / R_v / temp;
+    rho_v        = sample_ellipse_cosine(0.8_fp  ,  x,y,z  ,  xlen/2,ylen/2,2000.  ,  2000.,2000.,2000.) * sat_rv;
+    real p       = rho_d * R_d * temp + rho_v * R_v * temp;
+    rho          = rho_d + rho_v;
+    theta        = std::pow( p / C0 , 1._fp / gamma ) / rho;
+  }
+
+
+
+  // Computes a hydrostatic background density and potential temperature using c constant potential temperature
+  // backgrounda for a single vertical location
+  YAKL_INLINE static void hydro_const_theta( real z, real grav, real C0, real cp, real p0, real gamma, real rd,
+                                             real &r, real &t ) {
+    const real theta0 = 300.;  //Background potential temperature
+    const real exner0 = 1.;    //Surface-level Exner pressure
+    t = theta0;                                       //Potential Temperature at z
+    real exner = exner0 - grav * z / (cp * theta0);   //Exner pressure at z
+    real p = p0 * std::pow(exner,(cp/rd));            //Pressure at z
+    real rt = std::pow((p / C0),(1._fp / gamma));     //rho*theta at z
+    r = rt / t;                                       //Density at z
+  }
+
+
+
+  // Samples a 3-D ellipsoid at a point in space
+  YAKL_INLINE static real sample_ellipse_cosine(real amp, real x   , real y   , real z   ,
+                                                          real x0  , real y0  , real z0  ,
+                                                          real xrad, real yrad, real zrad) {
+    //Compute distance from bubble center
+    real dist = sqrt( ((x-x0)/xrad)*((x-x0)/xrad) +
+                      ((y-y0)/yrad)*((y-y0)/yrad) +
+                      ((z-z0)/zrad)*((z-z0)/zrad) ) * M_PI / 2.;
+    //If the distance from bubble center is less than the radius, create a cos**2 profile
+    if (dist <= M_PI / 2.) {
+      return amp * std::pow(cos(dist),2._fp);
+    } else {
+      return 0.;
+    }
+  }
+
+
+
+  YAKL_INLINE static real saturation_vapor_pressure(real temp) {
+    real tc = temp - 273.15;
+    return 610.94 * std::exp( 17.625*tc / (243.04+tc) );
+  }
+
+
+
+  // Compute supercell temperature profile at a vertical location
+  YAKL_INLINE static real init_supercell_temperature(real z, real z_0, real z_trop, real z_top,
+                                                     real T_0, real T_trop, real T_top) {
+    if (z <= z_trop) {
+      real lapse = - (T_trop - T_0) / (z_trop - z_0);
+      return T_0 - lapse * (z - z_0);
+    } else {
+      real lapse = - (T_top - T_trop) / (z_top - z_trop);
+      return T_trop - lapse * (z - z_trop);
+    }
+  }
+
+
+
+  // Compute supercell dry pressure profile at a vertical location
+  YAKL_INLINE static real init_supercell_pressure_dry(real z, real z_0, real z_trop, real z_top,
+                                                      real T_0, real T_trop, real T_top,
+                                                      real p_0, real R_d, real grav) {
+    if (z <= z_trop) {
+      real lapse = - (T_trop - T_0) / (z_trop - z_0);
+      real T = init_supercell_temperature(z, z_0, z_trop, z_top, T_0, T_trop, T_top);
+      return p_0 * pow( T / T_0 , grav/(R_d*lapse) );
+    } else {
+      // Get pressure at the tropopause
+      real lapse = - (T_trop - T_0) / (z_trop - z_0);
+      real p_trop = p_0 * pow( T_trop / T_0 , grav/(R_d*lapse) );
+      // Get pressure at requested height
+      lapse = - (T_top - T_trop) / (z_top - z_trop);
+      if (lapse != 0) {
+        real T = init_supercell_temperature(z, z_0, z_trop, z_top, T_0, T_trop, T_top);
+        return p_trop * pow( T / T_trop , grav/(R_d*lapse) );
+      } else {
+        return p_trop * exp(-grav*(z-z_trop)/(R_d*T_trop));
+      }
+    }
+  }
+
+
+  
+  // Compute supercell relative humidity profile at a vertical location
+  YAKL_INLINE static real init_supercell_relhum(real z, real z_0, real z_trop) {
+    if (z <= z_trop) {
+      return 1._fp - 0.75_fp * pow(z / z_trop , 1.25_fp );
+    } else {
+      return 0.25_fp;
+    }
+  }
+
+
+
+  // Computes dry saturation mixing ratio
+  YAKL_INLINE static real init_supercell_sat_mix_dry( real press , real T ) {
+    return 380/(press) * exp( 17.27_fp * (T-273)/(T-36) );
+  }
+
+
+
+  // Initialize the class data as well as the state and tracers arrays and convert them back into the coupler state
+  void init(pam::PamCoupler &coupler, bool verbose=false) {
+    using yakl::c::parallel_for;
+    using yakl::c::SimpleBounds;
+
+    auto nens        = coupler.get_nens();
+    auto nx          = coupler.get_nx();
+    auto ny          = coupler.get_ny();
+    auto nz          = coupler.get_nz();
+    auto dx          = coupler.get_dx();
+    auto dy          = coupler.get_dy();
+    auto xlen        = coupler.get_xlen();
+    auto ylen        = coupler.get_ylen();
+    auto sim2d       = ny == 1;
+    auto num_tracers = coupler.get_num_tracers();
+
+
+    int nbands = 5;
+    int n      = 5;
+    real3d diags("diags",nbands,n,nens);  diags = 0;
+    real2d rhs  ("rhs"         ,n,nens);
+    parallel_for( YAKL_AUTO_LABEL() , n , YAKL_LAMBDA (int i) {
+      if (i > 1  ) diags(0,i,0) = 0.5;
+      if (i > 0  ) diags(1,i,0) =  -1;
+                   diags(2,i,0) =   2;
+      if (i < n-1) diags(3,i,0) =  -1;
+      if (i < n-2) diags(4,i,0) = 0.5;
+      rhs  (  i,0) = i == n/2 ? 1 : 0;
+    });
+    solve_banded( diags , rhs );
+    std::cout << rhs;
+
+    coupler.set_option<bool>("balance_hydrostasis_with_gravity",true);
+    if (coupler.get_option<bool>("balance_hydrostasis_with_gravity")) {
+      coupler.get_data_manager_device_readwrite().register_and_allocate<real>("variable_gravity","",{nz,nens});
+    }
+
+    if (! coupler.option_exists("R_d"     )) coupler.set_option<real>("R_d"     ,287.       );
+    if (! coupler.option_exists("cp_d"    )) coupler.set_option<real>("cp_d"    ,1003.      );
+    if (! coupler.option_exists("R_v"     )) coupler.set_option<real>("R_v"     ,461.       );
+    if (! coupler.option_exists("cp_v"    )) coupler.set_option<real>("cp_v"    ,1859       );
+    if (! coupler.option_exists("p0"      )) coupler.set_option<real>("p0"      ,1.e5       );
+    if (! coupler.option_exists("grav"    )) coupler.set_option<real>("grav"    ,9.81       );
+    auto R_d  = coupler.get_option<real>("R_d" );
+    auto cp_d = coupler.get_option<real>("cp_d");
+    auto R_v  = coupler.get_option<real>("R_v" );
+    auto cp_v = coupler.get_option<real>("cp_v");
+    auto p0   = coupler.get_option<real>("p0"  );
+    auto grav = coupler.get_option<real>("grav");
+    if (! coupler.option_exists("cv_d"   )) coupler.set_option<real>("cv_d"   ,cp_d - R_d );
+    auto cv_d = coupler.get_option<real>("cv_d");
+    if (! coupler.option_exists("gamma_d")) coupler.set_option<real>("gamma_d",cp_d / cv_d);
+    if (! coupler.option_exists("kappa_d")) coupler.set_option<real>("kappa_d",R_d  / cp_d);
+    if (! coupler.option_exists("cv_v"   )) coupler.set_option<real>("cv_v"   ,R_v - cp_v );
+    auto gamma = coupler.get_option<real>("gamma_d");
+    auto kappa = coupler.get_option<real>("kappa_d");
+    if (! coupler.option_exists("C0")) coupler.set_option<real>("C0" , pow( R_d * pow( p0 , -kappa ) , gamma ));
+    auto C0    = coupler.get_option<real>("C0");
+
+    auto &dm = coupler.get_data_manager_device_readwrite();
+    auto dz = dm.get<real const,2>("vertical_cell_dz");
+
+    // Create WENO reconstruction matrices
+    dm.register_and_allocate<real>("vert_weno_recon_lower","",{nz+2,hs,hs,hs,nens});
+    dm.register_and_allocate<real>("vert_sten_to_coefs"   ,"",{nz+2,ord,ord ,nens});
+    auto vert_weno_recon_lower = dm.get<real,5>("vert_weno_recon_lower");
+    auto vert_sten_to_coefs    = dm.get<real,4>("vert_sten_to_coefs"   );
+    auto dz_host        = dz                   .createHostCopy();
+    auto vert_weno_host = vert_weno_recon_lower.createHostCopy();
+    auto vert_s2c_host  = vert_sten_to_coefs   .createHostCopy();
+    for (int k=0; k < nz+2; k++) {
+      for (int iens = 0; iens < nens; iens++) {
+        // Load normalize grid spacings
+        SArray<real,1,ord> dzloc;
+        for (int kk=0; kk < ord; kk++) {
+          int ind1 = std::min(nz-1,std::max(0,-1+k+kk));
+          int ind2 = std::min(nz-1,std::max(0,-1+k   ));
+          dzloc(kk) = dz_host(ind1,iens) / dz_host(ind2,iens);
+        }
+        // Compute normalized locations of cell edges
+        SArray<real,1,ord+1> locs;
+        locs(0) = 0;
+        for (int kk=1; kk < ord+1; kk++) { locs(kk) = locs(kk-1) + dzloc(kk-1); }
+        real midloc = ( locs((ord-1)/2) + locs((ord+1)/2) ) / 2;
+        for (int kk=0; kk < ord+1; kk++) { locs(kk) = locs(kk) - midloc; }
+        // Compute WENO reconstruction matrices for each level
+        SArray<double,2,ord,ord>  s2c_var;
+        SArray<double,3,hs,hs,hs> weno_recon_lower_var;
+        TransformMatrices_variable::sten_to_coefs_variable  <ord>(locs,s2c_var             );
+        TransformMatrices_variable::weno_lower_sten_to_coefs<ord>(locs,weno_recon_lower_var);
+        for (int jj=0; jj < ord; jj++) {
+          for (int ii=0; ii < ord; ii++) {
+            vert_s2c_host(k,jj,ii,iens) = s2c_var(jj,ii);
+          }
+        }
+        for (int kk=0; kk < hs; kk++) {
+          for (int jj=0; jj < hs; jj++) {
+            for (int ii=0; ii < hs; ii++) {
+              vert_weno_host(k,kk,jj,ii,iens) = weno_recon_lower_var(kk,jj,ii);
+            }
+          }
+        }
+      }
+    }
+    vert_s2c_host .deep_copy_to(vert_sten_to_coefs   );
+    vert_weno_host.deep_copy_to(vert_weno_recon_lower);
+    TransformMatrices::weno_lower_sten_to_coefs(this->weno_recon_lower);
+
+    R_d   = coupler.get_option<real>("R_d"    );
+    R_v   = coupler.get_option<real>("R_v"    );
+    cp_d  = coupler.get_option<real>("cp_d"   );
+    cp_v  = coupler.get_option<real>("cp_v"   );
+    p0    = coupler.get_option<real>("p0"     );
+    grav  = coupler.get_option<real>("grav"   );
+    kappa = coupler.get_option<real>("kappa_d");
+    gamma = coupler.get_option<real>("gamma_d");
+    C0    = coupler.get_option<real>("C0"     );
+
+    // Create arrays to determine whether we should add mass for a tracer or whether it should remain non-negative
+    num_tracers = coupler.get_num_tracers();
+    bool1d tracer_adds_mass("tracer_adds_mass",num_tracers);
+    bool1d tracer_positive ("tracer_positive" ,num_tracers);
+
+    // Must assign on the host to avoid segfaults
+    auto tracer_adds_mass_host = tracer_adds_mass.createHostCopy();
+    auto tracer_positive_host  = tracer_positive .createHostCopy();
+
+    auto tracer_names = coupler.get_tracer_names();  // Get a list of tracer names
+    int idWV;
+    for (int tr=0; tr < num_tracers; tr++) {
+      std::string tracer_desc;
+      bool        tracer_found, positive, adds_mass;
+      coupler.get_tracer_info( tracer_names[tr] , tracer_desc, tracer_found , positive , adds_mass);
+      tracer_positive_host (tr) = positive;
+      tracer_adds_mass_host(tr) = adds_mass;
+      if (tracer_names[tr] == "water_vapor") idWV = tr;  // Be sure to track which index belongs to water vapor
+    }
+    tracer_positive_host .deep_copy_to(tracer_positive );
+    tracer_adds_mass_host.deep_copy_to(tracer_adds_mass);
+
+    coupler.set_option<int>("idWV",idWV);
+    dm.register_and_allocate<bool>("tracer_adds_mass","",{num_tracers});
+    auto dm_tracer_adds_mass = dm.get<bool,1>("tracer_adds_mass");
+    tracer_adds_mass.deep_copy_to(dm_tracer_adds_mass);
+
+    dm.register_and_allocate<bool>("tracer_positive","",{num_tracers});
+    auto dm_tracer_positive = dm.get<bool,1>("tracer_positive");
+    tracer_positive.deep_copy_to(dm_tracer_positive);
+
+    dm.register_and_allocate<real>("hy_dens_cells"    ,"",{nz,nens});
+    dm.register_and_allocate<real>("hy_pressure_cells","",{nz,nens});
+
+    int static constexpr DATA_SPEC_THERMAL   = 0;
+    int static constexpr DATA_SPEC_SUPERCELL = 1;
+    int static constexpr DATA_SPEC_EXTERNAL  = 2;
+    int data_spec;
+    if (coupler.option_exists("standalone_input_file")) {
+      #ifdef PAM_STANDALONE
+        std::string inFile = coupler.get_option<std::string>( "standalone_input_file" );
+        YAML::Node config = YAML::LoadFile(inFile);
+        std::string dataStr = config["initData"].as<std::string>();
+        if        (dataStr == "thermal") {
+          data_spec = DATA_SPEC_THERMAL;
+        } else if (dataStr == "supercell") {
+          data_spec = DATA_SPEC_SUPERCELL;
+        } else if (dataStr == "external") {
+          data_spec = DATA_SPEC_EXTERNAL;
+        } else {
+          endrun("ERROR: Invalid data_spec");
+        }
+      #endif
+    } else {
+      data_spec = DATA_SPEC_EXTERNAL;
+    }
+
+    if (data_spec == DATA_SPEC_EXTERNAL) {
+
+    } else {
+      real5d state  ("state"  ,num_state  ,nz+2*hs,ny+2*hs,nx+2*hs,nens);
+      real5d tracers("tracers",num_tracers,nz+2*hs,ny+2*hs,nx+2*hs,nens);
+
+      auto zmid = dm.get<real const,2>("vertical_midpoint_height");
+
+      if (data_spec == DATA_SPEC_SUPERCELL) {
+
+        init_supercell( coupler , state , tracers );
+
+      } else if (data_spec == DATA_SPEC_THERMAL) {
+
+        real2d hy_dens_cells    ("hy_dens_cells"    ,nz,nens);
+        real2d hy_pressure_cells("hy_pressure_cells",nz,nens);
+
+        // Define quadrature weights and points for 3-point rules
+        const int nqpoints = 9;
+        SArray<real,1,nqpoints> qpoints;
+        SArray<real,1,nqpoints> qweights;
+
+        TransformMatrices::get_gll_points ( qpoints  );
+        TransformMatrices::get_gll_weights( qweights );
+
+        // Compute hydrostatic background cell averages using quadrature
+        parallel_for( YAKL_AUTO_LABEL() , SimpleBounds<2>(nz,nens) , YAKL_LAMBDA (int k, int iens) {
+          hy_dens_cells    (k,iens) = 0.;
+          hy_pressure_cells(k,iens) = 0.;
+          for (int kk=0; kk<nqpoints; kk++) {
+            real z = zmid(k,iens) + qpoints(kk)*dz(k,iens);
+            real hr, ht;
+
+            if (data_spec == DATA_SPEC_THERMAL) { hydro_const_theta(z,grav,C0,cp_d,p0,gamma,R_d,hr,ht); }
+
+            hy_dens_cells    (k,iens) += hr                       * qweights(kk);
+            hy_pressure_cells(k,iens) += C0*std::pow(hr*ht,gamma) * qweights(kk);
+          }
+        });
+
+        // Use quadrature to initialize state and tracer data
+        parallel_for( YAKL_AUTO_LABEL() , SimpleBounds<4>(nz,ny,nx,nens) , YAKL_LAMBDA (int k, int j, int i, int iens) {
+          for (int l=0; l < num_state  ; l++) { state  (l,hs+k,hs+j,hs+i,iens) = 0.; }
+          for (int l=0; l < num_tracers; l++) { tracers(l,hs+k,hs+j,hs+i,iens) = 0.; }
+          //Use Gauss-Legendre quadrature
+          for (int kk=0; kk<nqpoints; kk++) {
+            for (int jj=0; jj<nqpoints; jj++) {
+              for (int ii=0; ii<nqpoints; ii++) {
+                real x = (i+0.5)*dx   + qpoints(ii)*dx;
+                real y = (j+0.5)*dy   + qpoints(jj)*dy;   if (sim2d) y = ylen/2;
+                real z = zmid(k,iens) + qpoints(kk)*dz(k,iens);
+
+                real hr = hy_dens_cells    (k,iens);
+                real hp = hy_pressure_cells(k,iens);
+                real ht = std::pow( hp/C0 , 1._fp/gamma ) / hr;
+                real rho = hr;
+                real u = 0;
+                real v = 0;
+                real w = 0;
+                real rho_v = 0;
+                real theta = ht + sample_ellipse_cosine(2._fp , x,y,z , xlen/2,ylen/2,2000. , 2000.,2000.,2000.);
+
+                if (sim2d) v = 0;
+
+                real wt = qweights(ii)*qweights(jj)*qweights(kk);
+                state(idR,hs+k,hs+j,hs+i,iens) += rho       * wt;
+                state(idU,hs+k,hs+j,hs+i,iens) += rho*u     * wt;
+                state(idV,hs+k,hs+j,hs+i,iens) += rho*v     * wt;
+                state(idW,hs+k,hs+j,hs+i,iens) += rho*w     * wt;
+                state(idT,hs+k,hs+j,hs+i,iens) += rho*theta * wt;
+                for (int tr=0; tr < num_tracers; tr++) {
+                  if (tr == idWV) { tracers(tr,hs+k,hs+j,hs+i,iens) += rho_v * wt; }
+                  else            { tracers(tr,hs+k,hs+j,hs+i,iens) += 0     * wt; }
+                }
+              }
+            }
+          }
+        });
+
+      }
+      convert_dynamics_to_coupler( coupler , state , tracers );
+    }
+  }
+
+
+
+  // Initialize the supercell test case
+  void init_supercell( pam::PamCoupler &coupler , real5d &state , real5d &tracers ) {
+    using yakl::c::parallel_for;
+    using yakl::c::SimpleBounds;
+    real constexpr z_0    = 0;
+    real constexpr z_trop = 12000;
+    real constexpr T_0    = 300;
+    real constexpr T_trop = 213;
+    real constexpr T_top  = 213;
+    real constexpr p_0    = 100000;
+
+    int constexpr ngll = 9;
+    yakl::SArray<real,1,ngll> gll_pts, gll_wts;
+    TransformMatrices::get_gll_points (gll_pts);
+    TransformMatrices::get_gll_weights(gll_wts);
+
+    auto nens        = coupler.get_nens();
+    auto nx          = coupler.get_nx();
+    auto ny          = coupler.get_ny();
+    auto nz          = coupler.get_nz();
+    auto dx          = coupler.get_dx();
+    auto dy          = coupler.get_dy();
+    auto xlen        = coupler.get_xlen();
+    auto ylen        = coupler.get_ylen();
+    auto sim2d       = ny == 1;
+    auto R_d         = coupler.get_option<real>("R_d"    );
+    auto R_v         = coupler.get_option<real>("R_v"    );
+    auto grav        = coupler.get_option<real>("grav"   );
+    auto gamma       = coupler.get_option<real>("gamma_d");
+    auto C0          = coupler.get_option<real>("C0"     );
+    auto idWV        = coupler.get_option<int >("idWV");
+    auto num_tracers = coupler.get_num_tracers();
+
+    real2d hy_dens_cells    ("hy_dens_cells"    ,nz,nens);
+    real2d hy_pressure_cells("hy_pressure_cells",nz,nens);
+
+    auto &dm  = coupler.get_data_manager_device_readwrite();
+    auto dz   = dm.get<real const,2>("vertical_cell_dz");
+    auto zmid = dm.get<real const,2>("vertical_midpoint_height");
+    auto zint = dm.get<real const,2>("vertical_interface_height");
+
+    // Temporary arrays used to compute the initial state for high-CAPE supercell conditions
+    real4d quad_temp       ("quad_temp"       ,nz,ngll-1,ngll,nens);
+    real3d hyDensGLL       ("hyDensGLL"       ,nz,ngll,nens);
+    real3d hyDensThetaGLL  ("hyDensThetaGLL"  ,nz,ngll,nens);
+    real3d hyDensVapGLL    ("hyDensVapGLL"    ,nz,ngll,nens);
+    real3d hyPressureGLL   ("hyPressureGLL"   ,nz,ngll,nens);
+    real2d hyDensCells     ("hyDensCells"     ,nz,nens);
+    real2d hyDensThetaCells("hyDensThetaCells",nz,nens);
+
+    // Compute quadrature term to integrate to get pressure at GLL points
+    parallel_for( YAKL_AUTO_LABEL() , SimpleBounds<4>(nz,ngll-1,ngll,nens) ,
+                  YAKL_LAMBDA (int k, int kk, int kkk, int iens) {
+      // Middle of this cell
+      real cellmid   = zmid(k,iens);
+      // Bottom, top, and middle of the space between these two ngll GLL points
+      real ngll_b    = cellmid + gll_pts(kk  )*dz(k,iens);
+      real ngll_t    = cellmid + gll_pts(kk+1)*dz(k,iens);
+      real ngll_m    = 0.5_fp * (ngll_b + ngll_t);
+      // Compute grid spacing between these ngll GLL points
+      real ngll_dz   = dz(k,iens) * ( gll_pts(kk+1) - gll_pts(kk) );
+      // Compute the locate of this GLL point within the ngll GLL points
+      real zloc      = ngll_m + ngll_dz * gll_pts(kkk);
+      // Compute full density at this location
+      real temp      = init_supercell_temperature (zloc, z_0, z_trop, zint(nz,iens), T_0, T_trop, T_top);
+      real press_dry = init_supercell_pressure_dry(zloc, z_0, z_trop, zint(nz,iens), T_0, T_trop, T_top, p_0, R_d, grav);
+      real qvs       = init_supercell_sat_mix_dry(press_dry, temp);
+      real relhum    = init_supercell_relhum(zloc, z_0, z_trop);
+      if (relhum * qvs > 0.014_fp) relhum = 0.014_fp / qvs;
+      real qv        = std::min( 0.014_fp , qvs*relhum );
+      quad_temp(k,kk,kkk,iens) = -(1+qv)*grav/(R_d+qv*R_v)/temp;
+    });
+
+    // Compute pressure at GLL points
+    parallel_for( YAKL_AUTO_LABEL() , nens , YAKL_LAMBDA (int iens) {
+      hyPressureGLL(0,0,iens) = p_0;
+      for (int k=0; k < nz; k++) {
+        for (int kk=0; kk < ngll-1; kk++) {
+          real tot = 0;
+          for (int kkk=0; kkk < ngll; kkk++) {
+            tot += quad_temp(k,kk,kkk,iens) * gll_wts(kkk);
+          }
+          tot *= dz(k,iens) * ( gll_pts(kk+1) - gll_pts(kk) );
+          hyPressureGLL(k,kk+1,iens) = hyPressureGLL(k,kk,iens) * exp( tot );
+          if (kk == ngll-2 && k < nz-1) {
+            hyPressureGLL(k+1,0,iens) = hyPressureGLL(k,ngll-1,iens);
+          }
+        }
+      }
+    });
+
+    // Compute hydrostatic background state at GLL points
+    parallel_for( YAKL_AUTO_LABEL() , SimpleBounds<3>(nz,ngll,nens) , YAKL_LAMBDA (int k, int kk, int iens) {
+      real zloc = zmid(k,iens) + gll_pts(kk)*dz(k,iens);
+      real temp       = init_supercell_temperature (zloc, z_0, z_trop, zint(nz,iens), T_0, T_trop, T_top);
+      real press_tmp  = init_supercell_pressure_dry(zloc, z_0, z_trop, zint(nz,iens), T_0, T_trop, T_top, p_0, R_d, grav);
+      real qvs        = init_supercell_sat_mix_dry(press_tmp, temp);
+      real relhum     = init_supercell_relhum(zloc, z_0, z_trop);
+      if (relhum * qvs > 0.014_fp) relhum = 0.014_fp / qvs;
+      real qv         = std::min( 0.014_fp , qvs*relhum );
+      real press      = hyPressureGLL(k,kk,iens);
+      real dens_dry   = press / (R_d+qv*R_v) / temp;
+      real dens_vap   = qv * dens_dry;
+      real dens       = dens_dry + dens_vap;
+      real dens_theta = pow( press / C0 , 1._fp / gamma );
+      hyDensGLL     (k,kk,iens) = dens;
+      hyDensThetaGLL(k,kk,iens) = dens_theta;
+      hyDensVapGLL  (k,kk,iens) = dens_vap;
+    });
+
+    // Compute hydrostatic background state over cells
+    parallel_for( YAKL_AUTO_LABEL() , SimpleBounds<2>(nz,nens) , YAKL_LAMBDA (int k, int iens) {
+      real press_tot      = 0;
+      real dens_tot       = 0;
+      real dens_vap_tot   = 0;
+      real dens_theta_tot = 0;
+      for (int kk=0; kk < ngll; kk++) {
+        press_tot      += hyPressureGLL (k,kk,iens) * gll_wts(kk);
+        dens_tot       += hyDensGLL     (k,kk,iens) * gll_wts(kk);
+        dens_vap_tot   += hyDensVapGLL  (k,kk,iens) * gll_wts(kk);
+        dens_theta_tot += hyDensThetaGLL(k,kk,iens) * gll_wts(kk);
+      }
+      real press      = press_tot;
+      real dens       = dens_tot;
+      real dens_vap   = dens_vap_tot;
+      real dens_dry   = dens - dens_vap;
+      real R          = dens_dry / dens * R_d + dens_vap / dens * R_v;
+      real temp       = press / (dens * R);
+      real qv         = dens_vap / dens_dry;
+      real zloc       = (k+0.5_fp)*dz(k,iens);
+      real press_tmp  = init_supercell_pressure_dry(zloc, z_0, z_trop, zint(nz,iens), T_0, T_trop, T_top, p_0, R_d, grav);
+      real qvs        = init_supercell_sat_mix_dry(press_tmp, temp);
+      real relhum     = qv / qvs;
+      real T          = temp - 273;
+      real a          = 17.27;
+      real b          = 237.7;
+      real tdew       = b * ( a*T / (b + T) + log(relhum) ) / ( a - ( a*T / (b+T) + log(relhum) ) );
+      // These are used in the rest of the model
+      for (int iens=0; iens < nens; iens++) {
+        hy_dens_cells    (k,iens) = dens;
+        hy_pressure_cells(k,iens) = press;
+      }
+    });
+
+    // Initialize the state
+    parallel_for( YAKL_AUTO_LABEL() , SimpleBounds<4>(nz,ny,nx,nens) , YAKL_LAMBDA (int k, int j, int i, int iens) {
+      state(idR,hs+k,hs+j,hs+i,iens) = 0;
+      state(idU,hs+k,hs+j,hs+i,iens) = 0;
+      state(idV,hs+k,hs+j,hs+i,iens) = 0;
+      state(idW,hs+k,hs+j,hs+i,iens) = 0;
+      state(idT,hs+k,hs+j,hs+i,iens) = 0;
+      for (int tr=0; tr < num_tracers; tr++) { tracers(tr,hs+k,hs+j,hs+i,iens) = 0; }
+      real pres = hy_pressure_cells(k,iens);
+      state(idR,hs+k,hs+j,hs+i,iens) = hy_dens_cells(k,iens);
+      state(idT,hs+k,hs+j,hs+i,iens) = std::pow( pres/C0 , 1._fp/gamma );
+      for (int kk=0; kk < ngll; kk++) {
+        for (int jj=0; jj < ngll; jj++) {
+          for (int ii=0; ii < ngll; ii++) {
+            real xloc = (i+0.5_fp)*dx + gll_pts(ii)*dx;
+            real yloc = (j+0.5_fp)*dy + gll_pts(jj)*dy;
+            real zloc = zmid(k,iens)  + gll_pts(kk)*dz(k,iens);
+
+            if (sim2d) yloc = ylen/2;
+
+            real constexpr zs = 5000;
+            real constexpr us = 30;
+            real constexpr uc = 15;
+            real uvel = zloc < zs  ?  us*(zloc/zs)-uc  :  us-uc;
+            real vvel       = 0;
+            real wvel       = 0;
+            real dens_vap   = hyDensVapGLL(k,kk,iens);
+
+            real factor = gll_wts(ii) * gll_wts(jj) * gll_wts(kk);
+            state  (idU ,hs+k,hs+j,hs+i,iens) += state(idR,hs+k,hs+j,hs+i,iens) * uvel  * factor;
+            state  (idV ,hs+k,hs+j,hs+i,iens) += state(idR,hs+k,hs+j,hs+i,iens) * vvel  * factor;
+            state  (idW ,hs+k,hs+j,hs+i,iens) += state(idR,hs+k,hs+j,hs+i,iens) * wvel  * factor;
+            tracers(idWV,hs+k,hs+j,hs+i,iens) += dens_vap                               * factor;
+          }
+        }
+      }
+    });
+  }
+
+
+
+  // Convert dynamics state and tracers arrays to the coupler state and write to the coupler's data
+  void convert_dynamics_to_coupler( pam::PamCoupler &coupler ,
+                                    realConst5d state        ,
+                                    realConst5d tracers      ) const {
+    using yakl::c::parallel_for;
+    using yakl::c::SimpleBounds;
+    auto nens        = coupler.get_nens();
+    auto nx          = coupler.get_nx();
+    auto ny          = coupler.get_ny();
+    auto nz          = coupler.get_nz();
+    auto R_d         = coupler.get_option<real>("R_d"    );
+    auto R_v         = coupler.get_option<real>("R_v"    );
+    auto gamma       = coupler.get_option<real>("gamma_d");
+    auto C0          = coupler.get_option<real>("C0"     );
+    auto idWV        = coupler.get_option<int >("idWV");
+    auto num_tracers = coupler.get_num_tracers();
+
+    auto &dm = coupler.get_data_manager_device_readwrite();
+    auto tracer_adds_mass = dm.get<bool const,1>("tracer_adds_mass");
+
+    // Get state from the coupler
+    auto dm_rho_d = dm.get<real,4>("density_dry");
+    auto dm_uvel  = dm.get<real,4>("uvel"       );
+    auto dm_vvel  = dm.get<real,4>("vvel"       );
+    auto dm_wvel  = dm.get<real,4>("wvel"       );
+    auto dm_temp  = dm.get<real,4>("temp"       );
+
+    // Get tracers from the coupler
+    pam::MultiField<real,4> dm_tracers;
+    auto tracer_names = coupler.get_tracer_names();
+    for (int tr=0; tr < num_tracers; tr++) { dm_tracers.add_field( dm.get<real,4>(tracer_names[tr]) ); }
+
+    // Convert from state and tracers arrays to the coupler's data
+    parallel_for( YAKL_AUTO_LABEL() , SimpleBounds<4>(nz,ny,nx,nens) , YAKL_LAMBDA (int k, int j, int i, int iens) {
+      real rho   = state(idR,hs+k,hs+j,hs+i,iens);
+      real u     = state(idU,hs+k,hs+j,hs+i,iens) / rho;
+      real v     = state(idV,hs+k,hs+j,hs+i,iens) / rho;
+      real w     = state(idW,hs+k,hs+j,hs+i,iens) / rho;
+      real theta = state(idT,hs+k,hs+j,hs+i,iens) / rho;
+      real press = C0 * pow( rho*theta , gamma );
+      real rho_v = tracers(idWV,hs+k,hs+j,hs+i,iens);
+      real rho_d = rho;
+      for (int tr=0; tr < num_tracers; tr++) { if (tracer_adds_mass(tr)) rho_d -= tracers(tr,hs+k,hs+j,hs+i,iens); }
+      real temp = press / ( rho_d * R_d + rho_v * R_v );
+      dm_rho_d(k,j,i,iens) = rho_d;
+      dm_uvel (k,j,i,iens) = u;
+      dm_vvel (k,j,i,iens) = v;
+      dm_wvel (k,j,i,iens) = w;
+      dm_temp (k,j,i,iens) = temp;
+      for (int tr=0; tr < num_tracers; tr++) { dm_tracers(tr,k,j,i,iens) = tracers(tr,hs+k,hs+j,hs+i,iens); }
+    });
+  }
+
+
+
+  // Convert coupler's data to state and tracers arrays
+  void convert_coupler_to_dynamics( pam::PamCoupler &coupler ,
+                                    real5d &state            ,
+                                    real5d &tracers          ) const {
+    using yakl::c::parallel_for;
+    using yakl::c::SimpleBounds;
+    auto nens        = coupler.get_nens();
+    auto nx          = coupler.get_nx();
+    auto ny          = coupler.get_ny();
+    auto nz          = coupler.get_nz();
+    auto R_d         = coupler.get_option<real>("R_d"    );
+    auto R_v         = coupler.get_option<real>("R_v"    );
+    auto gamma_d     = coupler.get_option<real>("gamma_d");
+    auto C0          = coupler.get_option<real>("C0"     );
+    auto idWV        = coupler.get_option<int >("idWV");
+    auto grav        = coupler.get_option<real>("grav");
+    auto num_tracers = coupler.get_num_tracers();
+
+    auto &dm = coupler.get_data_manager_device_readwrite();
+    auto tracer_adds_mass = dm.get<bool const,1>("tracer_adds_mass");
+
+    // Get the coupler's state (as const because it's read-only)
+    auto dm_rho_d = dm.get<real const,4>("density_dry");
+    auto dm_uvel  = dm.get<real const,4>("uvel"       );
+    auto dm_vvel  = dm.get<real const,4>("vvel"       );
+    auto dm_wvel  = dm.get<real const,4>("wvel"       );
+    auto dm_temp  = dm.get<real const,4>("temp"       );
+
+    // Get the coupler's tracers (as const because it's read-only)
+    pam::MultiField<real const,4> dm_tracers;
+    auto tracer_names = coupler.get_tracer_names();
+    for (int tr=0; tr < num_tracers; tr++) { dm_tracers.add_field( dm.get<real const,4>(tracer_names[tr]) ); }
+
+    // Convert from the coupler's state to the dycore's state and tracers arrays.
+    // Compute domain-averaged pressure column
+    parallel_for( YAKL_AUTO_LABEL() , SimpleBounds<4>(nz,ny,nx,nens) , YAKL_LAMBDA (int k, int j, int i, int iens) {
+      real rho_d = dm_rho_d(k,j,i,iens);
+      real u     = dm_uvel (k,j,i,iens);
+      real v     = dm_vvel (k,j,i,iens);
+      real w     = dm_wvel (k,j,i,iens);
+      real temp  = dm_temp (k,j,i,iens);
+      real rho_v = dm_tracers(idWV,k,j,i,iens);
+      real press = rho_d * R_d * temp + rho_v * R_v * temp;
+      real rho   = rho_d;
+      for (int tr=0; tr < num_tracers; tr++) { if (tracer_adds_mass(tr)) rho += dm_tracers(tr,k,j,i,iens); }
+      real theta = pow( press/C0 , 1._fp / gamma_d ) / rho;
+      state(idR,hs+k,hs+j,hs+i,iens) = rho;
+      state(idU,hs+k,hs+j,hs+i,iens) = rho * u;
+      state(idV,hs+k,hs+j,hs+i,iens) = rho * v;
+      state(idW,hs+k,hs+j,hs+i,iens) = rho * w;
+      state(idT,hs+k,hs+j,hs+i,iens) = rho * theta;
+      for (int tr=0; tr < num_tracers; tr++) { tracers(tr,hs+k,hs+j,hs+i,iens) = dm_tracers(tr,k,j,i,iens); }
+    });
+  }
+
+
+
+  void declare_current_profile_as_hydrostatic( pam::PamCoupler &coupler , bool use_gcm_data = false ) const {
+    using yakl::c::parallel_for;
+    using yakl::c::SimpleBounds;
+    auto nens        = coupler.get_nens();
+    auto nx          = coupler.get_nx();
+    auto ny          = coupler.get_ny();
+    auto nz          = coupler.get_nz();
+    auto gamma_d     = coupler.get_option<real>("gamma_d");
+    auto C0          = coupler.get_option<real>("C0"     );
+    auto idWV        = coupler.get_option<int >("idWV"   );
+    auto R_d         = coupler.get_option<real>("R_d");
+    auto R_v         = coupler.get_option<real>("R_v");
+    auto num_tracers = coupler.get_num_tracers();
+    auto &dm                   = coupler.get_data_manager_device_readwrite();
+    auto dz                    = dm.get<real const,2>("vertical_cell_dz");
+    auto vert_weno_recon_lower = dm.get<real const,5>("vert_weno_recon_lower");
+    auto vert_sten_to_coefs    = dm.get<real const,4>("vert_sten_to_coefs");
+
+    auto grav_balance = coupler.get_option<bool>("balance_hydrostasis_with_gravity");
+
+    real5d state  ("state"  ,num_state  ,nz+2*hs,ny+2*hs,nx+2*hs,nens);
+    real5d tracers("tracers",num_tracers,nz+2*hs,ny+2*hs,nx+2*hs,nens);
+
+    if (use_gcm_data) {
+      auto gcm_rho_d = dm.get<real const,2>("gcm_density_dry");
+      auto gcm_temp  = dm.get<real const,2>("gcm_temp"       );
+      auto gcm_rho_v = dm.get<real const,2>("gcm_water_vapor");
+      auto gcm_rho_c = dm.get<real const,2>("gcm_cloud_water");
+      auto gcm_rho_i = dm.get<real const,2>("gcm_cloud_ice"  );
+      parallel_for( YAKL_AUTO_LABEL() , SimpleBounds<4>(nz,ny,nx,nens) , YAKL_LAMBDA (int k, int j, int i, int iens) {
+        real rho_d     = gcm_rho_d(k,iens);
+        real rho_v     = gcm_rho_v(k,iens);
+        real rho       = gcm_rho_d(k,iens) + gcm_rho_v(k,iens) + gcm_rho_c(k,iens) + gcm_rho_i(k,iens);
+        real temp      = gcm_temp(k,iens);
+        real p         = (rho_d*R_d + rho_v*R_v)*temp;
+        real rho_theta = std::pow( p/C0 , 1._fp/gamma_d );
+        state(idR,hs+k,hs+j,hs+i,iens) = rho;
+        state(idU,hs+k,hs+j,hs+i,iens) = 0;
+        state(idV,hs+k,hs+j,hs+i,iens) = 0;
+        state(idW,hs+k,hs+j,hs+i,iens) = 0;
+        state(idT,hs+k,hs+j,hs+i,iens) = rho_theta;
+        for (int tr=0; tr < num_tracers; tr++) { tracers(tr,hs+k,hs+j,hs+i,iens) = 0; }
+      });
+    } else {
+      convert_coupler_to_dynamics( coupler , state , tracers );
+    }
+    
+    if (grav_balance) {
+
+      SArray<real,2,ord,2> coefs_to_gll;
+      SArray<real,1,hs+1> idl;
+      real                sigma;
+      TransformMatrices::coefs_to_gll_lower(coefs_to_gll );
+      weno::wenoSetIdealSigma<ord>(idl,sigma);
+
+      auto grav_var = coupler.get_data_manager_device_readwrite().get<real,2>("variable_gravity");
+      // Discretize interface pressure spatially exactly as we would discretize it in the solver
+      real4d pressure("pressure",nz+2*hs,ny+2*hs,nx+2*hs,nens);
+      parallel_for( YAKL_AUTO_LABEL() , SimpleBounds<4>(nz,ny,nx,nens) , YAKL_LAMBDA (int k, int j, int i, int iens) {
+        pressure(hs+k,hs+j,hs+i,iens) = C0*std::pow(state(idT,hs+k,hs+j,hs+i,iens),gamma_d);
+        state(idT,hs+k,hs+j,hs+i,iens) /= state(idR,hs+k,hs+j,hs+i,iens);
+        if (j == 0 && i == 0) grav_var(k,iens) = 0;
+      });
+      halo_exchange( coupler , state , tracers , pressure );
+      real4d pint("pint",nz+1,ny,nx,nens);
+      parallel_for( YAKL_AUTO_LABEL() , SimpleBounds<4>(nz+1,ny,nx,nens) , YAKL_LAMBDA (int k, int j, int i, int iens) {
+        SArray<real,1,ord> stencil;
+        SArray<real,2,ord,ord>  s2c_loc[2];
+        SArray<real,3,hs,hs,hs> wrl_loc[2];
+        for (int i1=0; i1 < ord; i1++) {
+          for (int i2=0; i2 < ord; i2++) {
+            s2c_loc[0](i1,i2) = vert_sten_to_coefs(k  ,i1,i2,iens);
+            s2c_loc[1](i1,i2) = vert_sten_to_coefs(k+1,i1,i2,iens);
+          }
+        }
+        for (int i1=0; i1 < hs; i1++) {
+          for (int i2=0; i2 < hs; i2++) {
+            for (int i3=0; i3 < hs; i3++) {
+              wrl_loc[0](i1,i2,i3) = vert_weno_recon_lower(k  ,i1,i2,i3,iens);
+              wrl_loc[1](i1,i2,i3) = vert_weno_recon_lower(k+1,i1,i2,i3,iens);
+            }
+          }
+        }
+        int k_upw = 0;
+        for (int s=0; s < ord; s++) { stencil(s) = pressure(k+k_upw+s,hs+j,hs+i,iens); }
+        real pp_L = reconstruct(stencil,coefs_to_gll,s2c_loc[k_upw],wrl_loc[k_upw],idl,sigma,1-k_upw);
+        k_upw = 1;
+        for (int s=0; s < ord; s++) { stencil(s) = pressure(k+k_upw+s,hs+j,hs+i,iens); }
+        real pp_R = reconstruct(stencil,coefs_to_gll,s2c_loc[k_upw],wrl_loc[k_upw],idl,sigma,1-k_upw);
+        pint(k,j,i,iens) = 0.5_fp * (pp_L + pp_R);
+      });
+      // Compute average column of variable gravity that provides exact hydrostatic balance
+      real r_nx_ny = 1./(nx*ny);
+      parallel_for( YAKL_AUTO_LABEL() , SimpleBounds<4>(nz,ny,nx,nens) , YAKL_LAMBDA (int k, int j, int i, int iens) {
+        real dens = state(idR,hs+k,hs+j,hs+i,iens);
+        yakl::atomicAdd( grav_var(k,iens) , -(pint(k+1,j,i,iens)-pint(k,j,i,iens))/(dens*dz(k,iens))*r_nx_ny );
+      });
+
+    } else {
+
+      auto hy_dens_cells     = coupler.get_data_manager_device_readwrite().get<real,2>("hy_dens_cells");
+      auto hy_pressure_cells = coupler.get_data_manager_device_readwrite().get<real,2>("hy_pressure_cells");
+      hy_dens_cells     = 0;
+      hy_pressure_cells = 0;
+      real r_nx_ny = 1./(nx*ny);
+      parallel_for( YAKL_AUTO_LABEL() , SimpleBounds<4>(nz,ny,nx,nens) , YAKL_LAMBDA (int k, int j, int i, int iens) {
+        real press = C0 * std::pow( state(idT,hs+k,hs+j,hs+i,iens) , gamma_d );
+        yakl::atomicAdd( hy_pressure_cells(k,iens) , press                         *r_nx_ny );
+        yakl::atomicAdd( hy_dens_cells    (k,iens) , state(idR,hs+k,hs+j,hs+i,iens)*r_nx_ny );
+      });
+
+    }
+  }
+
+
+
+  void solve_banded( real3d const &diags , real2d const &rhs ) const {
+    int nbands = diags.extent(0);
+    int n      = diags.extent(1);
+    int nens   = diags.extent(2);
+    if (nbands % 2 != 1)       yakl::yakl_throw("ERROR: Number of bands must be odd");
+    if (n != rhs.extent(0))    yakl::yakl_throw("ERROR: Number of elements for diags and rhs must match");
+    if (nens != rhs.extent(1)) yakl::yakl_throw("ERROR: Number of ensembles must match between diags and rhs");
+    int h = (nbands-1)/2;
+    parallel_for( YAKL_AUTO_LABEL() , nens , YAKL_LAMBDA (int iens) {
+      // Clear out lower values
+      for (int idiag = 0 ; idiag < n-1; idiag++) {
+        real r_diag_val = 1._fp / diags(h,idiag,iens);
+        for (int irow2 = idiag+1; irow2 <= std::min(n-1,idiag+h); irow2++) {
+          int irow2_pert = irow2-idiag;
+          real mult = -diags(h-irow2_pert,irow2,iens) * r_diag_val;
+          for (int icol = idiag; icol <= std::min(n-1,idiag+h); icol++) {
+            int icol_pert = icol - idiag;
+            diags(h+icol_pert-irow2_pert,irow2,iens) += mult*diags(h+icol_pert,idiag,iens);
+          }
+          rhs(irow2,iens) += mult*rhs(idiag,iens);
+        }
+      }
+      // Clear out upper values
+      for (int idiag = n-1 ; idiag >= 1; idiag--) {
+        real r_diag_val = 1._fp / diags(h,idiag,iens);
+        for (int irow2 = idiag-1; irow2 >= std::max(0,idiag-h); irow2--) {
+          rhs(irow2,iens) -= diags(h-irow2+idiag,irow2,iens) * r_diag_val*rhs(idiag,iens);
+        }
+      }
+      // Divide by the diagonal values
+      for (int idiag = 0; idiag < n; idiag++) { rhs(idiag,iens) /= diags(h,idiag,iens); }
+    });
+  }
+
+
+
+  char const * dycore_name() const { return "SSPRK3+WENO+FV A-grid"; }
+
+
+
+  void finalize( pam::PamCoupler const &coupler ) const { }
+
+};
 
diff --git a/dynamics/awfl/Spatial_expl.h b/dynamics/awfl/Spatial_expl.h
deleted file mode 100644
index 4ccbfd1d..00000000
--- a/dynamics/awfl/Spatial_expl.h
+++ /dev/null
@@ -1,2228 +0,0 @@
-
-#pragma once
-
-#include "awfl_const.h"
-#include "TransformMatrices.h"
-#include "TransformMatrices_variable.h"
-#include "WenoLimiter.h"
-#include "idealized_profiles.h"
-#include "MultipleFields.h"
-#include "pam_coupler.h"
-
-
-template <int nTimeDerivs, bool timeAvg, int nAder>
-class Spatial_operator {
-public:
-
-  static_assert(nTimeDerivs == 1 , "ERROR: This Spatial class isn't setup to use nTimeDerivs > 1");
-
-  int static constexpr hs = (ord-1)/2;
-  int static constexpr num_state = 5;
-  int static constexpr max_tracers = 50;
-
-  real Rd   ;
-  real cp   ;
-  real gamma;
-  real p0   ;
-  real C0   ;
-  real Rv   ;
-  real grav ;
-
-  int idWV;  // Tracer index for water vapor (set in add_tracer by testing the tracer name against "water_vapor"
-
-  real hydrostasis_parameters_sum;
-
-  typedef real5d StateArr;  // Array of state variables (rho, rho*u, rho*v, rho*w, and rho*theta)
-  typedef real5d TracerArr; // Array of tracers (total tracer mass)
-
-  typedef real5d StateTendArr;   // State tendencies
-  typedef real5d TracerTendArr;  // Tracer tendencies
-
-  // Hydrostatically balanced values for density, potential temperature, and pressure (cell-averages)
-  real3d hyDensSten;
-  real3d hyDensThetaSten;
-
-  // Hydrostatically balanced values for density, potential temperature, and pressure (GLL points)
-  real3d hyDensGLL;
-  real3d hyPressureGLL;
-  real3d hyDensThetaGLL;
-
-  // Matrices to transform DOFs from one form to another
-  SArray<real,2,ord,ngll> coefs_to_gll;
-  SArray<real,2,ord,ngll> coefs_to_deriv_gll;
-  SArray<real,2,ord,ngll> sten_to_gll;
-  SArray<real,2,ord,ord > sten_to_coefs;
-  SArray<real,2,ord,ngll> sten_to_deriv_gll;
-  // WENO reconstruction matrices
-  SArray<real,3,hs+1,hs+1,hs+1> weno_recon_lower;
-  SArray<real,1,hs+2> idl;   // Ideal weights for WENO
-  real sigma;                // WENO sigma parameter (handicap high-order TV estimate)
-  // For ADER spatial derivative computation (ngll points -> coefs -> deriv -> ngll points)
-  SArray<real,2,ngll,ngll> derivMatrix;
-  // For quadrature
-  SArray<real,1,ord> gllWts_ord;
-  SArray<real,1,ord> gllPts_ord;
-  SArray<real,1,ngll> gllWts_ngll;
-  SArray<real,1,ngll> gllPts_ngll;
-
-  real2d vert_interface;
-  real2d vert_interface_ghost;
-  real3d vert_locs_normalized;
-  real2d dz;
-  real2d dz_ghost;
-  real4d vert_sten_to_gll;
-  real4d vert_sten_to_coefs;
-  real5d vert_weno_recon_lower;
-
-  // For indexing into the state and state tendency arrays
-  int static constexpr idR = 0;  // density perturbation
-  int static constexpr idU = 1;  // u
-  int static constexpr idV = 2;  // v
-  int static constexpr idW = 3;  // w
-  int static constexpr idT = 4;  // potential temperature perturbation
-
-  // The two boundary condition options for each direction
-  int static constexpr BC_PERIODIC = 0;
-  int static constexpr BC_WALL     = 1;
-
-  // Options for initializing the data
-  int static constexpr DATA_SPEC_EXTERNAL      = 0;
-  int static constexpr DATA_SPEC_THERMAL       = 1;
-  int static constexpr DATA_SPEC_SUPERCELL     = 2;
-
-  bool sim2d;  // Whether we're simulating in 2-D
-
-  // Grid spacing in each dimension
-  real dx;
-  real dy;
-
-  // Initial time step (used throughout the simulation)
-  real dtInit;
-
-  // Which direction we're passing through for a given time step (x,y,z)  or (z,y,x)
-  // For Strang splitting
-  bool dimSwitch;
-
-  int num_tracers;  // Number of tracers
-  std::vector<std::string> tracer_name;      // Name of each tracer
-  std::vector<std::string> tracer_desc;      // Description of each tracer
-  bool1d                   tracer_pos;       // Whether each tracer is positive-definite
-  bool1d                   tracer_adds_mass; // Whether each tracer adds mass (otherwise it's passive)
-
-  //////////////////////////////////////
-  // Values read from input file
-  //////////////////////////////////////
-  // Number of ensembles to simulate at the same time
-  int         nens;
-  // Number of cells in each direction
-  int         nx;
-  int         ny;
-  int         nz;
-  // Length of the domain in each direction (m)
-  real        xlen;
-  real        ylen;
-  real1d      zbot;
-  real1d      ztop;
-  // Whether to use WENO for scalars and also for winds
-  bool        weno_scalars;
-  bool        weno_winds;
-  // How to initialize the data
-  int         data_spec;
-
-
-  // When this class is created, initialize num_tracers to zero
-  Spatial_operator() {
-    num_tracers = 0;
-    idWV = -1;
-  }
-
-
-
-  // Make sure it's odd-order-accurate
-  static_assert(ord%2 == 1,"ERROR: ord must be an odd integer");
-
-  void convert_dynamics_to_coupler_state( pam::PamCoupler &coupler , realConst5d state , realConst5d tracers ) const {
-    using yakl::c::parallel_for;
-    using yakl::c::SimpleBounds;
-
-    auto &dm = coupler.get_data_manager_device_readwrite();
-
-    real4d dm_dens_dry = dm.get<real,4>( "density_dry"      );
-    real4d dm_uvel     = dm.get<real,4>( "uvel"             );
-    real4d dm_vvel     = dm.get<real,4>( "vvel"             );
-    real4d dm_wvel     = dm.get<real,4>( "wvel"             );
-    real4d dm_temp     = dm.get<real,4>( "temp"             );
-
-    YAKL_SCOPE( C0               , this->C0               );
-    YAKL_SCOPE( gamma            , this->gamma            );
-    YAKL_SCOPE( num_tracers      , this->num_tracers      );
-    YAKL_SCOPE( Rd               , this->Rd               );
-    YAKL_SCOPE( Rv               , this->Rv               );
-    YAKL_SCOPE( tracer_adds_mass , this->tracer_adds_mass );
-    YAKL_SCOPE( idWV , this->idWV );
-
-    pam::MultipleFields<max_tracers,real4d> dm_tracers;
-    for (int tr = 0; tr < num_tracers; tr++) {
-      auto trac = dm.get<real,4>( tracer_name[tr] );
-      dm_tracers.add_field( trac );
-    }
-
-    parallel_for( "Spatial.h d2c" , SimpleBounds<4>(nz,ny,nx,nens) , YAKL_LAMBDA (int k, int j, int i, int iens) {
-      real dens  = state(idR,hs+k,hs+j,hs+i,iens);
-      real uvel  = state(idU,hs+k,hs+j,hs+i,iens) / dens;
-      real vvel  = state(idV,hs+k,hs+j,hs+i,iens) / dens;
-      real wvel  = state(idW,hs+k,hs+j,hs+i,iens) / dens;
-      real theta = state(idT,hs+k,hs+j,hs+i,iens) / dens;
-      real pressure = C0 * pow( dens*theta , gamma );
-      real dens_vap = tracers(idWV,hs+k,hs+j,hs+i,iens);
-      real dens_dry = dens;
-      for (int tr=0; tr < num_tracers; tr++) {
-        if (tracer_adds_mass(tr)) dens_dry -= tracers(tr,hs+k,hs+j,hs+i,iens);
-      }
-      real temp = pressure / ( dens_dry * Rd + dens_vap * Rv );
-      dm_dens_dry(k,j,i,iens) = dens_dry;
-      dm_uvel    (k,j,i,iens) = uvel;
-      dm_vvel    (k,j,i,iens) = vvel;
-      dm_wvel    (k,j,i,iens) = wvel;
-      dm_temp    (k,j,i,iens) = temp;
-      for (int tr=0; tr < num_tracers; tr++) {
-        dm_tracers(tr,k,j,i,iens) = tracers(tr,hs+k,hs+j,hs+i,iens);
-      }
-    });
-  }
-
-
-
-  YAKL_INLINE static real hydrostatic_dens_theta( realConst3d hy_params , real z , real z0 , real dz ,
-                                                  int k, int iens , real C0 , real gamma ) {
-    real p = pam::hydrostatic_pressure( hy_params , z , z0 , dz , k , iens );
-    // p = C0*(rho*theta)^gamma
-    return pow(p/C0,1._fp/gamma);
-  }
-
-
-
-  void convert_coupler_state_to_dynamics( pam::PamCoupler const &coupler , real5d const &state , real5d const &tracers ) {
-    using yakl::c::parallel_for;
-    using yakl::c::SimpleBounds;
-
-    auto &dm = coupler.get_data_manager_device_readonly();
-    auto hy_params = dm.get<real const,3>("hydrostasis_parameters");
-
-    YAKL_SCOPE( hyDensSten           , this->hyDensSten           );
-    YAKL_SCOPE( hyDensThetaSten      , this->hyDensThetaSten      );
-    YAKL_SCOPE( hyPressureGLL        , this->hyPressureGLL        );
-    YAKL_SCOPE( hyDensGLL            , this->hyDensGLL            );
-    YAKL_SCOPE( hyDensThetaGLL       , this->hyDensThetaGLL       );
-    YAKL_SCOPE( C0                   , this->C0                   );
-    YAKL_SCOPE( gamma                , this->gamma                );
-    YAKL_SCOPE( num_tracers          , this->num_tracers          );
-    YAKL_SCOPE( Rd                   , this->Rd                   );
-    YAKL_SCOPE( Rv                   , this->Rv                   );
-    YAKL_SCOPE( tracer_adds_mass     , this->tracer_adds_mass     );
-    YAKL_SCOPE( zbot                 , this->zbot                 );
-    YAKL_SCOPE( ztop                 , this->ztop                 );
-    YAKL_SCOPE( gllPts_ngll          , this->gllPts_ngll          );
-    YAKL_SCOPE( dz                   , this->dz                   );
-    YAKL_SCOPE( vert_interface       , this->vert_interface       );
-    YAKL_SCOPE( idWV                 , this->idWV                 );
-    YAKL_SCOPE( grav                 , this->grav                 );
-    YAKL_SCOPE( vert_interface_ghost , this->vert_interface_ghost );
-    YAKL_SCOPE( dz_ghost             , this->dz_ghost             );
-
-    auto dm_dens_dry = dm.get<real const,4>( "density_dry"      );
-    auto dm_uvel     = dm.get<real const,4>( "uvel"             );
-    auto dm_vvel     = dm.get<real const,4>( "vvel"             );
-    auto dm_wvel     = dm.get<real const,4>( "wvel"             );
-    auto dm_temp     = dm.get<real const,4>( "temp"             );
-
-    pam::MultipleFields<max_tracers,realConst4d> dm_tracers;
-    for (int tr = 0; tr < num_tracers; tr++) {
-      auto trac = dm.get<real const,4>( tracer_name[tr] );
-      dm_tracers.add_field( trac );
-    }
-
-    // If hydrostasis in the coupler has changed, then we need to re-compute
-    // hydrostatically balanced cells and GLL points for the dycore's time step
-    real tmp = yakl::intrinsics::sum(hy_params);
-    if (tmp != hydrostasis_parameters_sum) {
-      SArray<real,1,9> gll_pts, gll_wts;
-      TransformMatrices::get_gll_points ( gll_pts );
-      TransformMatrices::get_gll_weights( gll_wts );
-
-      // Compute new cell averages and GLL point values for hydrostasis
-      hydrostasis_parameters_sum = tmp;
-      parallel_for( "Spatial.h new hydrostasis" , SimpleBounds<3>(nz,ord,nens) , YAKL_LAMBDA (int k, int kk, int iens) {
-        real r  = 0;
-        real rt = 0;
-        for (int l=0; l < 9; l++) {
-          real zloc = vert_interface_ghost(k+kk,iens) + 0.5_fp*dz_ghost(k+kk,iens) + gll_pts(l)*dz_ghost(k+kk,iens);
-          real z0   = vert_interface(k,iens) + 0.5_fp*dz(k,iens);
-          real wt = gll_wts(l);
-          r  += pam::hydrostatic_density   (hy_params,zloc,z0,dz(k,iens),k,iens         ,grav) * wt;
-          rt +=      hydrostatic_dens_theta(hy_params,zloc,z0,dz(k,iens),k,iens,C0,gamma     ) * wt;
-        }
-        hyDensSten     (k,kk,iens) = r;
-        hyDensThetaSten(k,kk,iens) = rt;
-      });
-
-      parallel_for( "Spatial.h new hydrostasis" , SimpleBounds<3>(nz,ngll,nens) , YAKL_LAMBDA (int k, int kk, int iens) {
-        real zloc = vert_interface(k,iens) + 0.5_fp*dz(k,iens) + gllPts_ngll(kk)*dz(k,iens);
-        real z0   = vert_interface(k,iens) + 0.5_fp*dz(k,iens);
-        hyPressureGLL (k,kk,iens) = pam::hydrostatic_pressure  (hy_params,zloc,z0,dz(k,iens),k,iens              );
-        hyDensGLL     (k,kk,iens) = pam::hydrostatic_density   (hy_params,zloc,z0,dz(k,iens),k,iens         ,grav);
-        hyDensThetaGLL(k,kk,iens) =      hydrostatic_dens_theta(hy_params,zloc,z0,dz(k,iens),k,iens,C0,gamma     );
-      });
-    }
-
-    parallel_for(  "Spatial.h c2d" , SimpleBounds<4>(nz,ny,nx,nens) , YAKL_LAMBDA (int k, int j, int i, int iens) {
-      for (int tr=0; tr < num_tracers; tr++) {
-        tracers(tr,hs+k,hs+j,hs+i,iens) = dm_tracers(tr,k,j,i,iens);
-      }
-      real dens_dry = dm_dens_dry    (k,j,i,iens);
-      real uvel     = dm_uvel        (k,j,i,iens);
-      real vvel     = dm_vvel        (k,j,i,iens);
-      real wvel     = dm_wvel        (k,j,i,iens);
-      real temp     = dm_temp        (k,j,i,iens);
-      real dens_vap = tracers(idWV,hs+k,hs+j,hs+i,iens);
-      real dens     = dens_dry;
-      for (int tr=0; tr < num_tracers; tr++) {
-        if (tracer_adds_mass(tr)) dens += tracers(tr,hs+k,hs+j,hs+i,iens);
-      }
-      real pressure = dens_dry * Rd * temp + dens_vap * Rv * temp;
-      real theta    = pow( pressure / C0 , 1._fp / gamma ) / dens;
-      state(idR,hs+k,hs+j,hs+i,iens) = dens;
-      state(idU,hs+k,hs+j,hs+i,iens) = dens * uvel;
-      state(idV,hs+k,hs+j,hs+i,iens) = dens * vvel;
-      state(idW,hs+k,hs+j,hs+i,iens) = dens * wvel;
-      state(idT,hs+k,hs+j,hs+i,iens) = dens * theta;
-    });
-  }
-
-
-
-  real5d createStateArr() const {
-    return real5d("stateArr",num_state,nz+2*hs,ny+2*hs,nx+2*hs,nens);
-  }
-
-
-
-  real5d createTracerArr() const {
-    return real5d("tracerArr",num_tracers,nz+2*hs,ny+2*hs,nx+2*hs,nens);
-  }
-
-
-
-  real5d createStateTendArr() const {
-    return real5d("stateTendArr",num_state,nz,ny,nx,nens);
-  }
-
-
-
-  real5d createTracerTendArr() const {
-    return real5d("tracerTendArr",num_tracers,nz,ny,nx,nens);
-  }
-
-
-
-  // Number of operator splittinng steps to use
-  // Normally this would be 3, but the z-directly CFL is reduced because of how the fluxes are
-  // handled in the presence of a solid wall boundary condition. I'm looking into how to fix this
-  int numSplit() const {
-    if (sim2d) {
-      return 2;
-    } else {
-      return 3;
-    }
-  }
-
-
-
-  // Given the model data and CFL value, compute the maximum stable time step
-  real compute_time_step(pam::PamCoupler const &coupler, real cfl_in = -1) {
-    using yakl::c::parallel_for;
-    using yakl::c::SimpleBounds;
-
-    real cfl = cfl_in;
-    if (cfl < 0) cfl = 0.75;
-
-    // If we've already computed the time step, then don't compute it again
-    if (dtInit <= 0) {
-      YAKL_SCOPE( dx                   , this->dx                  );
-      YAKL_SCOPE( dy                   , this->dy                  );
-      YAKL_SCOPE( dz                   , this->dz                  );
-      YAKL_SCOPE( gamma                , this->gamma               );
-      YAKL_SCOPE( Rd                   , this->Rd                  );
-      YAKL_SCOPE( Rv                   , this->Rv                  );
-      YAKL_SCOPE( sim2d                , this->sim2d               );
-
-      auto &dm = coupler.get_data_manager_device_readonly();
-
-      // Convert data from DataManager to state and tracers array for convenience
-      auto dm_dens_dry = dm.get<real const,4>( "density_dry" );
-      auto dm_uvel     = dm.get<real const,4>( "uvel"        );
-      auto dm_vvel     = dm.get<real const,4>( "vvel"        );
-      auto dm_wvel     = dm.get<real const,4>( "wvel"        );
-      auto dm_temp     = dm.get<real const,4>( "temp"        );
-      auto dm_dens_vap = dm.get<real const,4>( "water_vapor" );
-
-      // Allocate a 3-D array for the max stable time steps (we'll use this for a reduction later)
-      real4d dt3d("dt3d",nz,ny,nx,nens);
-
-      // Loop through the cells, calculate the max stable time step for each cell
-      parallel_for( "Spatial.h compute_time_step" , SimpleBounds<4>(nz,ny,nx,nens) ,
-                    YAKL_LAMBDA (int k, int j, int i, int iens) {
-        real rho_d = dm_dens_dry(k,j,i,iens);
-        real u     = dm_uvel    (k,j,i,iens);
-        real v     = dm_vvel    (k,j,i,iens);
-        real w     = dm_wvel    (k,j,i,iens);
-        real temp  = dm_temp    (k,j,i,iens);
-        real rho_v = dm_dens_vap(k,j,i,iens);
-        real p = Rd * rho_d * temp + Rv * rho_v * temp;
-        // This neglects non-wv mass-adding tracers, but these are small, and their lack only increases cs
-        // Thus the resulting time step is conservative w/r to these missing masses, which is more stable
-        real cs = sqrt(gamma*p/(rho_v+rho_d));
-
-        // Compute the maximum stable time step in each direction
-        real udt = cfl * dx         / max( abs(u-cs) , abs(u+cs) );
-        real vdt = cfl * dy         / max( abs(v-cs) , abs(v+cs) );
-        if (sim2d) vdt = std::numeric_limits<real>::max();
-        real wdt = cfl * dz(k,iens) / max( abs(w-cs) , abs(w+cs) );
-
-        // Compute the min of the max stable time steps
-        dt3d(k,j,i,iens) = min( min(udt,vdt) , wdt );
-      });
-      // Store to dtInit so we don't have to compute this again
-      dtInit = yakl::intrinsics::minval( dt3d );
-    }
-
-    return dtInit;
-  }
-
-
-
-  // Initialize crap needed by recon()
-  void init(pam::PamCoupler &coupler) {
-    using yakl::c::parallel_for;
-    using yakl::c::SimpleBounds;
-
-    using yakl::intrinsics::matmul_cr;
-
-    this->nens = coupler.get_nens();
-    this->nx   = coupler.get_nx();
-    this->ny   = coupler.get_ny();
-    this->xlen = coupler.get_xlen();
-    this->ylen = coupler.get_ylen();
-    this->num_tracers = coupler.get_num_tracers();
-
-    this->hydrostasis_parameters_sum = 0;
-
-    this->Rd    = coupler.get_option<real>("R_d" );
-    this->cp    = coupler.get_option<real>("cp_d");
-    this->p0    = coupler.get_option<real>("p0"  );
-    this->Rv    = coupler.get_option<real>("R_v" );
-    this->grav  = coupler.get_option<real>("grav");
-    this->gamma = cp / (cp-Rd);
-    real kappa = Rd/cp;
-    this->C0 = pow( Rd * pow( p0 , -kappa ) , gamma );
-
-    // Allocate device arrays for whether tracers are positive-definite or add mass
-    tracer_pos       = bool1d("tracer_pos"      ,num_tracers);
-    tracer_adds_mass = bool1d("tracer_adds_mass",num_tracers);
-    boolHost1d tracer_pos_host      ("tracer_pos_host"      ,num_tracers);
-    boolHost1d tracer_adds_mass_host("tracer_adds_mass_host",num_tracers);
-
-    std::vector<std::string> tracer_names_loc = coupler.get_tracer_names();
-    bool water_vapor_found = false;
-    for (int tr=0; tr < num_tracers; tr++) {
-      bool found, positive, adds_mass;
-      std::string desc;
-      coupler.get_tracer_info( tracer_names_loc[tr] , desc , found , positive , adds_mass );
-      tracer_name.push_back(tracer_names_loc[tr]);
-      tracer_desc.push_back(desc);
-      tracer_pos_host      (tr) = positive ;
-      tracer_adds_mass_host(tr) = adds_mass;
-      if (tracer_names_loc[tr] == std::string("water_vapor")) {
-        idWV = tr;
-        water_vapor_found = true;
-      }
-    }
-    if (! water_vapor_found) endrun("ERROR: processed registered tracers, and water_vapor was not found");
-
-    tracer_pos_host      .deep_copy_to(tracer_pos      );
-    tracer_adds_mass_host.deep_copy_to(tracer_adds_mass);
-    fence();
-
-    // Inialize time step to zero, and dimensional splitting switch
-    dtInit = 0;
-    dimSwitch = true;
-
-    // If inFile is empty, then we aren't reading in an input file
-    if (coupler.option_exists("standalone_input_file")) {
-      #ifdef PAM_STANDALONE
-      std::string inFile = coupler.get_option<std::string>( "standalone_input_file" );
-
-      // Read the YAML input file
-      YAML::Node config = YAML::LoadFile(inFile);
-
-      // Read whether we're doing WENO limiting on scalars and winds
-      weno_scalars = config["weno_scalars"].as<bool>();
-      weno_winds   = config["weno_winds"].as<bool>();
-
-      // Read the data initialization option
-      std::string dataStr = config["initData"].as<std::string>();
-      if        (dataStr == "thermal") {
-        data_spec = DATA_SPEC_THERMAL;
-      } else if (dataStr == "supercell") {
-        data_spec = DATA_SPEC_SUPERCELL;
-      } else if (dataStr == "external") {
-        data_spec = DATA_SPEC_EXTERNAL;
-      } else {
-        endrun("ERROR: Invalid data_spec");
-      }
-      #endif
-    } else {
-      weno_scalars            = true;
-      weno_winds              = true;
-      data_spec               = DATA_SPEC_EXTERNAL;
-    }
-
-    // Determine whether this is a 2-D simulation
-    sim2d = ny == 1;
-
-    // Store vertical cell interface heights in the data manager
-    auto &dm = coupler.get_data_manager_device_readonly();
-    auto zint = dm.get<real const,2>("vertical_interface_height");
-
-    nz = coupler.get_nz();
-
-    // Get the height of the z-dimension
-    zbot = real1d("zbot",nens);
-    ztop = real1d("ztop",nens);
-    YAKL_SCOPE( zbot , this->zbot );
-    YAKL_SCOPE( ztop , this->ztop );
-    YAKL_SCOPE( nz   , this->nz   );
-    parallel_for( "Spatial.h init 1" , nens , YAKL_LAMBDA (int iens) {
-      zbot(iens) = zint(0 ,iens);
-      ztop(iens) = zint(nz,iens);
-    });
-
-    vert_interface        = real2d("vert_interface"      ,nz+1          ,nens);
-    vert_interface_ghost  = real2d("vert_interface_ghost",nz+2*hs+1     ,nens);
-    vert_locs_normalized  = real3d("vert_locs_normalized",nz,ord+1      ,nens);
-    dz                    = real2d("dz"                  ,nz            ,nens);
-    dz_ghost              = real2d("dz_ghost"            ,nz+2*hs       ,nens);
-    vert_sten_to_gll      = real4d("vert_sten_to_gll"     ,nz,ord,ngll,nens);
-    vert_sten_to_coefs    = real4d("vert_sten_to_coefs"   ,nz,ord,ord ,nens);
-    vert_weno_recon_lower = real5d("vert_weno_recon_lower",nz,hs+1,hs+1,hs+1,nens);
-
-    YAKL_SCOPE( vert_interface        , this->vert_interface        );
-    YAKL_SCOPE( vert_interface_ghost  , this->vert_interface_ghost  );
-    YAKL_SCOPE( vert_locs_normalized  , this->vert_locs_normalized  );
-    YAKL_SCOPE( dz                    , this->dz                    );
-    YAKL_SCOPE( dz_ghost              , this->dz_ghost              );
-    YAKL_SCOPE( vert_sten_to_gll      , this->vert_sten_to_gll      );
-    YAKL_SCOPE( vert_sten_to_coefs    , this->vert_sten_to_coefs    );
-    YAKL_SCOPE( vert_weno_recon_lower , this->vert_weno_recon_lower );
-
-    zint.deep_copy_to(vert_interface);
-
-    parallel_for( "Spatial.h init 1" , SimpleBounds<2>(nz,nens) , YAKL_LAMBDA (int k, int iens) {
-      dz(k,iens) = vert_interface(k+1,iens) - vert_interface(k,iens);
-    });
-
-    parallel_for( "Spatial.h init 2" , SimpleBounds<2>(nz+2*hs,nens) , YAKL_LAMBDA (int k, int iens) {
-      if (k >= hs && k < hs+nz) {
-        dz_ghost(k,iens) = dz(k-hs,iens);
-      } else if (k < hs) {
-        dz_ghost(k,iens) = dz(0,iens);
-      } else if (k >= hs+nz) {
-        dz_ghost(k,iens) = dz(nz-1,iens);
-      }
-    });
-
-    parallel_for( "Spatial.h init 3" , nens , YAKL_LAMBDA (int iens) {
-      vert_interface_ghost(0,iens) = vert_interface(0,iens) - hs*dz(0,iens);
-      for (int k=1; k < nz+2*hs+1; k++) {
-        vert_interface_ghost(k,iens) = vert_interface_ghost(k-1,iens) + dz_ghost(k-1,iens);
-      }
-    });
-
-    auto vint_host      = vert_interface_ghost .createHostCopy();
-    auto vert_s2g_host  = vert_sten_to_gll     .createHostCopy();
-    auto vert_s2c_host  = vert_sten_to_coefs   .createHostCopy();
-    auto vert_weno_host = vert_weno_recon_lower.createHostCopy();
-    auto vert_locs_host = vert_locs_normalized .createHostCopy();
-
-    SArray<real,2,ord,ngll> c2g;
-    TransformMatrices::coefs_to_gll_lower(c2g);
-
-    for (int k=0; k < nz; k++) {
-      for (int iens = 0; iens < nens; iens++) {
-        // Store stencil locations
-        SArray<double,1,ord+1> locs;
-        for (int kk=0; kk < ord+1; kk++) {
-          locs(kk) = vint_host(k+kk,iens);
-        }
-
-        // Normalize stencil locations
-        double zmid = ( locs(hs+1) + locs(hs) ) / 2;
-        double dzmid = locs(hs+1) - locs(hs);
-        for (int kk=0; kk < ord+1; kk++) {
-          locs(kk) = ( locs(kk) - zmid ) / dzmid;
-          vert_locs_host(k,kk,iens) = locs(kk);
-        }
-
-        // Compute reconstruction matrices
-        SArray<double,2,ord,ord> s2c_var_in;
-        SArray<double,3,hs+1,hs+1,hs+1> weno_recon_lower_var;
-        TransformMatrices_variable::sten_to_coefs_variable<ord>(locs,s2c_var_in);
-        TransformMatrices_variable::weno_lower_sten_to_coefs<ord>(locs,weno_recon_lower_var);
-        SArray<real,2,ord,ord> s2c_var;
-        for (int jj=0; jj < ord; jj++) {
-          for (int ii=0; ii < ord; ii++) {
-            s2c_var(jj,ii) = s2c_var_in(jj,ii);
-          }
-        }
-        auto s2g_var = matmul_cr( c2g , s2c_var );
-
-        // Store reconstruction matrices
-        for (int jj=0; jj < ord; jj++) {
-          for (int ii=0; ii < ord; ii++) {
-            vert_s2c_host(k,jj,ii,iens) = s2c_var(jj,ii);
-          }
-        }
-
-        for (int jj=0; jj < ord; jj++) {
-          for (int ii=0; ii < ngll; ii++) {
-            vert_s2g_host(k,jj,ii,iens) = s2g_var(jj,ii);
-          }
-        }
-
-        for (int kk=0; kk < hs+1; kk++) {
-          for (int jj=0; jj < hs+1; jj++) {
-            for (int ii=0; ii < hs+1; ii++) {
-              vert_weno_host(k,kk,jj,ii,iens) = weno_recon_lower_var(kk,jj,ii);
-            }
-          }
-        }
-
-      }
-    }
-
-    vert_s2g_host .deep_copy_to(vert_sten_to_gll     );
-    vert_s2c_host .deep_copy_to(vert_sten_to_coefs   );
-    vert_weno_host.deep_copy_to(vert_weno_recon_lower);
-    vert_locs_host.deep_copy_to(vert_locs_normalized );
-
-    // Compute the grid spacing in each dimension
-    dx = xlen/nx;
-    dy = ylen/ny;
-
-    // Store the WENO reconstruction matrices
-    TransformMatrices::weno_lower_sten_to_coefs(this->weno_recon_lower);
-
-    // Block exists to avoid name mangling stufff
-    {
-      SArray<real,2,ord,ord>  s2c;        // Converts ord stencil cell averages to ord coefficients
-      SArray<real,2,ord,ngll> c2g_lower;  // Converts ord coefficients to ngll GLL points
-      SArray<real,2,ord,ord>  c2d;        // Converts ord coefficients to order differentiated coefficients
-
-      TransformMatrices::sten_to_coefs     (s2c      );
-      TransformMatrices::coefs_to_gll_lower(c2g_lower);
-      TransformMatrices::coefs_to_deriv    (c2d      );
-
-      this->coefs_to_gll       = c2g_lower;
-      this->coefs_to_deriv_gll = matmul_cr( c2g_lower , c2d );
-      this->sten_to_coefs      = s2c;
-      this->sten_to_gll        = matmul_cr( c2g_lower , s2c );
-      this->sten_to_deriv_gll  = matmul_cr( c2g_lower , matmul_cr( c2d , s2c ) );
-    }
-    // Store ader derivMatrix
-    {
-      SArray<real,2,ngll,ngll> g2c;  // Converts ngll GLL points to ngll coefficients
-      SArray<real,2,ngll,ngll> c2d;  // Converts ngll coefficients to ngll differentiated coefficients
-      SArray<real,2,ngll,ngll> c2g;  // Converts ngll coefficients to ngll GLL points
-
-      TransformMatrices::gll_to_coefs  (g2c);
-      TransformMatrices::coefs_to_deriv(c2d);
-      TransformMatrices::coefs_to_gll  (c2g);
-
-      this->derivMatrix = matmul_cr( c2g , matmul_cr( c2d , g2c ) );
-    }
-    // Store quadrature weights using ord GLL points
-    TransformMatrices::get_gll_points (this->gllPts_ord);
-    TransformMatrices::get_gll_weights(this->gllWts_ord);
-    // Store quadrature weights using ngll GLL points
-    TransformMatrices::get_gll_points (this->gllPts_ngll);
-    TransformMatrices::get_gll_weights(this->gllWts_ngll);
-
-    // Store WENO ideal weights and sigma value
-    weno::wenoSetIdealSigma<ord>(this->idl,this->sigma);
-
-    // Allocate data
-    hyDensSten      = real3d("hyDensSten       ",nz,ord,nens);
-    hyDensThetaSten = real3d("hyDensThetaSten  ",nz,ord,nens);
-    hyDensGLL       = real3d("hyDensGLL        ",nz,ngll,nens);
-    hyPressureGLL   = real3d("hyPressureGLL    ",nz,ngll,nens);
-    hyDensThetaGLL  = real3d("hyDensThetaGLL   ",nz,ngll,nens);
-
-    init_idealized_state_and_tracers( coupler );
-
-  }
-
-
-
-  // Initialize the state
-  void init_idealized_state_and_tracers( pam::PamCoupler &coupler ) {
-    using yakl::c::parallel_for;
-    using yakl::c::SimpleBounds;
-
-    YAKL_SCOPE( nx                       , this->nx                      );
-    YAKL_SCOPE( ny                       , this->ny                      );
-    YAKL_SCOPE( nz                       , this->nz                      );
-    YAKL_SCOPE( dx                       , this->dx                      );
-    YAKL_SCOPE( dy                       , this->dy                      );
-    YAKL_SCOPE( dz                       , this->dz                      );
-    YAKL_SCOPE( gllPts_ord               , this->gllPts_ord              );
-    YAKL_SCOPE( gllWts_ord               , this->gllWts_ord              );
-    YAKL_SCOPE( gllPts_ngll              , this->gllPts_ngll             );
-    YAKL_SCOPE( gllWts_ngll              , this->gllWts_ngll             );
-    YAKL_SCOPE( data_spec                , this->data_spec               );
-    YAKL_SCOPE( sim2d                    , this->sim2d                   );
-    YAKL_SCOPE( xlen                     , this->xlen                    );
-    YAKL_SCOPE( ylen                     , this->ylen                    );
-    YAKL_SCOPE( Rd                       , this->Rd                      );
-    YAKL_SCOPE( Rv                       , this->Rv                      );
-    YAKL_SCOPE( cp                       , this->cp                      );
-    YAKL_SCOPE( gamma                    , this->gamma                   );
-    YAKL_SCOPE( p0                       , this->p0                      );
-    YAKL_SCOPE( C0                       , this->C0                      );
-    YAKL_SCOPE( vert_interface           , this->vert_interface          );
-    YAKL_SCOPE( num_tracers              , this->num_tracers             );
-    YAKL_SCOPE( idWV                     , this->idWV                    );
-    YAKL_SCOPE( grav                     , this->grav                    );
-
-    // If data's being specified by the driver externally, then there's nothing to do here
-    if (data_spec == DATA_SPEC_EXTERNAL) return;
-
-    real5d state   = createStateArr();
-    real5d tracers = createTracerArr();
-
-    // If the data_spec is thermal or ..., then initialize the domain with Exner pressure-based hydrostasis
-    // This is mostly to make plotting potential temperature perturbation easier for publications
-    if (data_spec == DATA_SPEC_THERMAL) {
-
-      // Compute the state
-      parallel_for( "Spatial.h init_state 3" , SimpleBounds<4>(nz,ny,nx,nens) ,
-                    YAKL_LAMBDA (int k, int j, int i, int iens) {
-        state(idR,hs+k,hs+j,hs+i,iens) = 0;
-        state(idU,hs+k,hs+j,hs+i,iens) = 0;
-        state(idV,hs+k,hs+j,hs+i,iens) = 0;
-        state(idW,hs+k,hs+j,hs+i,iens) = 0;
-        state(idT,hs+k,hs+j,hs+i,iens) = 0;
-        for (int kk=0; kk<ord; kk++) {
-          for (int jj=0; jj<ord; jj++) {
-            for (int ii=0; ii<ord; ii++) {
-              real zloc = vert_interface(k,iens) + 0.5_fp*dz(k,iens) + gllPts_ord(kk)*dz(k,iens);
-              real yloc;
-              if (sim2d) {
-                yloc = ylen/2;
-              } else {
-                yloc = (j+0.5_fp)*dy + gllPts_ord(jj)*dy;
-              }
-              real xloc = (i+0.5_fp)*dx + gllPts_ord(ii)*dx;
-              real wt = gllWts_ord(kk) * gllWts_ord(jj) * gllWts_ord(ii);
-              if        (data_spec == DATA_SPEC_THERMAL) {
-                // Compute constant theta hydrostatic background state
-                real th = 300;
-                real rh = profiles::initConstTheta_density(th,zloc,Rd,cp,gamma,p0,C0,grav);
-                real tp = profiles::ellipsoid_linear(xloc, yloc, zloc, xlen/2, ylen/2, 2000, 2000, 2000, 2000, 2 );
-                real t = th + tp;
-                real r = rh;
-
-                state(idR,hs+k,hs+j,hs+i,iens) += r   * wt;
-                state(idT,hs+k,hs+j,hs+i,iens) += r*t * wt;
-              }
-            }
-          }
-        }
-      });
-
-      parallel_for( "Spatial.h init_tracers" , SimpleBounds<4>(nz,ny,nx,nens) ,
-                    YAKL_LAMBDA (int k, int j, int i, int iens) {
-        for (int tr=0; tr < num_tracers; tr++) { tracers(tr,hs+k,hs+j,hs+i,iens) = 0; }
-        // Loop over quadrature points
-        for (int kk=0; kk<ord; kk++) {
-          for (int jj=0; jj<ord; jj++) {
-            for (int ii=0; ii<ord; ii++) {
-              // Get the location
-              real zloc = vert_interface(k,iens) + 0.5_fp*dz(k,iens) + gllPts_ord(kk)*dz(k,iens);
-              real yloc;
-              if (sim2d) {
-                yloc = ylen/2;
-              } else {
-                yloc = (j+0.5_fp)*dy + gllPts_ord(jj)*dy;
-              }
-              real xloc = (i+0.5_fp)*dx + gllPts_ord(ii)*dx;
-
-              // Get dry constants
-
-              // Compute constant theta hydrostatic background state
-              real th = 300;
-              real rh = profiles::initConstTheta_density(th,zloc,Rd,cp,gamma,p0,C0,grav);
-
-              // Initialize tracer mass based on dry state
-              // Vapor perturbation profile
-              real pert  = profiles::ellipsoid_linear(xloc,yloc,zloc  ,  xlen/2,ylen/2,2000  ,  2000,2000,2000  ,  0.8);
-              real press = C0*pow(rh*th,gamma);                       // Dry pressure
-              real temp  = press / Rd / rh;                           // Temperator (same for dry and moist)
-              real svp   = profiles::saturation_vapor_pressure(temp); // Self-explanatory
-              real p_v   = pert*svp;                                  // Multiply profile by saturation vapor pressure
-              real r_v   = p_v / (Rv*temp);                           // Compute vapor density
-
-              real wt = gllWts_ord(kk) * gllWts_ord(jj) * gllWts_ord(ii);
-              tracers(idWV,hs+k,hs+j,hs+i,iens) += r_v / (rh+r_v) * rh * wt;
-              for (int tr=0; tr < num_tracers; tr++) {
-                if (tr != idWV) tracers(tr,hs+k,hs+j,hs+i,iens) = 0;
-              }
-            }
-          }
-        }
-      });
-
-      parallel_for( "Spatial.h adjust_moisture" , SimpleBounds<4>(nz,ny,nx,nens) ,
-                    YAKL_LAMBDA (int k, int j, int i, int iens) {
-        // Add tracer density to dry density if it adds mass
-        real rho_dry = state(idR,hs+k,hs+j,hs+i,iens);
-        state(idR,hs+k,hs+j,hs+i,iens) += tracers(idWV,hs+k,hs+j,hs+i,iens);
-        real rho_moist = state(idR,hs+k,hs+j,hs+i,iens);
-
-        // Adjust momenta for moist density
-        state(idU,hs+k,hs+j,hs+i,iens) = state(idU,hs+k,hs+j,hs+i,iens) / rho_dry * rho_moist;
-        state(idV,hs+k,hs+j,hs+i,iens) = state(idV,hs+k,hs+j,hs+i,iens) / rho_dry * rho_moist;
-        state(idW,hs+k,hs+j,hs+i,iens) = state(idW,hs+k,hs+j,hs+i,iens) / rho_dry * rho_moist;
-
-        // Compute the dry temperature (same as the moist temperature)
-        real rho_theta_dry = state(idT,hs+k,hs+j,hs+i,iens);
-        real press = C0*pow(rho_theta_dry,gamma);  // Dry pressure
-        real temp  = press / Rd / rho_dry;         // Temp (same dry or moist)
-
-        // Compute moist theta
-        real rho_v = tracers(idWV,hs+k,hs+j,hs+i,iens);
-        real R_moist = Rd * (rho_dry / rho_moist) + Rv * (rho_v / rho_moist);
-        real press_moist = rho_moist * R_moist * temp;
-        real rho_theta_moist = pow( press_moist / C0 , 1._fp/gamma );
-
-        // Compute moist rho*theta
-        state(idT,hs+k,hs+j,hs+i,iens) = rho_theta_moist;
-
-        for (int tr = 0 ; tr < num_tracers ; tr++) {
-          tracers(tr,hs+k,hs+j,hs+i,iens) = tracers(tr,hs+k,hs+j,hs+i,iens) / rho_dry * rho_moist;
-        }
-      });
-
-    } // if (data_spec == DATA_SPEC_THERMAL)
-
-    convert_dynamics_to_coupler_state( coupler , state , tracers );
-  }
-
-
-
-  // Compute state and tendency time derivatives from the state
-  void computeTendencies( real5d const &state   , real5d const &stateTend  ,
-                          real5d const &tracers , real5d const &tracerTend ,
-                          real &dt , int splitIndex ) const {
-    if (sim2d) {
-      if (dimSwitch) {
-        if        (splitIndex == 0) {
-          computeTendenciesX( state , stateTend , tracers , tracerTend , dt );
-        } else if (splitIndex == 1) {
-          computeTendenciesZ( state , stateTend , tracers , tracerTend , dt );
-        }
-      } else {
-        if        (splitIndex == 0) {
-          computeTendenciesZ( state , stateTend , tracers , tracerTend , dt );
-        } else if (splitIndex == 1) {
-          computeTendenciesX( state , stateTend , tracers , tracerTend , dt );
-        }
-      }
-    } else {
-      if (dimSwitch) {
-        if        (splitIndex == 0) {
-          computeTendenciesX( state , stateTend , tracers , tracerTend , dt );
-        } else if (splitIndex == 1) {
-          computeTendenciesY( state , stateTend , tracers , tracerTend , dt );
-        } else if (splitIndex == 2) {
-          computeTendenciesZ( state , stateTend , tracers , tracerTend , dt );
-        }
-      } else {
-        if        (splitIndex == 0) {
-          computeTendenciesZ( state , stateTend , tracers , tracerTend , dt );
-        } else if (splitIndex == 1) {
-          computeTendenciesY( state , stateTend , tracers , tracerTend , dt );
-        } else if (splitIndex == 2) {
-          computeTendenciesX( state , stateTend , tracers , tracerTend , dt );
-        }
-      }
-    }
-  } // computeTendencies
-
-
-
-  void switch_directions() {
-    dimSwitch = ! dimSwitch;
-  }
-
-
-
-  YAKL_INLINE static int wrapx(int i, int ii, int nx) {
-    int ret = i+ii;
-    if (ret < hs+0   ) ret += nx;
-    if (ret > hs+nx-1) ret -= nx;
-    return ret;
-  }
-
-
-
-  void computeTendenciesX( real5d const &state   , real5d const &stateTend  ,
-                           real5d const &tracers , real5d const &tracerTend ,
-                           real &dt ) const {
-    using yakl::c::parallel_for;
-    using yakl::c::SimpleBounds;
-
-    YAKL_SCOPE( nx                      , this->nx                     );
-    YAKL_SCOPE( weno_scalars            , this->weno_scalars           );
-    YAKL_SCOPE( weno_winds              , this->weno_winds             );
-    YAKL_SCOPE( c2g                     , this->coefs_to_gll           );
-    YAKL_SCOPE( s2g                     , this->sten_to_gll            );
-    YAKL_SCOPE( s2c                     , this->sten_to_coefs          );
-    YAKL_SCOPE( weno_recon_lower        , this->weno_recon_lower       );
-    YAKL_SCOPE( idl                     , this->idl                    );
-    YAKL_SCOPE( sigma                   , this->sigma                  );
-    YAKL_SCOPE( sim2d                   , this->sim2d                  );
-    YAKL_SCOPE( derivMatrix             , this->derivMatrix            );
-    YAKL_SCOPE( dx                      , this->dx                     );
-    YAKL_SCOPE( tracer_pos              , this->tracer_pos             );
-    YAKL_SCOPE( num_tracers             , this->num_tracers            );
-    YAKL_SCOPE( gamma                   , this->gamma                  );
-    YAKL_SCOPE( C0                      , this->C0                     );
-
-    real6d stateLimits ("stateLimits" ,num_state  ,2,nz,ny,nx+1,nens);
-    real6d tracerLimits("tracerLimits",num_tracers,2,nz,ny,nx+1,nens);
-
-    // Loop through all cells, reconstruct in x-direction, compute centered tendencies, store cell-edge state estimates
-    parallel_for( "Spatial.h X recon" , SimpleBounds<4>(nz,ny,nx,nens) ,
-                  YAKL_LAMBDA (int k, int j, int i, int iens) {
-      SArray<real,2,nAder,ngll> r_DTs , ru_DTs;
-
-      { // State
-        SArray<real,2,nAder,ngll> rv_DTs , rw_DTs , rt_DTs, ruu_DTs , ruv_DTs , ruw_DTs , rut_DTs , rt_gamma_DTs;
-
-        { // Recon
-          SArray<real,1,ord>  stencil;
-          SArray<real,1,ngll> gll;
-
-          // Density
-          for (int ii=0; ii < ord; ii++) { stencil(ii) = state(idR,hs+k,hs+j,wrapx(i,ii,nx),iens); }
-          reconstruct_gll_values( stencil , gll , c2g , s2g , s2c , weno_recon_lower , idl , sigma , weno_scalars );
-          // Add hydrostasis back on
-          for (int ii=0; ii < ngll; ii++) { r_DTs(0,ii) = gll(ii); }
-
-          // u
-          for (int ii=0; ii < ord; ii++) { stencil(ii) = state(idU,hs+k,hs+j,wrapx(i,ii,nx),iens); }
-          reconstruct_gll_values( stencil , gll , c2g , s2g , s2c , weno_recon_lower , idl , sigma , weno_winds );
-          for (int ii=0; ii < ngll; ii++) { ru_DTs(0,ii) = gll(ii); }
-
-          // v
-          for (int ii=0; ii < ord; ii++) { stencil(ii) = state(idV,hs+k,hs+j,wrapx(i,ii,nx),iens); }
-          reconstruct_gll_values( stencil , gll , c2g , s2g , s2c , weno_recon_lower , idl , sigma , weno_winds );
-          for (int ii=0; ii < ngll; ii++) { rv_DTs(0,ii) = gll(ii); }
-
-          // w
-          for (int ii=0; ii < ord; ii++) { stencil(ii) = state(idW,hs+k,hs+j,wrapx(i,ii,nx),iens); }
-          reconstruct_gll_values( stencil , gll , c2g , s2g , s2c , weno_recon_lower , idl , sigma , weno_winds );
-          for (int ii=0; ii < ngll; ii++) { rw_DTs(0,ii) = gll(ii); }
-
-          // theta
-          for (int ii=0; ii < ord; ii++) { stencil(ii) = state(idT,hs+k,hs+j,wrapx(i,ii,nx),iens); }
-          reconstruct_gll_values( stencil , gll , c2g , s2g , s2c , weno_recon_lower , idl , sigma , weno_scalars );
-          // Add hydrostasis back on
-          for (int ii=0; ii < ngll; ii++) { rt_DTs(0,ii) = gll(ii); }
-        }
-
-        for (int ii=0; ii < ngll; ii++) {
-          real r = r_DTs (0,ii);
-          real u = ru_DTs(0,ii) / r;
-          real v = rv_DTs(0,ii) / r;
-          real w = rw_DTs(0,ii) / r;
-          real t = rt_DTs(0,ii) / r;
-          ruu_DTs     (0,ii) = r*u*u;
-          ruv_DTs     (0,ii) = r*u*v;
-          ruw_DTs     (0,ii) = r*u*w;
-          rut_DTs     (0,ii) = r*u*t;
-          rt_gamma_DTs(0,ii) = pow(r*t,gamma);
-        }
-
-        if (nAder > 1) {
-          diffTransformEulerConsX( r_DTs , ru_DTs , rv_DTs , rw_DTs , rt_DTs , ruu_DTs , ruv_DTs , ruw_DTs ,
-                                   rut_DTs , rt_gamma_DTs , derivMatrix , C0 , gamma , dx );
-        }
-
-        SArray<real,1,ngll> r_tavg, ru_tavg;
-        if (timeAvg) {
-          compute_timeAvg( r_DTs  , r_tavg  , dt );
-          compute_timeAvg( ru_DTs , ru_tavg , dt );
-          compute_timeAvg( rv_DTs           , dt );
-          compute_timeAvg( rw_DTs           , dt );
-          compute_timeAvg( rt_DTs           , dt );
-        } else {
-          for (int ii=0; ii < ngll; ii++) {
-            r_tavg (ii) = r_DTs (0,ii);
-            ru_tavg(ii) = ru_DTs(0,ii);
-          }
-        }
-
-        // Left interface
-        stateLimits(idR,1,k,j,i  ,iens) = r_tavg  (0     );
-        stateLimits(idU,1,k,j,i  ,iens) = ru_tavg (0     );
-        stateLimits(idV,1,k,j,i  ,iens) = rv_DTs(0,0     );
-        stateLimits(idW,1,k,j,i  ,iens) = rw_DTs(0,0     );
-        stateLimits(idT,1,k,j,i  ,iens) = rt_DTs(0,0     );
-        // Right interface
-        stateLimits(idR,0,k,j,i+1,iens) = r_tavg  (ngll-1);
-        stateLimits(idU,0,k,j,i+1,iens) = ru_tavg (ngll-1);
-        stateLimits(idV,0,k,j,i+1,iens) = rv_DTs(0,ngll-1);
-        stateLimits(idW,0,k,j,i+1,iens) = rw_DTs(0,ngll-1);
-        stateLimits(idT,0,k,j,i+1,iens) = rt_DTs(0,ngll-1);
-      }
-
-      { // Tracers
-        for (int tr=0; tr < num_tracers; tr++) {
-          SArray<real,2,nAder,ngll> rt_DTs, rut_DTs;
-
-          { // Recon
-            SArray<real,1,ord>  stencil;
-            SArray<real,1,ngll> gll;
-
-            for (int ii=0; ii < ord; ii++) { stencil(ii) = tracers(tr,hs+k,hs+j,wrapx(i,ii,nx),iens); }
-            reconstruct_gll_values( stencil , gll , c2g , s2g , s2c , weno_recon_lower , idl , sigma , weno_scalars );
-            if (tracer_pos(tr)) {
-              for (int ii=0; ii < ngll; ii++) { gll(ii) = max( 0._fp , gll(ii) ); }
-            }
-            for (int ii=0; ii < ngll; ii++) { rt_DTs(0,ii) = gll(ii); }
-          }
-
-          for (int ii=0; ii < ngll; ii++) {
-            rut_DTs(0,ii) = rt_DTs(0,ii) * ru_DTs(0,ii) / r_DTs(0,ii);
-          }
-
-          if (nAder > 1) {
-            diffTransformTracer( r_DTs , ru_DTs , rt_DTs , rut_DTs , derivMatrix , dx );
-          }
-
-          if (timeAvg) {
-            compute_timeAvg( rt_DTs  , dt );
-          }
-
-          if (tracer_pos(tr)) {
-            for (int ii=0; ii < ngll; ii++) { rt_DTs(0,ii) = max( 0._fp , rt_DTs(0,ii) ); }
-          }
-
-          tracerLimits(tr,1,k,j,i  ,iens) = rt_DTs(0,0     ); // Left interface
-          tracerLimits(tr,0,k,j,i+1,iens) = rt_DTs(0,ngll-1); // Right interface
-        }
-      }
-    });
-
-    real5d state_flux ("state_flux" ,num_state  ,nz,ny,nx+1,nens);
-    real5d tracer_flux("tracer_flux",num_tracers,nz,ny,nx+1,nens);
-
-    //////////////////////////////////////////////////////////
-    // Compute the upwind fluxes
-    //////////////////////////////////////////////////////////
-    parallel_for( "Spatial.h X Riemann" , SimpleBounds<4>(nz,ny,nx+1,nens) ,
-                  YAKL_LAMBDA (int k, int j, int i, int iens) {
-      if (i == 0 ) {
-        for (int l=0; l < num_state  ; l++) { stateLimits (l,0,k,j,0 ,iens) = stateLimits (l,0,k,j,nx,iens); }
-        for (int l=0; l < num_tracers; l++) { tracerLimits(l,0,k,j,0 ,iens) = tracerLimits(l,0,k,j,nx,iens); }
-      }
-      if (i == nx) {
-        for (int l=0; l < num_state  ; l++) { stateLimits (l,1,k,j,nx,iens) = stateLimits (l,1,k,j,0 ,iens); }
-        for (int l=0; l < num_tracers; l++) { tracerLimits(l,1,k,j,nx,iens) = tracerLimits(l,1,k,j,0 ,iens); }
-      }
-      // Get left and right state
-      real r_L = stateLimits(idR,0,k,j,i,iens)    ;   real r_R = stateLimits(idR,1,k,j,i,iens)    ;
-      real u_L = stateLimits(idU,0,k,j,i,iens)/r_L;   real u_R = stateLimits(idU,1,k,j,i,iens)/r_R;
-      real v_L = stateLimits(idV,0,k,j,i,iens)/r_L;   real v_R = stateLimits(idV,1,k,j,i,iens)/r_R;
-      real w_L = stateLimits(idW,0,k,j,i,iens)/r_L;   real w_R = stateLimits(idW,1,k,j,i,iens)/r_R;
-      real t_L = stateLimits(idT,0,k,j,i,iens)/r_L;   real t_R = stateLimits(idT,1,k,j,i,iens)/r_R;
-      // Compute average state
-      real r = 0.5_fp * (r_L + r_R);
-      real u = 0.5_fp * (u_L + u_R);
-      real v = 0.5_fp * (v_L + v_R);
-      real w = 0.5_fp * (w_L + w_R);
-      real t = 0.5_fp * (t_L + t_R);
-      real p = C0 * pow(r*t,gamma);
-      real cs2 = gamma*p/r;
-      real cs  = sqrt(cs2);
-
-      // COMPUTE UPWIND STATE FLUXES
-      // Get left and right fluxes
-      real q1_L = stateLimits(idR,0,k,j,i,iens);   real q1_R = stateLimits(idR,1,k,j,i,iens);
-      real q2_L = stateLimits(idU,0,k,j,i,iens);   real q2_R = stateLimits(idU,1,k,j,i,iens);
-      real q3_L = stateLimits(idV,0,k,j,i,iens);   real q3_R = stateLimits(idV,1,k,j,i,iens);
-      real q4_L = stateLimits(idW,0,k,j,i,iens);   real q4_R = stateLimits(idW,1,k,j,i,iens);
-      real q5_L = stateLimits(idT,0,k,j,i,iens);   real q5_R = stateLimits(idT,1,k,j,i,iens);
-      // Compute upwind characteristics
-      // Waves 1-3, velocity: u
-      real w1, w2, w3;
-      if (u > 0) {
-        w1 = q1_L - q5_L/t;
-        w2 = q3_L - v*q5_L/t;
-        w3 = q4_L - w*q5_L/t;
-      } else {
-        w1 = q1_R - q5_R/t;
-        w2 = q3_R - v*q5_R/t;
-        w3 = q4_R - w*q5_R/t;
-      }
-      // Wave 5, velocity: u-cs
-      real w5 =  u*q1_R/(2*cs) - q2_R/(2*cs) + q5_R/(2*t);
-      // Wave 6, velocity: u+cs
-      real w6 = -u*q1_L/(2*cs) + q2_L/(2*cs) + q5_L/(2*t);
-      // Use right eigenmatrix to compute upwind flux
-      real q1 = w1 + w5 + w6;
-      real q2 = u*w1 + (u-cs)*w5 + (u+cs)*w6;
-      real q3 = w2 + v*w5 + v*w6;
-      real q4 = w3 + w*w5 + w*w6;
-      real q5 =      t*w5 + t*w6;
-
-      state_flux(idR,k,j,i,iens) = q2;
-      state_flux(idU,k,j,i,iens) = q2*q2/q1 + C0*pow(q5,gamma);
-      state_flux(idV,k,j,i,iens) = q2*q3/q1;
-      state_flux(idW,k,j,i,iens) = q2*q4/q1;
-      state_flux(idT,k,j,i,iens) = q2*q5/q1;
-
-      // COMPUTE UPWIND TRACER FLUXES
-      // Handle it one tracer at a time
-      for (int tr=0; tr < num_tracers; tr++) {
-        if (u > 0) {
-          tracer_flux(tr,k,j,i,iens) = q2 * tracerLimits(tr,0,k,j,i,iens) / r_L;
-        } else {
-          tracer_flux(tr,k,j,i,iens) = q2 * tracerLimits(tr,1,k,j,i,iens) / r_R;
-        }
-      }
-    });
-
-    stateLimits  = real6d();
-    tracerLimits = real6d();
-
-    //////////////////////////////////////////////////////////
-    // Limit the tracer fluxes for positivity
-    //////////////////////////////////////////////////////////
-    real5d fct_mult("fct_mult",num_tracers,nz,ny,nx+1,nens);
-    parallel_for( "Spatial.h X FCT" , SimpleBounds<5>(num_tracers,nz,ny,nx+1,nens) ,
-                  YAKL_LAMBDA (int tr, int k, int j, int i, int iens) {
-      fct_mult(tr,k,j,i,iens) = 1.;
-      // Solid wall BCs mean u == 0 at boundaries, so we assume periodic if u != 0
-      if (tracer_pos(tr)) {
-        // Compute and apply the flux reduction factor of the upwind cell
-        if      (tracer_flux(tr,k,j,i,iens) > 0) {
-          // if u > 0, then it pulls mass out of the left cell
-          int ind_i = i-1;
-          // TODO: Relax the periodic assumption here
-          if (ind_i == -1) ind_i = nx-1;
-          real f1 = min( tracer_flux(tr,k,j,ind_i  ,iens) , 0._fp );
-          real f2 = max( tracer_flux(tr,k,j,ind_i+1,iens) , 0._fp );
-          real fluxOut = dt*(f2-f1)/dx;
-          real mass = tracers(tr,hs+k,hs+j,hs+ind_i,iens);
-          if (fluxOut > 0) {
-            fct_mult(tr,k,j,i,iens) = min( 1._fp , mass / fluxOut );
-          }
-        } else if (tracer_flux(tr,k,j,i,iens) < 0) {
-          // upwind is to the right of this interface
-          int ind_i = i;
-          // TODO: Relax the periodic assumption here
-          if (ind_i == nx) ind_i = 0;
-          real f1 = min( tracer_flux(tr,k,j,ind_i  ,iens) , 0._fp );
-          real f2 = max( tracer_flux(tr,k,j,ind_i+1,iens) , 0._fp );
-          real fluxOut = dt*(f2-f1)/dx;
-          real mass = tracers(tr,hs+k,hs+j,hs+ind_i,iens);
-          if (fluxOut > 0) {
-            fct_mult(tr,k,j,i,iens) = min( 1._fp , mass / fluxOut );
-          }
-        }
-      }
-    });
-
-    //////////////////////////////////////////////////////////
-    // Compute the tendencies
-    //////////////////////////////////////////////////////////
-    parallel_for( "Spatial.h X tendencies" , SimpleBounds<4>(nz,ny,nx,nens) ,
-                  YAKL_LAMBDA(int k, int j, int i, int iens) {
-      for (int l = 0; l < num_state; l++) {
-        if (sim2d && l == idV) {
-          stateTend(l,k,j,i,iens) = 0;
-        } else {
-          stateTend(l,k,j,i,iens) = - ( state_flux(l,k,j,i+1,iens) - state_flux(l,k,j,i,iens) ) / dx;
-        }
-      }
-      for (int l = 0; l < num_tracers; l++) {
-        // Compute tracer tendency
-        tracerTend(l,k,j,i,iens) = - ( tracer_flux(l,k,j,i+1,iens)*fct_mult(l,k,j,i+1,iens) -
-                                       tracer_flux(l,k,j,i  ,iens)*fct_mult(l,k,j,i  ,iens) ) / dx;
-      }
-    });
-  }
-
-
-
-  YAKL_INLINE static int wrapy(int j, int jj, int ny) {
-    int ret = j+jj;
-    if (ret < hs+0   ) ret += ny;
-    if (ret > hs+ny-1) ret -= ny;
-    return ret;
-  }
-
-
-
-  void computeTendenciesY( real5d const &state   , real5d const &stateTend  ,
-                           real5d const &tracers , real5d const &tracerTend ,
-                           real &dt ) const {
-    using yakl::c::parallel_for;
-    using yakl::c::SimpleBounds;
-
-    YAKL_SCOPE( ny                      , this->ny                     );
-    YAKL_SCOPE( weno_scalars            , this->weno_scalars           );
-    YAKL_SCOPE( weno_winds              , this->weno_winds             );
-    YAKL_SCOPE( c2g                     , this->coefs_to_gll           );
-    YAKL_SCOPE( s2g                     , this->sten_to_gll            );
-    YAKL_SCOPE( s2c                     , this->sten_to_coefs          );
-    YAKL_SCOPE( weno_recon_lower        , this->weno_recon_lower       );
-    YAKL_SCOPE( idl                     , this->idl                    );
-    YAKL_SCOPE( sigma                   , this->sigma                  );
-
-    YAKL_SCOPE( derivMatrix             , this->derivMatrix            );
-    YAKL_SCOPE( dy                      , this->dy                     );
-    YAKL_SCOPE( tracer_pos              , this->tracer_pos             );
-    YAKL_SCOPE( num_tracers             , this->num_tracers            );
-    YAKL_SCOPE( gamma                   , this->gamma                  );
-    YAKL_SCOPE( C0                      , this->C0                     );
-
-    real6d stateLimits ("stateLimits" ,num_state  ,2,nz,ny+1,nx,nens);
-    real6d tracerLimits("tracerLimits",num_tracers,2,nz,ny+1,nx,nens);
-
-    // Loop through all cells, reconstruct in y-direction, compute centered tendencies, store cell-edge state estimates
-    parallel_for( "Spatial.h Y recon" , SimpleBounds<4>(nz,ny,nx,nens) ,
-                  YAKL_LAMBDA (int k, int j, int i, int iens) {
-      SArray<real,2,nAder,ngll> r_DTs , rv_DTs;
-
-      { // State
-        SArray<real,2,nAder,ngll> ru_DTs , rw_DTs , rt_DTs, rvu_DTs , rvv_DTs , rvw_DTs , rvt_DTs , rt_gamma_DTs;
-
-        { // Recon
-          SArray<real,1,ord>  stencil;
-          SArray<real,1,ngll> gll;
-
-          // Density
-          for (int jj=0; jj < ord; jj++) { stencil(jj) = state(idR,hs+k,wrapy(j,jj,ny),hs+i,iens); }
-          reconstruct_gll_values( stencil , gll , c2g , s2g , s2c , weno_recon_lower , idl , sigma , weno_scalars );
-          // Add hydrostasis back on
-          for (int jj=0; jj < ngll; jj++) { r_DTs(0,jj) = gll(jj); }
-
-          // u
-          for (int jj=0; jj < ord; jj++) { stencil(jj) = state(idU,hs+k,wrapy(j,jj,ny),hs+i,iens); }
-          reconstruct_gll_values( stencil , gll , c2g , s2g , s2c , weno_recon_lower , idl , sigma , weno_winds );
-          for (int jj=0; jj < ngll; jj++) { ru_DTs(0,jj) = gll(jj); }
-
-          // v
-          for (int jj=0; jj < ord; jj++) { stencil(jj) = state(idV,hs+k,wrapy(j,jj,ny),hs+i,iens); }
-          reconstruct_gll_values( stencil , gll , c2g , s2g , s2c , weno_recon_lower , idl , sigma , weno_winds );
-          for (int jj=0; jj < ngll; jj++) { rv_DTs(0,jj) = gll(jj); }
-
-          // w
-          for (int jj=0; jj < ord; jj++) { stencil(jj) = state(idW,hs+k,wrapy(j,jj,ny),hs+i,iens); }
-          reconstruct_gll_values( stencil , gll , c2g , s2g , s2c , weno_recon_lower , idl , sigma , weno_winds );
-          for (int jj=0; jj < ngll; jj++) { rw_DTs(0,jj) = gll(jj); }
-
-          // theta
-          for (int jj=0; jj < ord; jj++) { stencil(jj) = state(idT,hs+k,wrapy(j,jj,ny),hs+i,iens); }
-          reconstruct_gll_values( stencil , gll , c2g , s2g , s2c , weno_recon_lower , idl , sigma , weno_scalars );
-          // Add hydrostasis back on
-          for (int jj=0; jj < ngll; jj++) { rt_DTs(0,jj) = gll(jj); }
-        }
-
-        for (int jj=0; jj < ngll; jj++) {
-          real r = r_DTs (0,jj);
-          real u = ru_DTs(0,jj) / r;
-          real v = rv_DTs(0,jj) / r;
-          real w = rw_DTs(0,jj) / r;
-          real t = rt_DTs(0,jj) / r;
-          rvu_DTs     (0,jj) = r*v*u;
-          rvv_DTs     (0,jj) = r*v*v;
-          rvw_DTs     (0,jj) = r*v*w;
-          rvt_DTs     (0,jj) = r*v*t;
-          rt_gamma_DTs(0,jj) = pow(r*t,gamma);
-        }
-
-        if (nAder > 1) {
-          diffTransformEulerConsY( r_DTs , ru_DTs , rv_DTs , rw_DTs , rt_DTs , rvu_DTs , rvv_DTs , rvw_DTs ,
-                                   rvt_DTs , rt_gamma_DTs , derivMatrix , C0 , gamma , dy );
-        }
-
-        SArray<real,1,ngll> r_tavg, rv_tavg;
-        if (timeAvg) {
-          compute_timeAvg( r_DTs  , r_tavg  , dt );
-          compute_timeAvg( ru_DTs           , dt );
-          compute_timeAvg( rv_DTs , rv_tavg , dt );
-          compute_timeAvg( rw_DTs           , dt );
-          compute_timeAvg( rt_DTs           , dt );
-        } else {
-          for (int jj=0; jj < ngll; jj++) {
-            r_tavg (jj) = r_DTs (0,jj);
-            rv_tavg(jj) = rv_DTs(0,jj);
-          }
-        }
-
-        // Left interface
-        stateLimits(idR,1,k,j  ,i,iens) = r_tavg  (0     );
-        stateLimits(idU,1,k,j  ,i,iens) = ru_DTs(0,0     );
-        stateLimits(idV,1,k,j  ,i,iens) = rv_tavg (0     );
-        stateLimits(idW,1,k,j  ,i,iens) = rw_DTs(0,0     );
-        stateLimits(idT,1,k,j  ,i,iens) = rt_DTs(0,0     );
-        // Right interface
-        stateLimits(idR,0,k,j+1,i,iens) = r_tavg  (ngll-1);
-        stateLimits(idU,0,k,j+1,i,iens) = ru_DTs(0,ngll-1);
-        stateLimits(idV,0,k,j+1,i,iens) = rv_tavg (ngll-1);
-        stateLimits(idW,0,k,j+1,i,iens) = rw_DTs(0,ngll-1);
-        stateLimits(idT,0,k,j+1,i,iens) = rt_DTs(0,ngll-1);
-      }
-
-      { // Tracers
-        for (int tr=0; tr < num_tracers; tr++) {
-          SArray<real,2,nAder,ngll> rt_DTs, rvt_DTs;
-          
-          { // Recon
-            SArray<real,1,ord>  stencil;
-            SArray<real,1,ngll> gll;
-
-            for (int jj=0; jj < ord; jj++) { stencil(jj) = tracers(tr,hs+k,wrapy(j,jj,ny),hs+i,iens); }
-            reconstruct_gll_values( stencil , gll , c2g , s2g , s2c , weno_recon_lower , idl , sigma , weno_scalars );
-            if (tracer_pos(tr)) {
-              for (int jj=0; jj < ngll; jj++) { gll(jj) = max( 0._fp , gll(jj) ); }
-            }
-            for (int jj=0; jj < ngll; jj++) { rt_DTs(0,jj) = gll(jj); }
-          }
-
-          for (int jj=0; jj < ngll; jj++) {
-            rvt_DTs(0,jj) = rt_DTs(0,jj) * rv_DTs(0,jj) / r_DTs(0,jj);
-          }
-
-          if (nAder > 1) {
-            diffTransformTracer( r_DTs , rv_DTs , rt_DTs , rvt_DTs , derivMatrix , dy );
-          }
-
-          if (timeAvg) {
-            compute_timeAvg( rt_DTs  , dt );
-          }
-
-          if (tracer_pos(tr)) {
-            for (int jj=0; jj < ngll; jj++) { rt_DTs(0,jj) = max( 0._fp , rt_DTs(0,jj) ); }
-          }
-
-          tracerLimits(tr,1,k,j  ,i,iens) = rt_DTs (0,0     ); // Left interface
-          tracerLimits(tr,0,k,j+1,i,iens) = rt_DTs (0,ngll-1); // Right interface
-        }
-      }
-    });
-
-    real5d state_flux ("state_flux" ,num_state  ,nz,ny+1,nx,nens);
-    real5d tracer_flux("tracer_flux",num_tracers,nz,ny+1,nx,nens);
-
-    //////////////////////////////////////////////////////////
-    // Compute the upwind fluxes
-    //////////////////////////////////////////////////////////
-    parallel_for( "Spatial.h Y Riemann" , SimpleBounds<4>(nz,ny+1,nx,nens) ,
-                  YAKL_LAMBDA (int k, int j, int i, int iens) {
-      if (j == 0 ) {
-        for (int l=0; l < num_state  ; l++) { stateLimits (l,0,k,0 ,i,iens) = stateLimits (l,0,k,ny,i,iens); }
-        for (int l=0; l < num_tracers; l++) { tracerLimits(l,0,k,0 ,i,iens) = tracerLimits(l,0,k,ny,i,iens); }
-      }
-      if (j == ny) {
-        for (int l=0; l < num_state  ; l++) { stateLimits (l,1,k,ny,i,iens) = stateLimits (l,1,k,0 ,i,iens); }
-        for (int l=0; l < num_tracers; l++) { tracerLimits(l,1,k,ny,i,iens) = tracerLimits(l,1,k,0 ,i,iens); }
-      }
-      // Get left and right state
-      real r_L = stateLimits(idR,0,k,j,i,iens)    ;   real r_R = stateLimits(idR,1,k,j,i,iens)    ;
-      real u_L = stateLimits(idU,0,k,j,i,iens)/r_L;   real u_R = stateLimits(idU,1,k,j,i,iens)/r_R;
-      real v_L = stateLimits(idV,0,k,j,i,iens)/r_L;   real v_R = stateLimits(idV,1,k,j,i,iens)/r_R;
-      real w_L = stateLimits(idW,0,k,j,i,iens)/r_L;   real w_R = stateLimits(idW,1,k,j,i,iens)/r_R;
-      real t_L = stateLimits(idT,0,k,j,i,iens)/r_L;   real t_R = stateLimits(idT,1,k,j,i,iens)/r_R;
-      // Compute average state
-      real r = 0.5_fp * (r_L + r_R);
-      real u = 0.5_fp * (u_L + u_R);
-      real v = 0.5_fp * (v_L + v_R);
-      real w = 0.5_fp * (w_L + w_R);
-      real t = 0.5_fp * (t_L + t_R);
-      real p = C0 * pow(r*t,gamma);
-      real cs2 = gamma*p/r;
-      real cs  = sqrt(cs2);
-
-      // COMPUTE UPWIND STATE FLUXES
-      // Get left and right fluxes
-      real q1_L = stateLimits(idR,0,k,j,i,iens);   real q1_R = stateLimits(idR,1,k,j,i,iens);
-      real q2_L = stateLimits(idU,0,k,j,i,iens);   real q2_R = stateLimits(idU,1,k,j,i,iens);
-      real q3_L = stateLimits(idV,0,k,j,i,iens);   real q3_R = stateLimits(idV,1,k,j,i,iens);
-      real q4_L = stateLimits(idW,0,k,j,i,iens);   real q4_R = stateLimits(idW,1,k,j,i,iens);
-      real q5_L = stateLimits(idT,0,k,j,i,iens);   real q5_R = stateLimits(idT,1,k,j,i,iens);
-      // Compute upwind characteristics
-      // Waves 1-3, velocity: v
-      real w1, w2, w3;
-      if (v > 0) {
-        w1 = q1_L - q5_L/t;
-        w2 = q2_L - u*q5_L/t;
-        w3 = q4_L - w*q5_L/t;
-      } else {
-        w1 = q1_R - q5_R/t;
-        w2 = q2_R - u*q5_R/t;
-        w3 = q4_R - w*q5_R/t;
-      }
-      // Wave 5, velocity: v-cs
-      real w5 =  v*q1_R/(2*cs) - q3_R/(2*cs) + q5_R/(2*t);
-      // Wave 6, velocity: v+cs
-      real w6 = -v*q1_L/(2*cs) + q3_L/(2*cs) + q5_L/(2*t);
-      // Use right eigenmatrix to compute upwind flux
-      real q1 = w1 + w5 + w6;
-      real q2 = w2 + u*w5 + u*w6;
-      real q3 = v*w1 + (v-cs)*w5 + (v+cs)*w6;
-      real q4 = w3 + w*w5 + w*w6;
-      real q5 =      t*w5 + t*w6;
-
-      state_flux(idR,k,j,i,iens) = q3;
-      state_flux(idU,k,j,i,iens) = q3*q2/q1;
-      state_flux(idV,k,j,i,iens) = q3*q3/q1 + C0*pow(q5,gamma);
-      state_flux(idW,k,j,i,iens) = q3*q4/q1;
-      state_flux(idT,k,j,i,iens) = q3*q5/q1;
-
-      // COMPUTE UPWIND TRACER FLUXES
-      // Handle it one tracer at a time
-      for (int tr=0; tr < num_tracers; tr++) {
-        if (v > 0) {
-          tracer_flux(tr,k,j,i,iens) = q3 * tracerLimits(tr,0,k,j,i,iens) / r_L;
-        } else {
-          tracer_flux(tr,k,j,i,iens) = q3 * tracerLimits(tr,1,k,j,i,iens) / r_R;
-        }
-      }
-    });
-
-    stateLimits  = real6d();
-    tracerLimits = real6d();
-
-    //////////////////////////////////////////////////////////
-    // Limit the tracer fluxes for positivity
-    //////////////////////////////////////////////////////////
-    real5d fct_mult("fct_mult",num_tracers,nz,ny+1,nx,nens);
-    parallel_for( "Spatial.h Y FCT" , SimpleBounds<5>(num_tracers,nz,ny+1,nx,nens) ,
-                  YAKL_LAMBDA (int tr, int k, int j, int i, int iens) {
-      fct_mult(tr,k,j,i,iens) = 1.;
-      // Solid wall BCs mean u == 0 at boundaries, so we assume periodic if u != 0
-      if (tracer_pos(tr)) {
-        // Compute and apply the flux reduction factor of the upwind cell
-        if      (tracer_flux(tr,k,j,i,iens) > 0) {
-          // upwind is to the left of this interface
-          int ind_j = j-1;
-          if (ind_j == -1) ind_j = ny-1;
-          real f1 = min( tracer_flux(tr,k,ind_j  ,i,iens) , 0._fp );
-          real f2 = max( tracer_flux(tr,k,ind_j+1,i,iens) , 0._fp );
-          real fluxOut = dt*(f2-f1)/dy;
-          real mass = tracers(tr,hs+k,hs+ind_j,hs+i,iens);
-          if (fluxOut > 0) {
-            fct_mult(tr,k,j,i,iens) = min( 1._fp , mass / fluxOut );
-          }
-        } else if (tracer_flux(tr,k,j,i,iens) < 0) {
-          // upwind is to the right of this interface
-          int ind_j = j;
-          if (ind_j == ny) ind_j = 0;
-          real f1 = min( tracer_flux(tr,k,ind_j  ,i,iens) , 0._fp );
-          real f2 = max( tracer_flux(tr,k,ind_j+1,i,iens) , 0._fp );
-          real fluxOut = dt*(f2-f1)/dy;
-          real mass = tracers(tr,hs+k,hs+ind_j,hs+i,iens);
-          if (fluxOut > 0) {
-            fct_mult(tr,k,j,i,iens) = min( 1._fp , mass / fluxOut );
-          }
-        }
-      }
-    });
-
-    //////////////////////////////////////////////////////////
-    // Compute the tendencies
-    //////////////////////////////////////////////////////////
-    parallel_for( "Spatial.h Y tendendies" , SimpleBounds<4>(nz,ny,nx,nens) ,
-                  YAKL_LAMBDA(int k, int j, int i, int iens) {
-      for (int l=0; l < num_state; l++) {
-        stateTend(l,k,j,i,iens) = - ( state_flux(l,k,j+1,i,iens) - state_flux(l,k,j,i,iens) ) / dy;
-      }
-      for (int l=0; l < num_tracers; l++) {
-        // Compute the tracer tendency
-        tracerTend(l,k,j,i,iens) = - ( tracer_flux(l,k,j+1,i,iens)*fct_mult(l,k,j+1,i,iens) -
-                                       tracer_flux(l,k,j  ,i,iens)*fct_mult(l,k,j  ,i,iens) ) / dy;
-      }
-    });
-  }
-
-
-
-  YAKL_INLINE static int wrapz(int k, int kk, int nz) {
-    int ret = k+kk;
-    if (ret < hs+0   ) ret = hs+0;
-    if (ret > hs+nz-1) ret = hs+nz-1;
-    return ret;
-  }
-
-
-
-  void computeTendenciesZ( real5d const &state   , real5d const &stateTend  ,
-                           real5d const &tracers , real5d const &tracerTend ,
-                           real &dt ) const {
-    using yakl::c::parallel_for;
-    using yakl::c::SimpleBounds;
-
-    YAKL_SCOPE( nz                      , this->nz                     );
-    YAKL_SCOPE( weno_scalars            , this->weno_scalars           );
-    YAKL_SCOPE( weno_winds              , this->weno_winds             );
-    YAKL_SCOPE( c2g                     , this->coefs_to_gll           );
-    YAKL_SCOPE( idl                     , this->idl                    );
-    YAKL_SCOPE( sigma                   , this->sigma                  );
-    YAKL_SCOPE( hyDensSten              , this->hyDensSten             );
-    YAKL_SCOPE( hyDensThetaSten         , this->hyDensThetaSten        );
-    YAKL_SCOPE( hyDensGLL               , this->hyDensGLL              );
-    YAKL_SCOPE( hyDensThetaGLL          , this->hyDensThetaGLL         );
-    YAKL_SCOPE( hyPressureGLL           , this->hyPressureGLL          );
-    YAKL_SCOPE( sim2d                   , this->sim2d                  );
-    YAKL_SCOPE( derivMatrix             , this->derivMatrix            );
-    YAKL_SCOPE( dz                      , this->dz                     );
-    YAKL_SCOPE( tracer_pos              , this->tracer_pos             );
-    YAKL_SCOPE( num_tracers             , this->num_tracers            );
-    YAKL_SCOPE( gllWts_ngll             , this->gllWts_ngll            );
-    YAKL_SCOPE( gamma                   , this->gamma                  );
-    YAKL_SCOPE( C0                      , this->C0                     );
-    YAKL_SCOPE( vert_sten_to_gll        , this->vert_sten_to_gll       );
-    YAKL_SCOPE( vert_sten_to_coefs      , this->vert_sten_to_coefs     );
-    YAKL_SCOPE( vert_weno_recon_lower   , this->vert_weno_recon_lower  );
-    YAKL_SCOPE( grav                    , this->grav                   );
-
-    // Pre-process the tracers by dividing by density inside the domain
-    // After this, we can reconstruct tracers only (not rho * tracer)
-    parallel_for( "Spatial.h Z tracer div dens" , SimpleBounds<5>(num_tracers,nz,ny,nx,nens) ,
-                  YAKL_LAMBDA (int tr, int k, int j, int i, int iens) {
-      tracers(tr,hs+k,hs+j,hs+i,iens) /= state(idR,hs+k,hs+j,hs+i,iens);
-    });
-
-    real6d stateLimits ("stateLimits" ,num_state  ,2,nz+1,ny,nx,nens);
-    real6d tracerLimits("tracerLimits",num_tracers,2,nz+1,ny,nx,nens);
-
-    // Loop through all cells, reconstruct in x-direction, compute centered tendencies, store cell-edge state estimates
-    parallel_for( "Spatial.h Z recon" , SimpleBounds<4>(nz,ny,nx,nens) ,
-                  YAKL_LAMBDA (int k, int j, int i, int iens) {
-      SArray<real,2,ord,ngll>       s2g_loc;
-      SArray<real,2,ord,ord>        s2c_loc;
-      SArray<real,3,hs+1,hs+1,hs+1> weno_recon_lower_loc;
-      for (int jj=0; jj < ord; jj++) {
-        for (int ii=0; ii < ngll; ii++) {
-          s2g_loc(jj,ii) = vert_sten_to_gll(k,jj,ii,iens);
-        }
-      }
-      for (int jj=0; jj < ord; jj++) {
-        for (int ii=0; ii < ord; ii++) {
-          s2c_loc(jj,ii) = vert_sten_to_coefs(k,jj,ii,iens);
-        }
-      }
-      for (int kk=0; kk < hs+1; kk++) {
-        for (int jj=0; jj < hs+1; jj++) {
-          for (int ii=0; ii < hs+1; ii++) {
-            weno_recon_lower_loc(kk,jj,ii) = vert_weno_recon_lower(k,kk,jj,ii,iens);
-          }
-        }
-      }
-
-      SArray<real,2,nAder,ngll> r_DTs , rw_DTs;
-
-      { // State
-        SArray<real,2,nAder,ngll> ru_DTs , rv_DTs , rt_DTs, rwu_DTs , rwv_DTs , rww_DTs , rwt_DTs , rt_gamma_DTs;
-        { // Recon
-          SArray<real,1,ord>  stencil;
-          SArray<real,1,ngll> gll;
-
-          // Density
-          for (int kk=0; kk < ord; kk++) {
-            if (k+kk < hs || k+kk > hs+nz-1) {
-              stencil(kk) = state(idR,hs+k,hs+j,hs+i,iens) - hyDensSten(k,hs,iens);
-            } else {
-              stencil(kk) = state(idR,k+kk,hs+j,hs+i,iens) - hyDensSten(k,kk,iens);
-            }
-          }
-          reconstruct_gll_values( stencil , gll , c2g , s2g_loc , s2c_loc , weno_recon_lower_loc ,
-                                  idl , sigma , weno_scalars );
-          // Add hydrostasis back on
-          for (int kk=0; kk < ngll; kk++) { r_DTs(0,kk) = gll(kk) + hyDensGLL(k,kk,iens); }
-
-          // u values and derivatives
-          for (int kk=0; kk < ord; kk++) { stencil(kk) = state(idU,wrapz(k,kk,nz),hs+j,hs+i,iens); }
-          reconstruct_gll_values( stencil , gll , c2g , s2g_loc , s2c_loc , weno_recon_lower_loc ,
-                                  idl , sigma , weno_winds );
-          for (int kk=0; kk < ngll; kk++) { ru_DTs(0,kk) = gll(kk); }
-
-          // v
-          for (int kk=0; kk < ord; kk++) { stencil(kk) = state(idV,wrapz(k,kk,nz),hs+j,hs+i,iens); }
-          reconstruct_gll_values( stencil , gll , c2g , s2g_loc , s2c_loc , weno_recon_lower_loc ,
-                                  idl , sigma , weno_winds );
-          for (int kk=0; kk < ngll; kk++) { rv_DTs(0,kk) = gll(kk); }
-
-          // w
-          for (int kk=0; kk < ord; kk++) {
-            stencil(kk) = state(idW,wrapz(k,kk,nz),hs+j,hs+i,iens);
-            if (k+kk > hs+nz-1 || k+kk < hs) stencil(kk) = 0;
-          }
-          reconstruct_gll_values( stencil , gll , c2g , s2g_loc , s2c_loc , weno_recon_lower_loc ,
-                                  idl , sigma , weno_winds );
-          if (k == nz-1) gll(ngll-1) = 0;
-          if (k == 0   ) gll(0     ) = 0;
-          for (int kk=0; kk < ngll; kk++) { rw_DTs(0,kk) = gll(kk); }
-
-          // rho*theta
-          for (int kk=0; kk < ord; kk++) {
-            if (k+kk < hs || k+kk > hs+nz-1) {
-              stencil(kk) = state(idT,hs+k,hs+j,hs+i,iens) - hyDensThetaSten(k,hs,iens);
-            } else {
-              stencil(kk) = state(idT,k+kk,hs+j,hs+i,iens) - hyDensThetaSten(k,kk,iens);
-            }
-          }
-          reconstruct_gll_values( stencil , gll , c2g , s2g_loc , s2c_loc , weno_recon_lower_loc ,
-                                  idl , sigma , weno_scalars );
-          // Add hydrostasis back on
-          for (int kk=0; kk < ngll; kk++) { rt_DTs(0,kk) = gll(kk) + hyDensThetaGLL(k,kk,iens); }
-        }
-
-        for (int kk=0; kk < ngll; kk++) {
-          real r = r_DTs (0,kk);
-          real u = ru_DTs(0,kk) / r;
-          real v = rv_DTs(0,kk) / r;
-          real w = rw_DTs(0,kk) / r;
-          real t = rt_DTs(0,kk) / r;
-          rwu_DTs     (0,kk) = r*w*u;
-          rwv_DTs     (0,kk) = r*w*v;
-          rww_DTs     (0,kk) = r*w*w;
-          rwt_DTs     (0,kk) = r*w*t;
-          rt_gamma_DTs(0,kk) = pow(r*t,gamma);
-        }
-
-        if (nAder > 1) {
-          diffTransformEulerConsZ( r_DTs , ru_DTs , rv_DTs , rw_DTs , rt_DTs , rwu_DTs , rwv_DTs , rww_DTs ,
-                                   rwt_DTs , rt_gamma_DTs , derivMatrix , hyDensGLL , hyPressureGLL , C0 , gamma ,
-                                   grav , k , dz(k,iens) , nz , iens );
-        }
-
-        SArray<real,1,ngll> r_tavg, rw_tavg;
-        if (timeAvg) {
-          compute_timeAvg( r_DTs  , r_tavg  , dt );
-          compute_timeAvg( ru_DTs           , dt );
-          compute_timeAvg( rv_DTs           , dt );
-          compute_timeAvg( rw_DTs , rw_tavg , dt );
-          compute_timeAvg( rt_DTs           , dt );
-        } else {
-          for (int ii=0; ii < ngll; ii++) {
-            r_tavg (ii) = r_DTs (0,ii);
-            rw_tavg(ii) = rw_DTs(0,ii);
-          }
-        }
-
-        // Left interface
-        stateLimits(idR,1,k  ,j,i,iens) = r_tavg  (0     );
-        stateLimits(idU,1,k  ,j,i,iens) = ru_DTs(0,0     );
-        stateLimits(idV,1,k  ,j,i,iens) = rv_DTs(0,0     );
-        stateLimits(idW,1,k  ,j,i,iens) = rw_tavg (0     );
-        stateLimits(idT,1,k  ,j,i,iens) = rt_DTs(0,0     );
-        // Right interface
-        stateLimits(idR,0,k+1,j,i,iens) = r_tavg  (ngll-1);
-        stateLimits(idU,0,k+1,j,i,iens) = ru_DTs(0,ngll-1);
-        stateLimits(idV,0,k+1,j,i,iens) = rv_DTs(0,ngll-1);
-        stateLimits(idW,0,k+1,j,i,iens) = rw_tavg (ngll-1);
-        stateLimits(idT,0,k+1,j,i,iens) = rt_DTs(0,ngll-1);
-
-        real ravg = 0;
-        for (int kk=0; kk < ngll; kk++) {
-          ravg += r_tavg(kk) * gllWts_ngll(kk);
-        }
-        stateTend(idR,k,j,i,iens) = 0;
-        stateTend(idU,k,j,i,iens) = 0;
-        stateTend(idV,k,j,i,iens) = 0;
-        stateTend(idW,k,j,i,iens) = -grav * ravg;
-        stateTend(idT,k,j,i,iens) = 0;
-      }
-
-      { // Tracers
-        for (int tr=0; tr < num_tracers; tr++) {
-          SArray<real,2,nAder,ngll> rt_DTs;  // Density * tracer
-          SArray<real,2,nAder,ngll> rwt_DTs; // Density * wwind * tracer
-          { // Recon
-            SArray<real,1,ord>  stencil;
-            SArray<real,1,ngll> gll;
-
-            for (int kk=0; kk < ord; kk++) { stencil(kk) = tracers(tr,wrapz(k,kk,nz),hs+j,hs+i,iens); }
-            reconstruct_gll_values( stencil , gll , c2g , s2g_loc , s2c_loc , weno_recon_lower_loc ,
-                                    idl , sigma , weno_scalars );
-            for (int kk=0; kk < ngll; kk++) { gll(kk) *= r_DTs(0,kk); }
-            if (tracer_pos(tr)) {
-              for (int kk=0; kk < ngll; kk++) { gll(kk) = max( 0._fp , gll(kk) ); }
-            }
-            for (int kk=0; kk < ngll; kk++) { rt_DTs(0,kk) = gll(kk); }
-          }
-
-          for (int kk=0; kk < ngll; kk++) {
-            rwt_DTs(0,kk) = rt_DTs(0,kk) * rw_DTs(0,kk) / r_DTs(0,kk);
-          }
-
-          if (nAder > 1) {
-            diffTransformTracer( r_DTs , rw_DTs , rt_DTs , rwt_DTs , derivMatrix , dz(k,iens) );
-          }
-
-          if (timeAvg) {
-            compute_timeAvg( rt_DTs  , dt );
-          }
-
-          if (tracer_pos(tr)) {
-            for (int kk=0; kk < ngll; kk++) { rt_DTs(0,kk) = max( 0._fp , rt_DTs(0,kk) ); }
-          }
-
-          if (k == nz-1) rwt_DTs(0,ngll-1) = 0;
-          if (k == 0   ) rwt_DTs(0,0     ) = 0;
-
-          tracerLimits(tr,1,k  ,j,i,iens) = rt_DTs (0,0     ); // Left interface
-          tracerLimits(tr,0,k+1,j,i,iens) = rt_DTs (0,ngll-1); // Right interface
-        }
-      }
-    });
-
-    real5d state_flux ("state_flux" ,num_state  ,nz+1,ny,nx,nens);
-    real5d tracer_flux("tracer_flux",num_tracers,nz+1,ny,nx,nens);
-
-    //////////////////////////////////////////////////////////
-    // Compute the upwind fluxes
-    //////////////////////////////////////////////////////////
-    parallel_for( "Spatial.h Z Riemann" , SimpleBounds<4>(nz+1,ny,nx,nens) ,
-                  YAKL_LAMBDA (int k, int j, int i, int iens) {
-      if (k == 0) {
-        for (int l = 0; l < num_state  ; l++) { stateLimits (l,0,0 ,j,i,iens) = stateLimits (l,1,0 ,j,i,iens); }
-        for (int l = 0; l < num_tracers; l++) { tracerLimits(l,0,0 ,j,i,iens) = tracerLimits(l,1,0 ,j,i,iens); }
-      }
-      if (k == nz) {
-        for (int l = 0; l < num_state  ; l++) { stateLimits (l,1,nz,j,i,iens) = stateLimits (l,0,nz,j,i,iens); }
-        for (int l = 0; l < num_tracers; l++) { tracerLimits(l,1,nz,j,i,iens) = tracerLimits(l,0,nz,j,i,iens); }
-      }
-      // Get left and right state
-      real r_L = stateLimits(idR,0,k,j,i,iens)    ;   real r_R = stateLimits(idR,1,k,j,i,iens)    ;
-      real u_L = stateLimits(idU,0,k,j,i,iens)/r_L;   real u_R = stateLimits(idU,1,k,j,i,iens)/r_R;
-      real v_L = stateLimits(idV,0,k,j,i,iens)/r_L;   real v_R = stateLimits(idV,1,k,j,i,iens)/r_R;
-      real w_L = stateLimits(idW,0,k,j,i,iens)/r_L;   real w_R = stateLimits(idW,1,k,j,i,iens)/r_R;
-      real t_L = stateLimits(idT,0,k,j,i,iens)/r_L;   real t_R = stateLimits(idT,1,k,j,i,iens)/r_R;
-      // Compute average state
-      real r = 0.5_fp * (r_L + r_R);
-      real u = 0.5_fp * (u_L + u_R);
-      real v = 0.5_fp * (v_L + v_R);
-      real w = 0.5_fp * (w_L + w_R);
-      real t = 0.5_fp * (t_L + t_R);
-      real p = C0 * pow(r*t,gamma);
-      real cs2 = gamma*p/r;
-      real cs  = sqrt(cs2);
-      // Get left and right fluxes
-      real q1_L = stateLimits(idR,0,k,j,i,iens);   real q1_R = stateLimits(idR,1,k,j,i,iens);
-      real q2_L = stateLimits(idU,0,k,j,i,iens);   real q2_R = stateLimits(idU,1,k,j,i,iens);
-      real q3_L = stateLimits(idV,0,k,j,i,iens);   real q3_R = stateLimits(idV,1,k,j,i,iens);
-      real q4_L = stateLimits(idW,0,k,j,i,iens);   real q4_R = stateLimits(idW,1,k,j,i,iens);
-      real q5_L = stateLimits(idT,0,k,j,i,iens);   real q5_R = stateLimits(idT,1,k,j,i,iens);
-      // Compute upwind characteristics
-      // Waves 1-3, velocity: w
-      real w1, w2, w3;
-      if (w > 0) {
-        w1 = q1_L - q5_L/t;
-        w2 = q2_L - u*q5_L/t;
-        w3 = q3_L - v*q5_L/t;
-      } else {
-        w1 = q1_R - q5_R/t;
-        w2 = q2_R - u*q5_R/t;
-        w3 = q3_R - v*q5_R/t;
-      }
-      // Wave 5, velocity: w-cs
-      real w5 =  w*q1_R/(2*cs) - q4_R/(2*cs) + q5_R/(2*t);
-      // Wave 6, velocity: w+cs
-      real w6 = -w*q1_L/(2*cs) + q4_L/(2*cs) + q5_L/(2*t);
-      // Use right eigenmatrix to compute upwind flux
-      real q1 = w1 + w5 + w6;
-      real q2 = w2 + u*w5 + u*w6;
-      real q3 = w3 + v*w5 + v*w6;
-      real q4 = w*w1 + (w-cs)*w5 + (w+cs)*w6;
-      real q5 =      t*w5 + t*w6;
-
-      state_flux(idR,k,j,i,iens) = q4;
-      state_flux(idU,k,j,i,iens) = q4*q2/q1;
-      state_flux(idV,k,j,i,iens) = q4*q3/q1;
-      state_flux(idW,k,j,i,iens) = q4*q4/q1 + C0*pow(q5,gamma);
-      state_flux(idT,k,j,i,iens) = q4*q5/q1;
-
-      // COMPUTE UPWIND TRACER FLUXES
-      // Handle it one tracer at a time
-      for (int tr=0; tr < num_tracers; tr++) {
-        if (w > 0) {
-          tracer_flux(tr,k,j,i,iens) = q4 * tracerLimits(tr,0,k,j,i,iens) / r_L;
-        } else {
-          tracer_flux(tr,k,j,i,iens) = q4 * tracerLimits(tr,1,k,j,i,iens) / r_R;
-        }
-      }
-    });
-
-    stateLimits  = real6d();
-    tracerLimits = real6d();
-
-    //////////////////////////////////////////////////////////
-    // Limit the tracer fluxes for positivity
-    //////////////////////////////////////////////////////////
-    real5d fct_mult("fct_mult",num_tracers,nz+1,ny,nx,nens);
-    parallel_for( "Spatial.h Z FCT" , SimpleBounds<5>(num_tracers,nz+1,ny,nx,nens) ,
-                  YAKL_LAMBDA (int tr, int k, int j, int i, int iens) {
-      fct_mult(tr,k,j,i,iens) = 1.;
-      if (k == 0 || k == nz) tracer_flux(tr,k,j,i,iens) = 0;
-      // Solid wall BCs mean w == 0 at boundaries
-      if (tracer_pos(tr)) {
-        // Compute and apply the flux reduction factor of the upwind cell
-        if      (tracer_flux(tr,k,j,i,iens) > 0) {
-          int ind_k = k-1;
-          // upwind is to the left of this interface
-          real f1 = min( tracer_flux(tr,ind_k  ,j,i,iens) , 0._fp );
-          real f2 = max( tracer_flux(tr,ind_k+1,j,i,iens) , 0._fp );
-          real fluxOut = dt*(f2-f1)/dz(ind_k,iens);
-          real dens = state(idR,hs+ind_k,hs+j,hs+i,iens);
-          real mass = tracers(tr,hs+ind_k,hs+j,hs+i,iens) * dens;
-          if (fluxOut > 0) {
-            fct_mult(tr,k,j,i,iens) = min( 1._fp , mass / fluxOut );
-          }
-        } else if (tracer_flux(tr,k,j,i,iens) < 0) {
-          int ind_k = k;
-          // upwind is to the right of this interface
-          real f1 = min( tracer_flux(tr,ind_k  ,j,i,iens) , 0._fp );
-          real f2 = max( tracer_flux(tr,ind_k+1,j,i,iens) , 0._fp );
-          real fluxOut = dt*(f2-f1)/dz(ind_k,iens);
-          real dens = state(idR,hs+ind_k,hs+j,hs+i,iens);
-          real mass = tracers(tr,hs+ind_k,hs+j,hs+i,iens) * dens;
-          if (fluxOut > 0) {
-            fct_mult(tr,k,j,i,iens) = min( 1._fp , mass / fluxOut );
-          }
-        }
-      }
-    });
-
-    //////////////////////////////////////////////////////////
-    // Compute the tendencies
-    //////////////////////////////////////////////////////////
-    parallel_for( "Spatial.h Z tendencies" , SimpleBounds<4>(nz,ny,nx,nens) ,
-                  YAKL_LAMBDA(int k, int j, int i, int iens) {
-      for (int l=0; l < num_state; l++) {
-        if (sim2d && l == idV) {
-          stateTend(l,k,j,i,iens) = 0;
-        } else {
-          stateTend(l,k,j,i,iens) += - ( state_flux(l,k+1,j,i,iens) - state_flux(l,k,j,i,iens) ) / dz(k,iens);
-        }
-      }
-      for (int l=0; l < num_tracers; l++) {
-        // Compute tracer tendency
-        tracerTend(l,k,j,i,iens) = - ( tracer_flux(l,k+1,j,i,iens)*fct_mult(l,k+1,j,i,iens) -
-                                       tracer_flux(l,k  ,j,i,iens)*fct_mult(l,k  ,j,i,iens) ) / dz(k,iens);
-        // Multiply density back onto the tracers
-        tracers(l,hs+k,hs+j,hs+i,iens) *= state(idR,hs+k,hs+j,hs+i,iens);
-      }
-    });
-  }
-
-
-
-  const char * getName() { return ""; }
-
-
-
-  void finalize(real4d const &state , real4d const &tracers) {}
-
-
-
-  // ord stencil values to ngll GLL values; store in DTs
-  YAKL_INLINE static void reconstruct_gll_values( SArray<real,1,ord> const stencil                      ,
-                                                  SArray<real,1,ngll> &gll                              ,
-                                                  SArray<real,2,ord,ngll> const &coefs_to_gll           ,
-                                                  SArray<real,2,ord,ngll> const &sten_to_gll            ,
-                                                  SArray<real,2,ord,ord>  const &sten_to_coefs          ,
-                                                  SArray<real,3,hs+1,hs+1,hs+1> const &weno_recon_lower ,
-                                                  SArray<real,1,hs+2> const &idl                        ,
-                                                  real sigma, bool doweno ) {
-    if (doweno) {
-
-      // Reconstruct values
-      SArray<real,1,ord> wenoCoefs;
-      weno::compute_weno_coefs<ord>( weno_recon_lower , sten_to_coefs , stencil , wenoCoefs , idl , sigma );
-      // Transform ord weno coefficients into ngll GLL points
-      for (int ii=0; ii<ngll; ii++) {
-        real tmp = 0;
-        for (int s=0; s < ord; s++) {
-          tmp += coefs_to_gll(s,ii) * wenoCoefs(s);
-        }
-        gll(ii) = tmp;
-      }
-
-    } else {
-
-      // Transform ord stencil cell averages into ngll GLL points
-      for (int ii=0; ii<ngll; ii++) {
-        real tmp = 0;
-        for (int s=0; s < ord; s++) {
-          tmp += sten_to_gll(s,ii) * stencil(s);
-        }
-        gll(ii) = tmp;
-      }
-
-    } // if doweno
-  }
-
-
-
-  YAKL_INLINE static void diffTransformEulerConsX( SArray<real,2,nAder,ngll> &r  ,
-                                                   SArray<real,2,nAder,ngll> &ru ,
-                                                   SArray<real,2,nAder,ngll> &rv ,
-                                                   SArray<real,2,nAder,ngll> &rw ,
-                                                   SArray<real,2,nAder,ngll> &rt ,
-                                                   SArray<real,2,nAder,ngll> &ruu ,
-                                                   SArray<real,2,nAder,ngll> &ruv ,
-                                                   SArray<real,2,nAder,ngll> &ruw ,
-                                                   SArray<real,2,nAder,ngll> &rut ,
-                                                   SArray<real,2,nAder,ngll> &rt_gamma ,
-                                                   SArray<real,2,ngll,ngll> const &deriv ,
-                                                   real C0, real gamma, real dx ) {
-    // zero out the non-linear DTs
-    for (int kt=1; kt < nAder; kt++) {
-      for (int ii=0; ii < ngll; ii++) {
-        ruu     (kt,ii) = 0;
-        ruv     (kt,ii) = 0;
-        ruw     (kt,ii) = 0;
-        rut     (kt,ii) = 0;
-        rt_gamma(kt,ii) = 0;
-      }
-    }
-
-    // Loop over the time derivatives
-    for (int kt=0; kt<nAder-1; kt++) {
-      // Compute the state at the next time level
-      for (int ii=0; ii<ngll; ii++) {
-        real df1_dx = 0;
-        real df2_dx = 0;
-        real df3_dx = 0;
-        real df4_dx = 0;
-        real df5_dx = 0;
-        for (int s=0; s<ngll; s++) {
-          df1_dx += deriv(s,ii) * ( ru (kt,s) );
-          if (kt == 0) { df2_dx += deriv(s,ii) * ( ruu(kt,s) + C0*rt_gamma(kt,s)   ); }
-          else         { df2_dx += deriv(s,ii) * ( ruu(kt,s) + C0*rt_gamma(kt,s)/2 ); }
-          df3_dx += deriv(s,ii) * ( ruv(kt,s) );
-          df4_dx += deriv(s,ii) * ( ruw(kt,s) );
-          df5_dx += deriv(s,ii) * ( rut(kt,s) );
-        }
-        r (kt+1,ii) = -df1_dx/dx/(kt+1._fp);
-        ru(kt+1,ii) = -df2_dx/dx/(kt+1._fp);
-        rv(kt+1,ii) = -df3_dx/dx/(kt+1._fp);
-        rw(kt+1,ii) = -df4_dx/dx/(kt+1._fp);
-        rt(kt+1,ii) = -df5_dx/dx/(kt+1._fp);
-      }
-
-      // Compute ru* at the next time level
-      for (int ii=0; ii<ngll; ii++) {
-        real tot_ruu = 0;
-        real tot_ruv = 0;
-        real tot_ruw = 0;
-        real tot_rut = 0;
-        for (int ir=0; ir<=kt+1; ir++) {
-          tot_ruu += ru(ir,ii) * ru(kt+1-ir,ii) - r(ir,ii) * ruu(kt+1-ir,ii);
-          tot_ruv += ru(ir,ii) * rv(kt+1-ir,ii) - r(ir,ii) * ruv(kt+1-ir,ii);
-          tot_ruw += ru(ir,ii) * rw(kt+1-ir,ii) - r(ir,ii) * ruw(kt+1-ir,ii);
-          tot_rut += ru(ir,ii) * rt(kt+1-ir,ii) - r(ir,ii) * rut(kt+1-ir,ii);
-        }
-        ruu(kt+1,ii) = tot_ruu / r(0,ii);
-        ruv(kt+1,ii) = tot_ruv / r(0,ii);
-        ruw(kt+1,ii) = tot_ruw / r(0,ii);
-        rut(kt+1,ii) = tot_rut / r(0,ii);
-
-        // Compute rt_gamma at the next time level
-        real tot_rt_gamma = 0;
-        for (int ir=0; ir<=kt; ir++) {
-          tot_rt_gamma += (kt+1._fp -ir) * ( gamma*rt_gamma(ir,ii)*rt(kt+1-ir,ii) - rt(ir,ii)*rt_gamma(kt+1-ir,ii) );
-        }
-        rt_gamma(kt+1,ii) = ( gamma*rt_gamma(0,ii)*rt(kt+1,ii) + tot_rt_gamma / (kt+1._fp) ) / rt(0,ii);
-      }
-    }
-
-    // Fix the rt_gamma
-    for (int kt=1; kt < nAder; kt++) {
-      for (int ii=0; ii < ngll; ii++) {
-        rt_gamma(kt,ii) /= 2;
-      }
-    }
-  }
-
-
-
-  YAKL_INLINE static void diffTransformEulerConsY( SArray<real,2,nAder,ngll> &r  ,
-                                                   SArray<real,2,nAder,ngll> &ru ,
-                                                   SArray<real,2,nAder,ngll> &rv ,
-                                                   SArray<real,2,nAder,ngll> &rw ,
-                                                   SArray<real,2,nAder,ngll> &rt ,
-                                                   SArray<real,2,nAder,ngll> &rvu ,
-                                                   SArray<real,2,nAder,ngll> &rvv ,
-                                                   SArray<real,2,nAder,ngll> &rvw ,
-                                                   SArray<real,2,nAder,ngll> &rvt ,
-                                                   SArray<real,2,nAder,ngll> &rt_gamma ,
-                                                   SArray<real,2,ngll,ngll> const &deriv ,
-                                                   real C0, real gamma, real dy ) {
-    // zero out the non-linear DTs
-    for (int kt=1; kt < nAder; kt++) {
-      for (int ii=0; ii < ngll; ii++) {
-        rvu     (kt,ii) = 0;
-        rvv     (kt,ii) = 0;
-        rvw     (kt,ii) = 0;
-        rvt     (kt,ii) = 0;
-        rt_gamma(kt,ii) = 0;
-      }
-    }
-
-    // Loop over the time derivatives
-    for (int kt=0; kt<nAder-1; kt++) {
-      // Compute the state at the next time level
-      for (int ii=0; ii<ngll; ii++) {
-        real drv_dy    = 0;
-        real drvu_dy   = 0;
-        real drvv_p_dy = 0;
-        real drvw_dy   = 0;
-        real drvt_dy   = 0;
-        for (int s=0; s<ngll; s++) {
-          drv_dy    += deriv(s,ii) * rv(kt,s);
-          drvu_dy   += deriv(s,ii) * rvu(kt,s);
-          if (kt == 0) { drvv_p_dy += deriv(s,ii) * ( rvv(kt,s) + C0*rt_gamma(kt,s)   ); }
-          else         { drvv_p_dy += deriv(s,ii) * ( rvv(kt,s) + C0*rt_gamma(kt,s)/2 ); }
-          drvw_dy   += deriv(s,ii) * rvw(kt,s);
-          drvt_dy   += deriv(s,ii) * rvt(kt,s);
-        }
-        r (kt+1,ii) = -drv_dy   /dy/(kt+1);
-        ru(kt+1,ii) = -drvu_dy  /dy/(kt+1);
-        rv(kt+1,ii) = -drvv_p_dy/dy/(kt+1);
-        rw(kt+1,ii) = -drvw_dy  /dy/(kt+1);
-        rt(kt+1,ii) = -drvt_dy  /dy/(kt+1);
-      }
-
-      // Compute ru* at the next time level
-      for (int ii=0; ii<ngll; ii++) {
-        // Compute the non-linear differential transforms
-        real tot_rvu = 0;
-        real tot_rvv = 0;
-        real tot_rvw = 0;
-        real tot_rvt = 0;
-        for (int l=0; l<=kt+1; l++) {
-          tot_rvu += rv(l,ii) * ru(kt+1-l,ii) - r(l,ii) * rvu(kt+1-l,ii);
-          tot_rvv += rv(l,ii) * rv(kt+1-l,ii) - r(l,ii) * rvv(kt+1-l,ii);
-          tot_rvw += rv(l,ii) * rw(kt+1-l,ii) - r(l,ii) * rvw(kt+1-l,ii);
-          tot_rvt += rv(l,ii) * rt(kt+1-l,ii) - r(l,ii) * rvt(kt+1-l,ii);
-        }
-        rvu(kt+1,ii) = tot_rvu / r(0,ii);
-        rvv(kt+1,ii) = tot_rvv / r(0,ii);
-        rvw(kt+1,ii) = tot_rvw / r(0,ii);
-        rvt(kt+1,ii) = tot_rvt / r(0,ii);
-
-        // Compute rt_gamma at the next time level
-        real tot_rt_gamma = 0;
-        for (int l=0; l<=kt; l++) {
-          tot_rt_gamma += (kt+1._fp -l) * ( gamma*rt_gamma(l,ii)*rt(kt+1-l,ii) - rt(l,ii)*rt_gamma(kt+1-l,ii) );
-        }
-        rt_gamma(kt+1,ii) = ( gamma*rt_gamma(0,ii)*rt(kt+1,ii) + tot_rt_gamma / (kt+1._fp) ) / rt(0,ii);
-      }
-    }
-
-    // Fix the rt_gamma
-    for (int kt=1; kt < nAder; kt++) {
-      for (int ii=0; ii < ngll; ii++) {
-        rt_gamma(kt,ii) /= 2;
-      }
-    }
-  }
-
-
-
-  YAKL_INLINE static void diffTransformEulerConsZ( SArray<real,2,nAder,ngll> &r  ,
-                                                   SArray<real,2,nAder,ngll> &ru ,
-                                                   SArray<real,2,nAder,ngll> &rv ,
-                                                   SArray<real,2,nAder,ngll> &rw ,
-                                                   SArray<real,2,nAder,ngll> &rt ,
-                                                   SArray<real,2,nAder,ngll> &rwu ,
-                                                   SArray<real,2,nAder,ngll> &rwv ,
-                                                   SArray<real,2,nAder,ngll> &rww ,
-                                                   SArray<real,2,nAder,ngll> &rwt ,
-                                                   SArray<real,2,nAder,ngll> &rt_gamma ,
-                                                   SArray<real,2,ngll,ngll> const &deriv ,
-                                                   real3d const &hyDensGLL ,
-                                                   real3d const &hyPressureGLL ,
-                                                   real C0, real gamma , real grav ,
-                                                   int k , real dz , int nz , int iens ) {
-    // zero out the non-linear DTs
-    for (int kt=1; kt < nAder; kt++) {
-      for (int ii=0; ii < ngll; ii++) {
-        rwu     (kt,ii) = 0;
-        rwv     (kt,ii) = 0;
-        rww     (kt,ii) = 0;
-        rwt     (kt,ii) = 0;
-        rt_gamma(kt,ii) = 0;
-      }
-    }
-
-    // Loop over the time derivatives
-    for (int kt=0; kt<nAder-1; kt++) {
-      // Compute the state at the next time level
-      for (int ii=0; ii<ngll; ii++) {
-        real drw_dz    = 0;
-        real drwu_dz   = 0;
-        real drwv_dz   = 0;
-        real drww_p_dz = 0;
-        real drwt_dz   = 0;
-        for (int s=0; s<ngll; s++) {
-          drw_dz    += deriv(s,ii) * rw(kt,s);
-          drwu_dz   += deriv(s,ii) * rwu(kt,s);
-          drwv_dz   += deriv(s,ii) * rwv(kt,s);
-          if (kt == 0) { drww_p_dz += deriv(s,ii) * ( rww(kt,s) + C0*rt_gamma(kt,s) - hyPressureGLL(k,s,iens) ); }
-          else         { drww_p_dz += deriv(s,ii) * ( rww(kt,s) + C0*rt_gamma(kt,s)/2                         ); }
-          drwt_dz   += deriv(s,ii) * rwt(kt,s);
-        }
-        r (kt+1,ii) = -drw_dz   /dz/(kt+1);
-        ru(kt+1,ii) = -drwu_dz  /dz/(kt+1);
-        rv(kt+1,ii) = -drwv_dz  /dz/(kt+1);
-        if (kt == 0) { rw(kt+1,ii) = -drww_p_dz/dz/(kt+1) - (r(kt,ii)-hyDensGLL(k,ii,iens))*grav/(kt+1); }
-        else         { rw(kt+1,ii) = -drww_p_dz/dz/(kt+1) -  r(kt,ii)                      *grav/(kt+1); }
-        rt(kt+1,ii) = -drwt_dz  /dz/(kt+1);
-
-        if (k == nz-1) rw(kt+1,ngll-1) = 0;
-        if (k == 0   ) rw(kt+1,0     ) = 0;
-      }
-
-      // Compute ru* at the next time level
-      for (int ii=0; ii<ngll; ii++) {
-        // Compute the non-linear differential transforms
-        real tot_rwu = 0;
-        real tot_rwv = 0;
-        real tot_rww = 0;
-        real tot_rwt = 0;
-        for (int l=0; l<=kt+1; l++) {
-          tot_rwu += rw(l,ii) * ru(kt+1-l,ii) - r(l,ii) * rwu(kt+1-l,ii);
-          tot_rwv += rw(l,ii) * rv(kt+1-l,ii) - r(l,ii) * rwv(kt+1-l,ii);
-          tot_rww += rw(l,ii) * rw(kt+1-l,ii) - r(l,ii) * rww(kt+1-l,ii);
-          tot_rwt += rw(l,ii) * rt(kt+1-l,ii) - r(l,ii) * rwt(kt+1-l,ii);
-        }
-        rwu(kt+1,ii) = tot_rwu / r(0,ii);
-        rwv(kt+1,ii) = tot_rwv / r(0,ii);
-        rww(kt+1,ii) = tot_rww / r(0,ii);
-        rwt(kt+1,ii) = tot_rwt / r(0,ii);
-
-        // Compute rt_gamma at the next time level
-        real tot_rt_gamma = 0;
-        for (int l=0; l<=kt; l++) {
-          tot_rt_gamma += (kt+1-l) * ( gamma*rt_gamma(l,ii)*rt(kt+1-l,ii) - rt(l,ii)*rt_gamma(kt+1-l,ii) );
-        }
-        rt_gamma(kt+1,ii) = ( gamma*rt_gamma(0,ii)*rt(kt+1,ii) + tot_rt_gamma / (kt+1) ) / rt(0,ii);
-      }
-    }
-
-    // Fix the rt_gamma
-    for (int kt=1; kt < nAder; kt++) {
-      for (int ii=0; ii < ngll; ii++) {
-        rt_gamma(kt,ii) /= 2;
-      }
-    }
-  }
-
-
-
-  YAKL_INLINE static void diffTransformTracer( SArray<real,2,nAder,ngll> const &r  ,
-                                               SArray<real,2,nAder,ngll> const &ru ,
-                                               SArray<real,2,nAder,ngll> &rt ,
-                                               SArray<real,2,nAder,ngll> &rut ,
-                                               SArray<real,2,ngll,ngll> const &deriv ,
-                                               real dx ) {
-    // zero out the non-linear DT
-    for (int kt=1; kt < nAder; kt++) {
-      for (int ii=0; ii < ngll; ii++) {
-        rut(kt,ii) = 0;
-      }
-    }
-    // Loop over the time derivatives
-    for (int kt=0; kt<nAder-1; kt++) {
-      // Compute the rho*tracer at the next time level
-      for (int ii=0; ii<ngll; ii++) {
-        real df_dx = 0;
-        for (int s=0; s<ngll; s++) {
-          df_dx += deriv(s,ii) * rut(kt,s);
-        }
-        rt(kt+1,ii) = -df_dx/dx/(kt+1._fp);
-      }
-      // Compute rut at the next time level
-      for (int ii=0; ii<ngll; ii++) {
-        real tot_rut = 0;
-        for (int ir=0; ir<=kt+1; ir++) {
-          tot_rut += ru(ir,ii) * rt(kt+1-ir,ii) - r(ir,ii) * rut(kt+1-ir,ii);
-        }
-        rut(kt+1,ii) = tot_rut / r(0,ii);
-      }
-    }
-  }
-
-
-
-  YAKL_INLINE static void compute_timeAvg( SArray<real,3,num_state,nAder,ngll> &dts , real dt ) {
-    real dtmult = dt;
-    for (int kt=1; kt<nAder; kt++) {
-      for (int l=0; l<num_state; l++) {
-        for (int ii=0; ii<ngll; ii++) {
-          dts(l,0,ii) += dts(l,kt,ii) * dtmult / (kt+1._fp);
-        }
-      }
-      dtmult *= dt;
-    }
-  }
-
-
-
-  YAKL_INLINE static void compute_timeAvg( SArray<real,2,nAder,ngll> &dts , real dt ) {
-    real dtmult = dt;
-    for (int kt=1; kt<nAder; kt++) {
-      for (int ii=0; ii<ngll; ii++) {
-        dts(0,ii) += dts(kt,ii) * dtmult / (kt+1._fp);
-      }
-      dtmult *= dt;
-    }
-  }
-
-
-
-  YAKL_INLINE static void compute_timeAvg( SArray<real,2,nAder,ngll> const &dts , SArray<real,1,ngll> &tavg ,
-                                           real dt ) {
-    for (int ii=0; ii<ngll; ii++) {
-      tavg(ii) = dts(0,ii);
-    }
-    real dtmult = dt;
-    for (int kt=1; kt<nAder; kt++) {
-      for (int ii=0; ii<ngll; ii++) {
-        tavg(ii) += dts(kt,ii) * dtmult / (kt+1._fp);
-      }
-      dtmult *= dt;
-    }
-  }
-
-
-};
diff --git a/dynamics/awfl/Temporal_ader.h b/dynamics/awfl/Temporal_ader.h
deleted file mode 100644
index cb2a2071..00000000
--- a/dynamics/awfl/Temporal_ader.h
+++ /dev/null
@@ -1,193 +0,0 @@
-
-#pragma once
-
-#include "awfl_const.h"
-#include "pam_coupler.h"
-
-int  constexpr nTimeDerivs = 1;
-bool constexpr timeAvg     = true;
-int  constexpr nAder       = ngll;
-
-template <class Spatial> class Temporal_operator {
-public:
-  static_assert(nTimeDerivs <= ngll , "ERROR: nTimeDerivs must be <= ngll.");
-
-  real5d stateTend;
-  real5d tracerTend;
-
-  Spatial space_op;
-
-  real etime;
-
-  void init(pam::PamCoupler &coupler) {
-
-    space_op.init(coupler);
-
-    stateTend  = space_op.createStateTendArr ();
-    tracerTend = space_op.createTracerTendArr();
-    etime   = 0;
-  }
-
-
-  int add_tracer(std::string name , std::string desc , bool pos_def , bool adds_mass) {
-    return space_op.add_tracer(name , desc , pos_def , adds_mass);
-  }
-
-
-  void init_idealized_state_and_tracers( pam::PamCoupler &coupler ) {
-    space_op.init_idealized_state_and_tracers( coupler );
-  }
-
-
-  real compute_time_step(pam::PamCoupler const &coupler, real cfl_in = -1) {
-    return space_op.compute_time_step(coupler, cfl_in);
-  }
-
-
-  std::vector<real> compute_mass( pam::PamCoupler const &coupler , realConst5d state , realConst5d tracers ) const {
-    using yakl::c::parallel_for;
-    using yakl::c::SimpleBounds;
-
-    int nz   = coupler.get_nz();
-    int ny   = coupler.get_ny();
-    int nx   = coupler.get_nx();
-    int nens = coupler.get_nens();
-
-    int idR = Spatial::idR;
-    int hs  = Spatial::hs;
-    int num_tracers = space_op.num_tracers;
-    YAKL_SCOPE( dz          , space_op.dz          );
-
-    std::vector<real> mass(num_tracers+1);
-    real4d tmp("tmp",nz,ny,nx,nens);
-
-    parallel_for( "Temporal_ader.h state mass" , SimpleBounds<4>(nz,ny,nx,nens) , YAKL_LAMBDA (int k, int j, int i, int iens) {
-      tmp(k,j,i,iens) = state(idR,hs+k,hs+j,hs+i,iens) * dz(k,iens);
-    });
-    mass[0] = yakl::intrinsics::sum(tmp);
-
-    for (int l=0; l < num_tracers; l++) {
-      parallel_for( "Temporal_ader.h tracer mass" , SimpleBounds<4>(nz,ny,nx,nens) , YAKL_LAMBDA (int k, int j, int i, int iens) {
-        tmp(k,j,i,iens) = tracers(l,hs+k,hs+j,hs+i,iens) * dz(k,iens);
-      });
-      mass[l+1] = yakl::intrinsics::sum(tmp);
-    }
-    return mass;
-  }
-
-
-  void timeStep( pam::PamCoupler &coupler ) {
-    using yakl::c::parallel_for;
-    using yakl::c::SimpleBounds;
-
-    real dtphys = coupler.get_option<real>("crm_dt");
-
-    YAKL_SCOPE( stateTend       , this->stateTend           );
-    YAKL_SCOPE( tracerTend      , this->tracerTend          );
-
-    real dt = compute_time_step( coupler );
-
-    real5d state   = space_op.createStateArr();
-    real5d tracers = space_op.createTracerArr();
-
-    #ifdef PAM_DEBUG
-      memset(state   , 0._fp);
-      memset(tracers , 0._fp);
-    #endif
-
-    space_op.convert_coupler_state_to_dynamics( coupler , state , tracers );
-
-    int idR         = space_op.idR;
-    int idU         = space_op.idU;
-    int idV         = space_op.idV;
-    int idW         = space_op.idW;
-    int idT         = space_op.idT;
-    int nx          = space_op.nx;
-    int ny          = space_op.ny;
-    int nz          = space_op.nz;
-    int nens        = space_op.nens;
-    int num_state   = space_op.num_state;
-    int num_tracers = space_op.num_tracers;
-    int hs          = space_op.hs;
-
-    int n_iter = ceil( dtphys / dt );
-    dt = dtphys / n_iter;
-
-    for (int iter = 0; iter < n_iter; iter++) {
-
-      #ifdef PAM_DEBUG
-        validate_array_positive(tracers);
-        validate_array_inf_nan(state);
-        validate_array_inf_nan(tracers);
-        std::vector<real> mass_init = compute_mass( coupler , state , tracers );
-      #endif
-
-      ScalarLiveOut<bool> neg_too_large(false);
-
-      // Loop over different items in the spatial splitting
-      for (int spl = 0 ; spl < space_op.numSplit() ; spl++) {
-        real dtloc = dt;
-
-        // Compute the tendencies for state and tracers
-        space_op.computeTendencies( state , stateTend , tracers , tracerTend , dtloc , spl );
-
-        parallel_for( "Temporal_ader.h apply tendencies" , SimpleBounds<4>(nz,ny,nx,nens) ,
-                      YAKL_LAMBDA (int k, int j, int i, int iens) {
-          for (int l=0; l < num_state; l++) {
-            state(l,hs+k,hs+j,hs+i,iens) += dtloc * stateTend(l,k,j,i,iens);
-          }
-          for (int l=0; l < num_tracers; l++) {
-            tracers(l,hs+k,hs+j,hs+i,iens) += dtloc * tracerTend(l,k,j,i,iens);
-            #ifdef PAM_DEBUG
-              if (tracers(l,hs+k,hs+j,hs+i,iens) < -1.e-10) {
-                neg_too_large = true;
-              }
-            #endif
-            tracers(l,hs+k,hs+j,hs+i,iens) = max( 0._fp , tracers(l,hs+k,hs+j,hs+i,iens) );
-          }
-        });
-      }
-
-      #ifdef PAM_DEBUG
-        if (neg_too_large.hostRead()) {
-          std::cerr << "WARNING: Correcting a non-machine-precision negative tracer value" << std::endl;
-          // endrun();
-        }
-        std::vector<real> mass_final = compute_mass( coupler , state , tracers );
-        for (int l=0; l < mass_final.size(); l++) {
-          real mass_diff;
-          if (mass_init[l] > 0) {
-            mass_diff = abs(mass_final[l] - mass_init[l]) / abs(mass_init[l]);
-          } else {
-            mass_diff = mass_final[l];
-          }
-          real tol = 1.e-12;
-          if (std::is_same<real,float>::value) {tol = 1.e-5;}
-          if (mass_diff > tol) {
-            std::cout << "Dycore mass change is too large. Abs Diff: " << abs(mass_final[l] - mass_init[l])
-                      << ";   Rel Diff: " << mass_diff
-                      << ";   Initial Mass: " << mass_init[l] << std::endl;
-            // endrun("ERROR: mass not conserved by dycore");
-          }
-        }
-        validate_array_positive(tracers);
-        validate_array_inf_nan(state);
-        validate_array_inf_nan(tracers);
-      #endif
-
-      space_op.switch_directions();
-
-    }
-
-    space_op.convert_dynamics_to_coupler_state( coupler , state , tracers );
-
-    etime += dtphys;
-  }
-
-
-  void finalize(pam::PamCoupler &coupler) { }
-
-
-  const char * dycore_name() const { return "AWFL"; }
-
-};
diff --git a/dynamics/awfl/Temporal_ssprk3_needs_update.h b/dynamics/awfl/Temporal_ssprk3_needs_update.h
deleted file mode 100644
index 87367412..00000000
--- a/dynamics/awfl/Temporal_ssprk3_needs_update.h
+++ /dev/null
@@ -1,243 +0,0 @@
-
-#pragma once
-
-#include "awfl_const.h"
-#include "DataManager.h"
-
-int  constexpr nTimeDerivs = 1;
-bool constexpr timeAvg     = false;
-int  constexpr nAder       = 1;
-
-template <class Spatial> class Temporal_operator {
-public:
-  static_assert(nTimeDerivs <= ngll , "ERROR: nTimeDerivs must be <= ngll.");
-
-  real5d stateTmp;
-  real5d tracersTmp;
-
-  real5d stateTend;
-  real5d tracerTend;
-
-  real5d stateTendAccum;
-  real5d tracerTendAccum;
-
-  Spatial space_op;
-
-  void init(std::string inFile, int ny, int nx, int nens, real xlen, real ylen, int num_tracers, DataManager &dm) {
-    space_op.init(inFile, ny, nx, nens, xlen, ylen, num_tracers, dm);
-    stateTmp        = space_op.createStateArr     ();
-    tracersTmp      = space_op.createTracerArr    ();
-
-    stateTend       = space_op.createStateTendArr ();
-    tracerTend      = space_op.createTracerTendArr();
-
-    stateTendAccum  = space_op.createStateTendArr ();
-    tracerTendAccum = space_op.createTracerTendArr();
-  }
-
-
-  template <class MICRO>
-  void convert_dynamics_to_coupler_state( DataManager &dm , MICRO &micro ) {
-    space_op.convert_dynamics_to_coupler_state( dm , micro );
-  }
-
-
-  template <class MICRO>
-  void convert_coupler_state_to_dynamics( DataManager &dm , MICRO &micro ) {
-    space_op.convert_coupler_state_to_dynamics( dm , micro );
-  }
-
-
-  int add_tracer(DataManager &dm , std::string name , std::string desc , bool pos_def , bool adds_mass) {
-    return space_op.add_tracer(dm , name , desc , pos_def , adds_mass);
-  }
-
-
-  template <class MICRO>
-  void init_state_and_tracers( DataManager &dm , MICRO const &micro ) {
-    space_op.init_state_and_tracers( dm , micro );
-  }
-
-
-  template <class F, class MICRO>
-  void init_tracer_by_location(std::string name , F const &init_mass , DataManager &dm, MICRO const &micro) const {
-    space_op.init_tracer_by_location(name , init_mass , dm, micro);
-  }
-
-
-  template <class MICRO>
-  void output(DataManager &dm, MICRO const &micro, real etime) const {
-    space_op.output(dm , micro , etime);
-  }
-
-
-  template <class MICRO>
-  real compute_time_step(DataManager &dm, MICRO const &micro, real cfl=0.8) {
-    return space_op.compute_time_step(dm, micro, cfl);
-  }
-
-
-  inline void zero_accum_arrays( real5d &stateTendAccum , real5d &tracerTendAccum ) {
-    int num_state   = stateTendAccum .dimension[0];
-    int nz          = stateTendAccum .dimension[1];
-    int ny          = stateTendAccum .dimension[2];
-    int nx          = stateTendAccum .dimension[3];
-    int nens        = stateTendAccum .dimension[4];
-
-    int num_tracers = tracerTendAccum.dimension[0];
-
-    parallel_for( SimpleBounds<4>(nz,ny,nx,nens) , YAKL_LAMBDA (int k, int j, int i, int iens) {
-      for (int l=0; l < num_state; l++) {
-        stateTendAccum(l,k,j,i,iens) = 0;
-      }
-      for (int l=0; l < num_tracers; l++) {
-        tracerTendAccum(l,k,j,i,iens) = 0;
-      }
-    });
-  }
-
-
-  inline void tendency_accum( real5d &stateTendAccum  , real5d const &stateTend ,
-                              real5d &tracerTendAccum , real5d const &tracerTend ) {
-    int num_state   = stateTendAccum .dimension[0];
-    int nz          = stateTendAccum .dimension[1];
-    int ny          = stateTendAccum .dimension[2];
-    int nx          = stateTendAccum .dimension[3];
-    int nens        = stateTendAccum .dimension[4];
-
-    int num_tracers = tracerTendAccum.dimension[0];
-
-    parallel_for( SimpleBounds<4>(nz,ny,nx,nens) , YAKL_LAMBDA (int k, int j, int i, int iens) {
-      for (int l=0; l < num_state; l++) {
-        stateTendAccum(l,k,j,i,iens) += stateTend(l,k,j,i,iens);
-      }
-      for (int l=0; l < num_tracers; l++) {
-        tracerTendAccum(l,k,j,i,iens) += tracerTend(l,k,j,i,iens);
-      }
-    });
-  }
-
-
-  template <class MICRO>
-  void timeStep( DataManager &dm , MICRO const &micro , real dtphys ) {
-    YAKL_SCOPE( stateTend       , this->stateTend       );
-    YAKL_SCOPE( stateTmp        , this->stateTmp        );
-    YAKL_SCOPE( stateTendAccum  , this->stateTendAccum  );
-    YAKL_SCOPE( tracerTend      , this->tracerTend      );
-    YAKL_SCOPE( tracersTmp      , this->tracersTmp      );
-    YAKL_SCOPE( tracerTendAccum , this->tracerTendAccum );
-
-    space_op.convert_coupler_state_to_dynamics( dm , micro );
-
-    real5d state   = dm.get<real,5>("dynamics_state");
-    real5d tracers = dm.get<real,5>("dynamics_tracers");
-
-    int nx          = space_op.nx;
-    int ny          = space_op.ny;
-    int nz          = space_op.nz;
-    int nens        = space_op.nens;
-    int num_state   = space_op.num_state;
-    int num_tracers = space_op.num_tracers;
-    int hs          = space_op.hs;
-
-    real dt = compute_time_step( dm , micro );
-
-    real loctime = 0.;
-    while (loctime < dtphys) {
-      if (loctime + dt > dtphys) { dt = dtphys - loctime; }
-
-      real dtloc = dt;
-
-      ScalarLiveOut<bool> neg_too_large(false);
-
-      /////////////////////////////////////
-      // Stage 1
-      /////////////////////////////////////
-      zero_accum_arrays( stateTendAccum , tracerTendAccum );
-      for (int spl = 0 ; spl < space_op.numSplit() ; spl++) {
-        space_op.computeTendencies( state , stateTend , tracers , tracerTend , micro , dtloc , spl );
-        tendency_accum( stateTendAccum , stateTend , tracerTendAccum , tracerTend );
-      }
-      parallel_for( SimpleBounds<4>(nz,ny,nx,nens) , YAKL_LAMBDA (int k, int j, int i, int iens) {
-        for (int l=0; l < num_state; l++) {
-          stateTmp  (l,hs+k,hs+j,hs+i,iens) = state  (l,hs+k,hs+j,hs+i,iens) + dtloc * stateTendAccum (l,k,j,i,iens);
-        }
-        for (int l=0; l < num_tracers; l++) {
-          tracersTmp(l,hs+k,hs+j,hs+i,iens) = tracers(l,hs+k,hs+j,hs+i,iens) + dtloc * tracerTendAccum(l,k,j,i,iens);
-          if (tracersTmp(l,hs+k,hs+j,hs+i,iens) < -1.e-10) {
-            neg_too_large = true;
-          }
-          tracersTmp(l,hs+k,hs+j,hs+i,iens) = max( 0._fp , tracersTmp(l,hs+k,hs+j,hs+i,iens) );
-        }
-      });
-
-      /////////////////////////////////////
-      // Stage 2
-      /////////////////////////////////////
-      zero_accum_arrays( stateTendAccum , tracerTendAccum );
-      for (int spl = 0 ; spl < space_op.numSplit() ; spl++) {
-        space_op.computeTendencies( stateTmp , stateTend , tracersTmp , tracerTend , micro , dtloc , spl );
-        tendency_accum( stateTendAccum , stateTend , tracerTendAccum , tracerTend );
-      }
-      parallel_for( SimpleBounds<4>(nz,ny,nx,nens) , YAKL_LAMBDA (int k, int j, int i, int iens) {
-        for (int l=0; l < num_state; l++) {
-          stateTmp(l,hs+k,hs+j,hs+i,iens) = 0.75_fp * state   (l,hs+k,hs+j,hs+i,iens) +
-                                            0.25_fp * stateTmp(l,hs+k,hs+j,hs+i,iens) +
-                                            0.25_fp * dtloc * stateTendAccum(l,k,j,i,iens);
-        }
-        for (int l=0; l < num_tracers; l++) {
-          tracersTmp(l,hs+k,hs+j,hs+i,iens) = 0.75_fp * tracers   (l,hs+k,hs+j,hs+i,iens) +
-                                              0.25_fp * tracersTmp(l,hs+k,hs+j,hs+i,iens) +
-                                              0.25_fp * dtloc * tracerTendAccum(l,k,j,i,iens);
-          if (tracersTmp(l,hs+k,hs+j,hs+i,iens) < -1.e-10) {
-            neg_too_large = true;
-          }
-          tracersTmp(l,hs+k,hs+j,hs+i,iens) = max( 0._fp , tracersTmp(l,hs+k,hs+j,hs+i,iens) );
-        }
-      });
-
-      /////////////////////////////////////
-      // Stage 3
-      /////////////////////////////////////
-      zero_accum_arrays( stateTendAccum , tracerTendAccum );
-      for (int spl = 0 ; spl < space_op.numSplit() ; spl++) {
-        space_op.computeTendencies( stateTmp , stateTend , tracersTmp , tracerTend , micro , dtloc , spl );
-        tendency_accum( stateTendAccum , stateTend , tracerTendAccum , tracerTend );
-      }
-      parallel_for( SimpleBounds<4>(nz,ny,nx,nens) , YAKL_LAMBDA (int k, int j, int i, int iens) {
-        for (int l=0; l < num_state; l++) {
-          state(l,hs+k,hs+j,hs+i,iens) = (1._fp/3._fp) * state   (l,hs+k,hs+j,hs+i,iens) +
-                                         (2._fp/3._fp) * stateTmp(l,hs+k,hs+j,hs+i,iens) +
-                                         (2._fp/3._fp) * dtloc * stateTendAccum(l,k,j,i,iens);
-        }
-        for (int l=0; l < num_tracers; l++) {
-          tracers(l,hs+k,hs+j,hs+i,iens) = (1._fp/3._fp) * tracers   (l,hs+k,hs+j,hs+i,iens) +
-                                           (2._fp/3._fp) * tracersTmp(l,hs+k,hs+j,hs+i,iens) +
-                                           (2._fp/3._fp) * dtloc * tracerTendAccum(l,k,j,i,iens);
-          if (tracers(l,hs+k,hs+j,hs+i,iens) < -1.e-10) {
-            neg_too_large = true;
-          }
-          tracers(l,hs+k,hs+j,hs+i,iens) = max( 0._fp , tracers(l,hs+k,hs+j,hs+i,iens) );
-        }
-      });
-
-      #ifdef PAM_DEBUG
-        if (neg_too_large.hostRead()) {
-          std::cerr << "WARNING: Correcting a non-machine-precision negative tracer value" << std::endl;
-          // endrun();
-        }
-      #endif
-
-      loctime += dt;
-    }
-
-    space_op.convert_dynamics_to_coupler_state( dm , micro );
-  }
-
-
-  void finalize(DataManager &dm) { }
-
-
-  const char * dycore_name() const { return "AWFL"; }
-
-};
diff --git a/dynamics/spam/Dycore.h b/dynamics/spam/Dycore.h
index 807c2cf3..81ac7d51 100644
--- a/dynamics/spam/Dycore.h
+++ b/dynamics/spam/Dycore.h
@@ -54,6 +54,8 @@ class Dycore {
   int ierr;
   real etime = 0.0;
   uint prevstep = 0;
+  int num_out = 0;
+  int num_stat = 0;
 
   std::unique_ptr<TimeIntegrator>
   choose_time_integrator(const std::string &tstype) {
@@ -74,8 +76,11 @@ class Dycore {
     }
   }
 
-  void init(PamCoupler &coupler) {
-    serial_print("setting up dycore", par.masterproc);
+  void init(PamCoupler &coupler, bool verbose=false) {
+    #ifdef PAM_STANDALONE
+      // only use this for standalone to avoid cluttering the log files
+      if (verbose) { serial_print("setting up dycore", par.masterproc); }
+    #endif
 
     // Set parameters
     debug_print(
@@ -83,9 +88,11 @@ class Dycore {
         par.masterproc);
 
     if (coupler.option_exists("standalone_input_file")) {
-      std::string inFile =
-          coupler.get_option<std::string>("standalone_input_file");
-      read_model_params_file(inFile, params, par, coupler, testcase);
+      #ifdef PAM_STANDALONE
+        std::string inFile =
+            coupler.get_option<std::string>("standalone_input_file");
+        read_model_params_file(inFile, params, par, coupler, testcase);
+      #endif
     } else {
       read_model_params_coupler(params, par, coupler, testcase);
     }
@@ -95,7 +102,7 @@ class Dycore {
     testcase->set_tracers(params);
     testcase->set_domain(params);
 
-    check_and_print_model_parameters(params, par);
+    check_and_print_model_parameters(params, par, verbose);
 
     debug_print("read parameters and partitioned domain/setting domain sizes",
                 par.masterproc);
@@ -111,7 +118,7 @@ class Dycore {
     debug_print("finish init topo/geom", par.masterproc);
 
     debug_print("start init equations", par.masterproc);
-    equations.initialize(coupler, params, primal_geometry, dual_geometry);
+    equations.initialize(coupler, params, primal_geometry, dual_geometry, verbose);
     debug_print("finish init equations", par.masterproc);
 
     debug_print("start init testcase", par.masterproc);
@@ -159,8 +166,7 @@ class Dycore {
   void pre_time_loop(PamCoupler &coupler) {
     // // Set the reference state and initialize the tendencies
     debug_print("start tendencies init", par.masterproc);
-    // the reference state has to be set before the tendencies are initialized
-    // for anelastic
+    // The anelastic ref state must be set before tendency initialization
     testcase->set_reference_state(primal_geometry, dual_geometry);
     tendencies.initialize(params, equations, primal_geometry, dual_geometry);
     debug_print("end tendencies init", par.masterproc);
@@ -189,9 +195,12 @@ class Dycore {
     }
     debug_print("end ts init", par.masterproc);
 
-    // convert dynamics state to Coupler state
-    tendencies.convert_dynamics_to_coupler_state(coupler, prognostic_vars,
-                                                 constant_vars);
+    // TODO: add logic here to only include this for standlone configurations
+    // // convert dynamics state to Coupler state
+    // if (testcase->set_coupler_state)
+    //  { tendencies.convert_dynamics_to_coupler_state(coupler, prognostic_vars, constant_vars, params.couple_wind, params.couple_wind_exact_inverse); }
+    // if (testcase->set_coupler_state)
+    //  { tendencies.convert_dynamics_to_coupler_state(coupler, prognostic_vars, constant_vars); }
 
     // Output the initial model state
 #ifndef PAMC_NOIO
@@ -199,9 +208,12 @@ class Dycore {
     for (auto &diag : diagnostics) {
       diag->compute(0, constant_vars, prognostic_vars);
     }
-    stats.compute(prognostic_vars, constant_vars, 0);
-    io.outputInit(etime, primal_geometry, dual_geometry, params);
-    io.outputStats(stats);
+    if (params.stat_freq >=0.)
+    {stats.compute(prognostic_vars, constant_vars, 0);
+    io.outputStats(stats);}
+
+    if (params.out_freq >=0.)
+    {io.outputInit(etime);}
     debug_print("end initial io", par.masterproc);
 #endif
     prevstep = 1;
@@ -215,16 +227,20 @@ class Dycore {
   void timeStep(PamCoupler &coupler) {
     yakl::timer_start("timeStep");
 
-    serial_print("taking a dycore dtphys step", par.masterproc);
+    // serial_print("taking a dycore dtphys step", par.masterproc);
 
     // convert Coupler state to dynamics state
     tendencies.convert_coupler_to_dynamics_state(coupler, prognostic_vars,
-                                                 auxiliary_vars, constant_vars);
+                                                 auxiliary_vars,
+                                                 constant_vars,
+                                                 params.couple_wind,
+                                                 params.couple_wind_exact_inverse);
 
     // Time stepping loop
     debug_print("start time stepping loop", par.masterproc);
     for (uint nstep = 0; nstep < params.crm_per_phys; nstep++) {
       yakl::fence();
+
       time_integrator->step_forward(params.dtcrm);
 
 #ifdef PAMC_CHECK_ANELASTIC_CONSTRAINT
@@ -245,12 +261,13 @@ class Dycore {
           diag->compute(etime, constant_vars, prognostic_vars);
         }
         io.output(etime);
-        io.outputStats(stats);
+        if (params.stat_freq >= 0.) {io.outputStats(stats);}
+        num_out++;
       }
 
-      if ((nstep + prevstep) % params.Nstat == 0) {
-        stats.compute(prognostic_vars, constant_vars,
-                      (nstep + prevstep) / params.Nstat);
+      if (params.stat_freq >= 0. && etime / params.stat_freq >= num_stat+1) {
+        stats.compute(prognostic_vars, constant_vars, etime / params.stat_freq);
+        num_stat++;
       }
 #endif
     }
@@ -262,7 +279,9 @@ class Dycore {
 
     // convert dynamics state to Coupler state
     tendencies.convert_dynamics_to_coupler_state(coupler, prognostic_vars,
-                                                 constant_vars);
+                                                 constant_vars,
+                                                 params.couple_wind,
+                                                 params.couple_wind_exact_inverse);
 
     // ADD COUPLER DYCORE FUNCTIONS HERE AS WELL
 
diff --git a/dynamics/spam/src/common.h b/dynamics/spam/src/common.h
index 12575cc3..4bddc28b 100644
--- a/dynamics/spam/src/common.h
+++ b/dynamics/spam/src/common.h
@@ -4,7 +4,6 @@
 #include <iostream>
 //#include <cstring>
 #include "mpi.h"
-#include "yaml-cpp/yaml.h"
 #include <array>
 #include <complex>
 #include <extensions/YAKL_fft.h>
@@ -13,9 +12,12 @@
 #include <optional>
 #include <sstream>
 #include <string>
-#ifdef YAKL_ARCH_CUDA
-#include <cuda/std/complex>
+#ifdef PAM_STANDALONE
+#include "yaml-cpp/yaml.h"
 #endif
+// #ifdef YAKL_ARCH_CUDA
+// #include <cuda/std/complex>
+// #endif
 
 namespace pamc {
 
@@ -31,11 +33,12 @@ using uint = unsigned int;
 // Declaring the precision for the model
 using real = double;
 
-#ifdef YAKL_ARCH_CUDA
-using complex = cuda::std::complex<real>;
-#else
+// #if defined YAKL_ARCH_CUDA
+// #include <cuda/std/complex>
+// using complex = cuda::std::complex<real>;
+// #else
 using complex = std::complex<real>;
-#endif
+// #endif
 
 using complex5d = yakl::Array<complex, 5, yakl::memDevice, yakl::styleC>;
 using complex4d = yakl::Array<complex, 4, yakl::memDevice, yakl::styleC>;
diff --git a/dynamics/spam/src/core/params.h b/dynamics/spam/src/core/params.h
index aae76a8b..4b3fb784 100644
--- a/dynamics/spam/src/core/params.h
+++ b/dynamics/spam/src/core/params.h
@@ -13,13 +13,14 @@ class Parameters {
   int nz_dual = -1;
   int nens = -1;
 
-  int simSteps = -1;
-  int Nsteps = -1;
-  int Nout = -1;
+  int nsteps_gcm = -1;
+  real out_freq = -1.;
+  real stat_freq = -1.;
+  real dt_gcm = -1.;
   real dtcrm = -1.;
-  real dtphys = -1.;
+  real dt_crm_phys = -1.;
   int crm_per_phys = -1;
-  int Nstat = -1;
+  int statSize = -1;
   std::string outputName;
   std::string tstype;
   real si_tolerance = -1;
@@ -36,40 +37,58 @@ class Parameters {
 
   int masterproc;
   bool inner_mpi;
+
+  bool couple_wind = true;
+  bool couple_wind_exact_inverse = false;
+
 };
 
 void readParamsFile(std::string inFile, Parameters &params, Parallel &par,
                     int nz) {
-  // Read config file
-  YAML::Node config = YAML::LoadFile(inFile);
-
-  params.inner_mpi = config["inner_mpi"].as<bool>(false);
-  params.nx_glob = config["crm_nx"].as<int>();
-  params.ny_glob = config["crm_ny"].as<int>();
-  par.nprocx = config["nprocx"].as<int>();
-  par.nprocy = config["nprocy"].as<int>();
-  params.nens = config["nens"].as<int>();
-  params.dtphys = config["dtphys"].as<real>();
-  params.crm_per_phys = config["crm_per_phys"].as<int>(0);
-  params.Nout = config["outSteps"].as<int>(0);
-  params.Nstat = config["statSteps"].as<int>(0);
-  params.simSteps = config["simSteps"].as<int>(0);
-  params.tstype = config["tstype"].as<std::string>();
-  params.si_tolerance = config["si_tolerance"].as<real>(1e-8);
-  params.si_monitor_convergence = config["si_monitor_convergence"].as<int>(2);
-  params.si_verbosity_level =
-      config["si_verbosity_level"].as<int>(params.si_monitor_convergence);
-  params.si_max_iters = config["si_max_iters"].as<int>(
-      params.si_monitor_convergence > 1 ? 50 : 5);
-  params.si_nquad = config["si_nquad"].as<int>(4);
-  params.si_two_point_discrete_gradient =
-      config["si_two_point_discrete_gradient"].as<bool>(false);
-  params.tanh_upwind_coeff = config["tanh_upwind_coeff"].as<real>(250);
-  params.outputName = config["dycore_out_prefix"].as<std::string>("output");
-  params.nz_dual = nz;
-  params.Nsteps = params.simSteps * params.crm_per_phys;
-
-  params.dtcrm = params.dtphys / params.crm_per_phys;
+  #ifdef PAM_STANDALONE
+    // Read config file
+    YAML::Node config = YAML::LoadFile(inFile);
+
+    params.inner_mpi = config["inner_mpi"].as<bool>(false);
+    params.nx_glob = config["crm_nx"].as<int>();
+    params.ny_glob = config["crm_ny"].as<int>();
+    par.nprocx = config["nprocx"].as<int>();
+    par.nprocy = config["nprocy"].as<int>();
+    params.nens = config["nens"].as<int>();
+    params.dt_gcm = config["dt_gcm"].as<real>();
+    params.dt_crm_phys = config["dt_crm_phys"].as<real>();
+    params.crm_per_phys = config["crm_per_phys"].as<int>();
+    params.nsteps_gcm = config["nsteps_gcm"].as<int>();
+    params.out_freq = config["out_freq"].as<real>(-1.);
+    params.stat_freq = config["stat_freq"].as<real>(-1.);
+    params.tstype = config["tstype"].as<std::string>();
+    params.si_tolerance = config["si_tolerance"].as<real>(1e-8);
+    params.si_monitor_convergence = config["si_monitor_convergence"].as<int>(2);
+    params.si_verbosity_level =
+        config["si_verbosity_level"].as<int>(params.si_monitor_convergence);
+    params.si_max_iters = config["si_max_iters"].as<int>(
+        params.si_monitor_convergence > 1 ? 50 : 5);
+    params.si_nquad = config["si_nquad"].as<int>(4);
+    params.si_two_point_discrete_gradient =
+        config["si_two_point_discrete_gradient"].as<bool>(false);
+    params.tanh_upwind_coeff = config["tanh_upwind_coeff"].as<real>(250);
+    params.outputName = config["dycore_out_prefix"].as<std::string>("output");
+    params.nz_dual = nz;
+
+    params.couple_wind = config["couple_wind"].as<bool>(true);
+    params.couple_wind_exact_inverse = config["couple_wind_exact_inverse"].as<bool>(false);
+
+  //ADD A CHECK HERE THAT TOTAL TIME IS EXACTLY DIVISIBLE BY STAT_FREQ
+    if (params.stat_freq >= 0.)
+    {
+    real total_time = params.nsteps_gcm * params.dt_gcm;
+    params.statSize = total_time / params.stat_freq;
+    }
+    else
+    {params.statSize = 0;}
+
+    params.dtcrm = params.dt_crm_phys / params.crm_per_phys;
+  #endif
 }
 
 void read_params_coupler(Parameters &params, Parallel &par,
@@ -81,12 +100,15 @@ void read_params_coupler(Parameters &params, Parallel &par,
   par.nprocx = 1;
   par.nprocy = 1;
   params.nens = coupler.get_nens();
-  params.dtphys = coupler.get_option<real>("crm_dt");
-  params.crm_per_phys = 1;
+//THESE NAMES IN COUPLER DONT CORRESPOND WITH CONFIG FILE NAMES
+//FIX THIS
+  params.dt_crm_phys = coupler.get_option<real>("crm_dt");
+  if (coupler.option_exists("crm_dyn_per_phys")) {
+    params.crm_per_phys = coupler.get_option<int>("crm_dyn_per_phys");
+  } else {
+    params.crm_per_phys = 1;
+  }
 
-  params.Nout = 1;
-  params.Nstat = 1;
-  params.simSteps = 1;
 #ifdef PAMC_MAN
   params.tstype = "ssprk3";
 #else
@@ -101,9 +123,8 @@ void read_params_coupler(Parameters &params, Parallel &par,
   params.tanh_upwind_coeff = 250;
   params.outputName = "pamc_output";
   params.nz_dual = coupler.get_nz();
-  params.Nsteps = params.simSteps * params.crm_per_phys;
-
-  params.dtcrm = params.dtphys / params.crm_per_phys;
+  params.statSize = 0;
+  params.dtcrm = params.dt_crm_phys / params.crm_per_phys;
 }
 
 void finalize_parallel(Parameters &params, Parallel &par) {
@@ -201,17 +222,10 @@ void finalize_parallel(Parameters &params, Parallel &par) {
 #endif
 }
 
-void check_and_print_parameters(const Parameters &params, const Parallel &par) {
-  // Check time stepping params
-  if (not(params.dtphys > 0.0_fp) or not(params.simSteps > 0) or
-      not(params.crm_per_phys > 0) or not(params.Nout > 0) or
-      not(params.Nstat > 0)) {
-    endrun("spam++ must use step-based time control logic ie set simSteps >0, "
-           "dtphys>0, crm_per_phys >0, outSteps >0, statSteps >0");
-  }
+void check_and_print_parameters(const Parameters &params, const Parallel &par, bool verbose=false) {
 
   // Print out the values
-  if (par.masterproc) {
+  if (par.masterproc and verbose) {
     std::cout << "nx:         " << params.nx_glob << "\n";
     std::cout << "ny:         " << params.ny_glob << "\n";
     std::cout << "nl dual:         " << params.nz_dual << "\n";
@@ -222,11 +236,12 @@ void check_and_print_parameters(const Parameters &params, const Parallel &par) {
     std::cout << "haloy:      " << par.haloy << "\n";
 
     std::cout << "dtcrm:         " << params.dtcrm << "\n";
-    std::cout << "dtphys:         " << params.dtphys << "\n";
-    std::cout << "Nsteps:     " << params.Nsteps << "\n";
-    std::cout << "simSteps:     " << params.simSteps << "\n";
+    std::cout << "dt_crm_phys:         " << params.dt_crm_phys << "\n";
+    std::cout << "statSize:     " << params.statSize << "\n";
+    std::cout << "nsteps_gcm:     " << params.nsteps_gcm << "\n";
     std::cout << "crm per phys:     " << params.crm_per_phys << "\n";
-    std::cout << "Nout:       " << params.Nout << "\n";
+    std::cout << "out_freq:       " << params.out_freq << "\n";
+    std::cout << "stat_freq:       " << params.stat_freq << "\n";
     std::cout << "outputName: " << params.outputName << "\n";
 
     std::cout << "nranks:     " << par.nranks << "\n";
diff --git a/dynamics/spam/src/extrudedmodel-common.h b/dynamics/spam/src/extrudedmodel-common.h
index 4b8e0617..f1a4900e 100644
--- a/dynamics/spam/src/extrudedmodel-common.h
+++ b/dynamics/spam/src/extrudedmodel-common.h
@@ -10,9 +10,9 @@ uint constexpr ntracers_dycore = 0;
 //////////////////////////////////////////////////////////////////////////////
 
 // for debugging anelastic
-#if defined PAMC_AN || defined PAMC_MAN
-#define PAMC_CHECK_ANELASTIC_CONSTRAINT
-#endif
+// #if defined PAMC_AN || defined PAMC_MAN
+// #define PAMC_CHECK_ANELASTIC_CONSTRAINT
+// #endif
 
 // Number of Dimensions
 uint constexpr ndims = 1;
diff --git a/dynamics/spam/src/hamiltonians/anelastic.h b/dynamics/spam/src/hamiltonians/anelastic.h
index 4df5eea5..60dd83ef 100644
--- a/dynamics/spam/src/hamiltonians/anelastic.h
+++ b/dynamics/spam/src/hamiltonians/anelastic.h
@@ -222,7 +222,7 @@ class Hamiltonian_MAN_Hs {
     const real qv = varset.get_qv(dens, k, j, i, ks, js, is, n);
     real ql = 0.0_fp;
     real qi = 0.0_fp;
-    if (varset.liquid_found) {
+    if (varset.liq_found) {
       ql = varset.get_ql(dens, k, j, i, ks, js, is, n);
     }
     if (varset.ice_found) {
@@ -271,7 +271,7 @@ class Hamiltonian_MAN_Hs {
     if (!ThermoPotential::moist_species_decouple_from_dynamics) {
       B(varset.active_id_vap) =
           generalized_chemical_potential_v - generalized_chemical_potential_d;
-      if (varset.liquid_found) {
+      if (varset.liq_found) {
         B(varset.active_id_liq) =
             generalized_chemical_potential_l - generalized_chemical_potential_d;
       }
@@ -314,7 +314,7 @@ class Hamiltonian_MAN_Hs {
     l_q(1) = varset.get_entropic_var(dens, k, j, i, ks, js, is, n);
     l_q(2) = varset.get_qd(dens, k, j, i, ks, js, is, n);
     l_q(3) = varset.get_qv(dens, k, j, i, ks, js, is, n);
-    l_q(4) = varset.liquid_found ? varset.get_ql(dens, k, j, i, ks, js, is, n)
+    l_q(4) = varset.liq_found ? varset.get_ql(dens, k, j, i, ks, js, is, n)
                                  : 0.0_fp;
     l_q(5) =
         varset.ice_found ? varset.get_qi(dens, k, j, i, ks, js, is, n) : 0.0_fp;
diff --git a/dynamics/spam/src/hamiltonians/compressible_euler.h b/dynamics/spam/src/hamiltonians/compressible_euler.h
index 5a778185..5dfbd38b 100644
--- a/dynamics/spam/src/hamiltonians/compressible_euler.h
+++ b/dynamics/spam/src/hamiltonians/compressible_euler.h
@@ -246,7 +246,7 @@ class Hamiltonian_MCE_Hs {
     real qv = varset.get_qv(dens, k, j, i, ks, js, is, n);
     real ql = 0.0_fp;
     real qi = 0.0_fp;
-    if (varset.liquid_found) {
+    if (varset.liq_found) {
       ql = varset.get_ql(dens, k, j, i, ks, js, is, n);
     }
     if (varset.ice_found) {
@@ -339,7 +339,7 @@ class Hamiltonian_MCE_Hs {
     if (!ThermoPotential::moist_species_decouple_from_dynamics) {
       B(varset.active_id_vap) =
           generalized_chemical_potential_v - generalized_chemical_potential_d;
-      if (varset.liquid_found) {
+      if (varset.liq_found) {
         B(varset.active_id_liq) =
             generalized_chemical_potential_l - generalized_chemical_potential_d;
       }
@@ -374,7 +374,7 @@ class Hamiltonian_MCE_Hs {
     l_q(1) = varset.get_entropic_var(dens, k, j, i, ks, js, is, n);
     l_q(2) = varset.get_qd(dens, k, j, i, ks, js, is, n);
     l_q(3) = varset.get_qv(dens, k, j, i, ks, js, is, n);
-    l_q(4) = varset.liquid_found ? varset.get_ql(dens, k, j, i, ks, js, is, n)
+    l_q(4) = varset.liq_found ? varset.get_ql(dens, k, j, i, ks, js, is, n)
                                  : 0.0_fp;
     l_q(5) =
         varset.ice_found ? varset.get_qi(dens, k, j, i, ks, js, is, n) : 0.0_fp;
diff --git a/dynamics/spam/src/hamiltonians/thermo.h b/dynamics/spam/src/hamiltonians/thermo.h
index f12b6c61..4976167f 100644
--- a/dynamics/spam/src/hamiltonians/thermo.h
+++ b/dynamics/spam/src/hamiltonians/thermo.h
@@ -111,18 +111,24 @@ class ThermoNone {
   real YAKL_INLINE compute_alpha(real p, real T, real qd, real qv, real ql,
                                  real qi) const {};
 
-  real YAKL_INLINE compute_entropic_var(real p, real T, real qd, real qv,
-                                        real ql, real qi) const {};
 
   real YAKL_INLINE solve_p(real rho, real entropic_var, real qd, real qv,
                            real ql, real qi) const {};
 
-  real YAKL_INLINE compute_T(real alpha, real entropic_var, real qd, real qv,
+  real YAKL_INLINE compute_T_from_alpha(real alpha, real entropic_var, real qd, real qv,
+                             real ql, real qi) const {};
+
+  real YAKL_INLINE compute_T_from_p(real p, real entropic_var, real qd, real qv,
                              real ql, real qi) const {};
 
-  real YAKL_INLINE compute_entropic_var_from_T(real alpha, real T, real qd,
+  real YAKL_INLINE compute_entropic_var_from_T_alpha(real alpha, real T, real qd,
+                                               real qv, real ql,
+                                               real qi) const {};
+
+  real YAKL_INLINE compute_entropic_var_from_T_p(real p, real T, real qd,
                                                real qv, real ql,
                                                real qi) const {};
+
   real YAKL_INLINE compute_dpdentropic_var(real alpha, real entropic_var,
                                            real qd, real qv, real ql,
                                            real qi) const {};
@@ -214,8 +220,10 @@ class IdealGas_Pottemp {
     return cst.Rd * T / p;
   };
 
-  real YAKL_INLINE compute_entropic_var(real p, real T, real qd, real qv,
+  real YAKL_INLINE compute_entropic_var_from_T_p(real p, real T, real qd, real qv,
                                         real ql, real qi) const {
+    //real entropy =  cst.Cpd * log(T / cst.Tr) - cst.Rd * log(p / cst.pr);
+    //return theta(entropy);
     return T * pow(cst.pr / p, cst.kappa_d);
   };
 
@@ -224,14 +232,19 @@ class IdealGas_Pottemp {
     return cst.pr * pow(entropic_var * rho * cst.Rd / cst.pr, cst.gamma_d);
   };
 
-  real YAKL_INLINE compute_T(real alpha, real entropic_var, real qd, real qv,
+  real YAKL_INLINE compute_T_from_alpha(real alpha, real entropic_var, real qd, real qv,
                              real ql, real qi) const {
     real p =
         cst.pr * pow(entropic_var * cst.Rd / (alpha * cst.pr), cst.gamma_d);
     return alpha * p / cst.Rd;
   }
 
-  real YAKL_INLINE compute_entropic_var_from_T(real alpha, real T, real qd,
+  real YAKL_INLINE compute_T_from_p(real p, real entropic_var, real qd, real qv,
+                             real ql, real qi) const {
+  return pow(p / cst.pr, cst.kappa_d) * entropic_var;
+};
+
+  real YAKL_INLINE compute_entropic_var_from_T_alpha(real alpha, real T, real qd,
                                                real qv, real ql,
                                                real qi) const {
     real p = cst.Rd * T / alpha;
@@ -345,7 +358,7 @@ class IdealGas_Entropy {
     return cst.Rd * T / p;
   };
 
-  real YAKL_INLINE compute_entropic_var(real p, real T, real qd, real qv,
+  real YAKL_INLINE compute_entropic_var_from_T_p(real p, real T, real qd, real qv,
                                         real ql, real qi) const {
     return cst.Cpd * log(T / cst.Tr) - cst.Rd * log(p / cst.pr);
   };
@@ -357,20 +370,28 @@ class IdealGas_Entropy {
     return cst.Rd / cst.Cvd * U / alpha;
   };
 
-  real YAKL_INLINE compute_entropic_var_from_T(real alpha, real T, real qd,
+  real YAKL_INLINE compute_entropic_var_from_T_alpha(real alpha, real T, real qd,
                                                real qv, real ql,
                                                real qi) const {
+    //real alpha_r = cst.Rd * cst.Tr / cst.pr;
+    //cst.Cvd * log(T / cst.Tr) + cst.Rd * log(alpha / alpha_r);
+
     real p = cst.Rd * T / alpha;
-    return compute_entropic_var(p, T, qd, qv, ql, qi);
+    return compute_entropic_var_from_T_p(p, T, qd, qv, ql, qi);
   }
 
-  real YAKL_INLINE compute_T(real alpha, real entropic_var, real qd, real qv,
+  real YAKL_INLINE compute_T_from_alpha(real alpha, real entropic_var, real qd, real qv,
                              real ql, real qi) const {
 
     real U = compute_U(alpha, entropic_var, qd, qv, ql, qi);
     return U / cst.Cvd;
   }
 
+  real YAKL_INLINE compute_T_from_p(real p, real entropic_var, real qd, real qv,
+                             real ql, real qi) const {
+compute_dHdentropic_var(p,entropic_var,qd,qv,ql,qi);
+};
+
   real YAKL_INLINE compute_dpdentropic_var(real alpha, real entropic_var,
                                            real qd, real qv, real ql,
                                            real qi) const {
@@ -476,7 +497,8 @@ class ConstantKappa_VirtualPottemp {
     return (qd * cst.Rd + qv * cst.Rv) * T / p;
   };
 
-  real YAKL_INLINE compute_entropic_var(real p, real T, real qd, real qv,
+
+  real YAKL_INLINE compute_entropic_var_from_T_p(real p, real T, real qd, real qv,
                                         real ql, real qi) const {
     return (qd * cst.Rd + qv * cst.Rv) * T / cst.Rd *
            pow(cst.pr / p, cst.kappa_d);
@@ -487,7 +509,7 @@ class ConstantKappa_VirtualPottemp {
     return cst.pr * pow(entropic_var * rho * cst.Rd / cst.pr, cst.gamma_d);
   };
 
-  real YAKL_INLINE compute_T(real alpha, real entropic_var, real qd, real qv,
+  real YAKL_INLINE compute_T_from_alpha(real alpha, real entropic_var, real qd, real qv,
                              real ql, real qi) const {
     real Rstar = cst.Rd * qd + cst.Rv * qv;
     real p =
@@ -495,7 +517,14 @@ class ConstantKappa_VirtualPottemp {
     return alpha * p / Rstar;
   };
 
-  real YAKL_INLINE compute_entropic_var_from_T(real alpha, real T, real qd,
+
+  real YAKL_INLINE compute_T_from_p(real p, real entropic_var, real qd, real qv,
+                             real ql, real qi) const {
+  real Rstar = cst.Rd * qd + cst.Rv * qv;
+  return pow(p / cst.pr, cst.kappa_d) * entropic_var * cst.Rd / Rstar;
+};
+
+  real YAKL_INLINE compute_entropic_var_from_T_alpha(real alpha, real T, real qd,
                                                real qv, real ql,
                                                real qi) const {
     real Rstar = cst.Rd * qd + cst.Rv * qv;
@@ -570,7 +599,7 @@ class ConstantKappa_Entropy {
   real YAKL_INLINE compute_alpha(real p, real T, real qd, real qv, real ql,
                                  real qi) const {};
 
-  real YAKL_INLINE compute_entropic_var(real p, real T, real qd, real qv,
+  real YAKL_INLINE compute_entropic_var_from_T_p(real p, real T, real qd, real qv,
                                         real ql, real qi) const {};
 
   real YAKL_INLINE solve_p(real rho, real entropic_var, real qd, real qv,
@@ -627,7 +656,7 @@ class Unapprox_Pottemp {
   real YAKL_INLINE compute_alpha(real p, real T, real qd, real qv, real ql,
                                  real qi) const {};
 
-  real YAKL_INLINE compute_entropic_var(real p, real T, real qd, real qv,
+  real YAKL_INLINE compute_entropic_var_from_T_p(real p, real T, real qd, real qv,
                                         real ql, real qi) const {};
 
   real YAKL_INLINE solve_p(real rho, real entropic_var, real qd, real qv,
@@ -684,7 +713,7 @@ class Unapprox_Entropy {
   real YAKL_INLINE compute_alpha(real p, real T, real qd, real qv, real ql,
                                  real qi) const {};
 
-  real YAKL_INLINE compute_entropic_var(real p, real T, real qd, real qv,
+  real YAKL_INLINE compute_entropic_var_from_T_p(real p, real T, real qd, real qv,
                                         real ql, real qi) const {};
 
   real YAKL_INLINE solve_p(real rho, real entropic_var, real qd, real qv,
diff --git a/dynamics/spam/src/hamiltonians/variableset.h b/dynamics/spam/src/hamiltonians/variableset.h
index 7c385b14..a812864f 100644
--- a/dynamics/spam/src/hamiltonians/variableset.h
+++ b/dynamics/spam/src/hamiltonians/variableset.h
@@ -171,7 +171,7 @@ template <class T> class VariableSetBase : public T {
   int active_id_ice = std::numeric_limits<int>::min();
 
   bool ice_found = false;
-  bool liquid_found = false;
+  bool liq_found = false;
 
   ThermoPotential thermo;
   ReferenceState reference_state;
@@ -182,15 +182,17 @@ template <class T> class VariableSetBase : public T {
                   const ThermoPotential &thermodynamics,
                   const ReferenceState &refstate,
                   const Geometry<Straight> &primal_geom,
-                  const Geometry<Twisted> &dual_geom);
+                  const Geometry<Twisted> &dual_geom,
+                  bool verbose=false);
   static void initialize(VariableSetBase &varset, PamCoupler &coupler,
                          ModelParameters &params,
                          const ThermoPotential &thermodynamics,
                          const ReferenceState &refstate,
                          const Geometry<Straight> &primal_geom,
-                         const Geometry<Twisted> &dual_geom) {
+                         const Geometry<Twisted> &dual_geom,
+                         bool verbose=false) {
 
-    if (T::couple && couple_wind_exact_inverse) {
+    if (T::couple && params.couple_wind_exact_inverse) {
       if (primal_geom.topology.n_cells_x % 2 == 0) {
         throw std::runtime_error(
             "The number of crm cells in the horizontal "
@@ -202,10 +204,11 @@ template <class T> class VariableSetBase : public T {
     varset.primal_geometry = primal_geom;
     varset.dual_geometry = dual_geom;
 
-    // If more physics parameterizations are added this logic might need to
-    // change
-    varset.couple_wind = !(coupler.get_option<std::string>("sgs") == "none") ||
-                         !(coupler.option_exists("standalone_input_file"));
+    // Need different logic for this.
+    // maybe a coupler option so that the driver can set it directly??
+    //varset.couple_wind = !(coupler.get_option<std::string>("sgs") == "none") ||
+    //                     !(coupler.option_exists("standalone_input_file"));
+    varset.couple_wind = true;
 
     for (int l = ndensity_dycore_prognostic; l < ndensity_nophysics; l++) {
       varset.dens_pos(l) =
@@ -240,7 +243,7 @@ template <class T> class VariableSetBase : public T {
           tracer_names_loc[tr] == std::string("cloud_water")) {
         varset.dm_id_liq = tr;
         varset.dens_id_liq = ndensity_nophysics + tr;
-        varset.liquid_found = true;
+        varset.liq_found = true;
         if (!ThermoPotential::moist_species_decouple_from_dynamics) {
           varset.dens_active(tr + ndensity_nophysics) = true;
         }
@@ -272,7 +275,7 @@ template <class T> class VariableSetBase : public T {
         if (varset.ice_found && i == ndensity_nophysics + varset.dm_id_ice) {
           varset.active_id_ice = active_i;
         }
-        if (varset.liquid_found && i == ndensity_nophysics + varset.dm_id_liq) {
+        if (varset.liq_found && i == ndensity_nophysics + varset.dm_id_liq) {
           varset.active_id_liq = active_i;
         }
         active_i++;
@@ -286,20 +289,22 @@ template <class T> class VariableSetBase : public T {
           "Dycore Tracer" + std::to_string(i - ndensity_dycore_prognostic);
     }
 
-    serial_print("PAM-C densities", params.masterproc);
-    for (int i = 0; i < ndensity; i++) {
-      std::stringstream ss;
-      ss << std::left;
-      ss << std::setw(4) << i;
-      ss << std::setw(21) << varset.dens_name[i].substr(0, 19);
-      ss << std::setw(29) << varset.dens_desc[i].substr(0, 27);
-      ss << std::setw(12) << (varset.dens_prognostic(i) ? "prognostic" : "");
-      ss << std::setw(8) << (varset.dens_active(i) ? "active" : "");
-      ss << std::setw(10)
-         << ((varset.dens_prognostic(i) && varset.dens_pos(i)) ? "positive"
-                                                               : "");
-
-      serial_print(ss.str(), params.masterproc);
+    if (verbose) {
+      serial_print("PAM-C densities", params.masterproc);
+      for (int i = 0; i < ndensity; i++) {
+        std::stringstream ss;
+        ss << std::left;
+        ss << std::setw(4) << i;
+        ss << std::setw(21) << varset.dens_name[i].substr(0, 19);
+        ss << std::setw(29) << varset.dens_desc[i].substr(0, 27);
+        ss << std::setw(12) << (varset.dens_prognostic(i) ? "prognostic" : "");
+        ss << std::setw(8) << (varset.dens_active(i) ? "active" : "");
+        ss << std::setw(10)
+           << ((varset.dens_prognostic(i) && varset.dens_pos(i)) ? "positive"
+                                                                 : "");
+
+        serial_print(ss.str(), params.masterproc);
+      }
     }
 
     yakl::fence();
@@ -341,37 +346,100 @@ template <class T> class VariableSetBase : public T {
   real YAKL_INLINE _water_dens(const real3d &densvar, int k, int ks,
                                int n) const {};
 
-  void convert_dynamics_to_coupler_state(PamCoupler &coupler,
+  real YAKL_INLINE get_pres(const real5d &densvar, int k,
+                                                   int j, int i, int ks, int js,
+                                                   int is, int n) const {};
+  real YAKL_INLINE get_pres(int k, int ks, int n) const {};
+  real YAKL_INLINE get_ref_dens(int k, int ks, int n) const {};
+
+  void pamc_debug_chk(int id, PamCoupler &coupler,
+                                         const FieldSet<nprognostic> &prog_vars,
+                                         const FieldSet<nprognostic> &prev_vars);
+
+  void convert_dynamics_to_coupler_densities(PamCoupler &coupler,
                                          const FieldSet<nprognostic> &prog_vars,
                                          const FieldSet<nconstant> &const_vars);
-  void convert_coupler_to_dynamics_state(PamCoupler &coupler,
+  void convert_dynamics_to_coupler_wind(PamCoupler &coupler,
+                                         const FieldSet<nprognostic> &prog_vars,
+                                         const FieldSet<nconstant> &const_vars, bool couple_wind_exact_inverse);
+  void convert_dynamics_to_coupler_staggered_wind(PamCoupler &coupler,
+                                         const FieldSet<nprognostic> &prog_vars,
+                                         const FieldSet<nconstant> &const_vars);
+  void convert_coupler_to_dynamics_densities(PamCoupler &coupler,
+                                         FieldSet<nprognostic> &prog_vars,
+                                         const FieldSet<nconstant> &const_vars);
+  void convert_coupler_to_dynamics_wind(PamCoupler &coupler,
+                                         FieldSet<nprognostic> &prog_vars,
+                                         const FieldSet<nconstant> &const_vars, bool couple_wind_exact_inverse);
+  void convert_coupler_to_dynamics_staggered_wind(PamCoupler &coupler,
                                          FieldSet<nprognostic> &prog_vars,
                                          const FieldSet<nconstant> &const_vars);
 };
 
+//THIS IS ANELASTIC SPECIFIC FOR NOW, IDEALLY IT SHOULD MADE TO WORK FOR EITHER AN OR COMPRESSIBLE
 template <class T>
-void VariableSetBase<T>::convert_dynamics_to_coupler_state(
+void VariableSetBase<T>::pamc_debug_chk(int id,
+                                        PamCoupler &coupler,
+                                        const FieldSet<nprognostic> &prog_vars,
+                                        const FieldSet<nprognostic> &prev_vars)
+{
+  const auto &primal_topology = primal_geometry.topology;
+  const auto &dual_topology = dual_geometry.topology;
+  int dis = dual_topology.is;
+  int djs = dual_topology.js;
+  int dks = dual_topology.ks;
+  int pis = primal_topology.is;
+  int pjs = primal_topology.js;
+  int pks = primal_topology.ks;
+  // const auto densvar = prog_vars.fields_arr[DENSVAR].data;
+  parallel_for("pamc_debug_chk",SimpleBounds<4>(dual_topology.nl, dual_topology.n_cells_y, dual_topology.n_cells_x, dual_topology.nens), YAKL_CLASS_LAMBDA(int k, int j, int i, int n) {
+
+    const auto densvar1 = prev_vars.fields_arr[DENSVAR].data;
+    real entr1 = get_entropic_var( densvar1, k, j, i, dks, djs, dis, n);
+    real pres1 = get_pres(         densvar1, k, j, i, dks, djs, dis, n);
+    real qv1   = get_qv(           densvar1, k, j, i, dks, djs, dis, n);
+    real ql1   = get_ql(           densvar1, k, j, i, dks, djs, dis, n);
+    real qi1   = get_qi(           densvar1, k, j, i, dks, djs, dis, n);
+    real qd1   = 1.0_fp - qv1 - ql1 - qi1;
+    real temp1 = thermo.compute_T_from_p(pres1, entr1, qd1, qv1, ql1, qi1);
+
+    const auto densvar2 = prog_vars.fields_arr[DENSVAR].data;
+    real entr2 = get_entropic_var( densvar2, k, j, i, dks, djs, dis, n);
+    real pres2 = get_pres(         densvar2, k, j, i, dks, djs, dis, n);
+    real qv2   = get_qv(           densvar2, k, j, i, dks, djs, dis, n);
+    real ql2   = get_ql(           densvar2, k, j, i, dks, djs, dis, n);
+    real qi2   = get_qi(           densvar2, k, j, i, dks, djs, dis, n);
+    real qd2   = 1.0_fp - qv2 - ql2 - qi2;
+    real temp2 = thermo.compute_T_from_p(pres2, entr2, qd2, qv2, ql2, qi2);
+
+    if ( isnan(temp2) || isnan(qv2) || isnan(ql2) || isnan(qi2) || isnan(qd2) || temp2<0 || entr2<0 ) {
+      printf("pamc_debug_chk - id:%d k:%d j:%d i:%d n:%d en_var :: %g  =>  %g \n",id,k,j,i,n,entr1,entr2);
+      printf("pamc_debug_chk - id:%d k:%d j:%d i:%d n:%d temp   :: %g  =>  %g \n",id,k,j,i,n,temp1,temp2);
+      printf("pamc_debug_chk - id:%d k:%d j:%d i:%d n:%d qd     :: %g  =>  %g \n",id,k,j,i,n,qd1,qd2);
+      printf("pamc_debug_chk - id:%d k:%d j:%d i:%d n:%d qv     :: %g  =>  %g \n",id,k,j,i,n,qv1,qv2);
+      printf("pamc_debug_chk - id:%d k:%d j:%d i:%d n:%d ql     :: %g  =>  %g \n",id,k,j,i,n,ql1,ql2);
+      printf("pamc_debug_chk - id:%d k:%d j:%d i:%d n:%d qi     :: %g  =>  %g \n",id,k,j,i,n,qi1,qi2);
+    }
+  });
+}
+
+
+template <class T>
+void VariableSetBase<T>::convert_dynamics_to_coupler_densities(
     PamCoupler &coupler, const FieldSet<nprognostic> &prog_vars,
     const FieldSet<nconstant> &const_vars) {
 
-  if constexpr (T::couple) {
-    const auto &primal_topology = primal_geometry.topology;
     const auto &dual_topology = dual_geometry.topology;
 
     int dis = dual_topology.is;
     int djs = dual_topology.js;
     int dks = dual_topology.ks;
 
-    int pis = primal_topology.is;
-    int pjs = primal_topology.js;
-    int pks = primal_topology.ks;
-
     auto &dm = coupler.get_data_manager_device_readwrite();
 
+  if constexpr (T::couple) {
+
     real4d dm_dens_dry = dm.get<real, 4>("density_dry");
-    real4d dm_uvel = dm.get<real, 4>("uvel");
-    real4d dm_vvel = dm.get<real, 4>("vvel");
-    real4d dm_wvel = dm.get<real, 4>("wvel");
     real4d dm_temp = dm.get<real, 4>("temp");
 
     pam::MultipleFields<ntracers_physics, real4d> dm_tracers;
@@ -380,7 +448,90 @@ void VariableSetBase<T>::convert_dynamics_to_coupler_state(
       dm_tracers.add_field(trac);
     }
 
-    if (couple_wind) {
+    parallel_for(
+        "Dynamics to Coupler State densities",
+        SimpleBounds<4>(dual_topology.nl, dual_topology.n_cells_y,
+                        dual_topology.n_cells_x, dual_topology.nens),
+        YAKL_CLASS_LAMBDA(int k, int j, int i, int n) {
+
+
+          real qd = get_qd(prog_vars.fields_arr[DENSVAR].data, k, j, i, dks,
+                          djs, dis, n);
+          real qv = get_qv(prog_vars.fields_arr[DENSVAR].data, k, j, i, dks,
+                           djs, dis, n);
+
+          real entropic_var = get_entropic_var(
+              prog_vars.fields_arr[DENSVAR].data, k, j, i, dks, djs, dis, n);
+
+          real ql = 0.0_fp;
+          if (liq_found) {
+            ql = get_ql(prog_vars.fields_arr[DENSVAR].data, k, j, i, dks, djs,
+                        dis, n);
+          }
+
+          real qi = 0.0_fp;
+          if (ice_found) {
+            qi = get_qi(prog_vars.fields_arr[DENSVAR].data, k, j, i, dks, djs,
+                        dis, n);
+          }
+
+
+//THIS LOGIC SHOULD PROBABLY LIVE IN VARIABLE SET, BUT IT IS NOT CLEAR HOW TO CLEANLY INTEGRATE IT YET
+//Something like:
+//set_dry_dens(dm_dry_dens,DENSARR,k,j,i,dks,djs,dis,n)
+//compute_temp(DENSARR,k,j,i,dks,djs,dis,n, entropic_var, qd, qv, ql, qi) or compute_temp(total_dens, entropic_var, qd, qv, ql, qi) and get_total_dens(DENSARR,...)
+
+#if defined(_CE) || defined(_MCErho) || defined(_MCErhod) || defined(_CEp) || defined(_MCErhop) || defined(_MCErhodp)
+          dm_dens_dry(k, j, i, n) =
+              get_dry_density(prog_vars.fields_arr[DENSVAR].data, k, j, i, dks,
+                              djs, dis, n) /
+              dual_geometry.get_area_n1entity(k + dks, j + djs, i + dis, n);
+#endif
+
+#if defined(_CE) || defined(_MCErho) || defined(_MCErhod)
+          real alpha = get_alpha(prog_vars.fields_arr[DENSVAR].data, k, j, i,
+                                 dks, djs, dis, n);
+          real temp = thermo.compute_T_from_alpha(alpha, entropic_var, qd, qv, ql, qi);
+#elif defined(_CEp) || defined(_MCErhop) || defined(_MCErhodp) || defined(PAMC_AN) || defined(PAMC_MAN)
+          real p = get_pres(prog_vars.fields_arr[DENSVAR].data, k, j, i,
+                                 dks, djs, dis, n);
+          real temp = thermo.compute_T_from_p(p, entropic_var, qd, qv, ql, qi);
+#endif
+
+          dm_temp(k, j, i, n) = temp;
+
+          for (int tr = ndensity_nophysics; tr < ndensity_prognostic; tr++) {
+            dm_tracers(tr - ndensity_nophysics, k, j, i, n) =
+                prog_vars.fields_arr[DENSVAR].data(tr, k + dks, j + djs,
+                                                   i + dis, n) /
+                dual_geometry.get_area_n1entity(k + dks, j + djs, i + dis, n);
+          }
+
+        });
+
+}
+}
+
+template <class T>
+void VariableSetBase<T>::convert_dynamics_to_coupler_wind(
+    PamCoupler &coupler, const FieldSet<nprognostic> &prog_vars,
+    const FieldSet<nconstant> &const_vars, bool couple_wind_exact_inverse) {
+
+    const auto &primal_topology = primal_geometry.topology;
+    const auto &dual_topology = dual_geometry.topology;
+
+    int pis = primal_topology.is;
+    int pjs = primal_topology.js;
+    int pks = primal_topology.ks;
+
+    auto &dm = coupler.get_data_manager_device_readwrite();
+
+  if constexpr (T::couple) {
+
+    real4d dm_uvel = dm.get<real, 4>("uvel");
+    real4d dm_vvel = dm.get<real, 4>("vvel");
+    real4d dm_wvel = dm.get<real, 4>("wvel");
+
       parallel_for(
           "Dynamics to Coupler State winds",
           SimpleBounds<4>(dual_topology.nl, dual_topology.n_cells_y,
@@ -429,74 +580,48 @@ void VariableSetBase<T>::convert_dynamics_to_coupler_state(
             dm_vvel(k, j, i, n) = vvel;
             dm_wvel(k, j, i, n) = wvel_mid;
           });
-    }
+}
+}
 
-    parallel_for(
-        "Dynamics to Coupler State densities",
-        SimpleBounds<4>(dual_topology.nl, dual_topology.n_cells_y,
-                        dual_topology.n_cells_x, dual_topology.nens),
-        YAKL_CLASS_LAMBDA(int k, int j, int i, int n) {
-          real qd = get_qd(prog_vars.fields_arr[DENSVAR].data, k, j, i, dks,
-                           djs, dis, n);
-          real qv = get_qv(prog_vars.fields_arr[DENSVAR].data, k, j, i, dks,
-                           djs, dis, n);
-          real alpha = get_alpha(prog_vars.fields_arr[DENSVAR].data, k, j, i,
-                                 dks, djs, dis, n);
-          real entropic_var = get_entropic_var(
-              prog_vars.fields_arr[DENSVAR].data, k, j, i, dks, djs, dis, n);
 
-          real ql = 0.0_fp;
-          if (liquid_found) {
-            ql = get_ql(prog_vars.fields_arr[DENSVAR].data, k, j, i, dks, djs,
-                        dis, n);
-          }
+template <class T>
+void VariableSetBase<T>::convert_dynamics_to_coupler_staggered_wind(
+    PamCoupler &coupler, const FieldSet<nprognostic> &prog_vars,
+    const FieldSet<nconstant> &const_vars) {
 
-          real qi = 0.0_fp;
-          if (ice_found) {
-            qi = get_qi(prog_vars.fields_arr[DENSVAR].data, k, j, i, dks, djs,
-                        dis, n);
-          }
 
-          real temp = thermo.compute_T(alpha, entropic_var, qd, qv, ql, qi);
+    const auto &primal_topology = primal_geometry.topology;
+
+    int pis = primal_topology.is;
+    int pjs = primal_topology.js;
+    int pks = primal_topology.ks;
+
+    auto &dm = coupler.get_data_manager_device_readwrite();
+  if constexpr (T::couple) {
+
+//ADD THIS
+}
 
-          dm_dens_dry(k, j, i, n) =
-              get_dry_density(prog_vars.fields_arr[DENSVAR].data, k, j, i, dks,
-                              djs, dis, n) /
-              dual_geometry.get_area_n1entity(k + dks, j + djs, i + dis, n);
-          dm_temp(k, j, i, n) = temp;
-          for (int tr = ndensity_nophysics; tr < ndensity_prognostic; tr++) {
-            dm_tracers(tr - ndensity_nophysics, k, j, i, n) =
-                prog_vars.fields_arr[DENSVAR].data(tr, k + dks, j + djs,
-                                                   i + dis, n) /
-                dual_geometry.get_area_n1entity(k + dks, j + djs, i + dis, n);
-          }
-        });
-  }
 }
 
+
+
 template <class T>
-void VariableSetBase<T>::convert_coupler_to_dynamics_state(
+void VariableSetBase<T>::convert_coupler_to_dynamics_densities(
     PamCoupler &coupler, FieldSet<nprognostic> &prog_vars,
     const FieldSet<nconstant> &const_vars) {
 
   if constexpr (T::couple) {
-    const auto &primal_topology = primal_geometry.topology;
+
     const auto &dual_topology = dual_geometry.topology;
 
     int dis = dual_topology.is;
     int djs = dual_topology.js;
     int dks = dual_topology.ks;
 
-    int pis = primal_topology.is;
-    int pjs = primal_topology.js;
-    int pks = primal_topology.ks;
-
     auto &dm = coupler.get_data_manager_device_readonly();
 
     auto dm_dens_dry = dm.get<real const, 4>("density_dry");
-    auto dm_uvel = dm.get<real const, 4>("uvel");
-    auto dm_vvel = dm.get<real const, 4>("vvel");
-    auto dm_wvel = dm.get<real const, 4>("wvel");
     auto dm_temp = dm.get<real const, 4>("temp");
 
     pam::MultipleFields<ntracers_physics, realConst4d> dm_tracers;
@@ -510,37 +635,68 @@ void VariableSetBase<T>::convert_coupler_to_dynamics_state(
         SimpleBounds<4>(dual_topology.nl, dual_topology.n_cells_y,
                         dual_topology.n_cells_x, dual_topology.nens),
         YAKL_CLASS_LAMBDA(int k, int j, int i, int n) {
+
+
           real temp = dm_temp(k, j, i, n);
 
-          real dens_dry = dm_dens_dry(k, j, i, n);
           real dens_vap = dm_tracers(dm_id_vap, k, j, i, n);
           real dens_liq = 0.0_fp;
           real dens_ice = 0.0_fp;
-          if (liquid_found) {
+          if (liq_found) {
             dens_liq = dm_tracers(dm_id_liq, k, j, i, n);
           }
           if (ice_found) {
             dens_ice = dm_tracers(dm_id_ice, k, j, i, n);
           }
-          real dens = dens_dry + dens_ice + dens_liq + dens_vap;
 
-          real qd = dens_dry / dens;
-          real qv = dens_vap / dens;
-          real ql = dens_liq / dens;
-          real qi = dens_ice / dens;
-
-          real alpha = 1.0_fp / dens;
-          real entropic_var =
-              thermo.compute_entropic_var_from_T(alpha, temp, qd, qv, ql, qi);
+//AGAIN, THIS LOGIC SHOULD REALLY LIVE IN VARIABLE SET
+//Something like:
+//get_total_dens(dens,dry_dens)
+//get_qs(dens,dry_dens,dens_s)
+//compute_entropic_var(dens,dry_dens,temp,qs)
 
 #if !defined PAMC_AN && !defined PAMC_MAN
+
+          real dens_dry = dm_dens_dry(k, j, i, n);
+          real dens = dens_dry + dens_vap;
+
           set_density(dens * dual_geometry.get_area_n1entity(k + dks, j + djs,
                                                              i + dis, n),
                       dens_dry * dual_geometry.get_area_n1entity(
                                      k + dks, j + djs, i + dis, n),
                       prog_vars.fields_arr[DENSVAR].data, k, j, i, dks, djs,
                       dis, n);
+
+          real qd = dens_dry / dens;
+          real qv = dens_vap / dens;
+          real ql = dens_liq / dens;
+          real qi = dens_ice / dens;
+          real alpha = 1.0_fp / dens;
+
+#elif defined(PAMC_AN) || defined(PAMC_MAN)
+          real dens = get_ref_dens(k, dks, n) / dual_geometry.get_area_n1entity(k + dks, j + djs, i + dis, n);
+
+          real qv = dens_vap / dens;
+          real ql = dens_liq / dens;
+          real qi = dens_ice / dens;
+
+          real qd = get_qd(prog_vars.fields_arr[DENSVAR].data, k, j, i, dks, djs, dis, n);
+#endif
+
+
+#if defined(_CE) || defined(_MCErho) || defined(_MCE_rhod)
+          real entropic_var =
+              thermo.compute_entropic_var_from_T_alpha(alpha, temp, qd, qv, ql, qi);
+#elif defined(_CEp)  || defined(_MCErhop) || defined(_MCErhodp)
+          real p = get_pres(alpha, temp, qd, qv, ql, qi);
+          real entropic_var =
+              thermo.compute_entropic_var_from_T_p(p, temp, qd, qv, ql, qi);
+#elif defined(PAMC_AN) || defined(PAMC_MAN)
+          real p = get_pres(k, dks, n);
+          real entropic_var =
+              thermo.compute_entropic_var_from_T_p(p, temp, qd, qv, ql, qi);
 #endif
+
           set_entropic_density(
               entropic_var * dens *
                   dual_geometry.get_area_n1entity(k + dks, j + djs, i + dis, n),
@@ -553,8 +709,29 @@ void VariableSetBase<T>::convert_coupler_to_dynamics_state(
                 dual_geometry.get_area_n1entity(k + dks, j + djs, i + dis, n);
           }
         });
+  }
+}
+
+
+template <class T>
+void VariableSetBase<T>::convert_coupler_to_dynamics_wind(
+    PamCoupler &coupler, FieldSet<nprognostic> &prog_vars,
+    const FieldSet<nconstant> &const_vars, bool couple_wind_exact_inverse) {
+
+  if constexpr (T::couple) {
+
+    const auto &primal_topology = primal_geometry.topology;
+
+    int pis = primal_topology.is;
+    int pjs = primal_topology.js;
+    int pks = primal_topology.ks;
+
+    auto &dm = coupler.get_data_manager_device_readonly();
+
+    auto dm_uvel = dm.get<real const, 4>("uvel");
+    auto dm_vvel = dm.get<real const, 4>("vvel");
+    auto dm_wvel = dm.get<real const, 4>("wvel");
 
-    if (couple_wind) {
       if (couple_wind_exact_inverse) {
         parallel_for(
             "Coupler to Dynamics State Primal U",
@@ -631,10 +808,36 @@ void VariableSetBase<T>::convert_coupler_to_dynamics_state(
                   primal_geometry.get_area_01entity(k + pks, j + pjs, i + pis,
                                                     n);
             });
-      }
-    }
   }
 }
+}
+
+
+template <class T>
+void VariableSetBase<T>::convert_coupler_to_dynamics_staggered_wind(
+    PamCoupler &coupler, FieldSet<nprognostic> &prog_vars,
+    const FieldSet<nconstant> &const_vars) {
+
+  if constexpr (T::couple) {
+    const auto &primal_topology = primal_geometry.topology;
+
+    int pis = primal_topology.is;
+    int pjs = primal_topology.js;
+    int pks = primal_topology.ks;
+
+    auto &dm = coupler.get_data_manager_device_readonly();
+
+    auto dm_uvel = dm.get<real const, 4>("uvel");
+    auto dm_vvel = dm.get<real const, 4>("vvel");
+    auto dm_wvel = dm.get<real const, 4>("wvel");
+
+//ADD THIS
+
+      }
+
+
+  }
+
 
 #ifdef PAMC_SWE
 template <>
@@ -643,7 +846,8 @@ void VariableSetBase<VS_SWE>::initialize(PamCoupler &coupler,
                                          const ThermoPotential &thermodynamics,
                                          const ReferenceState &refstate,
                                          const Geometry<Straight> &primal_geom,
-                                         const Geometry<Twisted> &dual_geom) {
+                                         const Geometry<Twisted> &dual_geom,
+                                         bool verbose) {
   dens_id_mass = 0;
   active_id_mass = 0;
   dens_name[dens_id_mass] = "h";
@@ -652,7 +856,7 @@ void VariableSetBase<VS_SWE>::initialize(PamCoupler &coupler,
   dens_active(dens_id_mass) = true;
 
   VariableSetBase::initialize(*this, coupler, params, thermodynamics, refstate,
-                              primal_geom, dual_geom);
+                              primal_geom, dual_geom, verbose);
 }
 
 template <>
@@ -670,7 +874,8 @@ void VariableSetBase<VS_TSWE>::initialize(PamCoupler &coupler,
                                           const ThermoPotential &thermodynamics,
                                           const ReferenceState &refstate,
                                           const Geometry<Straight> &primal_geom,
-                                          const Geometry<Twisted> &dual_geom) {
+                                          const Geometry<Twisted> &dual_geom,
+                                          bool verbose) {
   dens_id_mass = 0;
   active_id_mass = 0;
   dens_name[dens_id_mass] = "h";
@@ -688,7 +893,7 @@ void VariableSetBase<VS_TSWE>::initialize(PamCoupler &coupler,
   dens_pos(dens_id_entr) = false;
 
   VariableSetBase::initialize(*this, coupler, params, thermodynamics, refstate,
-                              primal_geom, dual_geom);
+                              primal_geom, dual_geom, verbose);
 }
 #endif
 
@@ -699,7 +904,8 @@ void VariableSetBase<VS_CE>::initialize(PamCoupler &coupler,
                                         const ThermoPotential &thermodynamics,
                                         const ReferenceState &refstate,
                                         const Geometry<Straight> &primal_geom,
-                                        const Geometry<Twisted> &dual_geom) {
+                                        const Geometry<Twisted> &dual_geom,
+                                        bool verbose) {
 
   dens_id_mass = 0;
   active_id_mass = 0;
@@ -718,7 +924,7 @@ void VariableSetBase<VS_CE>::initialize(PamCoupler &coupler,
   dens_pos(dens_id_entr) = false;
 
   VariableSetBase::initialize(*this, coupler, params, thermodynamics, refstate,
-                              primal_geom, dual_geom);
+                              primal_geom, dual_geom, verbose);
 }
 
 template <>
@@ -764,7 +970,8 @@ void VariableSetBase<VS_AN>::initialize(PamCoupler &coupler,
                                         const ThermoPotential &thermodynamics,
                                         const ReferenceState &refstate,
                                         const Geometry<Straight> &primal_geom,
-                                        const Geometry<Twisted> &dual_geom) {
+                                        const Geometry<Twisted> &dual_geom,
+                                        bool verbose) {
   dens_id_entr = 0;
   active_id_entr = 0;
   dens_name[dens_id_entr] = "S";
@@ -783,7 +990,7 @@ void VariableSetBase<VS_AN>::initialize(PamCoupler &coupler,
   dens_active(dens_id_mass) = true;
 
   VariableSetBase::initialize(*this, coupler, params, thermodynamics, refstate,
-                              primal_geom, dual_geom);
+                              primal_geom, dual_geom, verbose);
 }
 
 template <>
@@ -792,6 +999,7 @@ real YAKL_INLINE VariableSetBase<VS_AN>::get_total_density(
     int n) const {
   return reference_state.dens.data(dens_id_mass, k + ks, n);
 }
+
 template <>
 real YAKL_INLINE VariableSetBase<VS_AN>::get_total_density(
     const real3d &densvar, int k, int ks, int n) const {
@@ -806,12 +1014,14 @@ real YAKL_INLINE VariableSetBase<VS_AN>::get_entropic_var(const real5d &densvar,
   return densvar(dens_id_entr, k + ks, j + js, i + is, n) /
          reference_state.dens.data(dens_id_mass, k + ks, n);
 }
+
 template <>
 real YAKL_INLINE VariableSetBase<VS_AN>::get_entropic_var(const real3d &densvar,
                                                           int k, int ks,
                                                           int n) const {
   return densvar(dens_id_entr, k + ks, n) / densvar(dens_id_mass, k + ks, n);
 }
+
 template <>
 real YAKL_INLINE VariableSetBase<VS_AN>::get_alpha(const real5d &densvar, int k,
                                                    int j, int i, int ks, int js,
@@ -819,12 +1029,37 @@ real YAKL_INLINE VariableSetBase<VS_AN>::get_alpha(const real5d &densvar, int k,
   return dual_geometry.get_area_n1entity(k + ks, j + js, i + is, n) /
          reference_state.dens.data(dens_id_mass, k + ks, n);
 }
+
 template <>
 real YAKL_INLINE VariableSetBase<VS_AN>::get_alpha(const real3d &densvar, int k,
                                                    int ks, int n) const {
   return dual_geometry.get_area_n1entity(k + ks, 0, 0, n) /
          densvar(dens_id_mass, k + ks, n);
 }
+
+template <>
+real YAKL_INLINE VariableSetBase<VS_AN>::get_pres(const real5d &densvar, int k,
+                                                   int j, int i, int ks, int js,
+                                                   int is, int n) const {
+    const real refrho = 1.0_fp / get_alpha(reference_state.dens.data, k, ks, n);
+    const real refentropic_var = get_entropic_var(reference_state.dens.data, k, ks, n);
+    const real refp =  thermo.solve_p(refrho, refentropic_var, 0, 0, 0, 0);
+    return refp;
+}
+
+template <>
+real YAKL_INLINE VariableSetBase<VS_AN>::get_pres(int k, int ks, int n) const {
+    const real refrho = 1.0_fp / get_alpha(reference_state.dens.data, k, ks, n);
+    const real refentropic_var = get_entropic_var(reference_state.dens.data, k, ks, n);
+    const real refp =  thermo.solve_p(refrho, refentropic_var, 0, 0 , 0, 0);
+    return refp;
+}
+
+template <>
+real YAKL_INLINE VariableSetBase<VS_AN>::get_ref_dens(int k, int ks, int n) const {
+    return reference_state.dens.data(dens_id_mass, k + ks, n);
+}
+
 #endif
 
 // We rely on physics packages ie micro to provide water species- must at least
@@ -836,7 +1071,8 @@ void VariableSetBase<VS_MAN>::initialize(PamCoupler &coupler,
                                          const ThermoPotential &thermodynamics,
                                          const ReferenceState &refstate,
                                          const Geometry<Straight> &primal_geom,
-                                         const Geometry<Twisted> &dual_geom) {
+                                         const Geometry<Twisted> &dual_geom,
+                                         bool verbose) {
   dens_id_entr = 0;
   active_id_entr = 0;
   dens_name[dens_id_entr] = "S";
@@ -855,7 +1091,7 @@ void VariableSetBase<VS_MAN>::initialize(PamCoupler &coupler,
   dens_active(dens_id_mass) = true;
 
   VariableSetBase::initialize(*this, coupler, params, thermodynamics, refstate,
-                              primal_geom, dual_geom);
+                              primal_geom, dual_geom, verbose);
 }
 
 template <>
@@ -903,6 +1139,11 @@ real YAKL_INLINE VariableSetBase<VS_MAN>::get_alpha(const real3d &densvar,
          reference_state.dens.data(dens_id_mass, k + ks, n);
 }
 
+template <>
+real YAKL_INLINE VariableSetBase<VS_MAN>::get_ref_dens(int k, int ks, int n) const {
+    return reference_state.dens.data(dens_id_mass, k + ks, n);
+}
+
 template <>
 real YAKL_INLINE VariableSetBase<VS_MAN>::get_qv(const real5d &densvar, int k,
                                                  int j, int i, int ks, int js,
@@ -933,6 +1174,26 @@ real YAKL_INLINE VariableSetBase<VS_MAN>::get_qi(const real5d &densvar, int k,
          reference_state.dens.data(dens_id_mass, k + ks, n);
 }
 
+template <>
+real YAKL_INLINE VariableSetBase<VS_MAN>::get_pres(const real5d &densvar, int k,
+                                                   int j, int i, int ks, int js,
+                                                   int is, int n) const {
+    const real refrho = 1.0_fp / get_alpha(reference_state.dens.data, k, ks, n);
+    const real refentropic_var = get_entropic_var(reference_state.dens.data, k, ks, n);
+    const real refqv = get_qv(reference_state.dens.data, k, ks, n);
+    const real refp =  thermo.solve_p(refrho, refentropic_var, 1 - refqv, refqv, 0, 0);
+    return refp;
+}
+
+template <>
+real YAKL_INLINE VariableSetBase<VS_MAN>::get_pres(int k, int ks, int n) const {
+    const real refrho = 1.0_fp / get_alpha(reference_state.dens.data, k, ks, n);
+    const real refentropic_var = get_entropic_var(reference_state.dens.data, k, ks, n);
+    const real refqv = get_qv(reference_state.dens.data, k, ks, n);
+    const real refp =  thermo.solve_p(refrho, refentropic_var, 1 - refqv, refqv, 0, 0);
+    return refp;
+}
+
 template <>
 real YAKL_INLINE VariableSetBase<VS_MAN>::_water_dens(const real5d &densvar,
                                                       int k, int j, int i,
@@ -941,12 +1202,8 @@ real YAKL_INLINE VariableSetBase<VS_MAN>::_water_dens(const real5d &densvar,
   real vap_dens = densvar(dens_id_vap, k + ks, j + js, i + is, n);
   real liq_dens = 0.0_fp;
   real ice_dens = 0.0_fp;
-  if (liquid_found) {
-    liq_dens = densvar(dens_id_liq, k + ks, j + js, i + is, n);
-  }
-  if (ice_found) {
-    ice_dens = densvar(dens_id_ice, k + ks, j + js, i + is, n);
-  }
+  // if (liq_found) { liq_dens = densvar(dens_id_liq, k + ks, j + js, i + is, n); }
+  // if (ice_found) { ice_dens = densvar(dens_id_ice, k + ks, j + js, i + is, n); }
   return vap_dens + liq_dens + ice_dens;
 }
 
@@ -957,12 +1214,8 @@ real YAKL_INLINE VariableSetBase<VS_MAN>::_water_dens(const real3d &densvar,
   real vap_dens = densvar(dens_id_vap, k + ks, n);
   real liq_dens = 0.0_fp;
   real ice_dens = 0.0_fp;
-  if (liquid_found) {
-    liq_dens = densvar(dens_id_liq, k + ks, n);
-  }
-  if (ice_found) {
-    ice_dens = densvar(dens_id_ice, k + ks, n);
-  }
+  // if (liq_found) { liq_dens = densvar(dens_id_liq, k + ks, n); }
+  // if (ice_found) { ice_dens = densvar(dens_id_ice, k + ks, n); }
   return vap_dens + liq_dens + ice_dens;
 }
 
@@ -1012,7 +1265,8 @@ template <>
 void VariableSetBase<VS_MCE_rho>::initialize(
     PamCoupler &coupler, ModelParameters &params,
     const ThermoPotential &thermodynamics, const ReferenceState &refstate,
-    const Geometry<Straight> &primal_geom, const Geometry<Twisted> &dual_geom) {
+    const Geometry<Straight> &primal_geom, const Geometry<Twisted> &dual_geom,
+    bool verbose) {
   dens_id_mass = 0;
   active_id_mass = 0;
   dens_name[dens_id_mass] = "rho";
@@ -1030,7 +1284,7 @@ void VariableSetBase<VS_MCE_rho>::initialize(
   dens_pos(dens_id_entr) = false;
 
   VariableSetBase::initialize(*this, coupler, params, thermodynamics, refstate,
-                              primal_geom, dual_geom);
+                              primal_geom, dual_geom, verbose);
 }
 
 template <>
@@ -1118,7 +1372,7 @@ real YAKL_INLINE VariableSetBase<VS_MCE_rho>::_water_dens(const real5d &densvar,
   real vap_dens = densvar(dens_id_vap, k + ks, j + js, i + is, n);
   real liq_dens = 0.0_fp;
   real ice_dens = 0.0_fp;
-  if (liquid_found) {
+  if (liq_found) {
     liq_dens = densvar(dens_id_liq, k + ks, j + js, i + is, n);
   }
   if (ice_found) {
@@ -1134,7 +1388,7 @@ real YAKL_INLINE VariableSetBase<VS_MCE_rho>::_water_dens(const real3d &densvar,
   real vap_dens = densvar(dens_id_vap, k + ks, n);
   real liq_dens = 0.0_fp;
   real ice_dens = 0.0_fp;
-  if (liquid_found) {
+  if (liq_found) {
     liq_dens = densvar(dens_id_liq, k + ks, n);
   }
   if (ice_found) {
@@ -1187,7 +1441,8 @@ template <>
 void VariableSetBase<VS_MCE_rhod>::initialize(
     PamCoupler &coupler, ModelParameters &params,
     const ThermoPotential &thermodynamics, const ReferenceState &refstate,
-    const Geometry<Straight> &primal_geom, const Geometry<Twisted> &dual_geom) {
+    const Geometry<Straight> &primal_geom, const Geometry<Twisted> &dual_geom,
+    bool verbose) {
   dens_id_mass = 0;
   active_id_mass = 0;
   dens_name[dens_id_mass] = "rho_d";
@@ -1205,7 +1460,7 @@ void VariableSetBase<VS_MCE_rhod>::initialize(
   dens_pos(dens_id_entr) = false;
 
   VariableSetBase::initialize(*this, coupler, params, thermodynamics, refstate,
-                              primal_geom, dual_geom);
+                              primal_geom, dual_geom, verbose);
 }
 
 template <>
@@ -1216,7 +1471,7 @@ real YAKL_INLINE VariableSetBase<VS_MCE_rhod>::get_total_density(
   real vap_dens = densvar(dens_id_vap, k + ks, j + js, i + is, n);
   real liq_dens = 0.0_fp;
   real ice_dens = 0.0_fp;
-  if (liquid_found) {
+  if (liq_found) {
     liq_dens = densvar(dens_id_liq, k + ks, j + js, i + is, n);
   }
   if (ice_found) {
@@ -1315,4 +1570,5 @@ using VariableSet = VariableSetBase<VS_MCE_rhop>;
 #elif PAMC_MCErhodp
 using VariableSet = VariableSetBase<VS_MCE_rhodp>;
 #endif
+
 } // namespace pamc
diff --git a/dynamics/spam/src/models/extrudedmodel.h b/dynamics/spam/src/models/extrudedmodel.h
index 6408d922..93f51ac2 100644
--- a/dynamics/spam/src/models/extrudedmodel.h
+++ b/dynamics/spam/src/models/extrudedmodel.h
@@ -377,24 +377,59 @@ class ModelTendencies : public ExtrudedTendencies {
 
   void
   convert_dynamics_to_coupler_state(PamCoupler &coupler,
+                                    const FieldSet<nprognostic> &prog_vars,
+                                    const FieldSet<nconstant> &const_vars, bool couple_wind, bool couple_wind_exact_inverse) {
+    equations->varset.convert_dynamics_to_coupler_densities(coupler, prog_vars,
+                                                        const_vars);
+
+    if (couple_wind){ equations->varset.convert_dynamics_to_coupler_wind(coupler, prog_vars,
+                                                        const_vars, couple_wind_exact_inverse);}
+
+  }
+
+  void
+  convert_dynamics_to_coupler_state_staggered(PamCoupler &coupler,
                                     const FieldSet<nprognostic> &prog_vars,
                                     const FieldSet<nconstant> &const_vars) {
-    equations->varset.convert_dynamics_to_coupler_state(coupler, prog_vars,
+    equations->varset.convert_dynamics_to_coupler_densities(coupler, prog_vars,
+                                                        const_vars);
+    equations->varset.convert_dynamics_to_coupler_staggered_wind(coupler, prog_vars,
                                                         const_vars);
   }
+
   void convert_coupler_to_dynamics_state(PamCoupler &coupler,
                                          FieldSet<nprognostic> &prog_vars,
                                          FieldSet<nauxiliary> &auxiliary_vars,
-                                         FieldSet<nconstant> &const_vars) {
-    equations->varset.convert_coupler_to_dynamics_state(coupler, prog_vars,
+                                         FieldSet<nconstant> &const_vars, bool couple_wind, bool couple_wind_exact_inverse) {
+    equations->varset.convert_coupler_to_dynamics_densities(coupler, prog_vars,
                                                         const_vars);
+    if (couple_wind) {equations->varset.convert_coupler_to_dynamics_wind(coupler, prog_vars,
+                                                        const_vars, couple_wind_exact_inverse);}
+    prog_vars.exchange();
+    const_vars.exchange();
+    auxiliary_vars.exchange();
+
 #if defined PAMC_AN || defined PAMC_MAN
-    if (equations->varset.couple_wind) {
+    if (couple_wind) {
       project_to_anelastic(const_vars, prog_vars, auxiliary_vars);
     }
 #endif
   }
 
+  void
+  convert_coupler_to_dynamics_state_staggered(PamCoupler &coupler,
+                                    FieldSet<nprognostic> &prog_vars,
+                                    FieldSet<nauxiliary> &auxiliary_vars,
+                                    FieldSet<nconstant> &const_vars) {
+    equations->varset.convert_coupler_to_dynamics_densities(coupler, prog_vars,
+                                                        const_vars);
+    equations->varset.convert_coupler_to_dynamics_staggered_wind(coupler, prog_vars,
+                                                        const_vars);
+#if defined(PAMC_AN) || defined(PAMC_MAN)
+      project_to_anelastic(const_vars, prog_vars, auxiliary_vars);
+#endif
+  }
+
   void compute_constants(FieldSet<nconstant> &const_vars,
                          FieldSet<nprognostic> &x) override {}
 
@@ -426,9 +461,9 @@ class ModelTendencies : public ExtrudedTendencies {
 #if defined PAMC_AN || defined PAMC_MAN
           for (int d = 0; d < VS::ndensity_prognostic; ++d) {
             dens0var(d, k + pks, j + pjs, i + pis, n) =
-                (densvar(d, k + pks, j + pjs, i + pis, n) -
-                 refdens(d, k + pks, n)) /
-                varset.get_total_density(densvar, k, j, i, pks, pjs, pis, n);
+               (densvar(d, k + pks, j + pjs, i + pis, n) -
+                refdens(d, k + pks, n)) /
+               varset.get_total_density(densvar, k, j, i, pks, pjs, pis, n);
           }
 #else
           compute_Hn1bar<VS::ndensity_prognostic, diff_ord, vert_diff_ord>(
@@ -1097,10 +1132,10 @@ class ModelTendencies : public ExtrudedTendencies {
 #if defined PAMC_AN || defined PAMC_MAN
           // add reference state
           for (int d = 0; d < ndims; d++) {
-            for (int l = 0; l < VS::ndensity_prognostic; l++) {
-              densreconvar(d + l * ndims, k + dks, j + djs, i + dis, n) +=
-                  refstate.q_pi.data(l, k + pks, n);
-            }
+           for (int l = 0; l < VS::ndensity_prognostic; l++) {
+             densreconvar(d + l * ndims, k + dks, j + djs, i + dis, n) +=
+                 refstate.q_pi.data(l, k + pks, n);
+           }
           }
           for (int d = 0; d < ndims; d++) {
             densreconvar(d + varset.dens_id_mass * ndims, k + dks, j + djs,
@@ -1154,8 +1189,8 @@ class ModelTendencies : public ExtrudedTendencies {
 #if defined PAMC_AN || defined PAMC_MAN
           // add reference state
           for (int l = 0; l < VS::ndensity_prognostic; l++) {
-            densvertreconvar(l, k + dks + 1, j + djs, i + dis, n) +=
-                refstate.q_di.data(l, k + dks + 1, n);
+           densvertreconvar(l, k + dks + 1, j + djs, i + dis, n) +=
+               refstate.q_di.data(l, k + dks + 1, n);
           }
           densvertreconvar(varset.dens_id_mass, k + dks + 1, j + djs, i + dis,
                            n) = 1;
@@ -2625,7 +2660,6 @@ class ModelTendencies : public ExtrudedTendencies {
           if (ik == 0 && jk == 0) {
             p_transform(kfix, j, i, n) = 0;
           }
-
           int nz = primal_topology.ni;
           tri_c(0, j, i, n) = tri_u(0, j, i, n) / tri_d(0, j, i, n);
           for (int k = 1; k < nz - 1; ++k) {
@@ -3758,41 +3792,42 @@ void testcase_from_string(std::unique_ptr<TestCase> &testcase, std::string name,
 void read_model_params_file(std::string inFile, ModelParameters &params,
                             Parallel &par, PamCoupler &coupler,
                             std::unique_ptr<TestCase> &testcase) {
+  #ifdef PAM_STANDALONE
+    // Read common parameters
+    int nz = coupler.get_nz();
+    readParamsFile(inFile, params, par, nz);
+
+    // Read config file
+    YAML::Node config = YAML::LoadFile(inFile);
+
+    params.acoustic_balance = config["balance_initial_density"].as<bool>(false);
+    params.uniform_vertical = (config["vcoords"].as<std::string>() == "uniform");
+    // Read diffusion coefficients
+    params.entropicvar_diffusion_coeff =
+        config["entropicvar_diffusion_coeff"].as<real>(0);
+    params.velocity_diffusion_coeff =
+        config["velocity_diffusion_coeff"].as<real>(0);
+    // Read the data initialization options
+    params.initdataStr = config["initData"].as<std::string>();
+    params.force_refstate_hydrostatic_balance =
+        config["force_refstate_hydrostatic_balance"].as<bool>(false);
 
-  // Read common parameters
-  int nz = coupler.get_nz();
-  readParamsFile(inFile, params, par, nz);
-
-  // Read config file
-  YAML::Node config = YAML::LoadFile(inFile);
-
-  params.acoustic_balance = config["balance_initial_density"].as<bool>(false);
-  params.uniform_vertical = (config["vcoords"].as<std::string>() == "uniform");
-  // Read diffusion coefficients
-  params.entropicvar_diffusion_coeff =
-      config["entropicvar_diffusion_coeff"].as<real>(0);
-  params.velocity_diffusion_coeff =
-      config["velocity_diffusion_coeff"].as<real>(0);
-  // Read the data initialization options
-  params.initdataStr = config["initData"].as<std::string>();
-  params.force_refstate_hydrostatic_balance =
-      config["force_refstate_hydrostatic_balance"].as<bool>(false);
-
-  for (int i = 0; i < ntracers_dycore; i++) {
-    params.tracerdataStr[i] =
-        config["initTracer" + std::to_string(i)].as<std::string>("constant");
-    params.dycore_tracerpos[i] =
-        config["initTracerPos" + std::to_string(i)].as<bool>(false);
-  }
+    for (int i = 0; i < ntracers_dycore; i++) {
+      params.tracerdataStr[i] =
+          config["initTracer" + std::to_string(i)].as<std::string>("constant");
+      params.dycore_tracerpos[i] =
+          config["initTracerPos" + std::to_string(i)].as<bool>(false);
+    }
 
-  // Store vertical cell interface heights in the data manager
-  auto &dm = coupler.get_data_manager_device_readonly();
-  params.zint = dm.get<real const, 2>("vertical_interface_height");
+    // Store vertical cell interface heights in the data manager
+    auto &dm = coupler.get_data_manager_device_readonly();
+    params.zint = dm.get<real const, 2>("vertical_interface_height");
 
-  params.ylen = 1.0;
-  params.yc = 0.5;
+    params.ylen = 1.0;
+    params.yc = 0.5;
 
-  testcase_from_string(testcase, params.initdataStr, params.acoustic_balance);
+    testcase_from_string(testcase, params.initdataStr, params.acoustic_balance);
+  #endif
 }
 
 void read_model_params_coupler(ModelParameters &params, Parallel &par,
@@ -3820,25 +3855,28 @@ void read_model_params_coupler(ModelParameters &params, Parallel &par,
 }
 
 void check_and_print_model_parameters(const ModelParameters &params,
-                                      const Parallel &par) {
-
-  check_and_print_parameters(params, par);
-
-  serial_print("IC: " + params.initdataStr, par.masterproc);
-  serial_print("acoustically balanced: " +
-                   std::to_string(params.acoustic_balance),
-               par.masterproc);
-  serial_print("entropicvar_diffusion_coeff: " +
-                   std::to_string(params.entropicvar_diffusion_coeff),
-               par.masterproc);
-  serial_print("velocity_diffusion_coeff: " +
-                   std::to_string(params.velocity_diffusion_coeff),
-               par.masterproc);
-
-  for (int i = 0; i < ntracers_dycore; i++) {
-    serial_print("Dycore Tracer" + std::to_string(i) +
-                     " IC: " + params.tracerdataStr[i],
+                                      const Parallel &par,
+                                      bool verbose=false) {
+
+  check_and_print_parameters(params, par, verbose);
+
+  if (verbose) {
+    serial_print("IC: " + params.initdataStr, par.masterproc);
+    serial_print("acoustically balanced: " +
+                     std::to_string(params.acoustic_balance),
+                 par.masterproc);
+    serial_print("entropicvar_diffusion_coeff: " +
+                     std::to_string(params.entropicvar_diffusion_coeff),
                  par.masterproc);
+    serial_print("velocity_diffusion_coeff: " +
+                     std::to_string(params.velocity_diffusion_coeff),
+                 par.masterproc);
+
+    for (int i = 0; i < ntracers_dycore; i++) {
+      serial_print("Dycore Tracer" + std::to_string(i) +
+                       " IC: " + params.tracerdataStr[i],
+                   par.masterproc);
+    }
   }
 }
 
@@ -4395,6 +4433,7 @@ class CoupledTestCase : public TestCase {
 
 public:
   PamCoupler &coupler;
+  bool set_coupler_state = false;
 
   CoupledTestCase(PamCoupler &coupler) : coupler(coupler) {}
 
@@ -4417,11 +4456,8 @@ class CoupledTestCase : public TestCase {
         YAKL_LAMBDA(real x, real y, real z) { return flat_geop(z, g); },
         constvars.fields_arr[HSVAR], 0);
 
-    // hack to set winds
-    bool org_couple_wind = varset.couple_wind;
-    varset.couple_wind = true;
-    varset.convert_coupler_to_dynamics_state(coupler, progvars, constvars);
-    varset.couple_wind = org_couple_wind;
+    varset.convert_coupler_to_dynamics_densities(coupler, progvars, constvars);
+    varset.convert_coupler_to_dynamics_wind(coupler, progvars, constvars, false);
   }
 
   void set_reference_state(const Geometry<Straight> &primal_geom,
@@ -4461,12 +4497,11 @@ class CoupledTestCase : public TestCase {
 
     auto &dm = coupler.get_data_manager_device_readonly();
 
-    auto dm_gcm_dens_dry = dm.get<real const, 2>("gcm_density_dry");
-    auto dm_gcm_dens_vap = dm.get<real const, 2>("gcm_water_vapor");
-    auto dm_gcm_uvel = dm.get<real const, 2>("gcm_uvel");
-    auto dm_gcm_vvel = dm.get<real const, 2>("gcm_vvel");
-    auto dm_gcm_wvel = dm.get<real const, 2>("gcm_wvel");
-    auto dm_gcm_temp = dm.get<real const, 2>("gcm_temp");
+    auto dm_ref_dens_dry = dm.get<real const, 2>("ref_density_dry");
+    auto dm_ref_dens_vap = dm.get<real const, 2>("ref_density_vapor");
+    auto dm_ref_dens_liq = dm.get<real const, 2>("ref_density_liq");
+    auto dm_ref_dens_ice = dm.get<real const, 2>("ref_density_ice");
+    auto dm_ref_temp     = dm.get<real const, 2>("ref_temp");
 
     const real grav = coupler.get_option<real>("grav");
     dual_geom.set_profile_n1form_values(
@@ -4477,12 +4512,12 @@ class CoupledTestCase : public TestCase {
         "Coupled reference state 1",
         SimpleBounds<2>(dual_topology.nl, dual_topology.nens),
         YAKL_CLASS_LAMBDA(int k, int n) {
-          const real temp = dm_gcm_temp(k, n);
-          const real dens_dry = dm_gcm_dens_dry(k, n);
-          const real dens_vap = dm_gcm_dens_vap(k, n);
-          const real dens_liq = 0.0_fp;
-          const real dens_ice = 0.0_fp;
-          const real dens = dens_dry + dens_ice + dens_liq + dens_vap;
+          const real temp     = dm_ref_temp(k, n);
+          const real dens_dry = dm_ref_dens_dry(k,n);
+          const real dens_vap = dm_ref_dens_vap(k,n);
+          const real dens_liq = dm_ref_dens_liq(k,n);
+          const real dens_ice = dm_ref_dens_ice(k,n);
+          const real dens = dens_dry + dens_vap;
 
           const real qd = dens_dry / dens;
           const real qv = dens_vap / dens;
@@ -4491,7 +4526,7 @@ class CoupledTestCase : public TestCase {
 
           const real alpha = 1.0_fp / dens;
           const real entropic_var =
-              thermo.compute_entropic_var_from_T(alpha, temp, qd, qv, ql, qi);
+              thermo.compute_entropic_var_from_T_alpha(alpha, temp, qd, qv, ql, qi);
 
           const real dual_volume =
               dual_geom.get_area_n1entity(k + dks, djs, dis, n);
@@ -4503,9 +4538,8 @@ class CoupledTestCase : public TestCase {
               dens_vap * dual_volume;
 
           refstate.q_pi.data(varset.dens_id_mass, k + dks, n) = dens;
-          refstate.q_pi.data(varset.dens_id_entr, k + dks, n) =
-              dens * entropic_var;
-          refstate.q_pi.data(varset.dens_id_vap, k + dks, n) = dens_vap;
+          refstate.q_pi.data(varset.dens_id_entr, k + dks, n) = dens * entropic_var;
+          refstate.q_pi.data(varset.dens_id_vap,  k + dks, n) = dens_vap;
         });
 
     parallel_for(
@@ -4611,33 +4645,33 @@ class CoupledTestCase : public TestCase {
           real dz;
 
           if (k == 0) {
-            T_km = dm_gcm_temp(k, n);
-            T_kp = dm_gcm_temp(k + 1, n);
+            T_km = dm_ref_temp(k, n);
+            T_kp = dm_ref_temp(k + 1, n);
             T = T_km;
 
-            rv_kp = dm_gcm_dens_vap(k + 1, n) / dm_gcm_dens_dry(k + 1, n);
-            rv_km = dm_gcm_dens_vap(k, n) / dm_gcm_dens_dry(k, n);
+            rv_kp = dm_ref_dens_vap(k + 1, n) / dm_ref_dens_dry(k + 1, n);
+            rv_km = dm_ref_dens_vap(k, n) / dm_ref_dens_dry(k, n);
             rv = rv_km;
 
             dz = primal_geom.dz(k + pks, n);
           } else if (k == primal_topology.ni - 1) {
-            T_km = dm_gcm_temp(k - 1, n);
-            T_kp = dm_gcm_temp(k, n);
-            T = dm_gcm_temp(k, n);
+            T_km = dm_ref_temp(k - 1, n);
+            T_kp = dm_ref_temp(k, n);
+            T = dm_ref_temp(k, n);
 
-            rv_kp = dm_gcm_dens_vap(k, n) / dm_gcm_dens_dry(k, n);
-            rv_km = dm_gcm_dens_vap(k - 1, n) / dm_gcm_dens_dry(k - 1, n);
+            rv_kp = dm_ref_dens_vap(k, n) / dm_ref_dens_dry(k, n);
+            rv_km = dm_ref_dens_vap(k - 1, n) / dm_ref_dens_dry(k - 1, n);
             rv = rv_kp;
 
             dz = primal_geom.dz(k - 1 + pks, n);
           } else {
-            T = dm_gcm_temp(k, n);
-            T_km = dm_gcm_temp(k - 1, n);
-            T_kp = dm_gcm_temp(k + 1, n);
+            T = dm_ref_temp(k, n);
+            T_km = dm_ref_temp(k - 1, n);
+            T_kp = dm_ref_temp(k + 1, n);
 
-            rv_km = dm_gcm_dens_vap(k - 1, n) / dm_gcm_dens_dry(k - 1, n);
-            rv_kp = dm_gcm_dens_vap(k + 1, n) / dm_gcm_dens_dry(k + 1, n);
-            rv = dm_gcm_dens_vap(k, n) / dm_gcm_dens_dry(k, n);
+            rv_km = dm_ref_dens_vap(k - 1, n) / dm_ref_dens_dry(k - 1, n);
+            rv_kp = dm_ref_dens_vap(k + 1, n) / dm_ref_dens_dry(k + 1, n);
+            rv = dm_ref_dens_vap(k, n) / dm_ref_dens_dry(k, n);
 
             dz = primal_geom.dz(k + pks, n) + primal_geom.dz(k - 1 + pks, n);
           }
@@ -4830,7 +4864,7 @@ template <bool acoustic_balance> struct RisingBubble {
     real T_ref = refT_f(z, thermo);
     real p_ref = refp_f(z, thermo);
 
-    return rho_ref * thermo.compute_entropic_var(p_ref, T_ref, 1, 0, 0, 0);
+    return rho_ref * thermo.compute_entropic_var_from_T_p(p_ref, T_ref, 1, 0, 0, 0);
   }
 
   static real YAKL_INLINE rho_f(real x, real y, real z,
@@ -4857,7 +4891,7 @@ template <bool acoustic_balance> struct RisingBubble {
     }
     real dtheta = (r < rc) ? dss * 0.5_fp * (1._fp + cos(pi * r / rc)) : 0._fp;
     real dT = dtheta * pow(p / thermo.cst.pr, thermo.cst.kappa_d);
-    return thermo.compute_entropic_var(p, T + dT, 1, 0, 0, 0);
+    return thermo.compute_entropic_var_from_T_p(p, T + dT, 1, 0, 0, 0);
   }
 
   static VecXYZ YAKL_INLINE v_f(real x, real y, real z) {
@@ -4920,7 +4954,7 @@ struct TwoBubbles {
     real rho_ref = refrho_f(z, thermo);
     real T_ref = refT_f(z, thermo);
     real p_ref = refp_f(z, thermo);
-    return rho_ref * thermo.compute_entropic_var(p_ref, T_ref, 1, 0, 0, 0);
+    return rho_ref * thermo.compute_entropic_var_from_T_p(p_ref, T_ref, 1, 0, 0, 0);
   }
 
   static real YAKL_INLINE rho_f(real x, real y, real z,
@@ -4950,7 +4984,7 @@ struct TwoBubbles {
     }
 
     real dT = dtheta * pow(p / thermo.cst.pr, thermo.cst.kappa_d);
-    return thermo.compute_entropic_var(p, T + dT, 1, 0, 0, 0);
+    return thermo.compute_entropic_var_from_T_p(p, T + dT, 1, 0, 0, 0);
   }
 
   static VecXYZ YAKL_INLINE v_f(real x, real y, real z) {
@@ -5004,7 +5038,7 @@ struct DensityCurrent {
     real rho_ref = refrho_f(z, thermo);
     real T_ref = refT_f(z, thermo);
     real p_ref = refp_f(z, thermo);
-    return rho_ref * thermo.compute_entropic_var(p_ref, T_ref, 1, 0, 0, 0);
+    return rho_ref * thermo.compute_entropic_var_from_T_p(p_ref, T_ref, 1, 0, 0, 0);
   }
 
   static real YAKL_INLINE rho_f(real x, real y, real z,
@@ -5021,7 +5055,7 @@ struct DensityCurrent {
                   (z - bzc) * (z - bzc) / (bzr * bzr));
     real dtheta = (r < 1) ? dss * 0.5_fp * (1._fp + cos(pi * r)) : 0._fp;
     real dT = dtheta * pow(p / thermo.cst.pr, thermo.cst.kappa_d);
-    return thermo.compute_entropic_var(p, T + dT, 1, 0, 0, 0);
+    return thermo.compute_entropic_var_from_T_p(p, T + dT, 1, 0, 0, 0);
   }
 
   static VecXYZ YAKL_INLINE v_f(real x, real y, real z) {
@@ -5111,7 +5145,7 @@ struct LargeRisingBubble {
     real rho_ref = refrho_f(z, thermo);
     real T_ref = refT_f(z, thermo);
     real p_ref = refp_f(z, thermo);
-    return rho_ref * thermo.compute_entropic_var(p_ref, T_ref, 1, 0, 0, 0);
+    return rho_ref * thermo.compute_entropic_var_from_T_p(p_ref, T_ref, 1, 0, 0, 0);
   }
 
   static real YAKL_INLINE rho_f(real x, real y, real z,
@@ -5125,7 +5159,7 @@ struct LargeRisingBubble {
     real T0 = isentropic_T(z, theta0, g, thermo);
     real dtheta = linear_ellipsoid(x, z, xc, bzc, xrad, zrad, amp_theta);
     real dT = dtheta * pow(p / thermo.cst.pr, thermo.cst.kappa_d);
-    return thermo.compute_entropic_var(p, T0 + dT, 1, 0, 0, 0);
+    return thermo.compute_entropic_var_from_T_p(p, T0 + dT, 1, 0, 0, 0);
   }
 
   static VecXYZ YAKL_INLINE v_f(real x, real y, real z) {
@@ -5177,7 +5211,7 @@ struct MoistLargeRisingBubble : LargeRisingBubble {
     real T0 = isentropic_T(z, theta0, g, thermo);
     real dtheta = linear_ellipsoid(x, z, xc, bzc, xrad, zrad, amp_theta);
     real dT = dtheta * pow(p / thermo.cst.pr, thermo.cst.kappa_d);
-    return thermo.compute_entropic_var(p, T0 + dT, 1, 0, 0, 0);
+    return thermo.compute_entropic_var_from_T_p(p, T0 + dT, 1, 0, 0, 0);
   }
 };
 
@@ -5220,7 +5254,7 @@ template <bool add_perturbation> struct GravityWave {
                                                const ThermoPotential &thermo) {
     real rho_ref = refrho_f(z, thermo);
     real p_ref = refp_f(z, thermo);
-    return rho_ref * thermo.compute_entropic_var(p_ref, T_ref, 1, 0, 0, 0);
+    return rho_ref * thermo.compute_entropic_var_from_T_p(p_ref, T_ref, 1, 0, 0, 0);
   }
 
   static real YAKL_INLINE rho_f(real x, real y, real z,
@@ -5280,7 +5314,7 @@ template <bool add_perturbation> struct GravityWave {
       p += dp;
     }
 
-    return thermo.compute_entropic_var(p, T, 1, 0, 0, 0);
+    return thermo.compute_entropic_var_from_T_p(p, T, 1, 0, 0, 0);
   }
 
   static real YAKL_INLINE entropicdensity_f(real x, real y, real z,
@@ -5322,7 +5356,7 @@ template <bool add_perturbation> struct GravityWave {
       T += sol.dT;
     }
 
-    return rho * thermo.compute_entropic_var(p, T, 1, 0, 0, 0);
+    return rho * thermo.compute_entropic_var_from_T_p(p, T, 1, 0, 0, 0);
   }
 
   static real YAKL_INLINE Texact_f(real x, real y, real z, real t,
@@ -5602,7 +5636,7 @@ template <bool add_perturbation> struct GravityWave {
 
             real T;
             if (!linear_T) {
-              T = thermo.compute_T(alpha, entropic_var, 1, 0, 0, 0);
+              T = thermo.compute_T_from_alpha(alpha, entropic_var, 1, 0, 0, 0);
             } else {
               real Rd = thermo.cst.Rd;
               real refalpha = varset.get_alpha(refdens, k, pks, n);
@@ -5664,6 +5698,7 @@ void testcase_from_string(std::unique_ptr<TestCase> &testcase, std::string name,
   }
 }
 
+#ifdef PAM_STANDALONE
 void testcase_from_config(std::unique_ptr<TestCase> &testcase,
                           const YAML::Node &config) {
   const std::string name = config["initData"].as<std::string>();
@@ -5671,6 +5706,7 @@ void testcase_from_config(std::unique_ptr<TestCase> &testcase,
       config["balance_initial_density"].as<bool>(false);
   testcase_from_string(testcase, name, acoustic_balance);
 }
+#endif
 
 std::unique_ptr<TestCase> make_coupled_test_case(PamCoupler &coupler) {
   return std::make_unique<CoupledTestCase>(coupler);
diff --git a/dynamics/spam/src/models/layermodel.h b/dynamics/spam/src/models/layermodel.h
index e394788b..8d55f112 100644
--- a/dynamics/spam/src/models/layermodel.h
+++ b/dynamics/spam/src/models/layermodel.h
@@ -104,16 +104,24 @@ class ModelTendencies : public Tendencies {
   void
   convert_dynamics_to_coupler_state(PamCoupler &coupler,
                                     const FieldSet<nprognostic> &prog_vars,
-                                    const FieldSet<nconstant> &const_vars) {
-    this->equations->varset.convert_dynamics_to_coupler_state(
-        coupler, prog_vars, const_vars);
+                                    const FieldSet<nconstant> &const_vars, bool couple_wind, bool couple_wind_exact_inverse) {
+    equations->varset.convert_dynamics_to_coupler_densities(coupler, prog_vars,
+                                                        const_vars);
+
+    if (couple_wind){ equations->varset.convert_dynamics_to_coupler_wind(coupler, prog_vars,
+                                                        const_vars, couple_wind_exact_inverse);}
+
   }
+
   void convert_coupler_to_dynamics_state(PamCoupler &coupler,
                                          FieldSet<nprognostic> &prog_vars,
                                          FieldSet<nauxiliary> &auxiliary_vars,
-                                         FieldSet<nconstant> &const_vars) {
-    this->equations->varset.convert_coupler_to_dynamics_state(
-        coupler, prog_vars, const_vars);
+                                         FieldSet<nconstant> &const_vars, bool couple_wind, bool couple_wind_exact_inverse) {
+    equations->varset.convert_coupler_to_dynamics_densities(coupler, prog_vars,
+                                                        const_vars);
+    if (couple_wind) {equations->varset.convert_coupler_to_dynamics_wind(coupler, prog_vars,
+                                                        const_vars, couple_wind_exact_inverse);}
+
   }
 
   void compute_constants(FieldSet<nconstant> &const_vars,
diff --git a/dynamics/spam/src/models/model.h b/dynamics/spam/src/models/model.h
index 4afccce8..28e26836 100644
--- a/dynamics/spam/src/models/model.h
+++ b/dynamics/spam/src/models/model.h
@@ -31,12 +31,13 @@ class Equations {
   Equations() { this->is_initialized = false; }
   void initialize(PamCoupler &coupler, ModelParameters &params,
                   const Geometry<Straight> &primal_geom,
-                  const Geometry<Twisted> &dual_geom) {
+                  const Geometry<Twisted> &dual_geom,
+                  bool verbose=false) {
 
     this->reference_state.initialize<VariableSet>(primal_geom.topology,
                                                   dual_geom.topology);
     this->varset.initialize(coupler, params, thermo, reference_state,
-                            primal_geom, dual_geom);
+                            primal_geom, dual_geom, verbose);
     this->PVPE.initialize(varset);
     this->Hk.initialize(varset, primal_geom, dual_geom);
     this->Hs.initialize(thermo, varset, primal_geom, dual_geom);
@@ -115,6 +116,7 @@ class TestCase {
   SArray<TRACER_TAG, 1, ntracers_dycore + GPU_PAD> tracers;
   Equations *equations;
   bool is_initialized;
+  bool set_coupler_state = true;
 
   TestCase() { this->is_initialized = false; }
 
diff --git a/dynamics/spam/src/models/stats.h b/dynamics/spam/src/models/stats.h
index 521eec4a..fd5b4bc1 100644
--- a/dynamics/spam/src/models/stats.h
+++ b/dynamics/spam/src/models/stats.h
@@ -47,7 +47,7 @@ class Stats {
     this->equations = &eqs;
     this->nens = params.nens;
     this->masterproc = par.masterproc;
-    this->statsize = params.Nsteps / params.Nstat + 1;
+    this->statsize = params.statSize + 1;
   }
 
   virtual void compute(FieldSet<nprognostic> &progvars,
diff --git a/externals/eigen b/externals/eigen
deleted file mode 160000
index 31f796eb..00000000
--- a/externals/eigen
+++ /dev/null
@@ -1 +0,0 @@
-Subproject commit 31f796ebef35eeadd0e26878aab3fe99ca412a45
diff --git a/pam_core/modules/gcm_forcing.h b/pam_core/modules/gcm_forcing.h
index a5c6a467..254b78a2 100644
--- a/pam_core/modules/gcm_forcing.h
+++ b/pam_core/modules/gcm_forcing.h
@@ -23,6 +23,10 @@ namespace modules {
     auto &dm = coupler.get_data_manager_device_readwrite();
 
     auto dt_gcm = coupler.get_option<real>("gcm_physics_dt");
+    real cp_d   = coupler.get_option<real>("cp_d");
+    real grav   = coupler.get_option<real>("grav");
+    real Lv     = coupler.get_option<real>("latvap") ;
+    real Lf     = coupler.get_option<real>("latice") ;
 
     // Get current state from coupler
     auto rho_d = dm.get<real const,4>( "density_dry" );
@@ -30,12 +34,24 @@ namespace modules {
     auto vvel  = dm.get<real const,4>( "vvel"        );
     auto temp  = dm.get<real const,4>( "temp"        );
     auto rho_v = dm.get<real const,4>( "water_vapor" );
+    auto rho_l = dm.get<real const,4>( "cloud_water" );
+    auto rho_i = dm.get<real const,4>( "ice"         );
 
     auto rho_d_gcm = dm.get<real const,2> ( "gcm_density_dry" );
     auto uvel_gcm  = dm.get<real const,2> ( "gcm_uvel"        );
     auto vvel_gcm  = dm.get<real const,2> ( "gcm_vvel"        );
     auto temp_gcm  = dm.get<real const,2> ( "gcm_temp"        );
     auto rho_v_gcm = dm.get<real const,2> ( "gcm_water_vapor" );
+    auto rho_l_gcm = dm.get<real const,2> ( "gcm_cloud_water" );
+    auto rho_i_gcm = dm.get<real const,2> ( "gcm_cloud_ice"   );
+
+    auto crm_nc = dm.get<real const,4>("cloud_water_num");
+    auto crm_ni = dm.get<real const,4>("ice_num");
+    auto crm_nr = dm.get<real const,4>("rain_num");
+
+    auto gcm_nc = dm.get<real,2>("gcm_num_liq" );
+    auto gcm_ni = dm.get<real,2>("gcm_num_ice" );
+    auto gcm_nr = dm.get<real,2>("gcm_num_rain");
 
     int nz   = dm.get_dimension_size("z"   );
     int ny   = dm.get_dimension_size("y"   );
@@ -46,16 +62,39 @@ namespace modules {
     real2d colavg_rho_d("colavg_rho_d",nz,nens);
     real2d colavg_uvel ("colavg_uvel" ,nz,nens);
     real2d colavg_vvel ("colavg_vvel" ,nz,nens);
-    real2d colavg_temp ("colavg_temp" ,nz,nens);
-    real2d colavg_rho_v("colavg_rho_v",nz,nens);
+    // #ifdef MMF_PAM_FORCE_ALL_WATER_SPECIES
+    // real2d colavg_temp ("colavg_temp" ,nz,nens);
+    // real2d colavg_rho_v("colavg_rho_v",nz,nens);
+    // real2d colavg_rho_l("colavg_rho_l",nz,nens);
+    // real2d colavg_rho_i("colavg_rho_i",nz,nens);
+    // #endif
+    // #ifdef MMF_PAM_FORCE_TOTAL_WATER
+    real2d colavg_temp_adj("colavg_temp_adj",nz,nens);
+    real2d colavg_rho_totq("colavg_rho_totq",nz,nens);
+    // #endif
+
+    real2d colavg_nc("colavg_nc",nz,nens);
+    real2d colavg_ni("colavg_ni",nz,nens);
+    real2d colavg_nr("colavg_nr",nz,nens);
 
     // We will be essentially reducing a summation to these variables, so initialize them to zero
     parallel_for( YAKL_AUTO_LABEL() , SimpleBounds<2>(nz,nens) , YAKL_LAMBDA (int k, int iens) {
       colavg_rho_d(k,iens) = 0;
       colavg_uvel (k,iens) = 0;
       colavg_vvel (k,iens) = 0;
-      colavg_temp (k,iens) = 0;
-      colavg_rho_v(k,iens) = 0;
+      // #ifdef MMF_PAM_FORCE_ALL_WATER_SPECIES
+      // colavg_temp (k,iens) = 0;
+      // colavg_rho_v(k,iens) = 0;
+      // colavg_rho_l(k,iens) = 0;
+      // colavg_rho_i(k,iens) = 0;
+      // #endif
+      // #ifdef MMF_PAM_FORCE_TOTAL_WATER
+      colavg_temp_adj(k,iens) = 0;
+      colavg_rho_totq(k,iens) = 0;
+      // #endif
+      colavg_nc(k,iens) = 0;
+      colavg_ni(k,iens) = 0;
+      colavg_nr(k,iens) = 0;
     });
 
     real r_nx_ny  = 1._fp / (nx*ny);  // precompute reciprocal to avoid costly divisions
@@ -64,23 +103,40 @@ namespace modules {
       atomicAdd( colavg_rho_d(k,iens) , rho_d(k,j,i,iens) * r_nx_ny );
       atomicAdd( colavg_uvel (k,iens) , uvel (k,j,i,iens) * r_nx_ny );
       atomicAdd( colavg_vvel (k,iens) , vvel (k,j,i,iens) * r_nx_ny );
-      atomicAdd( colavg_temp (k,iens) , temp (k,j,i,iens) * r_nx_ny );
-      atomicAdd( colavg_rho_v(k,iens) , rho_v(k,j,i,iens) * r_nx_ny );
+      // #ifdef MMF_PAM_FORCE_ALL_WATER_SPECIES
+      // atomicAdd( colavg_temp (k,iens) , temp (k,j,i,iens) * r_nx_ny );
+      // atomicAdd( colavg_rho_v(k,iens) , rho_v(k,j,i,iens) * r_nx_ny );
+      // atomicAdd( colavg_rho_l(k,iens) , rho_l(k,j,i,iens) * r_nx_ny );
+      // atomicAdd( colavg_rho_i(k,iens) , rho_i(k,j,i,iens) * r_nx_ny );
+      // #endif
+      // #ifdef MMF_PAM_FORCE_TOTAL_WATER
+      real rho_total_water = rho_v(k,j,i,iens) + rho_l(k,j,i,iens) + rho_i(k,j,i,iens);
+      real ql_tmp          = rho_l(k,j,i,iens) / ( rho_d(k,j,i,iens) + rho_v(k,j,i,iens) );
+      real qi_tmp          = rho_i(k,j,i,iens) / ( rho_d(k,j,i,iens) + rho_v(k,j,i,iens) );
+      real liq_adj         = ql_tmp* Lv     / cp_d;
+      real ice_adj         = qi_tmp*(Lv+Lf) / cp_d;
+      real temp_adj        = temp (k,j,i,iens) - liq_adj - ice_adj;
+      atomicAdd( colavg_temp_adj(k,iens) , temp_adj        * r_nx_ny );
+      atomicAdd( colavg_rho_totq(k,iens) , rho_total_water * r_nx_ny );
+      // #endif
+      atomicAdd( colavg_nc(k,iens), crm_nc(k,j,i,iens) * r_nx_ny );
+      atomicAdd( colavg_ni(k,iens), crm_ni(k,j,i,iens) * r_nx_ny );
+      atomicAdd( colavg_nr(k,iens), crm_nr(k,j,i,iens) * r_nx_ny );
     });
 
     // We need the GCM forcing tendencies later, so store these in the coupler's data manager
     // If they've already been registered, the do not register them again
     if (! dm.entry_exists("gcm_forcing_tend_uvel")) {
-      dm.register_and_allocate<real>( "gcm_forcing_tend_rho_d" , "GCM forcing tendency for dry density"         ,
-                                      {nz,nens} , {"z","nens"} );
-      dm.register_and_allocate<real>( "gcm_forcing_tend_uvel"  , "GCM forcing tendency for u-velocity"          ,
-                                      {nz,nens} , {"z","nens"} );
-      dm.register_and_allocate<real>( "gcm_forcing_tend_vvel"  , "GCM forcing tendency for v-velocity"          ,
-                                      {nz,nens} , {"z","nens"} );
-      dm.register_and_allocate<real>( "gcm_forcing_tend_temp"  , "GCM forcing tendency for temperature"         ,
-                                      {nz,nens} , {"z","nens"} );
-      dm.register_and_allocate<real>( "gcm_forcing_tend_rho_v" , "GCM forcing tendency for water vapor density" ,
-                                      {nz,nens} , {"z","nens"} );
+      dm.register_and_allocate<real>( "gcm_forcing_tend_rho_d" , "GCM forcing for dry density"        ,{nz,nens},{"z","nens"});
+      dm.register_and_allocate<real>( "gcm_forcing_tend_uvel"  , "GCM forcing for u-velocity"         ,{nz,nens},{"z","nens"});
+      dm.register_and_allocate<real>( "gcm_forcing_tend_vvel"  , "GCM forcing for v-velocity"         ,{nz,nens},{"z","nens"});
+      dm.register_and_allocate<real>( "gcm_forcing_tend_temp"  , "GCM forcing for temperature"        ,{nz,nens},{"z","nens"});
+      dm.register_and_allocate<real>( "gcm_forcing_tend_rho_v" , "GCM forcing for water vapor density",{nz,nens},{"z","nens"});
+      dm.register_and_allocate<real>( "gcm_forcing_tend_rho_l" , "GCM forcing for cloud water density",{nz,nens},{"z","nens"});
+      dm.register_and_allocate<real>( "gcm_forcing_tend_rho_i" , "GCM forcing for cloud ice density",  {nz,nens},{"z","nens"});
+      dm.register_and_allocate<real>( "gcm_forcing_tend_nc"    , "GCM forcing for liq number",  {nz,nens},{"z","nens"});
+      dm.register_and_allocate<real>( "gcm_forcing_tend_ni"    , "GCM forcing for ice number",  {nz,nens},{"z","nens"});
+      dm.register_and_allocate<real>( "gcm_forcing_tend_nr"    , "GCM forcing for rain number", {nz,nens},{"z","nens"});
     }
 
     // Retrieve the GCM forcing tendency arrays so we can set them with the time tencencies
@@ -89,20 +145,118 @@ namespace modules {
     auto gcm_forcing_tend_vvel  = dm.get<real,2>("gcm_forcing_tend_vvel" );
     auto gcm_forcing_tend_temp  = dm.get<real,2>("gcm_forcing_tend_temp" );
     auto gcm_forcing_tend_rho_v = dm.get<real,2>("gcm_forcing_tend_rho_v");
+    auto gcm_forcing_tend_rho_l = dm.get<real,2>("gcm_forcing_tend_rho_l");
+    auto gcm_forcing_tend_rho_i = dm.get<real,2>("gcm_forcing_tend_rho_i");
+    auto gcm_forcing_tend_nc    = dm.get<real,2>("gcm_forcing_tend_nc");
+    auto gcm_forcing_tend_ni    = dm.get<real,2>("gcm_forcing_tend_ni");
+    auto gcm_forcing_tend_nr    = dm.get<real,2>("gcm_forcing_tend_nr");
 
     real r_dt_gcm = 1._fp / dt_gcm;  // precompute reciprocal to avoid costly divisions
-    // The forcing is the difference between the input GCM state and the current colavg'd CRM state divided by the
-    //    GCM physics time step to be evenly distributed over the course of the MMF calculations for this step
+    // The forcing is the difference between the input GCM state and the current
+    // colavg'd CRM state divided by the GCM physics time step to be evenly
+    // distributed over the course of the CRM steps for the current GCM step
     parallel_for( YAKL_AUTO_LABEL() , SimpleBounds<2>(nz,nens) , YAKL_LAMBDA (int k, int iens) {
       gcm_forcing_tend_rho_d(k,iens) = ( rho_d_gcm(k,iens) - colavg_rho_d(k,iens) ) * r_dt_gcm;
       gcm_forcing_tend_uvel (k,iens) = ( uvel_gcm (k,iens) - colavg_uvel (k,iens) ) * r_dt_gcm;
       gcm_forcing_tend_vvel (k,iens) = ( vvel_gcm (k,iens) - colavg_vvel (k,iens) ) * r_dt_gcm;
-      gcm_forcing_tend_temp (k,iens) = ( temp_gcm (k,iens) - colavg_temp (k,iens) ) * r_dt_gcm;
-      gcm_forcing_tend_rho_v(k,iens) = ( rho_v_gcm(k,iens) - colavg_rho_v(k,iens) ) * r_dt_gcm;
+      // #ifdef MMF_PAM_FORCE_ALL_WATER_SPECIES
+      // gcm_forcing_tend_temp (k,iens) = ( temp_gcm (k,iens) - colavg_temp (k,iens) ) * r_dt_gcm;
+      // gcm_forcing_tend_rho_v(k,iens) = ( rho_v_gcm(k,iens) - colavg_rho_v(k,iens) ) * r_dt_gcm;
+      // gcm_forcing_tend_rho_l(k,iens) = ( rho_l_gcm(k,iens) - colavg_rho_l(k,iens) ) * r_dt_gcm;
+      // gcm_forcing_tend_rho_i(k,iens) = ( rho_i_gcm(k,iens) - colavg_rho_i(k,iens) ) * r_dt_gcm;
+      // #endif
+      // #ifdef MMF_PAM_FORCE_TOTAL_WATER
+      real gcm_rho_totq = rho_v_gcm(k,iens) + rho_l_gcm(k,iens) + rho_i_gcm(k,iens);
+      real gcm_ql_tmp   = rho_l_gcm(k,iens) / ( rho_d_gcm(k,iens) + rho_v_gcm(k,iens) );
+      real gcm_qi_tmp   = rho_i_gcm(k,iens) / ( rho_d_gcm(k,iens) + rho_v_gcm(k,iens) );
+      real gcm_liq_adj  = gcm_ql_tmp* Lv     / cp_d;
+      real gcm_ice_adj  = gcm_qi_tmp*(Lv+Lf) / cp_d;
+      real gsm_temp_adj = temp_gcm (k,iens) - gcm_liq_adj - gcm_ice_adj;
+      gcm_forcing_tend_temp (k,iens) = ( gsm_temp_adj - colavg_temp_adj(k,iens) ) * r_dt_gcm;
+      gcm_forcing_tend_rho_v(k,iens) = ( gcm_rho_totq - colavg_rho_totq(k,iens) ) * r_dt_gcm;
+      gcm_forcing_tend_rho_l(k,iens) = 0;
+      gcm_forcing_tend_rho_i(k,iens) = 0;
+      // #endif
+      gcm_forcing_tend_nc(k,iens) = ( gcm_nc(k,iens) - colavg_nc(k,iens) ) * r_dt_gcm;
+      gcm_forcing_tend_ni(k,iens) = ( gcm_ni(k,iens) - colavg_ni(k,iens) ) * r_dt_gcm;
+      gcm_forcing_tend_nr(k,iens) = ( gcm_nr(k,iens) - colavg_nr(k,iens) ) * r_dt_gcm;
     });
   }
 
+  // generalized hole filling routine to simplify apply_gcm_forcing_tendencies()
+  inline void fill_holes( pam::PamCoupler &coupler, real2d &rho_x_neg_mass, std::string tracer_name ) {
+    using yakl::c::parallel_for;
+    using yakl::c::SimpleBounds;
+    using yakl::atomicAdd;
+    using yakl::ScalarLiveOut;
+    using yakl::max;
+    auto &dm = coupler.get_data_manager_device_readwrite();
+    auto dt = coupler.get_option<real>("crm_dt");
+    int nz   = dm.get_dimension_size("z"   );
+    int ny   = dm.get_dimension_size("y"   );
+    int nx   = dm.get_dimension_size("x"   );
+    int nens = dm.get_dimension_size("nens");
+    auto dz  = dm.get<real,2>("vertical_cell_dz");
+
+    auto rho_x = dm.get<real,4>( tracer_name );
+
+    // We need these arrays for multiplicative hole filling. Holes are only filled 
+    // inside vertical columns at first because it leads to a very infrequent 
+    // collision rate for atomidAdd() operations, reducing the runtime.
+    real2d rho_x_pos_mass("rho_v_pos_mass",nz,nens);
+
+    // initialize rho_x_pos_mass to zero
+    parallel_for( YAKL_AUTO_LABEL() , SimpleBounds<2>(nz,nens) , YAKL_LAMBDA (int k, int iens) {
+      rho_x_pos_mass(k,iens) = 0;
+    });
 
+    // Calculate available positive mass for hole filling at each vertical level 
+    parallel_for( YAKL_AUTO_LABEL() , SimpleBounds<4>(nz,ny,nx,nens) , YAKL_LAMBDA (int k, int j, int i, int iens) {
+      if (rho_x(k,j,i,iens) > 0) atomicAdd( rho_x_pos_mass(k,iens) ,  rho_x(k,j,i,iens)*dz(k,iens) );
+    });
+
+    // The negative is too large if the mass added to fill in negative values is greater than the available mass
+    ScalarLiveOut<bool> neg_too_large(false);
+
+    // Correct for added mass by taking away mass from positive-valued cells proportional to the mass in that cell
+    parallel_for( YAKL_AUTO_LABEL() , SimpleBounds<4>(nz,ny,nx,nens) , YAKL_LAMBDA (int k, int j, int i, int iens) {
+      // Determine if mass added to negatives is too large to compensate for in this vertical level
+      if (i == 0 && j == 0) {
+        if (rho_x_neg_mass(k,iens) > rho_x_pos_mass(k,iens)) neg_too_large = true;
+      }
+      // Subtract mass proportional to this cells' portion of the total mass at the vertical level
+      real factor = rho_x(k,j,i,iens)*dz(k,iens) / rho_x_pos_mass(k,iens);
+      rho_x(k,j,i,iens) = max( 0._fp , rho_x(k,j,i,iens) - (rho_x_neg_mass(k,iens) * factor)/dz(k,iens) );
+    });
+
+    if (neg_too_large.hostRead()) {
+      // If negative mass was too large, let's expand the domain of hole filling to the entire CRM
+      // This will have a large contention rate for atomicAdd() and will be less efficient than before
+      real1d rho_x_neg_mass_glob("rho_x_neg_mass_glob",nens);
+      real1d rho_x_pos_mass_glob("rho_x_pos_mass_glob",nens);
+
+      // These are essentially reductions, so initialize to zero
+      parallel_for( YAKL_AUTO_LABEL() , nens , YAKL_LAMBDA (int iens) {
+        rho_x_neg_mass_glob(iens) = 0;
+        rho_x_pos_mass_glob(iens) = 0;
+      });
+
+      // Compute the amount of negative mass we need to compensate for 
+      // as well as how much positive mass we still have
+      parallel_for( YAKL_AUTO_LABEL() , SimpleBounds<4>(nz,ny,nx,nens) , YAKL_LAMBDA (int k, int j, int i, int iens) {
+        if (i == 0 && j == 0) atomicAdd( rho_x_neg_mass_glob(iens) , max(0._fp,rho_x_neg_mass(k,iens)-rho_x_pos_mass(k,iens)) );
+        atomicAdd( rho_x_pos_mass_glob(iens) , rho_x(k,j,i,iens)*dz(k,iens) );
+      });
+
+      // Remove mass proportionally to the mass in a given cell
+      parallel_for( YAKL_AUTO_LABEL() , SimpleBounds<4>(nz,ny,nx,nens) , YAKL_LAMBDA (int k, int j, int i, int iens) {
+        real factor = rho_x(k,j,i,iens)*dz(k,iens) / rho_x_pos_mass_glob(iens);
+        rho_x(k,j,i,iens) = max( 0._fp , rho_x(k,j,i,iens) - (rho_x_neg_mass_glob(iens) * factor)/dz(k,iens) );
+      });
+      
+    } // if (neg_too_large.hostRead()) {
+
+  }
 
 
   // This routine is intended to be called frequently throughout the MMF calculation
@@ -119,8 +273,6 @@ namespace modules {
     using yakl::c::parallel_for;
     using yakl::c::SimpleBounds;
     using yakl::atomicAdd;
-    using yakl::ScalarLiveOut;
-    using yakl::max;
     auto &dm = coupler.get_data_manager_device_readwrite();
 
     auto dt = coupler.get_option<real>("crm_dt");
@@ -137,6 +289,11 @@ namespace modules {
     auto vvel  = dm.get<real,4>( "vvel"        );
     auto temp  = dm.get<real,4>( "temp"        );
     auto rho_v = dm.get<real,4>( "water_vapor" );
+    auto rho_l = dm.get<real,4>( "cloud_water" );
+    auto rho_i = dm.get<real,4>( "ice"         );
+    auto nc    = dm.get<real,4>("cloud_water_num");
+    auto ni    = dm.get<real,4>("ice_num");
+    auto nr    = dm.get<real,4>("rain_num");
 
     // GCM forcing tendencies for the average CRM column state
     auto gcm_forcing_tend_rho_d = dm.get<real const,2>("gcm_forcing_tend_rho_d");
@@ -144,17 +301,30 @@ namespace modules {
     auto gcm_forcing_tend_vvel  = dm.get<real const,2>("gcm_forcing_tend_vvel" );
     auto gcm_forcing_tend_temp  = dm.get<real const,2>("gcm_forcing_tend_temp" );
     auto gcm_forcing_tend_rho_v = dm.get<real const,2>("gcm_forcing_tend_rho_v");
+    auto gcm_forcing_tend_rho_l = dm.get<real const,2>("gcm_forcing_tend_rho_l");
+    auto gcm_forcing_tend_rho_i = dm.get<real const,2>("gcm_forcing_tend_rho_i");
+    auto gcm_forcing_tend_nc    = dm.get<real,2>("gcm_forcing_tend_nc");
+    auto gcm_forcing_tend_ni    = dm.get<real,2>("gcm_forcing_tend_ni");
+    auto gcm_forcing_tend_nr    = dm.get<real,2>("gcm_forcing_tend_nr");
 
     // We need these arrays for multiplicative hole filling
     // Holes are only filled inside vertical columns at first because it leads to a very infrequent collision
     //    rate for atomidAdd() operations, reducing the runtime of the operations.
     real2d rho_v_neg_mass("rho_v_neg_mass",nz,nens);
     real2d rho_v_pos_mass("rho_v_pos_mass",nz,nens);
+    real2d rho_l_neg_mass("rho_l_neg_mass",nz,nens);
+    real2d rho_l_pos_mass("rho_l_pos_mass",nz,nens);
+    real2d rho_i_neg_mass("rho_i_neg_mass",nz,nens);
+    real2d rho_i_pos_mass("rho_i_pos_mass",nz,nens);
 
     // These are essentially reductions, so initialize to zero
     parallel_for( YAKL_AUTO_LABEL() , SimpleBounds<2>(nz,nens) , YAKL_LAMBDA (int k, int iens) {
       rho_v_neg_mass(k,iens) = 0;
       rho_v_pos_mass(k,iens) = 0;
+      rho_l_neg_mass(k,iens) = 0;
+      rho_l_pos_mass(k,iens) = 0;
+      rho_i_neg_mass(k,iens) = 0;
+      rho_i_pos_mass(k,iens) = 0;
     });
 
     // Apply the GCM forcing, and keep track of negative mass we had to fill and available positive mass
@@ -168,64 +338,41 @@ namespace modules {
       vvel (k,j,i,iens) += gcm_forcing_tend_vvel (k,iens) * dt;
       temp (k,j,i,iens) += gcm_forcing_tend_temp (k,iens) * dt;
       rho_v(k,j,i,iens) += gcm_forcing_tend_rho_v(k,iens) * dt;
+      // #ifdef MMF_PAM_FORCE_ALL_WATER_SPECIES
+      // rho_l(k,j,i,iens) += gcm_forcing_tend_rho_l(k,iens) * dt;
+      // rho_i(k,j,i,iens) += gcm_forcing_tend_rho_i(k,iens) * dt;
+      // #endif
       // Compute negative and positive mass for rho_v, and set negative masses to zero (essentially adding mass)
       if (rho_v(k,j,i,iens) < 0) {
         atomicAdd( rho_v_neg_mass(k,iens) , -rho_v(k,j,i,iens)*dz(k,iens) );
         rho_v(k,j,i,iens) = 0;
       }
+      // #ifdef MMF_PAM_FORCE_ALL_WATER_SPECIES
+      // if (rho_l(k,j,i,iens) < 0) {
+      //   atomicAdd( rho_l_neg_mass(k,iens) , -rho_l(k,j,i,iens)*dz(k,iens) );
+      //   rho_l(k,j,i,iens) = 0;
+      // }
+      // if (rho_i(k,j,i,iens) < 0) {
+      //   atomicAdd( rho_i_neg_mass(k,iens) , -rho_i(k,j,i,iens)*dz(k,iens) );
+      //   rho_i(k,j,i,iens) = 0;
+      // }
+      // #endif
+      nc(k,j,i,iens) += gcm_forcing_tend_nc(k,iens) * dt;
+      ni(k,j,i,iens) += gcm_forcing_tend_ni(k,iens) * dt;
+      nr(k,j,i,iens) += gcm_forcing_tend_nr(k,iens) * dt;
+
+      if (nc(k,j,i,iens) < 0) { nc(k,j,i,iens) = 0; }
+      if (ni(k,j,i,iens) < 0) { ni(k,j,i,iens) = 0; }
+      if (nr(k,j,i,iens) < 0) { nr(k,j,i,iens) = 0; }
     });
 
     // Only do the hole filing if there's negative mass
-    if (yakl::intrinsics::sum(rho_v_neg_mass) > 0) {
-      // Calculate available positive mass for hole filling at each vertical level 
-      parallel_for( YAKL_AUTO_LABEL() , SimpleBounds<4>(nz,ny,nx,nens) , YAKL_LAMBDA (int k, int j, int i, int iens) {
-        if (rho_v(k,j,i,iens) > 0) atomicAdd( rho_v_pos_mass(k,iens) ,  rho_v(k,j,i,iens)*dz(k,iens) );
-      });
-
-      // The negative is too large if the mass added to fill in negative values is greater than the available mass at
-      //   a vertical level. 
-      ScalarLiveOut<bool> neg_too_large(false);
+    if (yakl::intrinsics::sum(rho_v_neg_mass) > 0) { fill_holes(coupler, rho_v_neg_mass,"water_vapor"); }
+    // #ifdef MMF_PAM_FORCE_ALL_WATER_SPECIES
+    // if (yakl::intrinsics::sum(rho_l_neg_mass) > 0) { fill_holes(coupler, rho_l_neg_mass, "cloud_water"); }
+    // if (yakl::intrinsics::sum(rho_i_neg_mass) > 0) { fill_holes(coupler, rho_i_neg_mass, "ice");         }
+    // #endif
 
-      // Correct for added mass by taking away mass from positive-valued cells proportional to the mass in that cell
-      parallel_for( YAKL_AUTO_LABEL() , SimpleBounds<4>(nz,ny,nx,nens) , YAKL_LAMBDA (int k, int j, int i, int iens) {
-        // Determine if mass added to negatives is too large to compensate for in this vertical level
-        if (i == 0 && j == 0) {
-          if (rho_v_neg_mass(k,iens) > rho_v_pos_mass(k,iens)) neg_too_large = true;
-        }
-        // Subtract mass proportional to this cells' portion of the total mass at the vertical level
-        real factor = rho_v(k,j,i,iens)*dz(k,iens) / rho_v_pos_mass(k,iens);
-        rho_v(k,j,i,iens) = max( 0._fp , rho_v(k,j,i,iens) - (rho_v_neg_mass(k,iens) * factor)/dz(k,iens) );
-      });
-
-      if (neg_too_large.hostRead()) {
-        // If negative mass was too large, let's expand the domain of hole filling to the entire CRM
-        // This will have a large contention rate for atomicAdd() and will be less efficient than before
-        real1d rho_v_neg_mass_glob("rho_v_neg_mass_glob",nens);
-        real1d rho_v_pos_mass_glob("rho_v_pos_mass_glob",nens);
-
-        // These are essentially reductions, so initialize to zero
-        parallel_for( YAKL_AUTO_LABEL() , nens , YAKL_LAMBDA (int iens) {
-          rho_v_neg_mass_glob(iens) = 0;
-          rho_v_pos_mass_glob(iens) = 0;
-        });
-
-        // Compute the amount of negative mass we still need to compensate for as well as how much positive mass
-        //   we still have
-        parallel_for( YAKL_AUTO_LABEL() , SimpleBounds<4>(nz,ny,nx,nens) , YAKL_LAMBDA (int k, int j, int i, int iens) {
-          if (i == 0 && j == 0) atomicAdd( rho_v_neg_mass_glob(iens) , max(0._fp,rho_v_neg_mass(k,iens)-rho_v_pos_mass(k,iens)) );
-          atomicAdd( rho_v_pos_mass_glob(iens) , rho_v(k,j,i,iens)*dz(k,iens) );
-        });
-
-        // Remove mass proportionally to the mass in a given cell
-        parallel_for( YAKL_AUTO_LABEL() , SimpleBounds<4>(nz,ny,nx,nens) , YAKL_LAMBDA (int k, int j, int i, int iens) {
-          real factor = rho_v(k,j,i,iens)*dz(k,iens) / rho_v_pos_mass_glob(iens);
-          rho_v(k,j,i,iens) = max( 0._fp , rho_v(k,j,i,iens) - (rho_v_neg_mass_glob(iens) * factor)/dz(k,iens) );
-        });
-        
-      } // if (neg_too_large.hostRead()) {
-    } // if (yakl::intrinsics::sum(rho_v_neg_mass) > 0) {
   } // apply_gcm_forcing_tendencies
 
 }
-
-
diff --git a/pam_core/modules/perturb_temperature.h b/pam_core/modules/perturb_temperature.h
index bc437a11..9fe81fb2 100644
--- a/pam_core/modules/perturb_temperature.h
+++ b/pam_core/modules/perturb_temperature.h
@@ -7,9 +7,10 @@
 namespace modules {
 
   // It is best if id is globally unique for each batch of CRMs
-  inline void perturb_temperature( pam::PamCoupler &coupler , intConst1d id , real magnitude = 1. ) {
+  inline void perturb_temperature( pam::PamCoupler &coupler , intConst1d id , real magnitude = 0.1 ) {
     using yakl::c::parallel_for;
     using yakl::c::SimpleBounds;
+    using yakl::atomicAdd;
 
     int nz   = coupler.get_nz  ();
     int ny   = coupler.get_ny  ();
@@ -24,14 +25,41 @@ namespace modules {
     auto &dm = coupler.get_data_manager_device_readwrite();
     auto temp = dm.get<real,4>("temp");
 
-    parallel_for( YAKL_AUTO_LABEL() , SimpleBounds<4>(num_levels,ny,nx,nens) ,
+    real2d temp_hmean1("temp_hmean1",nz,nens);
+    real2d temp_hmean2("temp_hmean1",nz,nens);
+    parallel_for( YAKL_AUTO_LABEL(),
+                  SimpleBounds<2>(nz,nens),
+                  YAKL_LAMBDA (int k, int iens) {
+      temp_hmean1(k,iens) = 0;
+      temp_hmean2(k,iens) = 0;
+    });
+
+    real r_nx_ny  = 1._fp/(nx*ny);  // precompute reciprocal to avoid costly divisions
+    parallel_for( YAKL_AUTO_LABEL(), SimpleBounds<4>(nz,ny,nx,nens), 
+                  YAKL_LAMBDA (int k, int j, int i, int iens) {
+      atomicAdd( temp_hmean1(k,iens), temp(k,j,i,iens) * r_nx_ny );
+    });
+
+    parallel_for( YAKL_AUTO_LABEL(),
+                  SimpleBounds<4>(num_levels,ny,nx,nens),
                   YAKL_LAMBDA (int k, int j, int i, int iens) {
       int seed = id(iens)*num_levels*ny*nx + k*ny*nx + j*nx + i;
-      yakl::Random prng(seed);
+      yakl::Random prng(seed*1e3);
       real rand = prng.genFP<real>()*2._fp - 1._fp;  // Random number in [-1,1]
+      rand = std::min( rand, 1.0 );
+      rand = std::max( rand,-1.0 );
       real scaling = ( num_levels - static_cast<real>(k) ) / num_levels;  // Less effect at higher levels
       temp(k,j,i,iens) += rand * magnitude * scaling;
+      atomicAdd( temp_hmean2(k,iens), temp(k,j,i,iens) * r_nx_ny );
     });
+
+    // energy conservation scaling
+    parallel_for( YAKL_AUTO_LABEL(),
+                  SimpleBounds<4>(num_levels,ny,nx,nens),
+                  YAKL_LAMBDA (int k, int j, int i, int iens) {
+      temp(k,j,i,iens) = temp(k,j,i,iens) * temp_hmean1(k,iens) / temp_hmean2(k,iens);
+    });
+
   }
 
 }
diff --git a/pam_core/modules/sponge_layer.h b/pam_core/modules/sponge_layer.h
index 29c03354..b2bfdb75 100644
--- a/pam_core/modules/sponge_layer.h
+++ b/pam_core/modules/sponge_layer.h
@@ -26,7 +26,7 @@ namespace modules {
     // set relaxation timescale
     real time_scale;
     if (coupler.option_exists("sponge_time_scale")) {
-      time_scale = coupler.get_option<int>("sponge_time_scale");
+      time_scale = coupler.get_option<real>("sponge_time_scale");
     } else {
       time_scale = 60;
     };
diff --git a/pam_core/modules/surface_friction.h b/pam_core/modules/surface_friction.h
index bb0b907b..935ce0bd 100644
--- a/pam_core/modules/surface_friction.h
+++ b/pam_core/modules/surface_friction.h
@@ -6,16 +6,17 @@
 namespace modules {
 
   real constexpr vonk = 0.4;
-  real constexpr eps = 1.0e-10;
-  real constexpr am = 4.8;
-  real constexpr bm = 19.3;
-  real constexpr pi = 3.14159;
+  real constexpr eps  = 1.0e-10;
+  real constexpr am   = 4.8;
+  real constexpr bm   = 19.3;
+  real constexpr pi   = 3.14159;
 
   // estimate roughness height for momentum
+  // original fortran code written by Marat Khairoutdinov (2004)
   YAKL_INLINE void z0_est(real z, real bflx, real wnd, real ustar, real &z0) {
     real c1 = pi/2.0 - 3.0*log(2.0);
     real rlmo = -bflx*vonk/(ustar*ustar*ustar+eps);
-    real zeta = min(1.0,z*rlmo);
+    real zeta = std::min(1.0,z*rlmo);
     real x;
     real psi1;
     if(zeta >= 0.0) {
@@ -25,12 +26,21 @@ namespace modules {
       x = sqrt(sqrt(1.0-bm*zeta));
       psi1 = 2.0*log(1.0+x) + log(1.0+x*x) -2.0*atan(x) + c1;
     }
-    real lnz = max(0.0, vonk*wnd/(ustar+eps) +psi1);
+    real lnz = std::max(0.0, vonk*wnd/(ustar+eps) +psi1);
     z0 = z*exp(-lnz);
   }
 
 
   // calculate friction speed (sqrt of momentum flux) [m/s]
+  // returns value of ustar using the below
+  // similarity functions and a specified buoyancy flux (bflx) given in
+  // kinematic units
+  //   phi_m (zeta > 0) =  (1 + am * zeta)
+  //   phi_m (zeta < 0) =  (1 - bm * zeta)^(-1/4)
+  // where zeta = z/lmo and lmo = (theta_rev/g*vonk) * (ustar^2/tstar)
+  // Ref: Businger, 1973, Turbulent Transfer in the Atmospheric Surface
+  // Layer, in Workshop on Micormeteorology, pages 67-100.
+  // Original fortran code written by Bjorn Stevens (March, 1999)
   YAKL_INLINE real diag_ustar(real z, real bflx, real wnd, real z0) {
     real lnz = log(z/z0);
     real klnz = vonk/lnz;
@@ -39,7 +49,7 @@ namespace modules {
     if (bflx != 0.0) {
       for (int iterate = 0; iterate < 8; iterate++) {
         real rlmo = -bflx * vonk/(ustar*ustar*ustar + eps);
-        real zeta = min(1.0,z*rlmo);
+        real zeta = std::min(1.0,z*rlmo);
         if (zeta>0.0) {
           ustar = vonk*wnd / (lnz+am*zeta);
         } else {
@@ -61,30 +71,35 @@ namespace modules {
     auto ny   = coupler.get_ny  ();
     auto nx   = coupler.get_nx  ();
     auto nens = coupler.get_nens();
-    auto &dm = coupler.get_data_manager_device_readwrite();
-    dm.register_and_allocate<real>( "z0"      , "Momentum roughness height [m]",     {nens},{"nens"} );
+    auto &dm  = coupler.get_data_manager_device_readwrite();
+    dm.register_and_allocate<real>( "z0"      , "Momentum roughness height [m]",         {nens},{"nens"} );
     dm.register_and_allocate<real>( "sfc_bflx", "large-scale sfc buoyancy flux [K m/s]", {nens},{"nens"} );
-    auto z0       = dm.get<real,1>("z0");
-    auto rho_d    = dm.get<real,4>("density_dry");
-    auto rho_v    = dm.get<real,4>("water_vapor");
-    auto zmid     = dm.get<real,2>("vertical_midpoint_height" );
-    auto sfc_bflx = dm.get<real,1>("sfc_bflx");
-    auto gcm_uvel = dm.get<real,2>("gcm_uvel");
-    auto gcm_vvel = dm.get<real,2>("gcm_vvel");
+    auto z0            = dm.get<real,1>("z0");
+    auto rho_d         = dm.get<real,4>("density_dry");
+    auto rho_v         = dm.get<real,4>("water_vapor");
+    auto zmid          = dm.get<real,2>("vertical_midpoint_height" );
+    auto sfc_bflx      = dm.get<real,1>("sfc_bflx");
+    auto gcm_uvel      = dm.get<real,2>("gcm_uvel");
+    auto gcm_vvel      = dm.get<real,2>("gcm_vvel");
+    auto sfc_mom_flx_u = dm.get<real,3>("sfc_mom_flx_u" ); // momentum fluxes applied in SGS scheme
+    auto sfc_mom_flx_v = dm.get<real,3>("sfc_mom_flx_v" ); // momentum fluxes applied in SGS scheme
 
+    // compute horizontal mean density and initialize momentum fluxes
     real1d rho_horz_mean("rho_horz_mean",nens);
     real r_nx_ny  = 1._fp / (nx*ny);  // precompute reciprocal to avoid costly divisions
     parallel_for("compute horz mean wind", SimpleBounds<3>(ny,nx,nens), YAKL_LAMBDA (int j, int i, int iens) {
       atomicAdd( rho_horz_mean(iens), ( rho_d(0,j,i,iens) + rho_v(0,j,i,iens) ) * r_nx_ny );
+      sfc_mom_flx_u(j,i,iens) = 0;
+      sfc_mom_flx_v(j,i,iens) = 0;
     });
 
     parallel_for( nens , YAKL_LAMBDA (int iens) {
       sfc_bflx(iens) = bflx_in(iens);
-      real wnd_spd = max( 1.0, sqrt( gcm_uvel(0,iens)*gcm_uvel(0,iens) 
-                                    +gcm_vvel(0,iens)*gcm_vvel(0,iens) ) );
+      real wnd_spd = std::max( 1.0, sqrt( gcm_uvel(0,iens)*gcm_uvel(0,iens) 
+                                         +gcm_vvel(0,iens)*gcm_vvel(0,iens) ) );
       real ustar = sqrt( tau_in(iens) / rho_horz_mean(iens) );
       z0_est( zmid(0,iens), sfc_bflx(iens), wnd_spd, ustar, z0(iens));
-      z0(iens) = max(0.00001,min(1.0,z0(iens)));
+      z0(iens) = std::max(0.00001,std::min(1.0,z0(iens)));
     });
   }
 
@@ -103,15 +118,25 @@ namespace modules {
     auto sfc_bflx      = dm.get<real,1>("sfc_bflx");       // surface buoyancy flux
     auto sfc_mom_flx_u = dm.get<real,3>("sfc_mom_flx_u" ); // momentum fluxes applied in SGS scheme
     auto sfc_mom_flx_v = dm.get<real,3>("sfc_mom_flx_v" ); // momentum fluxes applied in SGS scheme
-    auto zmid          = dm.get<real,2>("vertical_midpoint_height" );
-    auto rho_d         = dm.get<real,4>("density_dry");
-    auto rho_v         = dm.get<real,4>("water_vapor");
-    auto uvel          = dm.get<real,4>("uvel");
-    auto vvel          = dm.get<real,4>("vvel");
+    auto zmid          = dm.get<real const,2>("vertical_midpoint_height" );
+    auto zint          = dm.get<real const,2>("vertical_interface_height");
+    auto rho_d         = dm.get<real const,4>("density_dry");
+    auto rho_v         = dm.get<real const,4>("water_vapor");
+    auto uvel          = dm.get<real const,4>("uvel");
+    auto vvel          = dm.get<real const,4>("vvel");
 
     real1d u_horz_mean("u_horz_mean",nens);
     real1d v_horz_mean("v_horz_mean",nens);
     real1d rho_horz_mean("rho_horz_mean",nens);
+    
+    // initialize horizontal means
+    parallel_for("compute horz mean wind", SimpleBounds<3>(ny,nx,nens), YAKL_LAMBDA (int j, int i, int iens) {
+      u_horz_mean(iens)   = 0;
+      v_horz_mean(iens)   = 0;
+      rho_horz_mean(iens) = 0;
+    });
+
+    // calculate horizontal means
     real r_nx_ny  = 1._fp / (nx*ny);  // precompute reciprocal to avoid costly divisions
     parallel_for("compute horz mean wind", SimpleBounds<3>(ny,nx,nens), YAKL_LAMBDA (int j, int i, int iens) {
       atomicAdd( u_horz_mean(iens), uvel(0,j,i,iens) * r_nx_ny );
@@ -119,13 +144,25 @@ namespace modules {
       atomicAdd( rho_horz_mean(iens), ( rho_d(0,j,i,iens) + rho_v(0,j,i,iens) ) * r_nx_ny );
     });
 
-    bool sim2d = ny==1;
-    parallel_for( "surface momentum flux zero" , SimpleBounds<3>(ny,nx,nens) , YAKL_LAMBDA (int j, int i, int iens) {
-      real wnd_spd = max( 1.0, sqrt( uvel(0,j,i,iens)*uvel(0,j,i,iens) + vvel(0,j,i,iens)*vvel(0,j,i,iens) ) );
+    parallel_for( "surface momentum flux" , SimpleBounds<3>(ny,nx,nens) , YAKL_LAMBDA (int j, int i, int iens) {
+      // calculate surface momentum flux identical to SAM with units = [kg m/s2]
+      real u2 = uvel(0,j,i,iens)*uvel(0,j,i,iens);
+      real v2 = vvel(0,j,i,iens)*vvel(0,j,i,iens);
+      real wnd_spd = std::max( 1.0, sqrt(u2+v2) );
       real ustar = diag_ustar( zmid(0,iens), sfc_bflx(iens), wnd_spd, z0(iens)); 
       real tau00 = rho_horz_mean(iens) * ustar * ustar;
       sfc_mom_flx_u(j,i,iens) = -( uvel(0,j,i,iens) - u_horz_mean(iens) ) / wnd_spd * tau00;
       sfc_mom_flx_v(j,i,iens) = -( vvel(0,j,i,iens) - v_horz_mean(iens) ) / wnd_spd * tau00;
+      // convert units to [m2/s2] for SHOC - first interpolate to get density at surface interface
+      real rho_mid0 = rho_d(0,j,i,iens) + rho_v(0,j,i,iens);
+      real rho_mid1 = rho_d(1,j,i,iens) + rho_v(1,j,i,iens);
+      real rho_mid2 = rho_d(2,j,i,iens) + rho_v(2,j,i,iens);
+      real rho_int0 = (rho_mid0 + rho_mid1)/2;
+      real rho_int1 = (rho_mid1 + rho_mid2)/2;
+      real rho_sfc = 2.0*rho_int0 - rho_int1;
+      real dz = zint(1,iens) - zint(0,iens);
+      sfc_mom_flx_u(j,i,iens) = sfc_mom_flx_u(j,i,iens) * rho_sfc / dz ;
+      sfc_mom_flx_v(j,i,iens) = sfc_mom_flx_v(j,i,iens) * rho_sfc / dz ;
     });
   }
 
diff --git a/pam_core/pam_const.h b/pam_core/pam_const.h
index 07dde5c4..2db1dfe3 100644
--- a/pam_core/pam_const.h
+++ b/pam_core/pam_const.h
@@ -299,3 +299,36 @@ inline void validate_array_positive( yakl::Array<T,N,MEM,STYLE> const &arr) {
 }
 
 
+
+
+inline void debug_print( char const * file , int line ) {
+  std::cout << "*** DEBUG: " << file << ": " << line << std::endl;
+}
+
+template <class T> inline void debug_print_sum( T var , char const * file , int line , char const * varname ) {
+  std::cout << "*** DEBUG: " << file << ": " << line << ": sum(" << varname << ")  -->  " << yakl::intrinsics::sum( var ) << std::endl;
+}
+
+template <class T> inline void debug_print_avg( T var , char const * file , int line , char const * varname ) {
+  std::cout << "*** DEBUG: " << file << ": " << line << ": avg(" << varname << ")  -->  " << yakl::intrinsics::sum( var )/var.size() << std::endl;
+}
+
+template <class T> inline void debug_print_min( T var , char const * file , int line , char const * varname ) {
+  std::cout << "*** DEBUG: " << file << ": " << line << ": minval(" << varname << ")  -->  " << yakl::intrinsics::minval( var ) << std::endl;
+}
+
+template <class T> inline void debug_print_max( T var , char const * file , int line , char const * varname ) {
+  std::cout << "*** DEBUG: " << file << ": " << line << ": maxval(" << varname << ")  -->  " << yakl::intrinsics::maxval( var ) << std::endl;
+}
+
+template <class T> inline void debug_print_val( T var , char const * file , int line , char const * varname ) {
+  std::cout << "*** DEBUG: " << file << ": " << line << ": " << varname << "  -->  " << var << std::endl;
+}
+
+#define DEBUG_PRINT() { debug_print(__FILE__,__LINE__); }
+#define DEBUG_PRINT_SUM(var) { debug_print_sum((var),__FILE__,__LINE__,#var); }
+#define DEBUG_PRINT_AVG(var) { debug_print_avg((var),__FILE__,__LINE__,#var); }
+#define DEBUG_PRINT_MIN(var) { debug_print_min((var),__FILE__,__LINE__,#var); }
+#define DEBUG_PRINT_MAX(var) { debug_print_max((var),__FILE__,__LINE__,#var); }
+#define DEBUG_PRINT_VAL(var) { debug_print_val((var),__FILE__,__LINE__,#var); }
+
diff --git a/pam_core/pam_coupler.h b/pam_core/pam_coupler.h
index 175f77f7..56fee677 100644
--- a/pam_core/pam_coupler.h
+++ b/pam_core/pam_coupler.h
@@ -10,43 +10,6 @@
 
 namespace pam {
 
-
-
-  YAKL_INLINE real hydrostatic_pressure( realConst3d hy_params , real z_in , real z0 , real dz ,
-                                         int k, int iens             ) {
-    real z = ( z_in - z0 ) / dz;
-    real a0 = hy_params(k,0,iens);
-    real a1 = hy_params(k,1,iens);
-    real a2 = hy_params(k,2,iens);
-    real a3 = hy_params(k,3,iens);
-    real a4 = hy_params(k,4,iens);
-    real lnp = a0 + ( a1 + ( a2 + ( a3 + ( a4)*z)*z)*z)*z;
-    return exp(lnp);
-  }
-
-
-
-  YAKL_INLINE real hydrostatic_density( realConst3d hy_params , real z_in , real z0 , real dz ,
-                                        int k, int iens , real grav ) {
-    real z = ( z_in - z0 ) / dz;
-    real a1 = hy_params(k,1,iens);
-    real a2 = hy_params(k,2,iens);
-    real a3 = hy_params(k,3,iens);
-    real a4 = hy_params(k,4,iens);
-    real p = hydrostatic_pressure( hy_params , z_in , z0 , dz , k , iens );
-    real mult = a1 + (2*a2 + (3*a3 + (4*a4)*z)*z)*z;
-    real dpdz = mult*p/dz;
-    return -dpdz/grav;
-  }
-
-
-
-  YAKL_INLINE real compute_pressure( real rho_d, real rho_v, real T, real R_d, real R_v ) {
-    return rho_d*R_d*T + rho_v*R_v*T;
-  }
-
-
-
   class PamCoupler {
   protected:
 
@@ -151,6 +114,13 @@ namespace pam {
     }
 
 
+    template <class T>
+    T get_option( std::string key , T val_if_absent ) const {
+      if (option_exists(key)) return options.get_option<T>(key);
+      return val_if_absent;
+    }
+
+
     bool option_exists( std::string key ) const {
       return options.option_exists(key);
     }
@@ -232,7 +202,7 @@ namespace pam {
     }
 
 
-    
+
     void add_tracer( std::string tracer_name , std::string tracer_desc , bool positive , bool adds_mass ) {
       int nz   = get_nz  ();
       int ny   = get_ny  ();
@@ -247,7 +217,7 @@ namespace pam {
     int get_num_tracers() const { return tracers.size(); }
 
 
-    
+
     std::vector<std::string> get_tracer_names() const {
       std::vector<std::string> ret;
       for (int i=0; i < tracers.size(); i++) { ret.push_back( tracers[i].name ); }
@@ -255,7 +225,7 @@ namespace pam {
     }
 
 
-    
+
     void get_tracer_info(std::string tracer_name , std::string &tracer_desc, bool &tracer_found ,
                          bool &positive , bool &adds_mass) const {
       std::vector<std::string> ret;
@@ -272,7 +242,7 @@ namespace pam {
     }
 
 
-    
+
     bool tracer_exists( std::string tracer_name ) const {
       for (int i=0; i < tracers.size(); i++) {
         if (tracer_name == tracers[i].name) return true;
@@ -294,13 +264,33 @@ namespace pam {
       dm.register_and_allocate<real>("vertical_interface_height","vertical interface height"  ,{nz+1    ,nens},{"zp1"      ,"nens"});
       dm.register_and_allocate<real>("vertical_cell_dz"         ,"vertical grid spacing"      ,{nz      ,nens},{"z"        ,"nens"});
       dm.register_and_allocate<real>("vertical_midpoint_height" ,"vertical midpoint height"   ,{nz      ,nens},{"z"        ,"nens"});
-      dm.register_and_allocate<real>("hydrostasis_parameters"   ,"hydrostasis parameters"     ,{nz,5    ,nens},{"z","nhy"  ,"nens"});
-      dm.register_and_allocate<real>("gcm_density_dry"          ,"GCM column dry density"     ,{nz      ,nens},{"z"        ,"nens"});
-      dm.register_and_allocate<real>("gcm_uvel"                 ,"GCM column u-velocity"      ,{nz      ,nens},{"z"        ,"nens"});
-      dm.register_and_allocate<real>("gcm_vvel"                 ,"GCM column v-velocity"      ,{nz      ,nens},{"z"        ,"nens"});
-      dm.register_and_allocate<real>("gcm_wvel"                 ,"GCM column w-velocity"      ,{nz      ,nens},{"z"        ,"nens"});
-      dm.register_and_allocate<real>("gcm_temp"                 ,"GCM column temperature"     ,{nz      ,nens},{"z"        ,"nens"});
-      dm.register_and_allocate<real>("gcm_water_vapor"          ,"GCM column water vapor mass",{nz      ,nens},{"z"        ,"nens"});
+
+      dm.register_and_allocate<real>("gcm_pressure_int","GCM column interface pressure"             ,{nz+1,nens},{"zp1","nens"});
+      dm.register_and_allocate<real>("gcm_pressure_mid","GCM column midpoint pressure"              ,{nz  ,nens},{"z"  ,"nens"});
+      dm.register_and_allocate<real>("gcm_density_dry" ,"GCM column dry density"                    ,{nz  ,nens},{"z"  ,"nens"});
+      dm.register_and_allocate<real>("gcm_uvel"        ,"GCM column u-velocity"                     ,{nz  ,nens},{"z"  ,"nens"});
+      dm.register_and_allocate<real>("gcm_vvel"        ,"GCM column v-velocity"                     ,{nz  ,nens},{"z"  ,"nens"});
+      dm.register_and_allocate<real>("gcm_wvel"        ,"GCM column w-velocity"                     ,{nz  ,nens},{"z"  ,"nens"});
+      dm.register_and_allocate<real>("gcm_temp"        ,"GCM column temperature"                    ,{nz  ,nens},{"z"  ,"nens"});
+      dm.register_and_allocate<real>("gcm_water_vapor" ,"GCM column water vapor density"            ,{nz  ,nens},{"z"  ,"nens"});
+      dm.register_and_allocate<real>("gcm_cloud_water" ,"GCM column cloud water density"            ,{nz  ,nens},{"z"  ,"nens"});
+      dm.register_and_allocate<real>("gcm_cloud_ice"   ,"GCM column cloud ice density"              ,{nz  ,nens},{"z"  ,"nens"});
+
+      dm.register_and_allocate<real>("gcm_num_liq"   ,"GCM column cloud liq number"              ,{nz  ,nens},{"z"  ,"nens"});
+      dm.register_and_allocate<real>("gcm_num_ice"   ,"GCM column cloud ice number"              ,{nz  ,nens},{"z"  ,"nens"});
+      dm.register_and_allocate<real>("gcm_num_rain"  ,"GCM column liq rain number"               ,{nz  ,nens},{"z"  ,"nens"});
+
+      dm.register_and_allocate<real>("ref_presi"        ,"Reference state column interface pressure" ,{nz+1,nens},{"zp1","nens"});
+      dm.register_and_allocate<real>("ref_pres"         ,"Reference state column mid-point pressure" ,{nz  ,nens},{"z"  ,"nens"});
+      dm.register_and_allocate<real>("ref_density_dry"  ,"Reference state column dry density"        ,{nz  ,nens},{"z"  ,"nens"});
+      dm.register_and_allocate<real>("ref_density_vapor","Reference state column water vapor density",{nz  ,nens},{"z"  ,"nens"});
+      dm.register_and_allocate<real>("ref_density_liq"  ,"Reference state column water liq density"  ,{nz  ,nens},{"z"  ,"nens"});
+      dm.register_and_allocate<real>("ref_density_ice"  ,"Reference state column water ice density"  ,{nz  ,nens},{"z"  ,"nens"});
+      dm.register_and_allocate<real>("ref_temp"         ,"Reference state column temperature"        ,{nz  ,nens},{"z"  ,"nens"});
+
+      dm.register_and_allocate<real>("uvel_stag"                     ,"staggered x-direction velocity"       ,{nz,ny,nx+1,nens},{"z","y","xp1","nens"});
+      dm.register_and_allocate<real>("vvel_stag"                     ,"staggered y-direction velocity"       ,{nz,ny+1,nx,nens},{"z","yp1","x","nens"});
+      dm.register_and_allocate<real>("wvel_stag"                     ,"staggered z-direction velocity"       ,{nz+1,ny,nx,nens},{"zp1","y","x","nens"});
 
       auto density_dry  = dm.get_collapsed<real>("density_dry"              );
       auto uvel         = dm.get_collapsed<real>("uvel"                     );
@@ -310,15 +300,29 @@ namespace pam {
       auto zint         = dm.get_collapsed<real>("vertical_interface_height");
       auto dz           = dm.get_collapsed<real>("vertical_cell_dz"         );
       auto zmid         = dm.get_collapsed<real>("vertical_midpoint_height" );
-      auto hy_params    = dm.get_collapsed<real>("hydrostasis_parameters"   );
       auto gcm_rho_d    = dm.get_collapsed<real>("gcm_density_dry"          );
       auto gcm_uvel     = dm.get_collapsed<real>("gcm_uvel"                 );
       auto gcm_vvel     = dm.get_collapsed<real>("gcm_vvel"                 );
       auto gcm_wvel     = dm.get_collapsed<real>("gcm_wvel"                 );
       auto gcm_temp     = dm.get_collapsed<real>("gcm_temp"                 );
       auto gcm_rho_v    = dm.get_collapsed<real>("gcm_water_vapor"          );
-
-      parallel_for( YAKL_AUTO_LABEL() , SimpleBounds<1>(nz*ny*nx*nens) , YAKL_LAMBDA (int i) {
+      auto gcm_rho_l    = dm.get_collapsed<real>("gcm_cloud_water"          );
+      auto gcm_rho_i    = dm.get_collapsed<real>("gcm_cloud_ice"            );
+      auto gcm_num_liq  = dm.get_collapsed<real>("gcm_num_liq"              );
+      auto gcm_num_ice  = dm.get_collapsed<real>("gcm_num_ice"              );
+      auto gcm_num_rain = dm.get_collapsed<real>("gcm_num_rain"             );
+      auto ref_rho_d    = dm.get_collapsed<real>("ref_density_dry"          );
+      auto ref_rho_v    = dm.get_collapsed<real>("ref_density_vapor"        );
+      auto ref_rho_l    = dm.get_collapsed<real>("ref_density_liq"          );
+      auto ref_rho_i    = dm.get_collapsed<real>("ref_density_ice"          );
+      auto ref_temp     = dm.get_collapsed<real>("ref_temp"                 );
+      auto ref_pres     = dm.get_collapsed<real>("ref_pres"                 );
+      auto ref_presi    = dm.get_collapsed<real>("ref_presi"                 );
+      auto uvel_stag    = dm.get_collapsed<real>("uvel_stag"                 );
+      auto vvel_stag    = dm.get_collapsed<real>("vvel_stag"                 );
+      auto wvel_stag    = dm.get_collapsed<real>("wvel_stag"                 );
+
+      parallel_for( YAKL_AUTO_LABEL() , SimpleBounds<1>((nz+1)*(ny+1)*(nx+1)*nens) , YAKL_LAMBDA (int i) {
         if (i < density_dry.size()) density_dry(i) = 0;
         if (i < uvel       .size()) uvel       (i) = 0;
         if (i < vvel       .size()) vvel       (i) = 0;
@@ -333,78 +337,22 @@ namespace pam {
         if (i < gcm_wvel   .size()) gcm_wvel   (i) = 0;
         if (i < gcm_temp   .size()) gcm_temp   (i) = 0;
         if (i < gcm_rho_v  .size()) gcm_rho_v  (i) = 0;
-        if (i < hy_params  .size()) hy_params  (i) = 0;
-      });
-    }
-
-
-
-    void update_hydrostasis() {
-      using yakl::c::parallel_for;
-      using yakl::c::SimpleBounds;
-      using yakl::intrinsics::matmul_cr;
-      using yakl::intrinsics::matinv_ge;
-      using yakl::atomicAdd;
-
-      auto zint      = dm.get<real const,2>("vertical_interface_height");
-      auto zmid      = dm.get<real const,2>("vertical_midpoint_height" );
-      auto dz        = dm.get<real const,2>("vertical_cell_dz");
-      auto hy_params = dm.get<real,3>("hydrostasis_parameters"   );
-
-      auto dens_dry = dm.get<real const,4>("density_dry");
-      auto dens_wv  = dm.get<real const,4>("water_vapor");
-      auto temp     = dm.get<real const,4>("temp");
-
-      int nz   = get_nz();
-      int ny   = get_ny();
-      int nx   = get_nx();
-      int nens = get_nens();
-
-      auto R_d = get_option<real>("R_d");
-      auto R_v = get_option<real>("R_v");
-
-      // Compute average column of pressure for each ensemble
-      real2d pressure_col("pressure_col",nz,nens);
-      memset( pressure_col , 0._fp );
-      real r_nx_ny = 1._fp / (nx*ny);
-      parallel_for( SimpleBounds<4>(nz,ny,nx,nens) , YAKL_LAMBDA (int k, int j, int i, int iens) {
-        real rho_d = dens_dry(k,j,i,iens);
-        real rho_v = dens_wv (k,j,i,iens);
-        real T     = temp    (k,j,i,iens);
-        atomicAdd( pressure_col(k,iens) , compute_pressure( rho_d , rho_v , T , R_d , R_v )*r_nx_ny );
+        if (i < gcm_rho_l  .size()) gcm_rho_l  (i) = 0;
+        if (i < gcm_rho_i  .size()) gcm_rho_i  (i) = 0;
+        if (i < gcm_num_liq .size()) gcm_num_liq (i) = 0;
+        if (i < gcm_num_ice .size()) gcm_num_ice (i) = 0;
+        if (i < gcm_num_rain.size()) gcm_num_rain(i) = 0;
+        if (i < ref_rho_d  .size()) ref_rho_d  (i) = 0;
+        if (i < ref_rho_v  .size()) ref_rho_v  (i) = 0;
+        if (i < ref_rho_l  .size()) ref_rho_l  (i) = 0;
+        if (i < ref_rho_i  .size()) ref_rho_i  (i) = 0;
+        if (i < ref_temp   .size()) ref_temp   (i) = 0;
+        if (i < ref_pres   .size()) ref_pres   (i) = 0;
+        if (i < ref_presi  .size()) ref_presi  (i) = 0;
+        if (i < uvel_stag  .size()) uvel_stag  (i) = 0;
+        if (i < vvel_stag  .size()) vvel_stag  (i) = 0;
+        if (i < wvel_stag  .size()) wvel_stag  (i) = 0;
       });
-
-      parallel_for( SimpleBounds<2>(nz,nens) , YAKL_LAMBDA (int k, int iens) {
-        int constexpr npts = 5;
-        int kmid = k;
-        int kbot = k-2;
-        int ktop = k+2;
-        while (kbot < 0   ) { kbot++; ktop++; }
-        while (ktop > nz-1) { kbot--; ktop--; }
-
-        SArray<double,1,npts> z;
-        real z0 = zmid(kmid,iens);
-        for (int i=0; i < npts; i++) { z(i) = ( zmid(kbot+i,iens) - z0 ) / dz(k,iens); }
-
-        SArray<double,2,npts,npts> vand;
-        for (int j=0; j < npts; j++) {
-          for (int i=0; i < npts; i++) {
-            vand(j,i) = pow( z(i) , (double) j );
-          }
-        }
-
-        auto vand_inv = matinv_ge( vand );
-
-        SArray<double,1,npts> logp;
-        for (int i=0; i < npts; i++) {
-          logp(i) = log(pressure_col(kbot+i,iens));
-        }
-
-        auto params = matmul_cr( vand_inv , logp );
-
-        for (int i=0; i < npts; i++) { hy_params(k,i,iens) = params(i); }
-      });
-
     }
 
 
@@ -439,8 +387,12 @@ namespace pam {
     }
 
 
-  };
 
-}
+    YAKL_INLINE static real compute_pressure( real rho_d, real rho_v, real T, real R_d, real R_v ) {
+      return rho_d*R_d*T + rho_v*R_v*T;
+    }
 
 
+  };
+
+}
diff --git a/physics/micro/none/Microphysics.h b/physics/micro/none/Microphysics.h
index ce419d7f..45110a6e 100644
--- a/physics/micro/none/Microphysics.h
+++ b/physics/micro/none/Microphysics.h
@@ -82,5 +82,6 @@ class Microphysics {
     return "none";
   }
 
-
+  void finalize(pam::PamCoupler &coupler) {
+  }
 };
diff --git a/physics/micro/p3/Microphysics.h b/physics/micro/p3/Microphysics.h
index 59d4ccbc..a4348198 100644
--- a/physics/micro/p3/Microphysics.h
+++ b/physics/micro/p3/Microphysics.h
@@ -2,6 +2,7 @@
 #pragma once
 
 #include "pam_coupler.h"
+// #include <stdio.h>
 
 #include "scream_cxx_interface_p3.h"
 
@@ -11,13 +12,13 @@ void p3_main_fortran(double *qc , double *nc , double *qr , double *nr , double
                      double *dz , double *nc_nuceat_tend , double *nccn_prescribed , double *ni_activated ,
                      double *inv_qc_relvar , int &it , double *precip_liq_surf , double *precip_ice_surf ,
                      int &its , int &ite , int &kts , int &kte , double *diag_eff_radius_qc ,
-                     double *diag_eff_radius_qi , double *rho_qi , bool &do_predict_nc , 
+                     double *diag_eff_radius_qi , double *rho_qi , bool &do_predict_nc ,
                      bool &do_prescribed_CCN ,double *dpres , double *exner , double *qv2qi_depos_tend ,
                      double *precip_total_tend , double *nevapr , double *qr_evap_tend ,
                      double *precip_liq_flux , double *precip_ice_flux , double *cld_frac_r ,
                      double *cld_frac_l , double *cld_frac_i , double *p3_tend_out , double *mu_c ,
-                     double *lamc , double *liq_ice_exchange , double *vap_liq_exchange , 
-                     double *vap_ice_exchange , double *qv_prev , double *t_prev , double *col_location ,
+                     double *lamc , double *liq_ice_exchange , double *vap_liq_exchange ,
+                     double *vap_ice_exchange , double *q_prev , double *t_prev , double *col_location ,
                      double *elapsed_s );
 
 
@@ -56,15 +57,15 @@ class Microphysics {
   real etime;
 
   // Indices for all of your tracer quantities
-  int static constexpr ID_C  = 0;  // Local index for Cloud Water Mass  
+  int static constexpr ID_C  = 0;  // Local index for Cloud Water Mass
   int static constexpr ID_NC = 1;  // Local index for Cloud Water Number
-  int static constexpr ID_R  = 2;  // Local index for Rain Water Mass   
-  int static constexpr ID_NR = 3;  // Local index for Rain Water Number 
-  int static constexpr ID_I  = 4;  // Local index for Ice Mass          
-  int static constexpr ID_M  = 5;  // Local index for Ice Number        
-  int static constexpr ID_NI = 6;  // Local index for Ice-Rime Mass     
-  int static constexpr ID_BM = 7;  // Local index for Ice-Rime Volume   
-  int static constexpr ID_V  = 8;  // Local index for Water Vapor       
+  int static constexpr ID_R  = 2;  // Local index for Rain Water Mass
+  int static constexpr ID_NR = 3;  // Local index for Rain Water Number
+  int static constexpr ID_I  = 4;  // Local index for Ice Mass
+  int static constexpr ID_M  = 5;  // Local index for Ice Number
+  int static constexpr ID_NI = 6;  // Local index for Ice-Rime Mass
+  int static constexpr ID_BM = 7;  // Local index for Ice-Rime Volume
+  int static constexpr ID_V  = 8;  // Local index for Water Vapor
 
 
 
@@ -118,12 +119,20 @@ class Microphysics {
 
     auto &dm = coupler.get_data_manager_device_readwrite();
 
-    dm.register_and_allocate<real>("qv_prev","qv from prev step"         ,{nz,ny,nx,nens},{"z","y","x","nens"});
+    dm.register_and_allocate<real>( "nccn_prescribed","prescribed cld nuclei concentration  [#/kg]",  {nz,ny,nx,nens},{"z","y","x","nens"});
+    dm.register_and_allocate<real>( "nc_nuceat_tend", "activated cld nuclei number tendency [#/kg/s]",{nz,ny,nx,nens},{"z","y","x","nens"});
+    dm.register_and_allocate<real>( "ni_activated",   "activated ice nuclei concentration   [#/kg]",  {nz,ny,nx,nens},{"z","y","x","nens"});
+
+    dm.register_and_allocate<real>("q_prev","rho_v from prev step"       ,{nz,ny,nx,nens},{"z","y","x","nens"});
     dm.register_and_allocate<real>("t_prev" ,"Temperature from prev step",{nz,ny,nx,nens},{"z","y","x","nens"});
 
     dm.register_and_allocate<real>("precip_liq_surf_out","liq surface precipitation rate",{ny,nx,nens},{"y","x","nens"});
     dm.register_and_allocate<real>("precip_ice_surf_out","ice surface precipitation rate",{ny,nx,nens},{"y","x","nens"});
 
+    dm.register_and_allocate<real>("liq_ice_exchange_out","p3 liq to ice phase change tendency",{nz,ny,nx,nens},{"z","y","x","nens"});
+    dm.register_and_allocate<real>("vap_liq_exchange_out","p3 vap to liq phase change tendency",{nz,ny,nx,nens},{"z","y","x","nens"});
+    dm.register_and_allocate<real>("vap_ice_exchange_out","p3 vap to ice phase change tendency",{nz,ny,nx,nens},{"z","y","x","nens"});
+
     auto cloud_water     = dm.get<real,4>( "cloud_water"     );
     auto cloud_water_num = dm.get<real,4>( "cloud_water_num" );
     auto rain            = dm.get<real,4>( "rain"            );
@@ -133,7 +142,7 @@ class Microphysics {
     auto ice_rime        = dm.get<real,4>( "ice_rime"        );
     auto ice_rime_vol    = dm.get<real,4>( "ice_rime_vol"    );
     auto water_vapor     = dm.get<real,4>( "water_vapor"     );
-    auto qv_prev         = dm.get<real,4>( "qv_prev"         );
+    auto q_prev          = dm.get<real,4>( "q_prev"          );
     auto t_prev          = dm.get<real,4>( "t_prev"          );
 
     parallel_for( "micro zero" , SimpleBounds<4>(nz,ny,nx,nens) ,
@@ -147,7 +156,7 @@ class Microphysics {
       ice_rime       (k,j,i,iens) = 0;
       ice_rime_vol   (k,j,i,iens) = 0;
       water_vapor    (k,j,i,iens) = 0;
-      qv_prev        (k,j,i,iens) = 0;
+      q_prev         (k,j,i,iens) = 0;
       t_prev         (k,j,i,iens) = 0;
     });
 
@@ -159,22 +168,22 @@ class Microphysics {
     real tmelt  = 273.15;
     real pi     = 3.14159265;
     int  iulog  = 1;
-    bool masterproc = true;
     #ifndef P3_CXX
+      bool masterproc = true;
       micro_p3_utils_init_fortran( cp_d , R_d , R_v , rhoh2o , mwh2o , mwdry ,
                                    grav , latvap , latice, cp_l , tmelt , pi , iulog , masterproc );
     #endif
 
-    std::string dir;
-    if (coupler.option_exists("p3_lookup_data_path")) {
-      dir = coupler.get_option<std::string>("p3_lookup_data_path");
-    } else {
-      dir = "../../../physics/micro/p3/tables"; // default for PAM standalone
-    };
-    std::string ver = "4.1.1";
-    int dir_len = dir.length();
-    int ver_len = ver.length();
     #ifndef P3_CXX
+      std::string dir;
+      if (coupler.option_exists("p3_lookup_data_path")) {
+        dir = coupler.get_option<std::string>("p3_lookup_data_path");
+      } else {
+        dir = "../../../physics/micro/p3/tables"; // default for PAM standalone
+      };
+      std::string ver = "4.1.1";
+      int dir_len = dir.length();
+      int ver_len = ver.length();
       p3_init_fortran( dir.c_str() , dir_len , ver.c_str() , ver_len );
     #endif
 
@@ -232,6 +241,10 @@ class Microphysics {
       }
     #endif
 
+    auto nccn_prescribed = dm.get_lev_col<real>("nccn_prescribed");
+    auto nc_nuceat_tend  = dm.get_lev_col<real>("nc_nuceat_tend");
+    auto ni_activated    = dm.get_lev_col<real>("ni_activated");
+
     // Get tracers dimensioned as (nz,ny*nx*nens)
     auto rho_c  = dm.get_lev_col<real>("cloud_water"    );
     auto rho_nc = dm.get_lev_col<real>("cloud_water_num");
@@ -247,7 +260,7 @@ class Microphysics {
     auto rho_dry = dm.get_lev_col<real>("density_dry");
     auto temp    = dm.get_lev_col<real>("temp"       );
 
-    // Calculate the grid spacing
+    // Set grid spacing
     real2d dz("dz",nz,ny*nx*nens);
     parallel_for( "micro dz" , SimpleBounds<4>(nz,ny,nx,nens) ,
                   YAKL_LAMBDA (int k, int j, int i, int iens) {
@@ -255,11 +268,10 @@ class Microphysics {
     });
 
     // Get everything from the DataManager that's not a tracer but is persistent across multiple micro calls
-    auto qv_prev = dm.get_lev_col<real>("qv_prev");
-    auto t_prev  = dm.get_lev_col<real>("t_prev" );
+    auto q_prev = dm.get_lev_col<real>("q_prev");
+    auto t_prev = dm.get_lev_col<real>("t_prev" );
 
     // Allocates inputs and outputs
-    int p3_nout = 49;
     real2d qc                ( "qc"                 ,           nz   , ncol );
     real2d nc                ( "nc"                 ,           nz   , ncol );
     real2d qr                ( "qr"                 ,           nz   , ncol );
@@ -269,14 +281,11 @@ class Microphysics {
     real2d qm                ( "qm"                 ,           nz   , ncol );
     real2d bm                ( "bm"                 ,           nz   , ncol );
     real2d qv                ( "qv"                 ,           nz   , ncol );
-    real2d pressure          ( "pressure"           ,           nz   , ncol );
+    real2d pressure_dry      ( "pressure_dry"       ,           nz   , ncol );
     real2d theta             ( "theta"              ,           nz   , ncol );
     real2d exner             ( "exner"              ,           nz   , ncol );
     real2d inv_exner         ( "inv_exner"          ,           nz   , ncol );
-    real2d dpres             ( "dpres"              ,           nz   , ncol );
-    real2d nc_nuceat_tend    ( "nc_nuceat_tend"     ,           nz   , ncol );
-    real2d nccn_prescribed   ( "nccn_prescribed"    ,           nz   , ncol );
-    real2d ni_activated      ( "ni_activated"       ,           nz   , ncol );
+    real2d dpres_dry         ( "dpres_dry"          ,           nz   , ncol );
     real2d cld_frac_i        ( "cld_frac_i"         ,           nz   , ncol );
     real2d cld_frac_l        ( "cld_frac_l"         ,           nz   , ncol );
     real2d cld_frac_r        ( "cld_frac_r"         ,           nz   , ncol );
@@ -298,7 +307,10 @@ class Microphysics {
     real2d liq_ice_exchange  ( "liq_ice_exchange"   ,           nz   , ncol );
     real2d vap_liq_exchange  ( "vap_liq_exchange"   ,           nz   , ncol );
     real2d vap_ice_exchange  ( "vap_ice_exchange"   ,           nz   , ncol );
+    #ifndef P3_CXX
+    int p3_nout = 49;
     real3d p3_tend_out       ( "p3_tend_out"        , p3_nout , nz   , ncol );
+    #endif
 
     //////////////////////////////////////////////////////////////////////////////
     // Compute quantities needed for inputs to P3
@@ -306,6 +318,7 @@ class Microphysics {
     // Force constants into local scope
     real R_d     = this->R_d;
     real R_v     = this->R_v;
+    real cv_d    = this->cv_d;
     real cp_d    = this->cp_d;
     real cp_v    = this->cp_v;
     real cp_l    = this->cp_l;
@@ -317,74 +330,86 @@ class Microphysics {
 
     // Save initial state, and compute inputs for p3(...)
     parallel_for( "micro adjust preprocess" , SimpleBounds<2>(nz,ncol) , YAKL_LAMBDA (int k, int i) {
-      // Compute total density
-      real rho = rho_dry(k,i) + rho_c(k,i) + rho_r(k,i) + rho_i(k,i) + rho_v(k,i);
 
-      // P3 doesn't do saturation adjustment, so we need to do that ahead of time
-      // If we're using SHOC, then it does saturation adjustment, so no need to do it here
       if (! sgs_shoc) {
-        compute_adjusted_state(rho, rho_dry(k,i) , rho_v(k,i) , rho_c(k,i) , temp(k,i),
-                               R_v , cp_d , cp_v , cp_l);
+        // If not using SHOC, then do a saturation adjustment here
+        real rho = rho_dry(k,i) + rho_v(k,i);
+        compute_adjusted_state(rho, rho_dry(k,i), rho_v(k,i), rho_c(k,i), temp(k,i), R_v, cp_d, cp_v, cp_l );
       }
 
       // Compute quantities for P3
-      qc       (k,i) = rho_c (k,i) / rho_dry(k,i);
-      nc       (k,i) = rho_nc(k,i) / rho_dry(k,i);
-      qr       (k,i) = rho_r (k,i) / rho_dry(k,i);
-      nr       (k,i) = rho_nr(k,i) / rho_dry(k,i);
-      qi       (k,i) = rho_i (k,i) / rho_dry(k,i);
-      ni       (k,i) = rho_ni(k,i) / rho_dry(k,i);
-      qm       (k,i) = rho_m (k,i) / rho_dry(k,i);
-      bm       (k,i) = rho_bm(k,i) / rho_dry(k,i);
-      qv       (k,i) = rho_v (k,i) / rho_dry(k,i);
-      pressure (k,i) = R_d * rho_dry(k,i) * temp(k,i) + R_v * rho_v(k,i) * temp(k,i);
-      exner    (k,i) = pow( pressure(k,i) / p0 , R_d / cp_d );
-      inv_exner(k,i) = 1. / exner(k,i);
-      theta    (k,i) = temp(k,i) / exner(k,i);
+      qc          (k,i) = rho_c (k,i) / rho_dry(k,i);
+      nc          (k,i) = rho_nc(k,i) / rho_dry(k,i);
+      qr          (k,i) = rho_r (k,i) / rho_dry(k,i);
+      nr          (k,i) = rho_nr(k,i) / rho_dry(k,i);
+      qi          (k,i) = rho_i (k,i) / rho_dry(k,i);
+      ni          (k,i) = rho_ni(k,i) / rho_dry(k,i);
+      qm          (k,i) = rho_m (k,i) / rho_dry(k,i);
+      bm          (k,i) = rho_bm(k,i) / rho_dry(k,i);
+      qv          (k,i) = rho_v (k,i) / rho_dry(k,i);
+
+      real pressure     = R_d*rho_dry(k,i)*temp(k,i) + R_v*rho_v(k,i)*temp(k,i);
+
+      pressure_dry(k,i) = R_d*rho_dry(k,i)*temp(k,i);
+
+      //These are constant kappa expressions
+      exner       (k,i) = pow( pressure / p0 , R_d / cp_d );
+      // exner       (k,i) = pow( pressure_dry(k,i) / p0 , R_d / cp_d );
+      inv_exner   (k,i) = 1. / exner(k,i);
+      theta       (k,i) = temp(k,i) * inv_exner(k,i);
+
       // P3 uses dpres to calculate density via the hydrostatic assumption.
       // So we just reverse this to compute dpres to give true density
-      dpres(k,i) = rho_dry(k,i) * grav * dz(k,i);
-      // nc_nuceat_tend, nccn_prescribed, and ni_activated are not used
-      nc_nuceat_tend (k,i) = 0;
-      nccn_prescribed(k,i) = 0;
-      ni_activated   (k,i) = 0;
+      dpres_dry(k,i) = rho_dry(k,i) * grav * dz(k,i);
+      // dpres_dry(k,i) = (rho_dry(k,i)+rho_v(k,i)) * grav * dz(k,i);
+
       // col_location is for debugging only, and it will be ignored for now
       if (k < 3) { col_location(k,i) = 1; }
 
       if (first_step) {
-        qv_prev(k,i) = qv  (k,i);
-        t_prev (k,i) = temp(k,i);
+        q_prev(k,i) = qv  (k,i);
+        t_prev(k,i) = temp(k,i);
+      } else {
+        // q_prev is stored as density but converted to mixing ratio here
+        q_prev(k,i) = q_prev(k,i) / rho_dry(k,i);
       }
     });
 
     if (sgs_shoc) {
-      auto ast      = dm.get_lev_col<real>("cldfrac");
-      inv_qc_relvar = dm.get_lev_col<real>("relvar" );
-      get_cloud_fraction( ast , qc , qr , qi , cld_frac_i , cld_frac_l , cld_frac_r );
+      // inv_qc_relvar = dm.get_lev_col<real>("inv_qc_relvar");
+      // auto cld_frac = dm.get_lev_col<real>("cldfrac");
+      // get_cloud_fraction( cld_frac , qc , qr , qi , cld_frac_i , cld_frac_l , cld_frac_r );
+
+      // For unknown reasons the method above (i.e. cld_frac from SHOC) was producing a overly dry 
+      // state, and the method below gave much better results. Not sure how to fix this.
+      parallel_for( SimpleBounds<2>(nz,ncol) , YAKL_LAMBDA (int k, int i) {
+        cld_frac_l(k,i) = 1;
+        cld_frac_i(k,i) = 1;
+        cld_frac_r(k,i) = 1;
+        inv_qc_relvar(k,i) = 1;
+      });
     } else {
       parallel_for( SimpleBounds<2>(nz,ncol) , YAKL_LAMBDA (int k, int i) {
         // Assume cloud fracton is always 1
         cld_frac_l(k,i) = 1;
         cld_frac_i(k,i) = 1;
         cld_frac_r(k,i) = 1;
-        // inv_qc_relvar is always set to one
         inv_qc_relvar(k,i) = 1;
       });
     }
     double elapsed_s;
-    int its, ite, kts, kte;
-    int it = 1;
-    bool do_predict_nc = false;
-    bool do_prescribed_CCN = false;
-
-
-    its = 1;
-    ite = ncol;
-    kts = 1;
-    kte = nz;
+    int it, its, ite, kts, kte;
+    bool do_predict_nc = true;
+    bool do_prescribed_CCN = true;
 
     #ifdef P3_CXX
 
+      it  = 0;
+      its = 0;
+      ite = ncol-1;
+      kts = 0;
+      kte = nz-1;
+
       // Create room for transposed variables (only 2-D variables need to be transposed)
       auto transposed_qc                 = qc                .createDeviceCopy().reshape(qc                .extent(1),qc                .extent(0)); // inout
       auto transposed_nc                 = nc                .createDeviceCopy().reshape(nc                .extent(1),nc                .extent(0)); // inout
@@ -396,18 +421,18 @@ class Microphysics {
       auto transposed_qm                 = qm                .createDeviceCopy().reshape(qm                .extent(1),qm                .extent(0)); // inout
       auto transposed_ni                 = ni                .createDeviceCopy().reshape(ni                .extent(1),ni                .extent(0)); // inout
       auto transposed_bm                 = bm                .createDeviceCopy().reshape(bm                .extent(1),bm                .extent(0)); // inout
-      auto transposed_pressure           = pressure          .createDeviceCopy().reshape(pressure          .extent(1),pressure          .extent(0)); // in
+      auto transposed_pressure_dry       = pressure_dry      .createDeviceCopy().reshape(pressure_dry      .extent(1),pressure_dry      .extent(0)); // in
       auto transposed_dz                 = dz                .createDeviceCopy().reshape(dz                .extent(1),dz                .extent(0)); // in
       auto transposed_nc_nuceat_tend     = nc_nuceat_tend    .createDeviceCopy().reshape(nc_nuceat_tend    .extent(1),nc_nuceat_tend    .extent(0)); // in
       auto transposed_nccn_prescribed    = nccn_prescribed   .createDeviceCopy().reshape(nccn_prescribed   .extent(1),nccn_prescribed   .extent(0)); // in
       auto transposed_ni_activated       = ni_activated      .createDeviceCopy().reshape(ni_activated      .extent(1),ni_activated      .extent(0)); // in
       auto transposed_inv_qc_relvar      = inv_qc_relvar     .createDeviceCopy().reshape(inv_qc_relvar     .extent(1),inv_qc_relvar     .extent(0)); // in
-      auto transposed_dpres              = dpres             .createDeviceCopy().reshape(dpres             .extent(1),dpres             .extent(0)); // in
+      auto transposed_dpres_dry          = dpres_dry         .createDeviceCopy().reshape(dpres_dry         .extent(1),dpres_dry         .extent(0)); // in
       auto transposed_inv_exner          = inv_exner         .createDeviceCopy().reshape(inv_exner         .extent(1),inv_exner         .extent(0)); // in
       auto transposed_cld_frac_r         = cld_frac_r        .createDeviceCopy().reshape(cld_frac_r        .extent(1),cld_frac_r        .extent(0)); // in
       auto transposed_cld_frac_l         = cld_frac_l        .createDeviceCopy().reshape(cld_frac_l        .extent(1),cld_frac_l        .extent(0)); // in
       auto transposed_cld_frac_i         = cld_frac_i        .createDeviceCopy().reshape(cld_frac_i        .extent(1),cld_frac_i        .extent(0)); // in
-      auto transposed_qv_prev            = qv_prev           .createDeviceCopy().reshape(qv_prev           .extent(1),qv_prev           .extent(0)); // in
+      auto transposed_q_prev             = q_prev            .createDeviceCopy().reshape(q_prev            .extent(1),q_prev            .extent(0)); // in
       auto transposed_t_prev             = t_prev            .createDeviceCopy().reshape(t_prev            .extent(1),t_prev            .extent(0)); // in
       auto transposed_col_location       = col_location      .createDeviceCopy().reshape(col_location      .extent(1),col_location      .extent(0)); // in
       auto transposed_diag_eff_radius_qc = diag_eff_radius_qc.createDeviceCopy().reshape(diag_eff_radius_qc.extent(1),diag_eff_radius_qc.extent(0)); //   out
@@ -422,29 +447,30 @@ class Microphysics {
 
       // For in and inout variables, copy transposed data (One kernel for efficiency)
       parallel_for( SimpleBounds<2>(nz,ncol) , YAKL_LAMBDA (int k, int i) {
-        transposed_qc             (i,k) = qc             (k,i); // inout
-        transposed_nc             (i,k) = nc             (k,i); // inout
-        transposed_qr             (i,k) = qr             (k,i); // inout
-        transposed_nr             (i,k) = nr             (k,i); // inout
-        transposed_theta          (i,k) = theta          (k,i); // inout
-        transposed_qv             (i,k) = qv             (k,i); // inout
-        transposed_qi             (i,k) = qi             (k,i); // inout
-        transposed_qm             (i,k) = qm             (k,i); // inout
-        transposed_ni             (i,k) = ni             (k,i); // inout
-        transposed_bm             (i,k) = bm             (k,i); // inout
-        transposed_pressure       (i,k) = pressure       (k,i); // in
-        transposed_dz             (i,k) = dz             (k,i); // in
-        transposed_nc_nuceat_tend (i,k) = nc_nuceat_tend (k,i); // in
-        transposed_nccn_prescribed(i,k) = nccn_prescribed(k,i); // in
-        transposed_ni_activated   (i,k) = ni_activated   (k,i); // in
-        transposed_inv_qc_relvar  (i,k) = inv_qc_relvar  (k,i); // in
-        transposed_dpres          (i,k) = dpres          (k,i); // in
-        transposed_inv_exner      (i,k) = inv_exner      (k,i); // in
-        transposed_cld_frac_r     (i,k) = cld_frac_r     (k,i); // in
-        transposed_cld_frac_l     (i,k) = cld_frac_l     (k,i); // in
-        transposed_cld_frac_i     (i,k) = cld_frac_i     (k,i); // in
-        transposed_qv_prev        (i,k) = qv_prev        (k,i); // in
-        transposed_t_prev         (i,k) = t_prev         (k,i); // in
+        int k_p3 = nz-1-k;
+        transposed_qc             (i,k_p3) = qc             (k,i); // inout
+        transposed_nc             (i,k_p3) = nc             (k,i); // inout
+        transposed_qr             (i,k_p3) = qr             (k,i); // inout
+        transposed_nr             (i,k_p3) = nr             (k,i); // inout
+        transposed_theta          (i,k_p3) = theta          (k,i); // inout
+        transposed_qv             (i,k_p3) = qv             (k,i); // inout
+        transposed_qi             (i,k_p3) = qi             (k,i); // inout
+        transposed_qm             (i,k_p3) = qm             (k,i); // inout
+        transposed_ni             (i,k_p3) = ni             (k,i); // inout
+        transposed_bm             (i,k_p3) = bm             (k,i); // inout
+        transposed_pressure_dry   (i,k_p3) = pressure_dry   (k,i); // in
+        transposed_dz             (i,k_p3) = dz             (k,i); // in
+        transposed_nc_nuceat_tend (i,k_p3) = nc_nuceat_tend (k,i); // in
+        transposed_nccn_prescribed(i,k_p3) = nccn_prescribed(k,i); // in
+        transposed_ni_activated   (i,k_p3) = ni_activated   (k,i); // in
+        transposed_inv_qc_relvar  (i,k_p3) = inv_qc_relvar  (k,i); // in
+        transposed_dpres_dry      (i,k_p3) = dpres_dry      (k,i); // in
+        transposed_inv_exner      (i,k_p3) = inv_exner      (k,i); // in
+        transposed_cld_frac_r     (i,k_p3) = cld_frac_r     (k,i); // in
+        transposed_cld_frac_l     (i,k_p3) = cld_frac_l     (k,i); // in
+        transposed_cld_frac_i     (i,k_p3) = cld_frac_i     (k,i); // in
+        transposed_q_prev         (i,k_p3) = q_prev         (k,i); // in
+        transposed_t_prev         (i,k_p3) = t_prev         (k,i); // in
         if (k < 3) transposed_col_location(i,k) = col_location(k,i); // in
       });
 
@@ -460,7 +486,7 @@ class Microphysics {
                         transposed_qm                .create_ArrayIR() , // inout
                         transposed_ni                .create_ArrayIR() , // inout
                         transposed_bm                .create_ArrayIR() , // inout
-                        transposed_pressure          .create_ArrayIR() , // in
+                        transposed_pressure_dry      .create_ArrayIR() , // in
                         transposed_dz                .create_ArrayIR() , // in
                         transposed_nc_nuceat_tend    .create_ArrayIR() , // in
                         transposed_nccn_prescribed   .create_ArrayIR() , // in
@@ -478,7 +504,7 @@ class Microphysics {
                         transposed_bulk_qi           .create_ArrayIR() , //   out
                         do_predict_nc                                  , // in
                         do_prescribed_CCN                              , // in
-                        transposed_dpres             .create_ArrayIR() , // in
+                        transposed_dpres_dry         .create_ArrayIR() , // in
                         transposed_inv_exner         .create_ArrayIR() , // in
                         transposed_qv2qi_depos_tend  .create_ArrayIR() , //   out
                         transposed_precip_liq_flux   .create_ArrayIR() , //   out
@@ -489,38 +515,46 @@ class Microphysics {
                         transposed_liq_ice_exchange  .create_ArrayIR() , //   out
                         transposed_vap_liq_exchange  .create_ArrayIR() , //   out
                         transposed_vap_ice_exchange  .create_ArrayIR() , //   out
-                        transposed_qv_prev           .create_ArrayIR() , // in
+                        transposed_q_prev            .create_ArrayIR() , // in
                         transposed_t_prev            .create_ArrayIR() , // in
                         transposed_col_location      .create_ArrayIR() , // in
                        &elapsed_s                                      );//   out {
 
       // For inout and out variables, copy transposed data (One kernel for efficiency)
       parallel_for( SimpleBounds<2>(nz+1,ncol) , YAKL_LAMBDA (int k, int i) {
-        precip_liq_flux   (k,i) = transposed_precip_liq_flux   (i,k); //   out
-        precip_ice_flux   (k,i) = transposed_precip_ice_flux   (i,k); //   out
+        int k_p3 = (nz+1)-1-k;
+        precip_liq_flux   (k,i) = transposed_precip_liq_flux   (i,k_p3); //   out
+        precip_ice_flux   (k,i) = transposed_precip_ice_flux   (i,k_p3); //   out
         if (k < nz) {
-          qc                (k,i) = transposed_qc                (i,k); // inout
-          nc                (k,i) = transposed_nc                (i,k); // inout
-          qr                (k,i) = transposed_qr                (i,k); // inout
-          nr                (k,i) = transposed_nr                (i,k); // inout
-          theta             (k,i) = transposed_theta             (i,k); // inout
-          qv                (k,i) = transposed_qv                (i,k); // inout
-          qi                (k,i) = transposed_qi                (i,k); // inout
-          qm                (k,i) = transposed_qm                (i,k); // inout
-          ni                (k,i) = transposed_ni                (i,k); // inout
-          bm                (k,i) = transposed_bm                (i,k); // inout
-          diag_eff_radius_qc(k,i) = transposed_diag_eff_radius_qc(i,k); //   out
-          diag_eff_radius_qi(k,i) = transposed_diag_eff_radius_qi(i,k); //   out
-          bulk_qi           (k,i) = transposed_bulk_qi           (i,k); //   out
-          qv2qi_depos_tend  (k,i) = transposed_qv2qi_depos_tend  (i,k); //   out
-          liq_ice_exchange  (k,i) = transposed_liq_ice_exchange  (i,k); //   out
-          vap_liq_exchange  (k,i) = transposed_vap_liq_exchange  (i,k); //   out
-          vap_ice_exchange  (k,i) = transposed_vap_ice_exchange  (i,k); //   out
+          k_p3 = nz-1-k;
+          qc                (k,i) = transposed_qc                (i,k_p3); // inout
+          nc                (k,i) = transposed_nc                (i,k_p3); // inout
+          qr                (k,i) = transposed_qr                (i,k_p3); // inout
+          nr                (k,i) = transposed_nr                (i,k_p3); // inout
+          theta             (k,i) = transposed_theta             (i,k_p3); // inout
+          qv                (k,i) = transposed_qv                (i,k_p3); // inout
+          qi                (k,i) = transposed_qi                (i,k_p3); // inout
+          qm                (k,i) = transposed_qm                (i,k_p3); // inout
+          ni                (k,i) = transposed_ni                (i,k_p3); // inout
+          bm                (k,i) = transposed_bm                (i,k_p3); // inout
+          diag_eff_radius_qc(k,i) = transposed_diag_eff_radius_qc(i,k_p3); //   out
+          diag_eff_radius_qi(k,i) = transposed_diag_eff_radius_qi(i,k_p3); //   out
+          bulk_qi           (k,i) = transposed_bulk_qi           (i,k_p3); //   out
+          qv2qi_depos_tend  (k,i) = transposed_qv2qi_depos_tend  (i,k_p3); //   out
+          liq_ice_exchange  (k,i) = transposed_liq_ice_exchange  (i,k_p3); //   out
+          vap_liq_exchange  (k,i) = transposed_vap_liq_exchange  (i,k_p3); //   out
+          vap_ice_exchange  (k,i) = transposed_vap_ice_exchange  (i,k_p3); //   out
         }
       });
 
     #else
 
+      it  = 1;
+      its = 1;
+      ite = ncol;
+      kts = 1;
+      kte = nz;
+
       auto qc_host                 = qc                .createHostCopy();
       auto nc_host                 = nc                .createHostCopy();
       auto qr_host                 = qr                .createHostCopy();
@@ -531,7 +565,7 @@ class Microphysics {
       auto qm_host                 = qm                .createHostCopy();
       auto ni_host                 = ni                .createHostCopy();
       auto bm_host                 = bm                .createHostCopy();
-      auto pressure_host           = pressure          .createHostCopy();
+      auto pressure_dry_host       = pressure_dry      .createHostCopy();
       auto dz_host                 = dz                .createHostCopy();
       auto nc_nuceat_tend_host     = nc_nuceat_tend    .createHostCopy();
       auto nccn_prescribed_host    = nccn_prescribed   .createHostCopy();
@@ -542,7 +576,7 @@ class Microphysics {
       auto diag_eff_radius_qc_host = diag_eff_radius_qc.createHostCopy();
       auto diag_eff_radius_qi_host = diag_eff_radius_qi.createHostCopy();
       auto bulk_qi_host            = bulk_qi           .createHostCopy();
-      auto dpres_host              = dpres             .createHostCopy();
+      auto dpres_dry_host          = dpres_dry         .createHostCopy();
       auto inv_exner_host          = inv_exner         .createHostCopy();
       auto qv2qi_depos_tend_host   = qv2qi_depos_tend  .createHostCopy();
       auto precip_total_tend_host  = precip_total_tend .createHostCopy();
@@ -559,22 +593,22 @@ class Microphysics {
       auto liq_ice_exchange_host   = liq_ice_exchange  .createHostCopy();
       auto vap_liq_exchange_host   = vap_liq_exchange  .createHostCopy();
       auto vap_ice_exchange_host   = vap_ice_exchange  .createHostCopy();
-      auto qv_prev_host            = qv_prev           .createHostCopy();
+      auto q_prev_host             = q_prev            .createHostCopy();
       auto t_prev_host             = t_prev            .createHostCopy();
       auto col_location_host       = col_location      .createHostCopy();
 
       p3_main_fortran(qc_host.data() , nc_host.data() , qr_host.data() , nr_host.data() , theta_host.data() ,
                       qv_host.data() , dt , qi_host.data() , qm_host.data() , ni_host.data() , bm_host.data() ,
-                      pressure_host.data() , dz_host.data() , nc_nuceat_tend_host.data() ,
+                      pressure_dry_host.data() , dz_host.data() , nc_nuceat_tend_host.data() ,
                       nccn_prescribed_host.data() , ni_activated_host.data() , inv_qc_relvar_host.data() , it ,
                       precip_liq_surf_host.data() , precip_ice_surf_host.data() , its , ite , kts , kte ,
                       diag_eff_radius_qc_host.data() , diag_eff_radius_qi_host.data() , bulk_qi_host.data() ,
-                      do_predict_nc , do_prescribed_CCN , dpres_host.data() , inv_exner_host.data() ,
+                      do_predict_nc , do_prescribed_CCN , dpres_dry_host.data() , inv_exner_host.data() ,
                       qv2qi_depos_tend_host.data() , precip_total_tend_host.data() , nevapr_host.data() ,
                       qr_evap_tend_host.data() , precip_liq_flux_host.data() , precip_ice_flux_host.data() ,
                       cld_frac_r_host.data() , cld_frac_l_host.data() , cld_frac_i_host.data() ,
                       p3_tend_out_host.data() , mu_c_host.data() , lamc_host.data() , liq_ice_exchange_host.data() ,
-                      vap_liq_exchange_host.data() , vap_ice_exchange_host.data() , qv_prev_host.data() ,
+                      vap_liq_exchange_host.data() , vap_ice_exchange_host.data() , q_prev_host.data() ,
                       t_prev_host.data() , col_location_host.data() , &elapsed_s );
 
       qc_host                .deep_copy_to( qc                 );
@@ -587,7 +621,6 @@ class Microphysics {
       qm_host                .deep_copy_to( qm                 );
       ni_host                .deep_copy_to( ni                 );
       bm_host                .deep_copy_to( bm                 );
-      pressure_host          .deep_copy_to( pressure           );
       dz_host                .deep_copy_to( dz                 );
       nc_nuceat_tend_host    .deep_copy_to( nc_nuceat_tend     );
       nccn_prescribed_host   .deep_copy_to( nccn_prescribed    );
@@ -598,7 +631,7 @@ class Microphysics {
       diag_eff_radius_qc_host.deep_copy_to( diag_eff_radius_qc );
       diag_eff_radius_qi_host.deep_copy_to( diag_eff_radius_qi );
       bulk_qi_host           .deep_copy_to( bulk_qi            );
-      dpres_host             .deep_copy_to( dpres              );
+      dpres_dry_host         .deep_copy_to( dpres_dry          );
       inv_exner_host         .deep_copy_to( inv_exner          );
       qv2qi_depos_tend_host  .deep_copy_to( qv2qi_depos_tend   );
       precip_total_tend_host .deep_copy_to( precip_total_tend  );
@@ -615,16 +648,21 @@ class Microphysics {
       liq_ice_exchange_host  .deep_copy_to( liq_ice_exchange   );
       vap_liq_exchange_host  .deep_copy_to( vap_liq_exchange   );
       vap_ice_exchange_host  .deep_copy_to( vap_ice_exchange   );
-      qv_prev_host           .deep_copy_to( qv_prev            );
+      q_prev_host            .deep_copy_to( q_prev             );
       t_prev_host            .deep_copy_to( t_prev             );
       col_location_host      .deep_copy_to( col_location       );
     #endif
-    
-                    
-    ///////////////////////////////////////////////////////////////////////////////
-    // Convert P3 outputs into dynamics coupler state and tracer masses
-    ///////////////////////////////////////////////////////////////////////////////
+
+
+    ////////////////////////////////////////////////////////////////////////////
+    // P3 postprocessing
+    ////////////////////////////////////////////////////////////////////////////
+
+    auto liq_ice_exchange_out = dm.get_lev_col<real>("liq_ice_exchange_out");
+    auto vap_liq_exchange_out = dm.get_lev_col<real>("vap_liq_exchange_out");
+    auto vap_ice_exchange_out = dm.get_lev_col<real>("vap_ice_exchange_out");
     parallel_for( "micro post process" , SimpleBounds<2>(nz,ncol) , YAKL_LAMBDA (int k, int i) {
+      // Convert P3 outputs into dynamics coupler state and tracer masses
       rho_c  (k,i) = std::max( qc(k,i)*rho_dry(k,i) , 0._fp );
       rho_nc (k,i) = std::max( nc(k,i)*rho_dry(k,i) , 0._fp );
       rho_r  (k,i) = std::max( qr(k,i)*rho_dry(k,i) , 0._fp );
@@ -634,12 +672,23 @@ class Microphysics {
       rho_m  (k,i) = std::max( qm(k,i)*rho_dry(k,i) , 0._fp );
       rho_bm (k,i) = std::max( bm(k,i)*rho_dry(k,i) , 0._fp );
       rho_v  (k,i) = std::max( qv(k,i)*rho_dry(k,i) , 0._fp );
+
       // While micro changes total pressure, thus changing exner, the definition
-      // of theta depends on the old exner pressure, so we'll use old exner here
-      temp   (k,i) = theta(k,i) * exner(k,i);
+      // of theta depends on the old exner pressure, so we'll use old exner here.      
+      // Also, P3 calculations are done at constant pressure but PAM assumes 
+      // constant volume, so we need to scale temperature change by cv/cp
+      real temp_old = temp(k,i);
+      real temp_new = theta(k,i) * exner(k,i);
+      temp(k,i) = temp_old + ( temp_new - temp_old ) * cv_d/cp_d;
+
       // Save qv and temperature for the next call to p3_main
-      qv_prev(k,i) = std::max( qv(k,i) , 0._fp );
-      t_prev (k,i) = temp(k,i);
+      // NOTE - convert q_prev back to density to carry across time steps
+      q_prev(k,i) = std::max( qv(k,i)*rho_dry(k,i) , 0._fp );
+      t_prev(k,i) = temp(k,i);
+      // copy diagnostic quantities to data manager
+      liq_ice_exchange_out(k,i) = liq_ice_exchange(k,i);
+      vap_liq_exchange_out(k,i) = vap_liq_exchange(k,i);
+      vap_ice_exchange_out(k,i) = vap_ice_exchange(k,i);
     });
 
     // output precipitation rates to be aggregated
@@ -647,8 +696,8 @@ class Microphysics {
     auto precip_ice_surf_out = dm.get<real,3>( "precip_ice_surf_out" );
     parallel_for( SimpleBounds<4>(nz,ny,nx,nens) , YAKL_LAMBDA (int k, int j, int i, int iens) {
       int icol = j*nx*nens + i*nens + iens;
-      precip_liq_surf_out(j,i,iens) = precip_liq_surf(icol)*1000.;
-      precip_ice_surf_out(j,i,iens) = precip_ice_surf(icol)*1000.;
+      precip_liq_surf_out(j,i,iens) = precip_liq_surf(icol);
+      precip_ice_surf_out(j,i,iens) = precip_ice_surf(icol);
     });
 
     #ifdef PAM_DEBUG
@@ -664,7 +713,7 @@ class Microphysics {
           int icol = j*nx*nens + i*nens + iens;
           mass4d(k,j,i,iens) = (rho_v(k,j,i,iens) + rho_c(k,j,i,iens) + rho_r(k,j,i,iens) + rho_i(k,j,i,iens)) *
                                crm_dx * crm_dy * (zint_in(k+1,iens) - zint_in(k,iens));
-          sfc_precip_mass3d(j,i,iens) = dt*crm_dx*crm_dy*( precip_liq_surf(icol)*1000. + precip_ice_surf(icol)*1000. );
+          sfc_precip_mass3d(j,i,iens) = dt*crm_dx*crm_dy*( precip_liq_surf(icol) + precip_ice_surf(icol) );
         });
         mass = yakl::intrinsics::sum(mass4d) + yakl::intrinsics::sum(sfc_precip_mass3d);
       }
@@ -788,35 +837,32 @@ class Microphysics {
 
 
 
-  void get_cloud_fraction( realConst2d ast , realConst2d qc , realConst2d qr , realConst2d qi ,
+  void get_cloud_fraction( realConst2d cld_frac_in , realConst2d qc , realConst2d qr , realConst2d qi ,
                            real2d const &cld_frac_i , real2d const &cld_frac_l , real2d const &cld_frac_r ) {
     using yakl::c::parallel_for;
     using yakl::c::SimpleBounds;
 
-    int nz   = ast.dimension[0];
-    int ncol = ast.dimension[1];
+    int nz   = cld_frac_in.dimension[0];
+    int ncol = cld_frac_in.dimension[1];
 
     real constexpr mincld = 0.0001;
-    real constexpr qsmall = 1.e-14;
 
     parallel_for( SimpleBounds<2>(nz,ncol) , YAKL_LAMBDA (int k, int i) {
-      cld_frac_i(k,i) = std::max(ast(k,i), mincld);
-      cld_frac_l(k,i) = std::max(ast(k,i), mincld);
-      cld_frac_r(k,i) = std::max(ast(k,i), mincld);
+      cld_frac_i(k,i) = std::max(cld_frac_in(k,i), mincld);
+      cld_frac_l(k,i) = std::max(cld_frac_in(k,i), mincld);
+      cld_frac_r(k,i) = std::max(cld_frac_in(k,i), mincld);
     });
 
-    // precipitation fraction 
+    // precipitation fraction
     // max overlap is the max cloud fraction in all layers above which are
-    // connected to this one by a continuous band of precip mass. If
+    // connected to a given layer by a continuous band of precip mass. If
     // there's no precip mass falling into a cell, it's precip frac is equal
     // to the cloud frac, which is probably ~zero.
-    // IF rain or ice mix ratios are smaller than threshold,
-    // then leave cld_frac_r as cloud fraction at current level
+    // Cycle through the layers from top to bottom and determine if the rain
+    // fraction needs to be updated to match cloud fraction in the layer above.
     parallel_for( ncol , YAKL_LAMBDA (int i) {
       for (int k=nz-2; k >= 0; k--) {
-        if ( qr(k+1,i) >= qsmall || qi(k+1,i) >= qsmall ) {
-          cld_frac_r(k,i) = std::max( cld_frac_r(k+1,i) , cld_frac_r(k,i) );
-        }
+        cld_frac_r(k,i) = std::max( cld_frac_in(k+1,i) , cld_frac_r(k,i) );
       }
     });
   }
@@ -831,6 +877,3 @@ class Microphysics {
 
 
 };
-
-
-
diff --git a/physics/scream_cxx_interfaces/CMakeLists.txt b/physics/scream_cxx_interfaces/CMakeLists.txt
index c1adf233..8b197e2e 100644
--- a/physics/scream_cxx_interfaces/CMakeLists.txt
+++ b/physics/scream_cxx_interfaces/CMakeLists.txt
@@ -16,7 +16,7 @@ set(EKAT_CONFIGURE_FILE_F90_FILE ON)
 set(Kokkos_ENABLE_SERIAL  ON CACHE BOOL "" FORCE)
 if ("${YAKL_ARCH}" STREQUAL "CUDA")
   set(EAMXX_ENABLE_GPU             ON CACHE BOOL "" FORCE)
-  set(Kokkos_ENABLE_CUDA_CONSTEXPR ON CACHE BOOL "" FORCE)
+#  set(Kokkos_ENABLE_CUDA_CONSTEXPR ON CACHE BOOL "" FORCE)
   set(Kokkos_ENABLE_CUDA_LAMBDA    ON CACHE BOOL "" FORCE)
   set(Kokkos_ENABLE_CUDA           ON CACHE BOOL "" FORCE)
 elseif ("${YAKL_ARCH}" STREQUAL "HIP")
@@ -38,7 +38,20 @@ set(SCREAM_POSSIBLY_NO_PACK_SIZE   FALSE       CACHE BOOL "" FORCE)
 set(SCREAM_PACK_SIZE               1           CACHE BOOL "" FORCE)
 set(SCREAM_SMALL_PACK_SIZE         1           CACHE BOOL "" FORCE)
 set(SCREAM_NUM_VERTICAL_LEV        ${PAM_NLEV} CACHE BOOL "" FORCE)
-set(SCREAM_DATA_DIR                ${CMAKE_CURRENT_SOURCE_DIR}/../micro/p3)
+if (PAM_STANDALONE)
+  set(SCREAM_DATA_DIR                ${CMAKE_CURRENT_SOURCE_DIR}/../micro/p3)
+else()
+  # MACH comes from CIME build system
+  execute_process(COMMAND ${SCREAM_HOME}/components/eamxx/scripts/query-cime ${MACH} DIN_LOC_ROOT
+    RESULT_VARIABLE QC_STATUS
+    OUTPUT_VARIABLE QC_OUTPUT
+    OUTPUT_STRIP_TRAILING_WHITESPACE
+  )
+  if (QC_STATUS EQUAL 0)
+    set(SCREAM_INPUT_ROOT ${QC_OUTPUT} CACHE PATH "" FORCE )
+  endif()
+  set (SCREAM_DATA_DIR ${SCREAM_INPUT_ROOT}/atm/scream CACHE PATH "" FORCE)
+endif()
 
 macro (EkatConfigFile CONFIG_FILE_IN CONFIG_FILE_C EKAT_CONFIGURE_FILE_F90_FILE)
   message(STATUS "DEBUG: ${CONFIG_FILE_IN} ; ${CONFIG_FILE_C} ; ${EKAT_CONFIGURE_FILE_F90_FILE}")
@@ -65,7 +78,9 @@ include_directories(${SCREAM_HOME}/components/eamxx/src/physics/share    )
 include_directories(${SCREAM_HOME}/components/eamxx/src                  )
 include_directories(${YAKL_HOME}/external                                )
 
-add_subdirectory(${SCREAM_HOME}/externals/ekat ${EKAT_BINARY_DIR})
+if (PAM_STANDALONE)
+  add_subdirectory(${SCREAM_HOME}/externals/ekat ${EKAT_BINARY_DIR})
+endif()
 add_library(pam_scream_cxx_interfaces ${MYSOURCES})
 target_compile_options(pam_scream_cxx_interfaces PUBLIC ${PAM_SCREAM_CXX_FLAGS})
 target_link_libraries(pam_scream_cxx_interfaces ekat "${PAM_SCREAM_CXX_LINK_FLAGS}")
diff --git a/physics/scream_cxx_interfaces/scream_cxx_interface_p3.cpp b/physics/scream_cxx_interfaces/scream_cxx_interface_p3.cpp
index a59dcb03..47143948 100644
--- a/physics/scream_cxx_interfaces/scream_cxx_interface_p3.cpp
+++ b/physics/scream_cxx_interfaces/scream_cxx_interface_p3.cpp
@@ -16,6 +16,15 @@ namespace pam {
 
   std::mutex p3_main_cxx_mutex;
 
+  void p3_init_lookup_tables() {
+    if (! Kokkos::is_initialized()) { Kokkos::initialize(); }
+    using namespace scream;
+    using namespace scream::p3;
+    using P3F = p3::Functions<Real, DefaultDevice>;
+    P3F::init_kokkos_ice_lookup_tables(ice_table_vals, collect_table_vals);
+    P3F::init_kokkos_tables(vn_table_vals, vm_table_vals, revap_table_vals, mu_r_table_vals, dnu_table_vals);
+  }
+
   void p3_main_cxx(array_ir::ArrayIR<double,2> const & qc,                 // inout
                    array_ir::ArrayIR<double,2> const & nc,                 // inout
                    array_ir::ArrayIR<double,2> const & qr,                 // inout
@@ -27,7 +36,7 @@ namespace pam {
                    array_ir::ArrayIR<double,2> const & qm,                 // inout
                    array_ir::ArrayIR<double,2> const & ni,                 // inout
                    array_ir::ArrayIR<double,2> const & bm,                 // inout
-                   array_ir::ArrayIR<double,2> const & pres,               // in
+                   array_ir::ArrayIR<double,2> const & pressure_dry,       // in
                    array_ir::ArrayIR<double,2> const & dz,                 // in
                    array_ir::ArrayIR<double,2> const & nc_nuceat_tend,     // in
                    array_ir::ArrayIR<double,2> const & nccn_prescribed,    // in
@@ -74,6 +83,9 @@ namespace pam {
     const int ncol  = ite-its+1;
     const int npack = ekat::npack<Spack>(nlev);
 
+    //--------------------------------------------------------------------------
+    // Prognostic State Variables
+
     auto qv_d = ArrayIR_to_View_of_Packs(qv);
     auto qc_d = ArrayIR_to_View_of_Packs(qc);
     auto nc_d = ArrayIR_to_View_of_Packs(nc);
@@ -85,15 +97,27 @@ namespace pam {
     auto bm_d = ArrayIR_to_View_of_Packs(bm);
     auto th_d = ArrayIR_to_View_of_Packs(th_atm);
 
-    P3F::P3PrognosticState prog_state{qc_d, nc_d, qr_d, nr_d, qi_d, qm_d,
-                                      ni_d, bm_d, qv_d, th_d};
+    P3F::P3PrognosticState   prog_state;
+    prog_state.qc     = qc_d;
+    prog_state.nc     = nc_d;
+    prog_state.qr     = qr_d;
+    prog_state.nr     = nr_d;
+    prog_state.qi     = qi_d;
+    prog_state.qm     = qm_d;
+    prog_state.ni     = ni_d;
+    prog_state.bm     = bm_d;
+    prog_state.th     = th_d;
+    prog_state.qv     = qv_d;
+
+    //--------------------------------------------------------------------------
+    // Diagnostic Inputs
 
     auto nc_nuceat_tend_d = ArrayIR_to_View_of_Packs(nc_nuceat_tend );
     auto nccn_d           = ArrayIR_to_View_of_Packs(nccn_prescribed);
     auto ni_activated_d   = ArrayIR_to_View_of_Packs(ni_activated   );
     auto inv_qc_relvar_d  = ArrayIR_to_View_of_Packs(inv_qc_relvar  );
     auto dz_d             = ArrayIR_to_View_of_Packs(dz             );
-    auto pres_d           = ArrayIR_to_View_of_Packs(pres           );
+    auto pressure_dry_d   = ArrayIR_to_View_of_Packs(pressure_dry   );
     auto dpres_d          = ArrayIR_to_View_of_Packs(dpres          );
     auto inv_exner_d      = ArrayIR_to_View_of_Packs(inv_exner      );
     auto t_prev_d         = ArrayIR_to_View_of_Packs(t_prev         );
@@ -102,11 +126,23 @@ namespace pam {
     auto cld_frac_l_d     = ArrayIR_to_View_of_Packs(cld_frac_l     );
     auto cld_frac_r_d     = ArrayIR_to_View_of_Packs(cld_frac_r     );
 
-    P3F::P3DiagnosticInputs diag_inputs{nc_nuceat_tend_d, nccn_d, 
-                                        ni_activated_d, inv_qc_relvar_d, 
-                                        cld_frac_i_d, cld_frac_l_d, cld_frac_r_d, 
-                                        pres_d, dz_d, dpres_d, inv_exner_d, 
-                                        q_prev_d, t_prev_d};
+    P3F::P3DiagnosticInputs diag_inputs;
+    diag_inputs.nc_nuceat_tend  = nc_nuceat_tend_d;
+    diag_inputs.nccn            = nccn_d;
+    diag_inputs.ni_activated    = ni_activated_d;
+    diag_inputs.inv_qc_relvar   = inv_qc_relvar_d;
+    diag_inputs.pres            = pressure_dry_d;
+    diag_inputs.dpres           = dpres_d;
+    diag_inputs.qv_prev         = q_prev_d;
+    diag_inputs.t_prev          = t_prev_d;
+    diag_inputs.cld_frac_l      = cld_frac_l_d;
+    diag_inputs.cld_frac_i      = cld_frac_i_d;
+    diag_inputs.cld_frac_r      = cld_frac_r_d;
+    diag_inputs.dz              = dz_d;
+    diag_inputs.inv_exner       = inv_exner_d;
+
+    //--------------------------------------------------------------------------
+    // Diagnostic Outputs
 
     auto qv2qi_depos_tend_d   = ArrayIR_to_View_of_Packs(qv2qi_depos_tend  );
     auto diag_eff_radius_qc_d = ArrayIR_to_View_of_Packs(diag_eff_radius_qc);
@@ -131,14 +167,34 @@ namespace pam {
       }
     });
 
-    P3F::P3DiagnosticOutputs diag_outputs {qv2qi_depos_tend_d, precip_liq_surf_d,
-                                           precip_ice_surf_d, diag_eff_radius_qc_d, diag_eff_radius_qi_d,
-                                           rho_qi_d,precip_liq_flux_d, precip_ice_flux_d};
+    P3F::P3DiagnosticOutputs diag_outputs;
+    diag_outputs.diag_eff_radius_qc = diag_eff_radius_qc_d;
+    diag_outputs.diag_eff_radius_qi = diag_eff_radius_qi_d;
+    diag_outputs.precip_liq_surf    = precip_liq_surf_d;
+    diag_outputs.precip_ice_surf    = precip_ice_surf_d;
+    diag_outputs.qv2qi_depos_tend   = qv2qi_depos_tend_d;
+    diag_outputs.rho_qi             = rho_qi_d;
+    diag_outputs.precip_liq_flux    = precip_liq_flux_d;
+    diag_outputs.precip_ice_flux    = precip_ice_flux_d;
+
+    //--------------------------------------------------------------------------
+    // Infrastructure
 
     auto col_location_d = ArrayIR_to_View(col_location);
 
-    P3F::P3Infrastructure infrastructure{dt, it, 0, ite-its, 0, kte-kts,
-                                         do_predict_nc, do_prescribed_CCN, col_location_d};
+    P3F::P3Infrastructure infrastructure;
+    infrastructure.col_location  = col_location_d;
+    infrastructure.dt            = dt;
+    infrastructure.it            = it;
+    infrastructure.its           = its;
+    infrastructure.ite           = ite;
+    infrastructure.kts           = kts;
+    infrastructure.kte           = kte;
+    infrastructure.predictNc     = do_predict_nc;
+    infrastructure.prescribedCCN = do_prescribed_CCN;
+
+    //--------------------------------------------------------------------------
+    // History
 
     auto liq_ice_exchange_d = ArrayIR_to_View_of_Packs(liq_ice_exchange);
     auto vap_liq_exchange_d = ArrayIR_to_View_of_Packs(vap_liq_exchange);
@@ -151,17 +207,24 @@ namespace pam {
        vap_ice_exchange_d(icol,ilev) = 0.;
     });
 
-    P3F::P3HistoryOnly history_only {liq_ice_exchange_d, vap_liq_exchange_d, vap_ice_exchange_d};
+    P3F::P3HistoryOnly history_only;
+    history_only.liq_ice_exchange = liq_ice_exchange_d;
+    history_only.vap_liq_exchange = vap_liq_exchange_d;
+    history_only.vap_ice_exchange = vap_ice_exchange_d;
 
-    p3_main_cxx_mutex.lock();
+    //--------------------------------------------------------------------------
 
-    if (! ice_table_vals.is_allocated()) {
-      P3F::init_kokkos_ice_lookup_tables(ice_table_vals, collect_table_vals);
-      P3F::init_kokkos_tables(vn_table_vals, vm_table_vals, revap_table_vals, mu_r_table_vals, dnu_table_vals);
-    }
+    p3_main_cxx_mutex.lock();
 
-    P3F::P3LookupTables tables{mu_r_table_vals, vn_table_vals, vm_table_vals, revap_table_vals,
-                               ice_table_vals, collect_table_vals, dnu_table_vals};
+    // Load P3 lookup table data
+    P3F::P3LookupTables lookup_tables;
+    lookup_tables.mu_r_table_vals    = mu_r_table_vals;
+    lookup_tables.vn_table_vals      = vn_table_vals;
+    lookup_tables.vm_table_vals      = vm_table_vals;
+    lookup_tables.revap_table_vals   = revap_table_vals;
+    lookup_tables.ice_table_vals     = ice_table_vals;
+    lookup_tables.collect_table_vals = collect_table_vals;
+    lookup_tables.dnu_table_vals     = dnu_table_vals;
 
     p3_main_cxx_mutex.unlock();
 
@@ -170,7 +233,7 @@ namespace pam {
     ekat::WorkspaceManager<Spack, KT::Device> workspace_mgr(nlev_pack, 59, policy);
 
     auto elapsed_time = P3F::p3_main(prog_state, diag_inputs, diag_outputs, infrastructure,
-                                     history_only, tables, workspace_mgr, ncol, nlev);
+                                     history_only, lookup_tables, workspace_mgr, ncol, nlev);
   }
 
 }
diff --git a/physics/scream_cxx_interfaces/scream_cxx_interface_p3.h b/physics/scream_cxx_interfaces/scream_cxx_interface_p3.h
index 87abbe5b..aca1eb39 100644
--- a/physics/scream_cxx_interfaces/scream_cxx_interface_p3.h
+++ b/physics/scream_cxx_interfaces/scream_cxx_interface_p3.h
@@ -5,6 +5,8 @@
 
 namespace pam {
 
+  void p3_init_lookup_tables();
+
   void p3_main_cxx(array_ir::ArrayIR<double,2> const & qc,                 // inout
                    array_ir::ArrayIR<double,2> const & nc,                 // inout
                    array_ir::ArrayIR<double,2> const & qr,                 // inout
diff --git a/physics/scream_cxx_interfaces/scream_cxx_interface_shoc.cpp b/physics/scream_cxx_interfaces/scream_cxx_interface_shoc.cpp
index 403b8dd3..44e20492 100644
--- a/physics/scream_cxx_interfaces/scream_cxx_interface_shoc.cpp
+++ b/physics/scream_cxx_interfaces/scream_cxx_interface_shoc.cpp
@@ -67,6 +67,9 @@ namespace pam {
     const int npack  = ekat::npack<Spack>(nlev);
     const int nipack = ekat::npack<Spack>(nlevi);
 
+    //--------------------------------------------------------------------------
+    // Input Variables
+
     auto host_dx_1d     = ArrayIR_to_View         (host_dx    );
     auto host_dy_1d     = ArrayIR_to_View         (host_dy    );
     auto wthl_sfc_1d    = ArrayIR_to_View         (wthl_sfc   );
@@ -84,10 +87,26 @@ namespace pam {
     auto wtracer_sfc_2d = ArrayIR_to_View_of_Packs(wtracer_sfc);
     auto inv_exner_2d   = ArrayIR_to_View_of_Packs(inv_exner  );
 
-    SHOC::SHOCInput shoc_input{host_dx_1d, host_dy_1d, zt_grid_2d, zi_grid_2d,
-                               pres_2d, presi_2d, pdel_2d, thv_2d,
-                               w_field_2d, wthl_sfc_1d, wqw_sfc_1d, uw_sfc_1d,
-                               vw_sfc_1d, wtracer_sfc_2d, inv_exner_2d, phis_1d};
+    SHOC::SHOCInput shoc_input;
+    shoc_input.dx          = host_dx_1d;
+    shoc_input.dy          = host_dy_1d;
+    shoc_input.zt_grid     = zt_grid_2d;
+    shoc_input.zi_grid     = zi_grid_2d;
+    shoc_input.pres        = pres_2d;
+    shoc_input.presi       = presi_2d;
+    shoc_input.pdel        = pdel_2d;
+    shoc_input.thv         = thv_2d;
+    shoc_input.w_field     = w_field_2d;     // wm_zt;
+    shoc_input.wthl_sfc    = wthl_sfc_1d;    // wpthlp_sfc;
+    shoc_input.wqw_sfc     = wqw_sfc_1d;     // wprtp_sfc;
+    shoc_input.uw_sfc      = uw_sfc_1d;      // upwp_sfc;
+    shoc_input.vw_sfc      = vw_sfc_1d;      // vpwp_sfc;
+    shoc_input.wtracer_sfc = wtracer_sfc_2d;
+    shoc_input.inv_exner   = inv_exner_2d;
+    shoc_input.phis        = phis_1d;
+
+    //--------------------------------------------------------------------------
+    // Input/Output Variables
 
     auto host_dse_2d     = ArrayIR_to_View_of_Packs(host_dse    );
     auto tke_2d          = ArrayIR_to_View_of_Packs(tke         );
@@ -100,14 +119,30 @@ namespace pam {
     auto shoc_hwind_3d   = ArrayIR_to_View_of_Packs(shoc_hwind  );
     auto qtracers_3d     = ArrayIR_to_View_of_Packs(qtracers    );
 
-    SHOC::SHOCInputOutput shoc_input_output{host_dse_2d, tke_2d, thetal_2d, qw_2d,
-                                           shoc_hwind_3d, wthv_sec_2d, qtracers_3d,
-                                           tk_2d, shoc_cldfrac_2d, shoc_ql_2d};
+    SHOC::SHOCInputOutput shoc_input_output;
+    shoc_input_output.host_dse     = host_dse_2d;
+    shoc_input_output.tke          = tke_2d;
+    shoc_input_output.thetal       = thetal_2d;
+    shoc_input_output.qw           = qw_2d;
+    shoc_input_output.horiz_wind   = shoc_hwind_3d;
+    shoc_input_output.wthv_sec     = wthv_sec_2d;
+    shoc_input_output.qtracers     = qtracers_3d;
+    shoc_input_output.tk           = tk_2d;
+    shoc_input_output.shoc_cldfrac = shoc_cldfrac_2d;
+    shoc_input_output.shoc_ql      = shoc_ql_2d;
+
+    //--------------------------------------------------------------------------
+    // Output Variables
 
     auto pblh_1d     = ArrayIR_to_View         (pblh    );
     auto shoc_ql2_2d = ArrayIR_to_View_of_Packs(shoc_ql2);
 
-    SHOC::SHOCOutput shoc_output{pblh_1d, shoc_ql2_2d};
+    SHOC::SHOCOutput shoc_output;
+    shoc_output.pblh     = pblh_1d;
+    shoc_output.shoc_ql2 = shoc_ql2_2d;
+
+    //--------------------------------------------------------------------------
+    // Diagnostic Output
 
     auto shoc_mix_2d  = ArrayIR_to_View_of_Packs(shoc_mix );
     auto w_sec_2d     = ArrayIR_to_View_of_Packs(w_sec    );
@@ -124,9 +159,23 @@ namespace pam {
     auto brunt_2d     = ArrayIR_to_View_of_Packs(brunt    );
     auto isotropy_2d  = ArrayIR_to_View_of_Packs(isotropy );
 
-    SHOC::SHOCHistoryOutput shoc_history_output{shoc_mix_2d, w_sec_2d, thl_sec_2d, qw_sec_2d,
-                                                qwthl_sec_2d, wthl_sec_2d, wqw_sec_2d, wtke_sec_2d,
-                                                uw_sec_2d, vw_sec_2d, w3_2d, wqls_sec_2d, brunt_2d, isotropy_2d};
+    SHOC::SHOCHistoryOutput shoc_history_output;
+    shoc_history_output.shoc_mix  = shoc_mix_2d;
+    shoc_history_output.isotropy  = isotropy_2d;
+    shoc_history_output.w_sec     = w_sec_2d;
+    shoc_history_output.thl_sec   = thl_sec_2d;
+    shoc_history_output.qw_sec    = qw_sec_2d;
+    shoc_history_output.qwthl_sec = qwthl_sec_2d;
+    shoc_history_output.wthl_sec  = wthl_sec_2d;
+    shoc_history_output.wqw_sec   = wqw_sec_2d;
+    shoc_history_output.wtke_sec  = wtke_sec_2d;
+    shoc_history_output.uw_sec    = uw_sec_2d;
+    shoc_history_output.vw_sec    = vw_sec_2d;
+    shoc_history_output.w3        = w3_2d;
+    shoc_history_output.wqls_sec  = wqls_sec_2d;
+    shoc_history_output.brunt     = brunt_2d;
+
+    //--------------------------------------------------------------------------
 
     const int nwind = ekat::npack<Spack>(2)*Spack::n;
     const int ntrac = ekat::npack<Spack>(num_qtracers+3)*Spack::n;
diff --git a/physics/scream_cxx_p3_shoc/CMakeLists.txt b/physics/scream_cxx_p3_shoc/CMakeLists.txt
index d2730d40..e95a3955 100644
--- a/physics/scream_cxx_p3_shoc/CMakeLists.txt
+++ b/physics/scream_cxx_p3_shoc/CMakeLists.txt
@@ -4,7 +4,6 @@ project(scream_cxx_interfaces CXX Fortran C)
 set(MYSOURCES scream_cxx_interface_p3.cpp
               scream_cxx_interface_shoc.cpp)
 
-
 set(SCREAM_LIB_ONLY ON CACHE BOOL "" FORCE)
 set(CMAKE_MODULE_PATH ${SCREAM_HOME}/components/eamxx/cmake;${SCREAM_HOME}/externals/ekat/cmake)
 set(SCREAM_CIME_BUILD ON)
@@ -16,7 +15,7 @@ set(EKAT_CONFIGURE_FILE_F90_FILE ON)
 set(Kokkos_ENABLE_SERIAL  ON CACHE BOOL "" FORCE)
 if ("${YAKL_ARCH}" STREQUAL "CUDA")
   set(EAMXX_ENABLE_GPU             ON CACHE BOOL "" FORCE)
-  set(Kokkos_ENABLE_CUDA_CONSTEXPR ON CACHE BOOL "" FORCE)
+#  set(Kokkos_ENABLE_CUDA_CONSTEXPR ON CACHE BOOL "" FORCE)
   set(Kokkos_ENABLE_CUDA_LAMBDA    ON CACHE BOOL "" FORCE)
   set(Kokkos_ENABLE_CUDA           ON CACHE BOOL "" FORCE)
 elseif ("${YAKL_ARCH}" STREQUAL "HIP")
@@ -29,16 +28,23 @@ elseif ("${YAKL_ARCH}" STREQUAL "HIP")
 endif()
 
 if ("${CMAKE_Fortran_COMPILER_ID}" STREQUAL "Flang")
-  set(CMAKE_Fortran_FLAGS "-Mpreprocess")
+  set(CMAKE_Fortran_FLAGS -Mpreprocess)
+elseif ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Intel")
+   set(CMAKE_Fortran_FLAGS -g )
+else()
+   set(CMAKE_Fortran_FLAGS -ffree-line-length-none )
 endif()
 
 add_definitions(-DSCREAM_CIME_BUILD -DHAVE_MPI -DSCREAM_CONFIG_IS_CMAKE)
 
+set(SCREAM_DOUBLE_PRECISION        TRUE        CACHE BOOL "" FORCE)
 set(SCREAM_POSSIBLY_NO_PACK_SIZE   FALSE       CACHE BOOL "" FORCE)
 set(SCREAM_PACK_SIZE               1           CACHE BOOL "" FORCE)
 set(SCREAM_SMALL_PACK_SIZE         1           CACHE BOOL "" FORCE)
 set(SCREAM_NUM_VERTICAL_LEV        ${PAM_NLEV} CACHE BOOL "" FORCE)
-set(SCREAM_DATA_DIR                ${CMAKE_CURRENT_SOURCE_DIR}/../micro/p3)
+if (PAM_STANDALONE)
+  set(SCREAM_DATA_DIR                ${CMAKE_CURRENT_SOURCE_DIR}/../micro/p3)
+endif()
 
 macro (EkatConfigFile CONFIG_FILE_IN CONFIG_FILE_C EKAT_CONFIGURE_FILE_F90_FILE)
   message(STATUS "DEBUG: ${CONFIG_FILE_IN} ; ${CONFIG_FILE_C} ; ${EKAT_CONFIGURE_FILE_F90_FILE}")
@@ -57,6 +63,10 @@ EkatConfigFile(${SCREAM_HOME}/components/eamxx/src/scream_config.h.in
                ${SCREAM_HOME}/components/eamxx/src/scream_config.h
                ${SCREAM_HOME}/components/eamxx/src/scream_config.f)
 
+if (${USE_KOKKOS})
+   set(Kokkos_ROOT ${INSTALL_SHAREDPATH}/kokkos/build)
+endif()
+
 include_directories(${SCREAM_HOME}/components/eamxx/src/physics/shoc     )
 include_directories(${SCREAM_HOME}/components/eamxx/src/physics/shoc/impl)
 include_directories(${SCREAM_HOME}/components/eamxx/src/physics/p3       )
diff --git a/physics/sgs/shoc/SGS.h b/physics/sgs/shoc/SGS.h
index 6200fc54..fb715ae9 100644
--- a/physics/sgs/shoc/SGS.h
+++ b/physics/sgs/shoc/SGS.h
@@ -105,22 +105,26 @@ class SGS {
     auto &dm = coupler.get_data_manager_device_readwrite();
 
     // Register and allocation non-tracer quantities used by the microphysics
-    dm.register_and_allocate<real>( "wthv_sec" , "Buoyancy flux [K m/s]"                , {nz,ny,nx,nens} , {"z","y","x","nens"} );
-    dm.register_and_allocate<real>( "tk"       , "Eddy coefficient for momentum [m2/s]" , {nz,ny,nx,nens} , {"z","y","x","nens"} );
-    dm.register_and_allocate<real>( "tkh"      , "Eddy coefficent for heat [m2/s]"      , {nz,ny,nx,nens} , {"z","y","x","nens"} );
-    dm.register_and_allocate<real>( "cldfrac"  , "Cloud fraction [-]"                   , {nz,ny,nx,nens} , {"z","y","x","nens"} );
-    dm.register_and_allocate<real>( "relvar"   , "Relative cloud water variance"        , {nz,ny,nx,nens} , {"z","y","x","nens"} );
+    dm.register_and_allocate<real>( "wthv_sec"     , "Buoyancy flux [K m/s]"                , {nz,ny,nx,nens} , {"z","y","x","nens"} );
+    dm.register_and_allocate<real>( "tk"           , "Eddy coefficient for momentum [m2/s]" , {nz,ny,nx,nens} , {"z","y","x","nens"} );
+    dm.register_and_allocate<real>( "tkh"          , "Eddy coefficent for heat [m2/s]"      , {nz,ny,nx,nens} , {"z","y","x","nens"} );
+    dm.register_and_allocate<real>( "cldfrac"      , "Cloud fraction [-]"                   , {nz,ny,nx,nens} , {"z","y","x","nens"} );
+    dm.register_and_allocate<real>( "inv_qc_relvar", "Inverse relative cloud water variance", {nz,ny,nx,nens} , {"z","y","x","nens"} );
+
+    // Store surface fluxes from GCM in data manager to facilitate more realistic turbulence calculations
+    dm.register_and_allocate<real>("sfc_shf", "input surface sensible heat flux"            , {ny,nx,nens}, {"y","x","nens"} );
+    dm.register_and_allocate<real>("sfc_lhf", "input surface latent heat flux"              , {ny,nx,nens}, {"y","x","nens"} );
 
     // Store surface momentum fluxes in data manager to facilitate internal surface calculations
-    dm.register_and_allocate<real>( "sfc_mom_flx_u", "Surface flux of U-momentum"       , {ny,nx,nens} , {"y","x","nens"} );
-    dm.register_and_allocate<real>( "sfc_mom_flx_v", "Surface flux of V-momentum"       , {ny,nx,nens} , {"y","x","nens"} );
+    dm.register_and_allocate<real>( "sfc_mom_flx_u", "Surface flux of U-momentum"           , {ny,nx,nens} , {"y","x","nens"} );
+    dm.register_and_allocate<real>( "sfc_mom_flx_v", "Surface flux of V-momentum"           , {ny,nx,nens} , {"y","x","nens"} );
 
     auto tke           = dm.get<real,4>( "tke"           );
     auto wthv_sec      = dm.get<real,4>( "wthv_sec"      );
     auto tk            = dm.get<real,4>( "tk"            );
     auto tkh           = dm.get<real,4>( "tkh"           );
     auto cldfrac       = dm.get<real,4>( "cldfrac"       );
-    auto relvar        = dm.get<real,4>( "relvar"        );
+    auto inv_qc_relvar = dm.get<real,4>( "inv_qc_relvar" );
     auto sfc_mom_flx_u = dm.get<real,3>( "sfc_mom_flx_u" );
     auto sfc_mom_flx_v = dm.get<real,3>( "sfc_mom_flx_v" );
 
@@ -130,7 +134,7 @@ class SGS {
       tk           (k,j,i,iens) = 0;
       tkh          (k,j,i,iens) = 0;
       cldfrac      (k,j,i,iens) = 0;
-      relvar       (k,j,i,iens) = 0;
+      inv_qc_relvar(k,j,i,iens) = 0;
     });
 
     parallel_for( "surface momentum flux zero" , SimpleBounds<3>(ny,nx,nens) , YAKL_LAMBDA (int j, int i, int iens) {
@@ -158,29 +162,25 @@ class SGS {
 
     auto &dm = coupler.get_data_manager_device_readwrite();
 
-    auto pres_mid_tmp = coupler.compute_pressure_array();
-
-    auto pres_mid = pres_mid_tmp.reshape<2>({nz  ,ncol});
-
     auto zint_pam = dm.get<real,2>("vertical_interface_height");
     auto zmid_pam = dm.get<real,2>("vertical_midpoint_height" );
 
     real crm_dx = coupler.get_xlen() / nx;
     real crm_dy = ny == 1 ? crm_dx : coupler.get_ylen() / ny;
 
-    // SHOC init requires reference pressure, which we do not have available for the init() call
     if (first_step) {
-      // Invert the first column in x, z, and ensemble to use as reference pressure for shoc
-      real1d pref_shoc("pref_shoc",nz);
-      parallel_for( SimpleBounds<1>(nz) , YAKL_LAMBDA (int k) {
-        pref_shoc(k) = pres_mid(nz-1-k,0);
-      });
-      real zvir = R_v / R_d - 1.;
-      int kbot, ktop;
-
-      kbot = nz;
-      ktop = 1;
       #ifndef SHOC_CXX
+        auto ref_pres = dm_device.get<real,2>("ref_pres");
+        // SHOC init requires reference pressure, which we do not have available for the init() call
+        // Invert the first column in x, z, and ensemble to use as reference pressure for shoc
+        real1d pref_shoc("pref_shoc",nz);
+        parallel_for( SimpleBounds<1>(nz) , YAKL_LAMBDA (int k) {
+          int k_shoc = nz-1-k;
+          pref_shoc(k_shoc) = ref_pres(k,0);
+        });
+        real zvir = R_v / R_d - 1.;
+        int kbot = nz;
+        int ktop = 1;
         shoc_init_fortran( nz , grav , R_d , R_v , cp_d , zvir , latvap , latice , karman ,
                            pref_shoc.createHostCopy().data() , kbot , ktop );
       #endif
@@ -213,14 +213,16 @@ class SGS {
     #endif
 
     // Get saved SHOC-related variables
-    auto tke           = dm.get_lev_col<real>( "tke"     ); // PAM Tracer                 ; don't compute
-    auto wthv_sec      = dm.get_lev_col<real>( "wthv_sec"); // Reuse from last SHOC output; don't compute
-    auto tk            = dm.get_lev_col<real>( "tk"      ); // Reuse from last SHOC output; don't compute
-    auto tkh           = dm.get_lev_col<real>( "tkh"     ); // Reuse from last SHOC output; don't compute
-    auto cldfrac       = dm.get_lev_col<real>( "cldfrac" ); // Reuse from last SHOC output; don't compute
-    auto relvar        = dm.get_lev_col<real>( "relvar"  ); // Computed on output for P3
+    auto tke           = dm.get_lev_col<real>(   "tke"           ); // PAM Tracer                 ; don't compute
+    auto wthv_sec      = dm.get_lev_col<real>(   "wthv_sec"      ); // Reuse from last SHOC output; don't compute
+    auto tk            = dm.get_lev_col<real>(   "tk"            ); // Reuse from last SHOC output; don't compute
+    auto tkh           = dm.get_lev_col<real>(   "tkh"           ); // Reuse from last SHOC output; don't compute
+    auto cldfrac       = dm.get_lev_col<real>(   "cldfrac"       ); // Reuse from last SHOC output; don't compute
+    auto inv_qc_relvar = dm.get_lev_col<real>(   "inv_qc_relvar" ); // Computed on output for P3
     auto sfc_mom_flx_u = dm.get_collapsed<real>( "sfc_mom_flx_u" ); // surface momentum flux - either zero or computed by surface_friction.h
     auto sfc_mom_flx_v = dm.get_collapsed<real>( "sfc_mom_flx_v" ); // surface momentum flux - either zero or computed by surface_friction.h
+    auto sfc_shf       = dm.get_collapsed<real>( "sfc_shf"       );
+    auto sfc_lhf       = dm.get_collapsed<real>( "sfc_lhf"       );
     // Get coupler state
     auto rho_d        = dm.get_lev_col<real>( "density_dry"  );
     auto uvel         = dm.get_lev_col<real>( "uvel"         );
@@ -229,11 +231,8 @@ class SGS {
     auto temp         = dm.get_lev_col<real>( "temp"         );
     auto rho_v        = dm.get_lev_col<real>( "water_vapor"  ); // Water vapor mass
 
-    // TODO: What do we do about rho_dry and wvel ???
 
-    // Grab cloud liquid tracer, and determine what other tracers to diffuse in SHOC
-    // TODO: If we add new MMF modules that add other tracers that need to be diffused, then this situation
-    //       must be handled
+    // Grab cloud liquid tracer, and other tracers that need to be modified by SHOC
     pam::MultiField<real,2> qtracers_pam;  // Extra tracers for SHOC to diffuse
     real2d rho_c;                     // Cloud liquid mass (name differs for different micro schemes)
     if        (micro_kessler) {
@@ -263,6 +262,9 @@ class SGS {
     auto zint = zint_tmp.reshape<2>({nz+1,ncol});
     auto zmid = zmid_tmp.reshape<2>({nz  ,ncol});
 
+    auto pmid_tmp = coupler.compute_pressure_array();
+    auto pmid = pmid_tmp.reshape<2>({nz  ,ncol});
+
     // Create variables for SHOC call (these are all inverted for vertical indices
     real1d shoc_host_dx    ("shoc_host_dx"    ,                  ncol); // grid spacing of host model in x direction [m]
     real1d shoc_host_dy    ("shoc_host_dy"    ,                  ncol); // grid spacing of host model in y direction [m]
@@ -314,6 +316,7 @@ class SGS {
     real grav   = this->grav  ;
     real R_d    = this->R_d   ;
     real R_v    = this->R_v   ;
+    real cv_d   = this->cv_d  ;
     real cp_d   = this->cp_d  ;
     real latvap = this->latvap;
 
@@ -324,6 +327,20 @@ class SGS {
         shoc_host_dy    (i) = crm_dy;
         shoc_wthl_sfc   (i) = 0;
         shoc_wqw_sfc    (i) = 0;
+
+        // // The alternative setting of surface fluxes below
+        // // must be used in conjunction with disabling the
+        // // application of surface fluxes in
+        // // components/eamxx/src/physics/shoc/impl/shoc_update_prognostics_implicit_impl.hpp
+        // // NOTE: density and exner values should represent the surface
+        // // interface rather than the center of the lowest level
+        // // SHF is in units of [W/m2] and needs to be converted to [K m/s]
+        // // LHF is in units of [W/m2] and needs to be converted to [m/s]
+        // real rho_sfc = rho_d(0,i)+rho_v(0,i);
+        // real exn_sfc = pow( pmid(0,i) / p0 , R_d / cp_d );
+        // shoc_wthl_sfc   (i) = sfc_shf(i) / ( cp_d * rho_sfc * exn_sfc);
+        // shoc_wqw_sfc    (i) = sfc_lhf(i) / ( latvap * rho_sfc );
+
         shoc_uw_sfc     (i) = sfc_mom_flx_u(i);
         shoc_vw_sfc     (i) = sfc_mom_flx_v(i);
         shoc_phis       (i) = zint(0,i) * grav;
@@ -333,55 +350,62 @@ class SGS {
       }
       if (k < nz) {
         int k_shoc = nz-1-k;
-        real z       = zmid    (k,i);
+
+        real rho_total = rho_d(k,i)+rho_v(k,i);
+
+        real z       = zmid(k,i);
         real dz      = zint(k+1,i) - zint(k,i);
-        real press   = pres_mid(k,i);
-        real t       = temp    (k,i);
-        real qv      = rho_v   (k,i) / rho_d(k,i);
-        real ql      = rho_c   (k,i) / rho_d(k,i);
-        real exner   = pow( press / p0 , R_d / cp_d );
+        real t       = temp(k,i);
+        real qv      = std::max(0._fp,rho_v(k,i)) / rho_total;
+        real ql      = std::max(0._fp,rho_c(k,i)) / rho_total;
+
+        //these are cosntant kappa expressions
+        real exner   = pow( pmid(k,i) / p0 , R_d / cp_d );
         real theta   = t / exner;
+
         // https://glossary.ametsoc.org/wiki/Virtual_potential_temperature
-        real theta_v = theta * (1 + 0.61_fp * qv);
+        real theta_v = theta * (1 + 0.61_fp * qv - ql);
         // https://glossary.ametsoc.org/wiki/Liquid_water_potential_temperature
-        // According to update_host_dse, the simplified version is used here
-        real theta_l = theta - (latvap/cp_d) * ql;
-        // dry static energy = Cp*T + g*z + phis
-        real dse     = cp_d * t + grav * z;
+        real theta_l = theta - (1/exner)*(latvap/cp_d) * ql;
+
         shoc_ql       (k_shoc,i) = ql;
         shoc_qw       (k_shoc,i) = qv + ql;
-        shoc_zt_grid  (k_shoc,i) = z;
-        shoc_pres     (k_shoc,i) = press;
-        shoc_pdel     (k_shoc,i) = grav * rho_d(k,i) * dz;
+        shoc_zt_grid  (k_shoc,i) = z - zint(0,i); // SHOC assumes that height starts at zero
+        shoc_pres     (k_shoc,i) = pmid(k,i);
+        shoc_pdel     (k_shoc,i) = grav * rho_total * dz;
         shoc_thv      (k_shoc,i) = theta_v;
         shoc_w_field  (k_shoc,i) = wvel(k,i);
         shoc_exner    (k_shoc,i) = exner;
         shoc_inv_exner(k_shoc,i) = 1._fp / exner;
-        shoc_host_dse (k_shoc,i) = dse;
-        // TKE is a tracer, so it's stored as mass-weighted. SHOC doesn't want mass-weighted
-        shoc_tke      (k_shoc,i) = tke(k,i) / rho_d(k,i);  
+        shoc_host_dse (k_shoc,i) = cp_d*t + grav*shoc_zt_grid(k_shoc,i) + shoc_phis(i);
         shoc_thetal   (k_shoc,i) = theta_l;
         shoc_u_wind   (k_shoc,i) = uvel(k,i);
         shoc_v_wind   (k_shoc,i) = vvel(k,i);
         shoc_wthv_sec (k_shoc,i) = wthv_sec(k,i);
+        // TKE is stored as a mass-weighted tracer, but SHOC doesn't want mass-weighted
+        // We also enfore a minimum of 0.004 taken from the SCREAM interface
+        shoc_tke      (k_shoc,i) = std::max( 0.004, tke(k,i)/rho_total );
+        shoc_tk       (k_shoc,i) = tk     (k,i);
+        shoc_tkh      (k_shoc,i) = tkh    (k,i);
+        shoc_cldfrac  (k_shoc,i) = cldfrac(k,i);
         for (int tr=0; tr < num_qtracers; tr++) {
-          shoc_qtracers(tr,k_shoc,i) = qtracers_pam(tr,k,i) / rho_d(k,i);
+          shoc_qtracers(tr,k_shoc,i) = qtracers_pam(tr,k,i) / rho_total;
+          shoc_qtracers(tr,k_shoc,i) = std::max( 0._fp, shoc_qtracers(tr,k_shoc,i) );
         }
-        shoc_tk     (k_shoc,i) = tk     (k,i);
-        shoc_tkh    (k_shoc,i) = tkh    (k,i);
-        shoc_cldfrac(k_shoc,i) = cldfrac(k,i);
       }
       int k_shoc = nz-k;
-      shoc_zi_grid(k_shoc,i) = zint    (k,i);
-      real pres_int;
-      if      (k == 0 ) {
-        pres_int = pres_mid(k  ,i) + grav*rho_d(k  ,i)*(zint(k+1,i)-zint(k  ,i))/2;
+      shoc_zi_grid(k_shoc,i) = zint(k,i) - zint(0,i); // SHOC needs height values to start at zero
+      // shoc_presi  (k_shoc,i) = pint(k,i);
+      real pint;
+      if (k == 0 ) {
+        pint = pmid(k  ,i) + grav*(rho_d(k  ,i)+rho_v(k  ,i))*(zint(k+1,i)-zint(k  ,i))/2;
       } else if (k == nz) {
-        pres_int = pres_mid(k-1,i) - grav*rho_d(k-1,i)*(zint(k  ,i)-zint(k-1,i))/2;
+        pint = pmid(k-1,i) - grav*(rho_d(k-1,i)+rho_v(k-1,i))*(zint(k  ,i)-zint(k-1,i))/2;
       } else {
-        pres_int = 0.5_fp * ( pres_mid(k-1,i) - grav*rho_d(k-1,i)*(zint(k  ,i)-zint(k-1,i))/2 +
-                              pres_mid(k  ,i) + grav*rho_d(k  ,i)*(zint(k+1,i)-zint(k  ,i))/2 ); }
-      shoc_presi  (k_shoc,i) = pres_int;
+        pint = 0.5_fp * ( pmid(k-1,i) - grav*(rho_d(k-1,i)+rho_v(k-1,i))*(zint(k  ,i)-zint(k-1,i))/2 +
+                          pmid(k  ,i) + grav*(rho_d(k  ,i)+rho_v(k  ,i))*(zint(k+1,i)-zint(k  ,i))/2 );
+      }
+      shoc_presi  (k_shoc,i) = pint;
     });
 
     int nadv = 1;
@@ -590,14 +614,14 @@ class SGS {
 
       int nzp1 = nz+1;
       // IMPORTANT: SHOC appears to actually want 1/exner for the exner parameter
-      shoc_main_fortran( ncol, nz, nzp1, dt, nadv, 
-                         shoc_host_dx_host.data(), shoc_host_dy_host.data(), shoc_thv_host.data(), 
+      shoc_main_fortran( ncol, nz, nzp1, dt, nadv,
+                         shoc_host_dx_host.data(), shoc_host_dy_host.data(), shoc_thv_host.data(),
                          shoc_zt_grid_host.data(), shoc_zi_grid_host.data(), shoc_pres_host.data(), shoc_presi_host.data(),
                          shoc_pdel_host.data(),
-                         shoc_wthl_sfc_host.data(), shoc_wqw_sfc_host.data(), shoc_uw_sfc_host.data(), shoc_vw_sfc_host.data(), 
-                         shoc_wtracer_sfc_host.data(), num_qtracers, shoc_w_field_host.data(), 
-                         shoc_inv_exner_host.data(), shoc_phis_host.data(), 
-                         shoc_host_dse_host.data(), shoc_tke_host.data(), shoc_thetal_host.data(), shoc_qw_host.data(), 
+                         shoc_wthl_sfc_host.data(), shoc_wqw_sfc_host.data(), shoc_uw_sfc_host.data(), shoc_vw_sfc_host.data(),
+                         shoc_wtracer_sfc_host.data(), num_qtracers, shoc_w_field_host.data(),
+                         shoc_inv_exner_host.data(), shoc_phis_host.data(),
+                         shoc_host_dse_host.data(), shoc_tke_host.data(), shoc_thetal_host.data(), shoc_qw_host.data(),
                          shoc_u_wind_host.data(), shoc_v_wind_host.data(), shoc_qtracers_host.data(),
                          shoc_wthv_sec_host.data(), shoc_tkh_host.data(), shoc_tk_host.data(),
                          shoc_ql_host.data(), shoc_cldfrac_host.data(),
@@ -656,31 +680,71 @@ class SGS {
 
     #endif
 
+    // yakl::fence();
+    // parallel_for( SimpleBounds<2>(5,5) , YAKL_LAMBDA (int k, int i) {  
+    //   int k_shoc = nz-1-k;
+    //   printf("WHDEBUG1 - k:%3.3d k_shoc:%3.3d nz:%3.3d i:%3.3d ncol:%5.5d \n",k,k_shoc,nz,i,ncol);
+    // });
+    // yakl::fence();
+    // parallel_for( SimpleBounds<2>(5,5) , YAKL_LAMBDA (int k, int i) {  
+    //   int k_shoc = nz-1-k;
+    //   real qw = shoc_qw(k_shoc,i);
+    //   real ql = shoc_ql(k_shoc,i);
+    //   real qv = qw - ql;
+    //   printf("WHDEBUG1 - k:%3.3d k_shoc:%3.3d nz:%3.3d i:%3.3d ncol:%5.5d \n",k,k_shoc,nz,i,ncol);
+    //   printf("WHDEBUG2 - shoc_thetal:%f \n",shoc_thetal(k_shoc,i));
+    //   printf("WHDEBUG3 - shoc_exner:%f \n",shoc_exner(k_shoc,i));
+    //   printf("WHDEBUG4 - shoc_ql:%f \n",shoc_ql(k_shoc,i));
+    //   printf("WHDEBUG5 - temp:%f \n",temp(k,i));
+    //   printf("WHDEBUG6 - cv_d:%f \n",cv_d);
+    //   printf("WHDEBUG7 - cp_d:%f \n",cp_d);
+    //   printf("WHDEBUG8 - latvap:%f \n",latvap);
+    //   real tmp_temp_old = temp(k,i);
+    //   real tmp_temp_new = shoc_thetal(k_shoc,i)*shoc_exner(k_shoc,i) + (latvap/cp_d)*shoc_ql(k_shoc,i);
+    //   // printf("WHDEBUG6 - temp_old:%f  temp_new:%f  cv_d:%f  cp_d:%f \n",temp_old,temp_new,cv_d,cp_d);
+    //   // temp(k,i) = tmp_temp_old + ( tmp_temp_new - tmp_temp_old ) * cv_d/cp_d;
+    //   temp(k,i) = temp(k,i) + ( (shoc_thetal(k_shoc,i)*shoc_exner(k_shoc,i) + (latvap/cp_d)*shoc_ql(k_shoc,i)) - temp(k,i) ) * cv_d/cp_d;
+    //   printf("WHDEBUG_END");
+    // });
+
     // Process outputs from SHOC (reordering the vertical dimension)
     parallel_for( SimpleBounds<2>(nz,ncol) , YAKL_LAMBDA (int k, int i) {
       int k_shoc = nz-1-k;
       real qw = shoc_qw(k_shoc,i);
       real ql = shoc_ql(k_shoc,i);
       real qv = qw - ql;
-      temp    (k,i) = (shoc_thetal(k_shoc,i) + (latvap/cp_d) * shoc_ql(k_shoc,i)) / shoc_inv_exner(k_shoc,i);
-      rho_v   (k,i) = qv * rho_d(k,i);
-      rho_c   (k,i) = ql * rho_d(k,i);
-      uvel    (k,i) = shoc_u_wind(k_shoc,i);
-      vvel    (k,i) = shoc_v_wind(k_shoc,i);
-      tke     (k,i) = shoc_tke     (k_shoc,i) * rho_d(k,i);
+
+      // invert liquid potential temperature to obtain updated temperature
+      // SHOC calculations are done at constant pressure but PAM assumes constant volume
+      // so we need to scale temperature change by cv/cp
+      real temp_old = temp(k,i);
+      real temp_new = shoc_thetal(k_shoc,i)*shoc_exner(k_shoc,i) + (latvap/cp_d)*shoc_ql(k_shoc,i);
+      temp(k,i) = temp_old + ( temp_new - temp_old ) * cv_d/cp_d;
+
+      rho_v(k,i) = qv * rho_d(k,i) / ( 1 - qv );
+      rho_v(k,i) = std::max( 0._fp, rho_v(k,i) );
+      real rho_total = rho_d(k,i)+rho_v(k,i);
+      rho_c   (k,i) = ql * rho_total;
+      rho_c(k,i) = std::max( 0._fp, rho_c(k,i) );
+      uvel    (k,i) = shoc_u_wind  (k_shoc,i);
+      vvel    (k,i) = shoc_v_wind  (k_shoc,i);
+      tke     (k,i) = shoc_tke     (k_shoc,i) * rho_total;
       wthv_sec(k,i) = shoc_wthv_sec(k_shoc,i);
       tk      (k,i) = shoc_tk      (k_shoc,i);
       tkh     (k,i) = shoc_tkh     (k_shoc,i);
-      cldfrac (k,i) = std::min(1._fp , shoc_cldfrac (k_shoc,i) );
+      cldfrac (k,i) = shoc_cldfrac (k_shoc,i);
+      cldfrac (k,i) = std::min( 1._fp, cldfrac(k,i) );
+      cldfrac (k,i) = std::max( 0._fp, cldfrac(k,i) );
       for (int tr=0; tr < num_qtracers; tr++) {
-        qtracers_pam(tr,k,i) = shoc_qtracers(tr,k_shoc,i) * rho_d(k,i);
+        qtracers_pam(tr,k,i) = shoc_qtracers(tr,k_shoc,i) * rho_total;
+        qtracers_pam(tr,k,i) = std::max( 0._fp, qtracers_pam(tr,k,i) );
       }
       real rcm  = shoc_ql (k_shoc,i);
       real rcm2 = shoc_ql2(k_shoc,i);
       if ( rcm != 0 && rcm2 != 0 ) {
-        relvar(k,i) = std::min( 10._fp , std::max( 0.001_fp , rcm*rcm / rcm2 ) );
+        inv_qc_relvar(k,i) = std::min( 10._fp , std::max( 0.001_fp , rcm*rcm / rcm2 ) );
       } else {
-        relvar(k,i) = 1;
+        inv_qc_relvar(k,i) = 1;
       }
     });
 
@@ -699,7 +763,8 @@ class SGS {
         });
         mass = yakl::intrinsics::sum(mass4d);
       }
-      std::cout << "Relative mass Change: " << abs(mass-mass0)/mass0 << " : ";
+      real rel_mass_change = abs(mass-mass0)/mass0;
+      if (rel_mass_change > 1.e-13) std::cout << "WARNING: SHOC relative mass change: " << rel_mass_change << std::endl;
     #endif
 
     first_step = false;
@@ -718,6 +783,3 @@ class SGS {
 
 
 };
-
-
-
diff --git a/physics/sgs/shoc/fortran/shoc.F90 b/physics/sgs/shoc/fortran/shoc.F90
index 790f74cd..e324f108 100644
--- a/physics/sgs/shoc/fortran/shoc.F90
+++ b/physics/sgs/shoc/fortran/shoc.F90
@@ -2358,12 +2358,12 @@ subroutine shoc_assumed_pdf(&
 
       ! Check to ensure Tl1_1 and Tl1_2 are not negative. endrun otherwise
       if (Tl1_1 .le. 0._rtype) then
-         write(err_msg,*)'ERROR: Tl1_1 is .le. 0 before shoc_assumed_pdf_compute_qs in shoc. Tl1_1 is:',Tl1_1
+         write(err_msg,*)'ERROR: Tl1_1<0 before shoc_assumed_pdf_compute_qs k:',k,' Tl1_1 is:',Tl1_1
          call endscreamrun(err_msg)
       endif
 
       if (Tl1_2 .le. 0._rtype) then
-         write(err_msg,*)'ERROR: Tl1_2 is .le. 0 before shoc_assumed_pdf_compute_qs in shoc. Tl1_2 is:',Tl1_2
+         write(err_msg,*)'ERROR: Tl1_2<0 before shoc_assumed_pdf_compute_qs k:',k,' Tl1_2 is:',Tl1_2
          call endscreamrun(err_msg)
       endif
 
diff --git a/standalone/idealized/CMakeLists.txt b/standalone/idealized/CMakeLists.txt
index 93e32256..41a9fec3 100644
--- a/standalone/idealized/CMakeLists.txt
+++ b/standalone/idealized/CMakeLists.txt
@@ -16,7 +16,6 @@ include_directories(${YAKL_BIN})
 set(YAML_BUILD_SHARED_LIBS OFF CACHE BOOL "" FORCE)
 add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/../../externals/yaml-cpp ${CMAKE_CURRENT_BINARY_DIR}/yaml-cpp)
 
-include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../../externals/eigen)
 include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../../externals/yaml-cpp/include)
 
 add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/../../pam_core pam_core)
diff --git a/standalone/idealized/build/cmakescript_pamc.sh b/standalone/idealized/build/cmakescript_pamc.sh
index 1fdc3a4d..8b997752 100755
--- a/standalone/idealized/build/cmakescript_pamc.sh
+++ b/standalone/idealized/build/cmakescript_pamc.sh
@@ -26,3 +26,4 @@ cmake      \
   -DPAMC_THERMO=$ARG3                                             \
   -DPAMC_IO="serial"                                              \
   ..
+  
\ No newline at end of file
diff --git a/standalone/idealized/driver.cpp b/standalone/idealized/driver.cpp
index e508d61d..78782a99 100644
--- a/standalone/idealized/driver.cpp
+++ b/standalone/idealized/driver.cpp
@@ -41,7 +41,6 @@ int main(int argc, char** argv) {
     real        zlen         = config["zlen"    ].as<real>(-1.0_fp);
     real        dtphys_in    = config["dtphys"  ].as<real>();
     std::string vcoords_file = config["vcoords" ].as<std::string>();
-    bool        use_coupler_hydrostasis = config["use_coupler_hydrostasis"].as<bool>(false);
     auto out_freq                = config["out_freq"   ].as<real>(0);
     auto out_prefix              = config["out_prefix" ].as<std::string>("test");
     bool        inner_mpi = config["inner_mpi"].as<bool>(false);
@@ -127,9 +126,6 @@ int main(int argc, char** argv) {
     dycore.pre_time_loop(coupler);
     yakl::timer_stop("dycore");
 
-    // Now that we have an initial state, define hydrostasis for each ensemble member
-    if (use_coupler_hydrostasis) coupler.update_hydrostasis( );
-
     real etime = 0;
     // There are two ways of time control- setting total simulation time (simTime) or setting number of physics time steps (simSteps)
     if (simTime == 0.0) {  simTime = simSteps * dtphys_in; }
diff --git a/standalone/idealized/inputs/pamc_input_extruded_densitycurrent.yaml b/standalone/idealized/inputs/pamc_input_extruded_densitycurrent.yaml
index 683a56e9..79b3a583 100644
--- a/standalone/idealized/inputs/pamc_input_extruded_densitycurrent.yaml
+++ b/standalone/idealized/inputs/pamc_input_extruded_densitycurrent.yaml
@@ -1,6 +1,7 @@
 ---
 # Simulation time in number of physics time steps
-simSteps  :  450
+simSteps  :  900
+#450
 
 entropicvar_diffusion_coeff: 75
 velocity_diffusion_coeff: 75
@@ -18,8 +19,9 @@ vcoords  : '../pam-c/vcoords/vcoords_equal_33_6400.nc'
 initData : densitycurrent
 
 # Time steps
-dtphys: 2
+dtphys: 1.0
 crm_per_phys: 1
+# 2
 
 # Output filename
 out_prefix  : test
@@ -29,10 +31,11 @@ dycore_out_prefix  : test_dycore
 out_freq : 100.0
 
 # Output frequency in number of CRM time steps
-outSteps: 30
+outSteps: 100
 statSteps: 1
+#30
 
-tstype: si
+tstype: ssprk3
 
 # Number of ensembles
 nens : 1
diff --git a/standalone/idealized/inputs/pamc_input_extruded_risingbubble.yaml b/standalone/idealized/inputs/pamc_input_extruded_risingbubble.yaml
index 7d61b7ad..8ff3bd7c 100644
--- a/standalone/idealized/inputs/pamc_input_extruded_risingbubble.yaml
+++ b/standalone/idealized/inputs/pamc_input_extruded_risingbubble.yaml
@@ -38,8 +38,8 @@ statSteps: 1
 #6400 = 100s for explicit
 #200 = 100s for semi-implicit
 
-tstype: si
-#kgrk4 si
+tstype: ssprk3
+#kgrk4 si ssprk3
 
 # Number of ensembles
 nens : 1
diff --git a/standalone/idealized/inputs/pamc_input_extruded_twobubbles.yaml b/standalone/idealized/inputs/pamc_input_extruded_twobubbles.yaml
index aa4a6077..f8994741 100644
--- a/standalone/idealized/inputs/pamc_input_extruded_twobubbles.yaml
+++ b/standalone/idealized/inputs/pamc_input_extruded_twobubbles.yaml
@@ -16,22 +16,22 @@ initData : twobubbles
 
 # Time steps
 dtphys: 2.5
-crm_per_phys: 1
+crm_per_phys: 5
 #0.015625 * 5 = 0.078125
- 
+
 # Output filename
 out_prefix  : test
 dycore_out_prefix  : test_dycore
 
 #Coupler output frequency in actual time
-out_freq : 100.0
+out_freq : 50.0
 
 # Output frequency in number of CRM time steps
-outSteps: 12
+outSteps: 100
 statSteps: 1
 #6400
 
-tstype: si
+tstype: ssprk3
 
 # Number of ensembles
 nens : 1
@@ -40,4 +40,3 @@ nens : 1
 inner_mpi : true
 nprocx : 1
 nprocy : 1
-
diff --git a/standalone/idealized/pam-c/make_debug_script.sh b/standalone/idealized/pam-c/make_debug_script.sh
index b0eb2cbd..2fc51469 100755
--- a/standalone/idealized/pam-c/make_debug_script.sh
+++ b/standalone/idealized/pam-c/make_debug_script.sh
@@ -1,4 +1,4 @@
 cd ../build
 source ../../machines/linux_laptop_gnu_mpi_cpu_debug.env
-./cmakescript.sh ../pam-c/set_pamc_cmakevars.sh
+./cmakescript_pamc.sh ../pam-c/set_pamc_cmakevars.sh
 make -j $1
diff --git a/standalone/idealized/pam-c/make_script.sh b/standalone/idealized/pam-c/make_script.sh
index 85d0e91e..306b8166 100755
--- a/standalone/idealized/pam-c/make_script.sh
+++ b/standalone/idealized/pam-c/make_script.sh
@@ -1,4 +1,4 @@
 cd ../build
 source ../../machines/linux_laptop_gnu_mpi_cpu.env
-./cmakescript.sh ../pam-c/set_pamc_cmakevars.sh
+./cmakescript_pamc.sh ../pam-c/set_pamc_cmakevars.sh
 make -j $1
diff --git a/standalone/idealized/pam-c/plot_ACES2023_ce.py b/standalone/idealized/pam-c/plot_ACES2023_ce.py
new file mode 100644
index 00000000..640f39f7
--- /dev/null
+++ b/standalone/idealized/pam-c/plot_ACES2023_ce.py
@@ -0,0 +1,69 @@
+import xarray as xr
+import numpy as np
+import sys
+import matplotlib.pyplot as plt
+import matplotlib
+from matplotlib.patches import Circle
+
+
+matplotlib.rcParams.update({'font.size': 30})
+core = sys.argv[1]
+test = sys.argv[2]
+DS = xr.open_dataset(core+'-'+test+'/test_dycore0.nc')
+DS.load()
+
+
+
+# STATS PLOTTING
+
+energy = DS.energy
+pv = DS.pv
+mass = DS.mass
+densmax = DS.densmax
+densmin = DS.densmin
+
+plt.figure(figsize=(10,8))
+
+plt.plot( (energy.isel(energy_ndofs=0) - energy.isel(energy_ndofs=0)[0])/energy.isel(energy_ndofs=0)[0]*100.)
+plt.xlabel('Nsteps')
+plt.ylabel('Fractional Change in Energy')
+plt.tight_layout()
+plt.savefig('total_energy.png')
+
+
+plt.figure(figsize=(10,8))
+if core == 'CE':
+	plt.plot( (mass.isel(mass_ndofs=0) - mass.isel(mass_ndofs=0)[0])/mass.isel(mass_ndofs=0)[0]*100., label='rho')
+	plt.plot( (mass.isel(mass_ndofs=1) - mass.isel(mass_ndofs=1)[0])/mass.isel(mass_ndofs=1)[0]*100., label='S')
+elif core == 'AN':
+	plt.plot( (mass.isel(mass_ndofs=0) - mass.isel(mass_ndofs=0)[0])/mass.isel(mass_ndofs=0)[0]*100., label='S')
+plt.legend()
+plt.xlabel('Nsteps')
+plt.ylabel('Fractional Change in Densities')
+plt.tight_layout()
+plt.savefig('total_masses.png')
+
+# VARIABLE PLOTTING
+
+dens = DS.dens
+densl = DS.densl
+QXZl = DS.QXZl
+total_dens = DS.total_dens
+
+nt = DS.dims['t']
+Nlist = [0,nt-1]
+for n in Nlist:
+
+
+	plt.figure(figsize=(10,8))
+	if core == 'CE':
+		plt.contourf(dens.isel(t=n,dens_ndofs=1,dual_ncells_y=0,nens=0) / dens.isel(t=n,dens_ndofs=0,dual_ncells_y=0,nens=0))
+	if core == 'AN':
+		plt.contourf(dens.isel(t=n,dens_ndofs=0,dual_ncells_y=0,nens=0) / total_dens.isel(t=n,total_dens_ndofs=0,dual_ncells_y=0,nens=0))	
+	plt.colorbar()
+	plt.xlabel('x')
+	plt.ylabel('y')
+	plt.tight_layout()
+	plt.savefig('Sc' + str(n) + '.png',transparent=True)
+	plt.close('all')
+
diff --git a/standalone/idealized/pam-c/plot_ACES2023_mce.py b/standalone/idealized/pam-c/plot_ACES2023_mce.py
new file mode 100644
index 00000000..6fbddb12
--- /dev/null
+++ b/standalone/idealized/pam-c/plot_ACES2023_mce.py
@@ -0,0 +1,117 @@
+import xarray as xr
+import numpy as np
+import sys
+import matplotlib.pyplot as plt
+import matplotlib
+from matplotlib.patches import Circle
+
+
+matplotlib.rcParams.update({'font.size': 30})
+core = sys.argv[1]
+test = sys.argv[2]
+DS = xr.open_dataset(core+'-'+test+'/test_dycore0.nc')
+DS.load()
+
+
+# STATS PLOTTING
+
+energy = DS.energy
+pv = DS.pv
+mass = DS.mass
+densmax = DS.densmax
+densmin = DS.densmin
+
+plt.figure(figsize=(10,8))
+
+plt.plot( (energy.isel(energy_ndofs=0) - energy.isel(energy_ndofs=0)[0])/energy.isel(energy_ndofs=0)[0]*100.)
+plt.xlabel('Nsteps')
+plt.ylabel('Fractional Change in Energy')
+plt.tight_layout()
+plt.savefig('total_energy.png')
+
+masslabels = []
+massnames = []
+nmass = 50
+
+if core == 'MCE':
+	nmass = 3
+	masslabels = [r'$\rho$', 'S', r'$\rho_v$']
+	massnames = ['rho','S','rhov']
+if core == 'MAN':
+	nmass = 2
+	masslabels = ['S', r'$\rho_v$']
+	massnames = ['S','rhov']
+	
+for i,label,name in zip(range(nmass),masslabels,massnames):
+	plt.figure(figsize=(10,8))
+	plt.plot( (mass.isel(mass_ndofs=i) - mass.isel(mass_ndofs=i)[0])/mass.isel(mass_ndofs=i)[0]*100., label=label)
+	plt.legend()
+	plt.xlabel('Nsteps')
+	plt.ylabel('Fractional Change in '+label)
+	plt.tight_layout()
+	plt.savefig('total_'+name+'.png')	
+
+plt.figure(figsize=(10,8))
+if core == 'MCE':
+	plt.plot(densmin.isel(densmin_ndofs=2))
+if core == 'MAN':
+	plt.plot(densmin.isel(densmin_ndofs=1))
+plt.xlabel('Nsteps')
+plt.ylabel('Water Vapor Minima')
+plt.tight_layout()
+plt.savefig('vapor_minima.png')
+
+plt.figure(figsize=(10,8))
+if core == 'MCE':
+	plt.plot(densmax.isel(densmax_ndofs=2))
+if core == 'MAN':
+	plt.plot(densmax.isel(densmax_ndofs=1))
+plt.xlabel('Nsteps')
+plt.ylabel('Water Vapor Maxima')
+plt.tight_layout()
+plt.savefig('vapor_maxima.png')
+
+plt.figure(figsize=(10,8))
+plt.plot( (pv.isel(pv_ndofs=0) - pv.isel(pv_ndofs=0)[0])/pv.isel(pv_ndofs=0)[0]*100.)
+plt.xlabel('Nsteps')
+plt.ylabel('Fractional Change in PV')
+plt.tight_layout()
+plt.savefig('total_PV.png')
+
+# VARIABLE PLOTTING
+
+dens = DS.dens
+densl = DS.densl
+QXZl = DS.QXZl
+total_dens = DS.total_dens
+
+nt = DS.dims['t']
+Nlist = [0,nt-1]
+for n in Nlist:
+
+	plt.figure(figsize=(10,8))
+	if core == 'MCE':
+		plt.contourf(densl.isel(t=n,densl_ndofs=2,primal_ncells_y=0,nens=0))
+	if core == 'MAN':
+		plt.contourf(densl.isel(t=n,densl_ndofs=1,primal_ncells_y=0,nens=0))
+	plt.colorbar()
+	plt.xlabel('x')
+	plt.ylabel('y')
+	plt.tight_layout()
+	plt.savefig('rhovl' + str(n) + '.png',transparent=True)
+	plt.close('all')
+
+
+	plt.figure(figsize=(10,8))
+	if core == 'MCE':
+		plt.contourf(dens.isel(t=n,dens_ndofs=1,dual_ncells_y=0,nens=0) / dens.isel(t=n,dens_ndofs=0,dual_ncells_y=0,nens=0))
+	if core == 'MAN':
+		plt.contourf(dens.isel(t=n,dens_ndofs=0,dual_ncells_y=0,nens=0) / total_dens.isel(t=n,total_dens_ndofs=0,dual_ncells_y=0,nens=0))
+	plt.colorbar()
+	plt.xlabel('x')
+	plt.ylabel('y')
+	plt.tight_layout()
+	plt.savefig('Sc' + str(n) + '.png',transparent=True)
+	plt.close('all')
+
+
diff --git a/standalone/idealized/pam-c/set_pamc_cmakevars.sh b/standalone/idealized/pam-c/set_pamc_cmakevars.sh
index c57771b4..e9b289a7 100644
--- a/standalone/idealized/pam-c/set_pamc_cmakevars.sh
+++ b/standalone/idealized/pam-c/set_pamc_cmakevars.sh
@@ -1,6 +1,9 @@
 #!/bin/bash
 
 add_cmake_vars="  -DPAMC_MODEL=extrudedmodel "
-add_cmake_vars+=" -DPAMC_HAMIL=mce_rho       "
+add_cmake_vars+=" -DPAMC_HAMIL=man       "
 add_cmake_vars+=" -DPAMC_THERMO=constkappavirpottemp      "
 add_cmake_vars+=" -DPAMC_IO=serial        "
+
+#ce mce_rho an man
+#idealgaspottemp constkappavirpottemp
diff --git a/standalone/machines/linux_laptop_gnu_mpi_cpu.env b/standalone/machines/linux_laptop_gnu_mpi_cpu.env
index 1b651598..0c989171 100644
--- a/standalone/machines/linux_laptop_gnu_mpi_cpu.env
+++ b/standalone/machines/linux_laptop_gnu_mpi_cpu.env
@@ -19,5 +19,5 @@ export PAM_SCREAM_CXX_FLAGS="-O3;-DHAVE_MPI;-I/home/$USER/YAKL/external"
 export SCREAM_Fortran_FLAGS="-O3;-ffree-line-length-none"
 export SCREAM_CXX_LIBS_DIR="`pwd`/../../mmf_simplified/build_p3_shoc_cxx"
 export PAM_NLEV=50
-export PAM_SCREAM_USE_CXX="ON"
+export PAM_SCREAM_USE_CXX="OFF"
 
diff --git a/standalone/machines/linux_laptop_gnu_mpi_cpu_debug.env b/standalone/machines/linux_laptop_gnu_mpi_cpu_debug.env
index 84972459..9f0e1582 100644
--- a/standalone/machines/linux_laptop_gnu_mpi_cpu_debug.env
+++ b/standalone/machines/linux_laptop_gnu_mpi_cpu_debug.env
@@ -19,5 +19,5 @@ export PAM_SCREAM_CXX_FLAGS="-O0;-DHAVE_MPI;-g;-I/home/$USER/YAKL/external"
 export SCREAM_Fortran_FLAGS="-O0;-g;-ffree-line-length-none"
 export SCREAM_CXX_LIBS_DIR="`pwd`/../../mmf_simplified/build_p3_shoc_cxx"
 export PAM_NLEV=50
-export PAM_SCREAM_USE_CXX="ON"
+export PAM_SCREAM_USE_CXX="OFF"
 
diff --git a/standalone/machines/thatchroof/gpu.env b/standalone/machines/thatchroof/gpu.env
index 29a108ba..d3812cb7 100644
--- a/standalone/machines/thatchroof/gpu.env
+++ b/standalone/machines/thatchroof/gpu.env
@@ -1,5 +1,12 @@
 #!/bin/bash
 
+source $MODULESHOME/init/bash
+module purge
+module load cuda-12.0.0-gcc-11.1.0-5vplmv2    \
+            gcc-12.2.0-gcc-11.1.0-pm3dysf     \
+            cmake-3.26.3-gcc-11.1.0-3ps6d75   \
+            netcdf-c-4.9.2-gcc-11.1.0-mvu6i6y
+
 export YAKL_ARCH=CUDA
 unset CXXFLAGS
 unset FFLAGS
@@ -10,7 +17,7 @@ export CXX=mpic++
 export CC=mpicc
 export FC=mpif90
 
-export YAKL_CUDA_FLAGS="-DHAVE_MPI -O3 -arch sm_86 --use_fast_math -ccbin mpic++ -DYAKL_PROFILE  -I`nc-config --includedir`"
+export YAKL_CUDA_FLAGS="-DHAVE_MPI -O3 -arch sm_86 --use_fast_math -ccbin mpic++ -DYAKL_AUTO_PROFILE  -I`nc-config --includedir`"
 export YAKL_F90_FLAGS="-O2 -ffree-line-length-none -DSCREAM_DOUBLE_PRECISION"
 export PAM_LINK_FLAGS="`nc-config --libs`"
 export SCREAM_HOME="/home/$USER/scream"
@@ -19,5 +26,5 @@ export PAM_SCREAM_CXX_FLAGS="-O3;-DHAVE_MPI;-I/home/$USER/YAKL/external"
 export SCREAM_Fortran_FLAGS="-O3;-ffree-line-length-none"
 export SCREAM_CXX_LIBS_DIR="`pwd`/../../mmf_simplified/build_p3_shoc_cxx"
 export PAM_NLEV=50
-export PAM_SCREAM_USE_CXX="ON"
+export PAM_SCREAM_USE_CXX="OFF"
 
diff --git a/standalone/machines/thatchroof/gpu_debug.env b/standalone/machines/thatchroof/gpu_debug.env
index ab2d283e..1db095f4 100644
--- a/standalone/machines/thatchroof/gpu_debug.env
+++ b/standalone/machines/thatchroof/gpu_debug.env
@@ -1,5 +1,12 @@
 #!/bin/bash
 
+source $MODULESHOME/init/bash
+module purge
+module load cuda-12.0.0-gcc-11.1.0-5vplmv2    \
+            gcc-12.2.0-gcc-11.1.0-pm3dysf     \
+            cmake-3.26.3-gcc-11.1.0-3ps6d75   \
+            netcdf-c-4.9.2-gcc-11.1.0-mvu6i6y
+
 export YAKL_ARCH=CUDA
 unset CXXFLAGS
 unset FFLAGS
@@ -10,7 +17,7 @@ export CXX=mpic++
 export CC=mpicc
 export FC=mpif90
 
-export YAKL_CUDA_FLAGS="-DHAVE_MPI -O0 -g -DYAKL_DEBUG -arch sm_86 -ccbin mpic++ -DYAKL_PROFILE  -I`nc-config --includedir`"
+export YAKL_CUDA_FLAGS="-DHAVE_MPI -O0 -g -DYAKL_DEBUG -DPAM_DEBUG -arch sm_86 -ccbin mpic++ -DYAKL_PROFILE  -I`nc-config --includedir`"
 export YAKL_F90_FLAGS="-O0 -g -ffree-line-length-none -DSCREAM_DOUBLE_PRECISION"
 export PAM_LINK_FLAGS="`nc-config --libs`"
 export SCREAM_HOME="/home/$USER/scream"
@@ -19,5 +26,5 @@ export PAM_SCREAM_CXX_FLAGS="-O3;-DHAVE_MPI;-I/home/$USER/YAKL/external"
 export SCREAM_Fortran_FLAGS="-O3;-ffree-line-length-none"
 export SCREAM_CXX_LIBS_DIR="`pwd`/../../mmf_simplified/build_p3_shoc_cxx"
 export PAM_NLEV=50
-export PAM_SCREAM_USE_CXX="ON"
+export PAM_SCREAM_USE_CXX="OFF"
 
diff --git a/standalone/mmf_simplified/CMakeLists.txt b/standalone/mmf_simplified/CMakeLists.txt
index 779a759f..64b0beef 100644
--- a/standalone/mmf_simplified/CMakeLists.txt
+++ b/standalone/mmf_simplified/CMakeLists.txt
@@ -16,7 +16,6 @@ include_directories(${YAKL_BIN})
 set(YAML_BUILD_SHARED_LIBS OFF CACHE BOOL "" FORCE)
 add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/../../externals/yaml-cpp ${CMAKE_CURRENT_BINARY_DIR}/yaml-cpp)
 
-include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../../externals/eigen)
 include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../../externals/yaml-cpp/include)
 include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../../physics/scream_cxx_interfaces)
 
diff --git a/standalone/mmf_simplified/build/PAM_vcoord_MMF_L60.nc b/standalone/mmf_simplified/build/PAM_vcoord_MMF_L60.nc
deleted file mode 100644
index c4cdfb35..00000000
Binary files a/standalone/mmf_simplified/build/PAM_vcoord_MMF_L60.nc and /dev/null differ
diff --git a/standalone/mmf_simplified/build/cmakescript_pamc.sh b/standalone/mmf_simplified/build/cmakescript_pamc.sh
index f238fa1c..68604481 100755
--- a/standalone/mmf_simplified/build/cmakescript_pamc.sh
+++ b/standalone/mmf_simplified/build/cmakescript_pamc.sh
@@ -1,7 +1,10 @@
-#!/bin/bash
+#!/bin/bash -x
 
 ./cmakeclean.sh
 
+if [[ "$1" != "" ]]; then
+  . $1
+fi
 
 cmake      \
   -DCMAKE_CXX_COMPILER=${CXX}                                     \
@@ -14,16 +17,12 @@ cmake      \
   -DYAKL_F90_FLAGS="${YAKL_F90_FLAGS}"                            \
   -DPAM_LINK_FLAGS="${PAM_LINK_FLAGS}"                            \
   -DYAKL_ARCH="${YAKL_ARCH}"                                      \
-  -DPAM_DYCORE="spam++"                                           \
-  -DPAM_MICRO="p3"                                                \
-  -DPAM_SGS="shoc"                                                \
+  -DPAM_DYCORE="spam"                                           \
+  -DPAM_MICRO="none"                                              \
+  -DPAM_SGS="none"                                                \
   -DPAM_RAD="none"                                                \
-  -DPAMC_MODEL="extrudedmodel"                                    \
-  -DPAMC_HAMIL="man"                                              \
-  -DPAMC_THERMO="constkappavirpottemp"                            \
-  -DPAMC_IO="none"                                                \
   -DPAM_NLEV=${PAM_NLEV}                                          \
   -DSCREAM_CXX_LIBS_DIR=${SCREAM_CXX_LIBS_DIR}                    \
-  -DPAM_SCREAM_USE_CXX=${PAM_SCREAM_USE_CXX}                      \
+  -DPAM_SCREAM_USE_CXX=FALSE                      \
+  ${add_cmake_vars} \
   ..
-
diff --git a/standalone/mmf_simplified/build/vcoords_equal_50_20km.nc b/standalone/mmf_simplified/build/vcoords_equal_50_20km.nc
deleted file mode 100644
index e7e7104d..00000000
Binary files a/standalone/mmf_simplified/build/vcoords_equal_50_20km.nc and /dev/null differ
diff --git a/standalone/mmf_simplified/driver.cpp b/standalone/mmf_simplified/driver.cpp
index c35d1074..040d1dcd 100644
--- a/standalone/mmf_simplified/driver.cpp
+++ b/standalone/mmf_simplified/driver.cpp
@@ -213,9 +213,3 @@ int main(int argc, char** argv) {
   MPI_Finalize();
 }
 
-
-
-
-
-
-
diff --git a/standalone/mmf_simplified/inputs/input_pama.yaml b/standalone/mmf_simplified/inputs/input_pama.yaml
index 54136ea7..daab9ff6 100644
--- a/standalone/mmf_simplified/inputs/input_pama.yaml
+++ b/standalone/mmf_simplified/inputs/input_pama.yaml
@@ -4,10 +4,10 @@
 # simTime  : 86400    # 1 day
 # simTime  : 432000   # 5 days
 # simTime  : 3600   # 30 min
-simTime : 7200
+simTime : 86400
 
 # Number of cells to use in the CRMs
-crm_nx   : 65
+crm_nx   : 250
 crm_ny   : 1
 
 # Number of CRMs
@@ -20,12 +20,6 @@ vcoords  : vcoords_equal_50_20km.nc
 xlen     : 128000
 ylen     : 64000
 
-weno_scalars : true
-
-weno_winds   : true
-
-initData : supercell
-
 # Output filename
 out_prefix  : test_pama
 
@@ -33,9 +27,9 @@ out_prefix  : test_pama
 dt_gcm: 900
 
 # CRM physics time step
-dt_crm_phys: 20.
+dt_crm_phys: 20
 
 # Output frequency in seconds
-out_freq: 200.
+out_freq: 100
 
 
diff --git a/standalone/mmf_simplified/inputs/pama_input_euler3d.yaml b/standalone/mmf_simplified/inputs/pama_input_euler3d.yaml
new file mode 100644
index 00000000..044a84aa
--- /dev/null
+++ b/standalone/mmf_simplified/inputs/pama_input_euler3d.yaml
@@ -0,0 +1,47 @@
+---
+# Simulation time in seconds
+simTime  : 1000
+
+# Number of cells to use
+crm_nx       : 100
+crm_ny       : 1
+
+# Vertical height coordinates file
+vcoords  : vcoords_equal_100_20km.nc
+
+# Lengths of the dimensions in meters
+xlen     : 20000
+ylen     : 10000
+
+bc_x   : periodic
+bc_y   : periodic
+bc_z   : wall
+
+weno_scalars : true
+
+weno_winds   : true
+
+# 200km x 100km x 20km  for supercell
+# 20km  x 20km  x 10km  for thermal
+# Data to initialize: thermal, supercell
+initData : thermal
+
+# Output filename
+out_prefix  : test
+
+# CRM physics time step
+dtphys: 0.
+
+# Output frequency in seconds
+out_freq: 0.
+
+# Number of ensembles
+nens : 1
+
+# Balance initial density to avoid initial acoustics
+balance_initial_density: false
+
+# Whether to use the coupler's hydrostasis
+use_coupler_hydrostasis: true
+
+
diff --git a/standalone/mmf_simplified/inputs/pamc_input_extruded_densitycurrent.yaml b/standalone/mmf_simplified/inputs/pamc_input_extruded_densitycurrent.yaml
new file mode 100644
index 00000000..79b3a583
--- /dev/null
+++ b/standalone/mmf_simplified/inputs/pamc_input_extruded_densitycurrent.yaml
@@ -0,0 +1,46 @@
+---
+# Simulation time in number of physics time steps
+simSteps  :  900
+#450
+
+entropicvar_diffusion_coeff: 75
+velocity_diffusion_coeff: 75
+
+# Number of cells to use
+crm_nx       : 256
+crm_ny       : 1
+#crm_nz       : 33
+
+# Vertical height coordinates file
+# vcoords: uniform
+vcoords  : '../pam-c/vcoords/vcoords_equal_33_6400.nc'
+
+# Data to initialize: doublevortex rb mrb
+initData : densitycurrent
+
+# Time steps
+dtphys: 1.0
+crm_per_phys: 1
+# 2
+
+# Output filename
+out_prefix  : test
+dycore_out_prefix  : test_dycore
+
+#Coupler output frequency in actual time
+out_freq : 100.0
+
+# Output frequency in number of CRM time steps
+outSteps: 100
+statSteps: 1
+#30
+
+tstype: ssprk3
+
+# Number of ensembles
+nens : 1
+
+# Parallel decomposition
+inner_mpi : true
+nprocx : 1
+nprocy : 1
diff --git a/standalone/mmf_simplified/inputs/pamc_input_extruded_doublevortex.yaml b/standalone/mmf_simplified/inputs/pamc_input_extruded_doublevortex.yaml
new file mode 100644
index 00000000..9cb3d3f3
--- /dev/null
+++ b/standalone/mmf_simplified/inputs/pamc_input_extruded_doublevortex.yaml
@@ -0,0 +1,52 @@
+---
+# Simulation time in number of physics time steps
+simSteps  : 200 
+#200 *5 = 1000
+
+# Number of cells to use
+crm_nx       : 100
+crm_ny       : 1
+crm_nz       : 100
+
+# Data to initialize: doublevortex risingbubble moistrisingbubble
+initData : doublevortex
+initTracer0: square
+initTracer1: gaussian
+initTracer2: doublesquare
+initTracer3: square
+initTracer4: gaussian
+initTracer5: doublesquare
+
+initTracerPos0: false
+initTracerPos1: false
+initTracerPos2: false
+initTracerPos3: true
+initTracerPos4: true
+initTracerPos5: true
+
+
+# Time steps
+dtphys: 1000.
+crm_per_phys: 5
+#200 * 5 = 1000 for doublevortex
+ 
+# Output filename
+out_prefix  : test
+dycore_out_prefix  : test_dycore
+out_freq: 1000
+
+# Output frequency in number of CRM time steps
+outSteps: 100
+statSteps: 1
+#100
+
+tstype: ssprk3
+
+# Number of ensembles
+nens : 1
+
+# Parallel decomposition
+inner_mpi : true
+nprocx : 1
+nprocy : 1
+
diff --git a/standalone/mmf_simplified/inputs/pamc_input_extruded_gravitywave.yaml b/standalone/mmf_simplified/inputs/pamc_input_extruded_gravitywave.yaml
new file mode 100644
index 00000000..44852e73
--- /dev/null
+++ b/standalone/mmf_simplified/inputs/pamc_input_extruded_gravitywave.yaml
@@ -0,0 +1,42 @@
+# Simulation time in number of physics time steps
+simSteps  : 90 #1280 11520
+#11520 * 5 = 57600
+
+# Number of cells to use
+crm_nx       : 300
+crm_ny       : 1
+crm_nz       : 21
+
+# Vertical height coordinates file
+#vcoords  : vcoords_equal_100_20km.nc
+vcoords: uniform
+
+# Data to initialize: doublevortex rb mrb
+initData : gravitywave
+
+# Time steps
+dtphys: 20
+crm_per_phys: 1
+ 
+# Output filename
+out_prefix  : test
+dycore_out_prefix  : test_dycore
+
+#Coupler output frequency in actual time
+out_freq : 100.0
+
+# Output frequency in number of CRM time steps
+outSteps: 90
+statSteps: 1
+#6400
+
+tstype: ssprk3
+
+# Number of ensembles
+nens : 1
+
+# Parallel decomposition
+inner_mpi : false
+nprocx : 1
+nprocy : 1
+
diff --git a/standalone/mmf_simplified/inputs/pamc_input_extruded_largerisingbubble.yaml b/standalone/mmf_simplified/inputs/pamc_input_extruded_largerisingbubble.yaml
new file mode 100644
index 00000000..7b563468
--- /dev/null
+++ b/standalone/mmf_simplified/inputs/pamc_input_extruded_largerisingbubble.yaml
@@ -0,0 +1,49 @@
+---
+# Simulation time in number of physics time steps
+simSteps  : 70
+#70 * 20 = 1400
+
+# Number of cells to use
+crm_nx       : 100
+crm_ny       : 1
+crm_nz       : 100
+
+# Vertical height coordinates file
+#vcoords  : vcoords_equal_100_20km.nc
+vcoords: uniform
+
+# Data to initialize: doublevortex rb mrb
+initData : moistlargerisingbubble
+
+# Time steps
+dtphys: 20.0
+crm_per_phys: 80
+#0.16 * 125 = 20.0
+#0.25 * 80 = 20.0
+#Matt recommends 0.8 * min(dx,dz) / 400 for dycore time step, and 20s for physics time step
+#but he has a high-cfl time integrator
+#I am using ssprk3, which has a much smaller maximum stable time step...
+
+# Output filenames
+out_prefix  : test
+dycore_out_prefix  : test_dycore
+
+#Coupler output frequency in actual time
+out_freq : 100.0
+
+# CRM Output frequency in number of CRM time steps
+outSteps: 400
+statSteps: 1
+#100 / 0.16 = 625
+#100 / 0.25 = 400
+
+tstype: ssprk3
+
+# Number of ensembles
+nens : 1
+
+# Parallel decomposition
+inner_mpi : true
+nprocx : 1
+nprocy : 1
+
diff --git a/standalone/mmf_simplified/inputs/pamc_input_extruded_moistrisingbubble.yaml b/standalone/mmf_simplified/inputs/pamc_input_extruded_moistrisingbubble.yaml
new file mode 100644
index 00000000..adfd96d2
--- /dev/null
+++ b/standalone/mmf_simplified/inputs/pamc_input_extruded_moistrisingbubble.yaml
@@ -0,0 +1,44 @@
+---
+# Simulation time in number of gcm time steps
+nsteps_gcm  : 9
+
+# Time steps
+dt_gcm: 100.0
+dt_crm_phys: 2.5
+crm_per_phys: 5
+#0.015625 * 5 = 0.078125 for explicit
+# 0.5*5 = 2.5 for semi-implicit
+
+# Number of cells to use
+crm_nx       : 100
+crm_ny       : 1
+#crm_nz       : 150
+
+# Vertical height coordinates file
+#vcoords  : vcoords_equal_100_20km.nc
+#vcoords: uniform
+vcoords  : '../pam-c/vcoords/vcoords_equal_150_1500.nc'
+
+# Data to initialize: doublevortex rb mrb
+initData : moistrisingbubble
+
+# Output filename
+out_prefix  : test
+dycore_out_prefix  : test_dycore
+
+#Output frequency in actual time
+out_freq : 100.0
+stat_freq : 0.5
+# 0.5 for semi-implicit FC or any MAN
+#0.015625 for explicit FC
+
+tstype: ssprk3
+#kgrk4 si ssprk3
+
+# Number of ensembles
+nens : 1
+
+# Parallel decomposition
+inner_mpi : false
+nprocx : 1
+nprocy : 1
diff --git a/standalone/mmf_simplified/inputs/pamc_input_extruded_risingbubble.yaml b/standalone/mmf_simplified/inputs/pamc_input_extruded_risingbubble.yaml
new file mode 100644
index 00000000..8ff3bd7c
--- /dev/null
+++ b/standalone/mmf_simplified/inputs/pamc_input_extruded_risingbubble.yaml
@@ -0,0 +1,50 @@
+---
+# Simulation time in number of physics time steps
+simSteps  : 360 #1280 11520
+#11520 * 5 = 57600 * 0.015625  = 900s
+#1280 * 5 = 6400 = 100s
+#40 * 5 = 200 * 0.5 = 100s
+#360 * 5 = 1800 * 0.5 = 900s
+
+# Number of cells to use
+crm_nx       : 100
+crm_ny       : 1
+#crm_nz       : 150
+
+# Vertical height coordinates file
+#vcoords  : vcoords_equal_100_20km.nc
+#vcoords: uniform
+vcoords  : '../pam-c/vcoords/vcoords_equal_150_1500.nc'
+
+# Data to initialize: doublevortex rb mrb
+initData : risingbubble
+
+# Time steps
+dtphys: 2.5
+crm_per_phys: 5
+#0.015625 * 5 = 0.078125 for explicit
+# 0.5*5 = 2.5 for semi-implicit
+
+# Output filename
+out_prefix  : test
+dycore_out_prefix  : test_dycore
+
+#Coupler output frequency in actual time
+out_freq : 100.0
+
+# Output frequency in number of CRM time steps
+outSteps: 200
+statSteps: 1
+#6400 = 100s for explicit
+#200 = 100s for semi-implicit
+
+tstype: ssprk3
+#kgrk4 si ssprk3
+
+# Number of ensembles
+nens : 1
+
+# Parallel decomposition
+inner_mpi : true
+nprocx : 1
+nprocy : 1
diff --git a/standalone/mmf_simplified/inputs/pamc_input_extruded_twobubbles.yaml b/standalone/mmf_simplified/inputs/pamc_input_extruded_twobubbles.yaml
new file mode 100644
index 00000000..f8994741
--- /dev/null
+++ b/standalone/mmf_simplified/inputs/pamc_input_extruded_twobubbles.yaml
@@ -0,0 +1,42 @@
+---
+# Simulation time in number of physics time steps
+simSteps  : 240 #1280 11520
+#11520 * 5 = 57600
+
+# Number of cells to use
+crm_nx       : 100
+crm_ny       : 1
+
+# Vertical height coordinates file
+#vcoords  : vcoords_equal_100_20km.nc
+vcoords  : '../pam-c/vcoords/vcoords_equal_100_1000.nc'
+
+# Data to initialize: doublevortex rb mrb
+initData : twobubbles
+
+# Time steps
+dtphys: 2.5
+crm_per_phys: 5
+#0.015625 * 5 = 0.078125
+
+# Output filename
+out_prefix  : test
+dycore_out_prefix  : test_dycore
+
+#Coupler output frequency in actual time
+out_freq : 50.0
+
+# Output frequency in number of CRM time steps
+outSteps: 100
+statSteps: 1
+#6400
+
+tstype: ssprk3
+
+# Number of ensembles
+nens : 1
+
+# Parallel decomposition
+inner_mpi : true
+nprocx : 1
+nprocy : 1
diff --git a/standalone/mmf_simplified/inputs/pamc_input_layer_bickleyjet.yaml b/standalone/mmf_simplified/inputs/pamc_input_layer_bickleyjet.yaml
new file mode 100644
index 00000000..36539d5d
--- /dev/null
+++ b/standalone/mmf_simplified/inputs/pamc_input_layer_bickleyjet.yaml
@@ -0,0 +1,36 @@
+---
+# Simulation time in number of physics time steps
+simSteps  : 10000
+
+# Number of cells to use
+crm_nx       : 50
+crm_ny       : 50
+crm_nz       : 1
+
+# Data to initialize:
+initData : bickleyjet
+initTracer0: bickleytracer
+initTracerPos0: false
+
+# Time steps
+dtphys: 0.02
+crm_per_phys: 1
+ 
+# Output filename
+out_prefix  : test
+dycore_out_prefix  : test_dycore
+out_freq: 1000
+
+# Output frequency in number of CRM time steps
+outSteps: 500
+statSteps: 5
+
+tstype: ssprk3
+
+# Number of ensembles
+nens : 1
+
+# Parallel decomposition
+inner_mpi : true
+nprocx : 1
+nprocy : 1
diff --git a/standalone/mmf_simplified/inputs/pamc_input_layer_doublevortex.yaml b/standalone/mmf_simplified/inputs/pamc_input_layer_doublevortex.yaml
new file mode 100644
index 00000000..5d4867fd
--- /dev/null
+++ b/standalone/mmf_simplified/inputs/pamc_input_layer_doublevortex.yaml
@@ -0,0 +1,52 @@
+---
+# Simulation time in number of physics time steps
+simSteps  : 200
+#200 *5 = 1000
+
+# Number of cells to use
+crm_nx       : 100
+crm_ny       : 100
+crm_nz       : 1
+vcoords: uniform
+
+# Data to initialize: doublevortex risingbubble moistrisingbubble
+initData : doublevortex
+initTracer0: square
+initTracer1: gaussian
+initTracer2: doublesquare
+initTracer3: square
+initTracer4: gaussian
+initTracer5: doublesquare
+
+initTracerPos0: false
+initTracerPos1: false
+initTracerPos2: false
+initTracerPos3: true
+initTracerPos4: true
+initTracerPos5: true
+
+# Time steps
+dtphys: 1000.
+crm_per_phys: 5
+#200 * 5 = 1000 for doublevortex
+ 
+# Output filename
+out_prefix  : test
+dycore_out_prefix  : test_dycore
+out_freq: 1000
+
+# Output frequency in number of CRM time steps
+outSteps: 100
+statSteps: 1
+#100
+
+tstype: si
+
+# Number of ensembles
+nens : 1
+
+# Parallel decomposition
+inner_mpi : true
+nprocx : 1
+nprocy : 1
+
diff --git a/standalone/mmf_simplified/inputs/pamc_input_layer_risingbubble.yaml b/standalone/mmf_simplified/inputs/pamc_input_layer_risingbubble.yaml
new file mode 100644
index 00000000..728ee32d
--- /dev/null
+++ b/standalone/mmf_simplified/inputs/pamc_input_layer_risingbubble.yaml
@@ -0,0 +1,43 @@
+---
+# Simulation time in number of physics time steps
+simSteps  : 11520 #1280
+#11520 * 5 = 57600
+#200 *5 = 1000
+
+# Number of cells to use
+crm_nx       : 100
+crm_ny       : 150
+crm_nz       : 1
+
+# Data to initialize: doublevortex risingbubble moistrisingbubble
+initData : moistrisingbubble
+initTracer1: square
+initTracer2: gaussian
+initTracer3: doublesquare
+initFCTTracer1: square
+initFCTTracer2: gaussian
+initFCTTracer3: doublesquare
+
+# Time steps
+dtphys: 0.078125
+crm_per_phys: 5
+#0.015625 * 5 = 0.078125
+#200 * 5 = 1000 for doublevortex
+ 
+# Output filename
+out_prefix  : test
+
+# Output frequency in number of CRM time steps
+outSteps: 6400
+statSteps: 1
+#100 6400
+
+tstype: ssprk3
+
+# Number of ensembles
+nens : 1
+
+# Parallel decomposition
+nprocx : 1
+nprocy : 1
+
diff --git a/standalone/mmf_simplified/inputs/pamc_input_layeradvection.yaml b/standalone/mmf_simplified/inputs/pamc_input_layeradvection.yaml
new file mode 100644
index 00000000..27733a8b
--- /dev/null
+++ b/standalone/mmf_simplified/inputs/pamc_input_layeradvection.yaml
@@ -0,0 +1,40 @@
+---
+# Simulation time in number of physics time steps
+simSteps  : 25
+
+# Number of cells to use
+crm_nx       : 100
+crm_ny       : 100
+
+# Data to initialize: doublevortex rb mrb
+initWind : doublevortex
+initT1: square
+initT2: gaussian
+initT3: doublesquare
+initTFCT1: square
+initTFCT2: gaussian
+initTFCT3: doublesquare
+initQ: square
+initQ: gaussian
+initQ: doublesquare
+
+# Time steps
+dtphys: 1000.0
+crm_per_phys: 4
+
+# Output filename
+out_prefix  : test
+
+# Output frequency in number of CRM time steps
+outSteps: 10
+statSteps: 1
+
+tstype: ssprk3
+
+# Number of ensembles
+nens : 1
+
+# Parallel decomposition
+nprocx : 1
+nprocy : 1
+
diff --git a/standalone/mmf_simplified/output.h b/standalone/mmf_simplified/output.h
index 08368b70..f74c6853 100644
--- a/standalone/mmf_simplified/output.h
+++ b/standalone/mmf_simplified/output.h
@@ -19,6 +19,7 @@ inline void output( pam::PamCoupler const &coupler , std::string out_prefix , re
   auto nx = coupler.get_nx();
   auto ny = coupler.get_ny();
   auto nz = coupler.get_nz();
+  auto nens = coupler.get_nens();
 
   auto &dm = coupler.get_data_manager_device_readonly();
 
@@ -39,21 +40,27 @@ inline void output( pam::PamCoupler const &coupler , std::string out_prefix , re
 
         // x-coordinate
         real1d xloc("xloc",nx);
+        real1d xp1loc("xp1loc",nx+1);
         parallel_for( "Spatial.h output 1" , nx , YAKL_LAMBDA (int i) { xloc(i) = (i+0.5)*dx; });
+        parallel_for( "Spatial.h output 2" , nx+1 , YAKL_LAMBDA (int i) { xp1loc(i) = (i)*dx; });
         nc.write(xloc.createHostCopy(),"x",{"x"});
+        nc.write(xp1loc.createHostCopy(),"xp1",{"xp1"});
 
         // y-coordinate
         real1d yloc("yloc",ny);
-        parallel_for( "Spatial.h output 2" , ny , YAKL_LAMBDA (int i) { yloc(i) = (i+0.5)*dy; });
+        real1d yp1loc("yp1loc",ny+1);
+        parallel_for( "Spatial.h output 3" , ny , YAKL_LAMBDA (int i) { yloc(i) = (i+0.5)*dy; });
+        parallel_for( "Spatial.h output 4" , ny+1 , YAKL_LAMBDA (int i) { yp1loc(i) = (i)*dy; });
         nc.write(yloc.createHostCopy(),"y",{"y"});
+        nc.write(yp1loc.createHostCopy(),"yp1",{"yp1"});
 
         // z-coordinate
         auto zint = dm.get<real const,2>("vertical_interface_height");
-        real1d zmid("zmid",nz);
-        parallel_for( "Spatial.h output 3" , nz , YAKL_LAMBDA (int i) {
-          zmid(i) = ( zint(i,0) + zint(i+1,0) ) / 2;
-        });
-        nc.write(zmid.createHostCopy(),"z",{"z"});
+        auto dz = dm.get<real const,2>("vertical_cell_dz");
+        auto zmid = dm.get<real const,2>("vertical_midpoint_height");
+        nc.write(zmid.createHostCopy(),"z", {"z","nens"});
+        nc.write(dz.createHostCopy(),"dz", {"z","nens"});
+        nc.write(zint.createHostCopy(),"zp1", {"zp1","nens"});
 
         // Create time variable
         nc.write1(0._fp,"t",0,"t");
@@ -79,25 +86,31 @@ inline void output( pam::PamCoupler const &coupler , std::string out_prefix , re
       }
 
       // First, write out standard coupler state
-      real3d data("data",nz,ny,nx);
-      parallel_for( SimpleBounds<3>(nz,ny,nx) , YAKL_LAMBDA (int k, int j, int i) { data(k,j,i) = fields(0,k,j,i,0); });
-      nc.write1(data,"density"    ,{"z","y","x"},ulIndex,"t");
-      parallel_for( SimpleBounds<3>(nz,ny,nx) , YAKL_LAMBDA (int k, int j, int i) { data(k,j,i) = fields(1,k,j,i,0); });
-      nc.write1(data,"uvel"       ,{"z","y","x"},ulIndex,"t");
-      parallel_for( SimpleBounds<3>(nz,ny,nx) , YAKL_LAMBDA (int k, int j, int i) { data(k,j,i) = fields(2,k,j,i,0); });
-      nc.write1(data,"vvel"       ,{"z","y","x"},ulIndex,"t");
-      parallel_for( SimpleBounds<3>(nz,ny,nx) , YAKL_LAMBDA (int k, int j, int i) { data(k,j,i) = fields(3,k,j,i,0); });
-      nc.write1(data,"wvel"       ,{"z","y","x"},ulIndex,"t");
-      parallel_for( SimpleBounds<3>(nz,ny,nx) , YAKL_LAMBDA (int k, int j, int i) { data(k,j,i) = fields(4,k,j,i,0); });
-      nc.write1(data,"temperature",{"z","y","x"},ulIndex,"t");
+      real4d data("data",nz,ny,nx,nens);
+      parallel_for( SimpleBounds<4>(nz,ny,nx,nens) , YAKL_LAMBDA (int k, int j, int i, int n) { data(k,j,i,n) = fields(0,k,j,i,n); });
+      nc.write1(data,"density"    ,{"z","y","x","nens"},ulIndex,"t");
+      parallel_for( SimpleBounds<4>(nz,ny,nx,nens) , YAKL_LAMBDA (int k, int j, int i, int n) { data(k,j,i,n) = fields(1,k,j,i,n); });
+      nc.write1(data,"uvel"       ,{"z","y","x","nens"},ulIndex,"t");
+      parallel_for( SimpleBounds<4>(nz,ny,nx,nens) , YAKL_LAMBDA (int k, int j, int i, int n) { data(k,j,i,n) = fields(2,k,j,i,n); });
+      nc.write1(data,"vvel"       ,{"z","y","x","nens"},ulIndex,"t");
+      parallel_for( SimpleBounds<4>(nz,ny,nx,nens) , YAKL_LAMBDA (int k, int j, int i, int n) { data(k,j,i,n) = fields(3,k,j,i,n); });
+      nc.write1(data,"wvel"       ,{"z","y","x","nens"},ulIndex,"t");
+      parallel_for( SimpleBounds<4>(nz,ny,nx,nens) , YAKL_LAMBDA (int k, int j, int i, int n) { data(k,j,i,n) = fields(4,k,j,i,n); });
+      nc.write1(data,"temperature",{"z","y","x","nens"},ulIndex,"t");
       for (int tr=0; tr < num_tracers; tr++) {
-        parallel_for( SimpleBounds<3>(nz,ny,nx) , YAKL_LAMBDA (int k, int j, int i) {
+        parallel_for( SimpleBounds<4>(nz,ny,nx,nens) , YAKL_LAMBDA (int k, int j, int i, int n) {
           // data(k,j,i) = fields(5+tr,k,j,i,0) / (state(idR,hs+k,hs+j,hs+i,0) + hyDensCells(k,0));
-          data(k,j,i) = fields(5+tr,k,j,i,0);
+          data(k,j,i,n) = fields(5+tr,k,j,i,n);
         });
-        nc.write1(data,tracer_names[tr],{"z","y","x"},ulIndex,"t");
+        nc.write1(data,tracer_names[tr],{"z","y","x","nens"},ulIndex,"t");
       }
 
+    auto uvel_stag = dm.get<real const, 4>("uvel_stag");
+    auto vvel_stag = dm.get<real const, 4>("vvel_stag");
+    auto wvel_stag = dm.get<real const, 4>("wvel_stag");
+
+//WRITE OUT OTHER VARIABLES!
+
       // Close the file
       nc.close();
 
@@ -108,5 +121,3 @@ inline void output( pam::PamCoupler const &coupler , std::string out_prefix , re
   }
   MPI_Barrier(MPI_COMM_WORLD);
 }
-
-
diff --git a/standalone/mmf_simplified/pam-c/create_vcoords.txt b/standalone/mmf_simplified/pam-c/create_vcoords.txt
new file mode 100644
index 00000000..0f99894b
--- /dev/null
+++ b/standalone/mmf_simplified/pam-c/create_vcoords.txt
@@ -0,0 +1,2 @@
+python3 ../../../utils/generate_vertical_levels.py --function=equal --nlev=100 --ztop=1000 --output=vcoords_equal_100_1000.nc
+
diff --git a/standalone/mmf_simplified/pam-c/debug_run_extruded_script.sh b/standalone/mmf_simplified/pam-c/debug_run_extruded_script.sh
new file mode 100755
index 00000000..b54ccda3
--- /dev/null
+++ b/standalone/mmf_simplified/pam-c/debug_run_extruded_script.sh
@@ -0,0 +1,7 @@
+rm *.png *.nc
+cd ../build
+rm driver
+./../pam-c/make_debug_script.sh $3
+mpirun.mpich -n $3 ./driver ../inputs/pamc_input_extruded_$2.yaml
+cd ../pam-c
+mv ../build/*.nc .
diff --git a/standalone/mmf_simplified/pam-c/debug_run_layer_script.sh b/standalone/mmf_simplified/pam-c/debug_run_layer_script.sh
new file mode 100755
index 00000000..2eebc10f
--- /dev/null
+++ b/standalone/mmf_simplified/pam-c/debug_run_layer_script.sh
@@ -0,0 +1,7 @@
+rm *.png *.nc
+cd ../build
+rm driver
+./../pam-c/make_debug_script.sh
+mpirun.mpich -n $3 ./driver ../inputs/pamc_input_layer_$2.yaml
+cd ../pam-c
+mv ../build/*.nc .
diff --git a/standalone/mmf_simplified/pam-c/densitycurrent/convergence.py b/standalone/mmf_simplified/pam-c/densitycurrent/convergence.py
new file mode 100644
index 00000000..772f7b8f
--- /dev/null
+++ b/standalone/mmf_simplified/pam-c/densitycurrent/convergence.py
@@ -0,0 +1,76 @@
+import yaml
+import os
+import subprocess
+import numpy as np
+from sys import argv
+
+if __name__ == "__main__":
+    nlevels = 2
+    fpath = os.path.dirname(__file__) + "/../../inputs/pamc_input_extruded_densitycurrent.yaml"
+
+    inputfile = yaml.safe_load(open(fpath))
+    
+    if(len(argv) > 1):
+        rundir = argv[1]
+    else:
+        rundir = "dc_convergence"
+    
+    if(len(argv) > 2):
+        base_dt = float(argv[2])
+    else:
+        base_dt = 2
+    
+    if(len(argv) > 3):
+        diffusion_coeff = float(argv[3])
+    else:
+        diffusion_coeff = 75
+    
+    if(len(argv) > 4):
+        driver = argv[4]
+    else:
+        driver = "driver"
+
+    os.mkdir(rundir)
+    os.chdir(rundir)
+
+    timeend = 15 * 60
+    outfreq = 1 * 60
+
+    L = 51.2e3
+    H = 6.4e3
+
+    base_nz = 32
+
+    errs = []
+    dts = []
+    dxs = []
+    for l in range(nlevels):
+        nz = base_nz * 2 ** l + 1
+        nx = 8 * (nz - 1)
+
+        dx = L / nx
+        dz = H / (nz - 1)
+
+        dt = base_dt / (2 ** l)
+        steps = int(np.ceil(timeend / dt))
+        outsteps = int(np.ceil(outfreq / dt))
+
+        ofname = f"output_{nx}_{nz}_"
+
+        inputfile["crm_nx"] = nx
+        inputfile["crm_nz"] = nz
+        inputfile["dtphys"] = dt
+        inputfile["simSteps"] = steps
+        inputfile["outSteps"] = outsteps
+        inputfile["statSteps"] = 1
+        inputfile["dycore_out_prefix"] = ofname
+        inputfile["entropicvar_diffusion_coeff"] = diffusion_coeff
+        inputfile["velocity_diffusion_coeff"] = diffusion_coeff
+
+        ifname = f"input_{nx}_{nz}"
+        yaml.dump(inputfile, open(ifname, "w"))
+
+        print(f"running (nx, nz, dx, dz, dt, steps) = ({nx}, {nz}, {dx}, {dz}, {dt}, {steps})")
+        run = subprocess.run([f"../{driver}", ifname], capture_output=True, text=True)
+        open(ofname + "stdout.txt", "w").write(run.stdout)
+        open(ofname + "stderr.txt", "w").write(run.stderr)
diff --git a/standalone/mmf_simplified/pam-c/gravitywave/convergence.py b/standalone/mmf_simplified/pam-c/gravitywave/convergence.py
new file mode 100644
index 00000000..3da6f66b
--- /dev/null
+++ b/standalone/mmf_simplified/pam-c/gravitywave/convergence.py
@@ -0,0 +1,181 @@
+import yaml
+import os
+import subprocess
+import numpy as np
+from sys import argv
+from netCDF4 import Dataset
+
+def create_vcoords(name, H, nz):
+    dz = H / (nz - 1)
+    zint = [k * dz + dz / 2 for k in range(0, nz - 1)]
+    zint = [0] + zint + [H]
+    
+    nc = Dataset(name, "w", format="NETCDF4")
+    nc.z0 = 0
+    nc.ztop = H
+    nc.nlev = len(zint)
+    nc.createDimension("num_interfaces", len(zint))
+    nc_zint = nc.createVariable("vertical_interfaces", "f8", ("num_interfaces",))
+    nc_zint[:] = zint
+
+    nc.close()
+
+
+def compute_Ediss_and_Edisp(a, b):
+    cov_M = np.cov(np.vstack((np.reshape(a, np.size(a)), np.reshape(b, np.size(b)))))
+    sigma_a = np.sqrt(cov_M[0, 0])
+    sigma_b = np.sqrt(cov_M[1, 1])
+    cov_ab = cov_M[0, 1]
+    mean_a = np.mean(a)
+    mean_b = np.mean(b)
+    Ediss = (sigma_a - sigma_b) ** 2 + (mean_a - mean_b) ** 2
+    Edisp = 2 * sigma_a * sigma_b - 2 * cov_ab
+    return (Ediss, Edisp)
+
+def compute_errors(dataset, nx, nz, dx, dz):
+    _rho = 0
+    _S = 1
+    ti = -1
+
+    rho = dataset["dens"][ti, _rho, :, 0 ,:, 0]
+    S = dataset["dens"][ti, _S, :, 0 ,:, 0]
+    w = dataset["w"][ti, 0, :, 0 ,:, 0]
+    T = dataset["T"][ti, 0, :, 0 ,:, 0]
+
+    rho_exact = dataset["dense"][ti, _rho, :, 0 ,:, 0]
+    S_exact = dataset["dense"][ti, _S, :, 0 ,:, 0]
+    w_exact = dataset["we"][ti, 0, :, 0 ,:, 0]
+    T_exact = dataset["Te"][ti, 0, :, 0 ,:, 0]
+
+    rho_b = dataset["densb"][ti, _rho, :, 0 ,:, 0]
+    S_b = dataset["densb"][ti, _S, :, 0 ,:, 0]
+
+    w /= dz
+    w_exact /= dz
+
+    rho /= (dx * dz)
+    rho_exact /= (dx * dz)
+    S /= (dx * dz)
+    S_exact /= (dx * dz)
+    
+    Linf_rho = np.max(np.abs((rho - rho_exact)))
+    L2_rho = np.sqrt(np.sum((rho - rho_exact) ** 2) / (nx * nz))
+    Ediss_rho, Edisp_rho = compute_Ediss_and_Edisp(rho, rho_exact)
+    
+    Linf_S = np.max(np.abs((S - S_exact)))
+    L2_S = np.sqrt(np.sum((S - S_exact) ** 2) / (nx * nz))
+    Ediss_S, Edisp_S = compute_Ediss_and_Edisp(S, S_exact)
+
+    Linf_T = np.max(np.abs((T - T_exact)))
+    L2_T = np.sqrt(np.sum((T - T_exact) ** 2) / (nx * nz))
+    Ediss_T, Edisp_T = compute_Ediss_and_Edisp(T, T_exact)
+
+    Linf_w = np.max(np.abs((w - w_exact)))
+    L2_w = np.sqrt(np.sum((w - w_exact) ** 2) / (nx * nz))
+    Ediss_w, Edisp_w = compute_Ediss_and_Edisp(w, w_exact)
+
+    ret = {"T" : (Linf_T, L2_T, Ediss_T, Edisp_T),
+           "w" : (Linf_w, L2_w, Ediss_w, Edisp_w),
+           "rho" : (Linf_rho, L2_rho, Ediss_rho, Edisp_rho),
+           "S" : (Linf_S, L2_S, Ediss_S, Edisp_S)}
+    return ret
+
+if __name__ == "__main__":
+    nlevels = 2
+    fpath = os.path.dirname(__file__) + "/../../inputs/pamc_input_extruded_gravitywave.yaml"
+
+    inputfile = yaml.safe_load(open(fpath))
+    
+    if(len(argv) > 1):
+        rundir = argv[1]
+    else:
+        rundir = "gw_convergence"
+    
+    if(len(argv) > 2):
+        base_dt = float(argv[2])
+    else:
+        base_dt = 20
+
+    os.mkdir(rundir)
+    os.chdir(rundir)
+
+    timeend = 30 * 60
+
+    L = 300e3
+    H = 10e3
+
+    base_nz = 20
+
+    errs = []
+    dts = []
+    dxs = []
+    for l in range(nlevels):
+        nz = base_nz * 2 ** l + 1
+        nx = 15 * (nz - 1)
+
+        dx = L / nx
+        dz = H / (nz - 1)
+
+        dt = base_dt / (2 ** l)
+        steps = int(np.ceil(timeend / dt))
+        outsteps = steps
+
+        ofname = f"output_{nx}_{nz}_"
+
+        vcoords = "uniform"
+        #vcoords = "uniform_variable"
+        #vcoords = "variable"
+
+        if vcoords == "uniform_variable":
+            vcoords = f"vcoords_{nz}.nc"
+            create_vcoords(vcoords, H, nz)
+            
+        if vcoords == "variable":
+            vcoords = f"vcoords_{nz}.nc"
+            vert_levels_script = "../../../../utils/generate_vertical_levels.py"
+            vert_func = "exp"
+            subprocess.run(["python3", vert_levels_script, f"--nlev={nz-1}", f"--function={vert_func}",
+                            f"--ztop={H}", f"--output={vcoords}"],
+                           capture_output=True, text=True)
+
+
+        inputfile["crm_nx"] = nx
+        inputfile["crm_nz"] = nz
+        inputfile["vcoords"] = vcoords
+        inputfile["dtphys"] = dt
+        inputfile["simSteps"] = steps
+        inputfile["outSteps"] = outsteps
+        inputfile["statSteps"] = 1
+        inputfile["dycore_out_prefix"] = ofname
+
+        ifname = f"input_{nx}_{nz}"
+        yaml.dump(inputfile, open(ifname, "w"))
+
+        print(f"running (nx, nz, dx, dz, dt, steps) = ({nx}, {nz}, {dx}, {dz}, {dt}, {steps})")
+        run = subprocess.run(["../driver", ifname], capture_output=True, text=True)
+        open(ofname + "stdout.txt", "w").write(run.stdout)
+        open(ofname + "stderr.txt", "w").write(run.stderr)
+
+        dataset = Dataset(ofname + "0.nc", "r", format="NETCDF4")
+        err = compute_errors(dataset, nx, nz, dx, dz)
+        dts.append(dt)
+        dxs.append(dx)
+        errs.append(err)
+
+    variables = ("T", "w", "rho", "S")
+    outfiles  = {var : open(f"errors_{var}.txt", "w") for var in variables}
+    header = "{:5} {:8} {:8} {:10} {:10} {:10} {:10} {:10} {:10}\n".format("lev", "dx", "dt", "Linf", "Linf_r", "L2", "L2_r", "Ediss", "Edisp")
+    for var in variables:
+        outfiles[var].write(header)
+        for l in range(nlevels):
+            err = errs[l][var]
+            if l > 0:
+                rate_Linf = np.log2(errs[l-1][var][0] / errs[l][var][0])
+                rate_L2 = np.log2(errs[l-1][var][1] / errs[l][var][1])
+            else:
+                rate_Linf = 0
+                rate_L2 = 0
+            line = f"{l:<5} {dxs[l]:<8.2f} {dts[l]:<8.2f} {err[0]:<10.2e} {rate_Linf:<10.1e} {err[1]:<10.2e} {rate_L2:<10.1e} {err[2]:<10.2e} {err[3]:<10.2e}\n"
+
+            outfiles[var].write(line)
+
diff --git a/standalone/mmf_simplified/pam-c/gravitywave/plot.py b/standalone/mmf_simplified/pam-c/gravitywave/plot.py
new file mode 100644
index 00000000..624ed5a0
--- /dev/null
+++ b/standalone/mmf_simplified/pam-c/gravitywave/plot.py
@@ -0,0 +1,92 @@
+from netCDF4 import Dataset
+import matplotlib.pyplot as plt
+import numpy as np
+from sys import argv
+
+plot_difference = False
+fnames = argv[1:]
+
+for fname in fnames:
+    dset = Dataset(fname, "r", format="NETCDF4")
+
+    ti = -1
+    _rho = 0
+    _S = 1
+
+    rho = dset["dens"][ti, _rho, :, 0 ,:, 0]
+    S = dset["dens"][ti, _S, :, 0 ,:, 0]
+    w = dset["w"][ti, 0, :, 0 ,:, 0]
+    T = dset["T"][ti, 0, :, 0 ,:, 0]
+
+    rho_exact = dset["dense"][ti, _rho, :, 0 ,:, 0]
+    S_exact = dset["dense"][ti, _S, :, 0 ,:, 0]
+    w_exact = dset["we"][ti, 0, :, 0 ,:, 0]
+    T_exact = dset["Te"][ti, 0, :, 0 ,:, 0]
+
+    rho_b = dset["densb"][ti, _rho, :, 0 ,:, 0]
+    S_b = dset["densb"][ti, _S, :, 0 ,:, 0]
+
+    nz, nx = rho.shape
+    dz = 10e3 / (nz - 1)
+    dx = 300e3 / nx
+    
+    T = T - 250
+    T_exact = T_exact - 250
+
+    w /= dz
+    w_exact /= dz
+
+    rho -= rho_b
+    rho_exact -= rho_b
+    
+    S -= S_b
+    S_exact -= S_b
+
+    k5000 = int(5000 / dz)
+    w5000 = w[k5000, :]
+    w5000_exact = w_exact[k5000, :]
+
+
+    fig, ax = plt.subplots(1, 1, figsize=(16, 10), sharex = "col")
+    ax.set_title("w at 5km", fontsize=22)
+    ax.plot(w5000, "k")
+    ax.plot(w5000_exact, "k--")
+    plt.savefig(fname.rstrip(".nc") + "_w5km.pdf")
+    plt.clf()
+
+    plotdata = {"T" : (T, T_exact), "w" : (w, w_exact), "rho" : (rho, rho_exact), "S" : (S, S_exact)}
+
+    for vpair in (("T", "w"), ("rho", "S")):
+        v1 = vpair[0]
+        v2 = vpair[1]
+
+        if vpair == ("T", "w") and not plot_difference:
+            sl = 0.0006
+            levels = np.array([sl * (i + 1) for i in range(0, 6)])
+            levels = np.concatenate([-levels[::-1], levels])
+        else:
+            levels = 10
+
+        cmap = "PuOr"
+        fig, ax = plt.subplots(2, 1, figsize=(16, 10), sharex = "col")
+
+        ax[0].set_title(v1, fontsize=22)
+        if plot_difference:
+            ax[0].contour(plotdata[v1][0] - plotdata[v1][1], colors=("k",), levels=levels)
+            cset = ax[0].contourf(plotdata[v1][0] - plotdata[v1][1], cmap=cmap, levels=levels)
+        else:
+            ax[0].contour(plotdata[v1][1], colors=("k",), levels=levels)
+            cset = ax[0].contourf(plotdata[v1][0], cmap=cmap, levels=levels)
+        cbar = plt.colorbar(cset, ax = ax[0], shrink = 1.0)
+
+        ax[1].set_title(v2, fontsize=22)
+        if plot_difference:
+            ax[1].contour(plotdata[v2][0] - plotdata[v2][1], colors=("k",), levels=levels)
+            cset = ax[1].contourf(plotdata[v2][0] - plotdata[v2][1], cmap=cmap, levels=levels)
+        else:
+            ax[1].contour(plotdata[v2][1], colors=("k",), levels=levels)
+            cset = ax[1].contourf(plotdata[v2][0], cmap=cmap, levels=levels)
+        cbar = plt.colorbar(cset, ax = ax[1], shrink = 1.0)
+
+        plt.savefig(fname.rstrip(".nc") + f"_contours_{v1}{v2}.pdf")
+        plt.clf()
diff --git a/standalone/mmf_simplified/pam-c/make_debug_script.sh b/standalone/mmf_simplified/pam-c/make_debug_script.sh
new file mode 100755
index 00000000..2fc51469
--- /dev/null
+++ b/standalone/mmf_simplified/pam-c/make_debug_script.sh
@@ -0,0 +1,4 @@
+cd ../build
+source ../../machines/linux_laptop_gnu_mpi_cpu_debug.env
+./cmakescript_pamc.sh ../pam-c/set_pamc_cmakevars.sh
+make -j $1
diff --git a/standalone/mmf_simplified/pam-c/make_script.sh b/standalone/mmf_simplified/pam-c/make_script.sh
new file mode 100755
index 00000000..306b8166
--- /dev/null
+++ b/standalone/mmf_simplified/pam-c/make_script.sh
@@ -0,0 +1,4 @@
+cd ../build
+source ../../machines/linux_laptop_gnu_mpi_cpu.env
+./cmakescript_pamc.sh ../pam-c/set_pamc_cmakevars.sh
+make -j $1
diff --git a/standalone/mmf_simplified/pam-c/plot_ACES2023_ce.py b/standalone/mmf_simplified/pam-c/plot_ACES2023_ce.py
new file mode 100644
index 00000000..640f39f7
--- /dev/null
+++ b/standalone/mmf_simplified/pam-c/plot_ACES2023_ce.py
@@ -0,0 +1,69 @@
+import xarray as xr
+import numpy as np
+import sys
+import matplotlib.pyplot as plt
+import matplotlib
+from matplotlib.patches import Circle
+
+
+matplotlib.rcParams.update({'font.size': 30})
+core = sys.argv[1]
+test = sys.argv[2]
+DS = xr.open_dataset(core+'-'+test+'/test_dycore0.nc')
+DS.load()
+
+
+
+# STATS PLOTTING
+
+energy = DS.energy
+pv = DS.pv
+mass = DS.mass
+densmax = DS.densmax
+densmin = DS.densmin
+
+plt.figure(figsize=(10,8))
+
+plt.plot( (energy.isel(energy_ndofs=0) - energy.isel(energy_ndofs=0)[0])/energy.isel(energy_ndofs=0)[0]*100.)
+plt.xlabel('Nsteps')
+plt.ylabel('Fractional Change in Energy')
+plt.tight_layout()
+plt.savefig('total_energy.png')
+
+
+plt.figure(figsize=(10,8))
+if core == 'CE':
+	plt.plot( (mass.isel(mass_ndofs=0) - mass.isel(mass_ndofs=0)[0])/mass.isel(mass_ndofs=0)[0]*100., label='rho')
+	plt.plot( (mass.isel(mass_ndofs=1) - mass.isel(mass_ndofs=1)[0])/mass.isel(mass_ndofs=1)[0]*100., label='S')
+elif core == 'AN':
+	plt.plot( (mass.isel(mass_ndofs=0) - mass.isel(mass_ndofs=0)[0])/mass.isel(mass_ndofs=0)[0]*100., label='S')
+plt.legend()
+plt.xlabel('Nsteps')
+plt.ylabel('Fractional Change in Densities')
+plt.tight_layout()
+plt.savefig('total_masses.png')
+
+# VARIABLE PLOTTING
+
+dens = DS.dens
+densl = DS.densl
+QXZl = DS.QXZl
+total_dens = DS.total_dens
+
+nt = DS.dims['t']
+Nlist = [0,nt-1]
+for n in Nlist:
+
+
+	plt.figure(figsize=(10,8))
+	if core == 'CE':
+		plt.contourf(dens.isel(t=n,dens_ndofs=1,dual_ncells_y=0,nens=0) / dens.isel(t=n,dens_ndofs=0,dual_ncells_y=0,nens=0))
+	if core == 'AN':
+		plt.contourf(dens.isel(t=n,dens_ndofs=0,dual_ncells_y=0,nens=0) / total_dens.isel(t=n,total_dens_ndofs=0,dual_ncells_y=0,nens=0))	
+	plt.colorbar()
+	plt.xlabel('x')
+	plt.ylabel('y')
+	plt.tight_layout()
+	plt.savefig('Sc' + str(n) + '.png',transparent=True)
+	plt.close('all')
+
diff --git a/standalone/mmf_simplified/pam-c/plot_ACES2023_mce.py b/standalone/mmf_simplified/pam-c/plot_ACES2023_mce.py
new file mode 100644
index 00000000..6fbddb12
--- /dev/null
+++ b/standalone/mmf_simplified/pam-c/plot_ACES2023_mce.py
@@ -0,0 +1,117 @@
+import xarray as xr
+import numpy as np
+import sys
+import matplotlib.pyplot as plt
+import matplotlib
+from matplotlib.patches import Circle
+
+
+matplotlib.rcParams.update({'font.size': 30})
+core = sys.argv[1]
+test = sys.argv[2]
+DS = xr.open_dataset(core+'-'+test+'/test_dycore0.nc')
+DS.load()
+
+
+# STATS PLOTTING
+
+energy = DS.energy
+pv = DS.pv
+mass = DS.mass
+densmax = DS.densmax
+densmin = DS.densmin
+
+plt.figure(figsize=(10,8))
+
+plt.plot( (energy.isel(energy_ndofs=0) - energy.isel(energy_ndofs=0)[0])/energy.isel(energy_ndofs=0)[0]*100.)
+plt.xlabel('Nsteps')
+plt.ylabel('Fractional Change in Energy')
+plt.tight_layout()
+plt.savefig('total_energy.png')
+
+masslabels = []
+massnames = []
+nmass = 50
+
+if core == 'MCE':
+	nmass = 3
+	masslabels = [r'$\rho$', 'S', r'$\rho_v$']
+	massnames = ['rho','S','rhov']
+if core == 'MAN':
+	nmass = 2
+	masslabels = ['S', r'$\rho_v$']
+	massnames = ['S','rhov']
+	
+for i,label,name in zip(range(nmass),masslabels,massnames):
+	plt.figure(figsize=(10,8))
+	plt.plot( (mass.isel(mass_ndofs=i) - mass.isel(mass_ndofs=i)[0])/mass.isel(mass_ndofs=i)[0]*100., label=label)
+	plt.legend()
+	plt.xlabel('Nsteps')
+	plt.ylabel('Fractional Change in '+label)
+	plt.tight_layout()
+	plt.savefig('total_'+name+'.png')	
+
+plt.figure(figsize=(10,8))
+if core == 'MCE':
+	plt.plot(densmin.isel(densmin_ndofs=2))
+if core == 'MAN':
+	plt.plot(densmin.isel(densmin_ndofs=1))
+plt.xlabel('Nsteps')
+plt.ylabel('Water Vapor Minima')
+plt.tight_layout()
+plt.savefig('vapor_minima.png')
+
+plt.figure(figsize=(10,8))
+if core == 'MCE':
+	plt.plot(densmax.isel(densmax_ndofs=2))
+if core == 'MAN':
+	plt.plot(densmax.isel(densmax_ndofs=1))
+plt.xlabel('Nsteps')
+plt.ylabel('Water Vapor Maxima')
+plt.tight_layout()
+plt.savefig('vapor_maxima.png')
+
+plt.figure(figsize=(10,8))
+plt.plot( (pv.isel(pv_ndofs=0) - pv.isel(pv_ndofs=0)[0])/pv.isel(pv_ndofs=0)[0]*100.)
+plt.xlabel('Nsteps')
+plt.ylabel('Fractional Change in PV')
+plt.tight_layout()
+plt.savefig('total_PV.png')
+
+# VARIABLE PLOTTING
+
+dens = DS.dens
+densl = DS.densl
+QXZl = DS.QXZl
+total_dens = DS.total_dens
+
+nt = DS.dims['t']
+Nlist = [0,nt-1]
+for n in Nlist:
+
+	plt.figure(figsize=(10,8))
+	if core == 'MCE':
+		plt.contourf(densl.isel(t=n,densl_ndofs=2,primal_ncells_y=0,nens=0))
+	if core == 'MAN':
+		plt.contourf(densl.isel(t=n,densl_ndofs=1,primal_ncells_y=0,nens=0))
+	plt.colorbar()
+	plt.xlabel('x')
+	plt.ylabel('y')
+	plt.tight_layout()
+	plt.savefig('rhovl' + str(n) + '.png',transparent=True)
+	plt.close('all')
+
+
+	plt.figure(figsize=(10,8))
+	if core == 'MCE':
+		plt.contourf(dens.isel(t=n,dens_ndofs=1,dual_ncells_y=0,nens=0) / dens.isel(t=n,dens_ndofs=0,dual_ncells_y=0,nens=0))
+	if core == 'MAN':
+		plt.contourf(dens.isel(t=n,dens_ndofs=0,dual_ncells_y=0,nens=0) / total_dens.isel(t=n,total_dens_ndofs=0,dual_ncells_y=0,nens=0))
+	plt.colorbar()
+	plt.xlabel('x')
+	plt.ylabel('y')
+	plt.tight_layout()
+	plt.savefig('Sc' + str(n) + '.png',transparent=True)
+	plt.close('all')
+
+
diff --git a/standalone/mmf_simplified/pam-c/plot_extrudedmodel2D.py b/standalone/mmf_simplified/pam-c/plot_extrudedmodel2D.py
new file mode 100644
index 00000000..713974a3
--- /dev/null
+++ b/standalone/mmf_simplified/pam-c/plot_extrudedmodel2D.py
@@ -0,0 +1,168 @@
+import xarray as xr
+from plot_helpers import plotvar_scalar2D, plotvar_vector2D, plot_stat, plot_rawstat
+import numpy as np
+import sys
+
+DS = xr.open_dataset('test_dycore0.nc')
+DS.load()
+
+ndensity = DS.dims['dens_ndofs']
+nt = DS.dims['t']
+model = sys.argv[1]
+nens = DS.dims['nens']
+
+plane = 'xz'
+if len(sys.argv) > 2:
+    plane = sys.argv[2]
+print(f"plane = {plane}")
+
+mass = DS.mass
+energy = DS.energy
+pens = DS.pens
+pv = DS.pv
+
+densmax = DS.densmax
+densmin = DS.densmin
+
+if plane == 'xz':
+    plane_idx = DS.dims['dual_ncells_y'] // 2
+if plane == 'yz':
+    plane_idx = DS.dims['dual_ncells_x'] // 2
+if plane == 'xy':
+    plane_idx = DS.dims['dual_nlayers'] // 2
+ndims = DS.dims['v_ndofs']
+print(f"plane_idx = {plane_idx}")
+if len(sys.argv) > 3:
+    plane_idx = int(sys.argv[3])
+
+
+if model == 'swe':
+    dens_names = ['h',]
+    dens_stat_names = ['mass',]
+    nprogdens = 1
+if model == 'tswe':
+    dens_names = ['h','S',]
+    dens_stat_names = ['mass','bouyancy',]
+    nprogdens = 2
+
+if model == 'ce':
+    dens_names = ['rho','Theta',]
+    dens_stat_names = ['mass','entropic_var_density',]
+    nprogdens = 2
+
+if model == 'an':
+    dens_names = ['Theta',]
+    dens_stat_names = ['entropic_var_density',]
+    nprogdens = 1
+
+#THIS IS A LITLTE BROKEN FOR RHOD VARIANTS...
+#probably ok, this is just a quick and dirty plotting script...
+if model == 'mce':
+    dens_names = ['rho','Theta','rho_v', 'rho_l', 'rho_i']    
+    dens_stat_names = ['mass','entropic_var_density','vapor', 'liquid', 'ice']    
+    nprogdens = 5
+
+if model == 'man':
+    dens_names = ['Theta','rho_v', 'rho_l', 'rho_i']    
+    dens_stat_names = ['entropic_var_density','vapor', 'liquid', 'ice']    
+    nprogdens = 4
+    
+for k in range(ndensity-nprogdens):
+    dens_names.append('T'+str(k))
+    dens_stat_names.append('tracer'+str(k))
+
+for n in range(nens):
+    for l,name in zip(range(ndensity), dens_stat_names):
+        plot_stat('total_'+name + '.' + str(n), mass.isel(mass_ndofs=l, nens=n))
+        plot_rawstat('min_' +name + '.' + str(n), densmin.isel(densmin_ndofs=l, nens=n))
+        plot_rawstat('max_' +name + '.' + str(n), densmax.isel(densmax_ndofs=l, nens=n))
+		        
+    plot_rawstat('internal_energy.' + str(n), energy.isel(energy_ndofs=3, nens=n))
+    plot_rawstat('potential_energy.'+ str(n), energy.isel(energy_ndofs=2, nens=n))
+    plot_rawstat('kinetic_energy.'+ str(n), energy.isel(energy_ndofs=1, nens=n))
+    plot_stat('total_energy.'+ str(n), energy.isel(energy_ndofs=0, nens=n))
+
+    plot_stat('total_pens.'+ str(n), pens.isel(pens_ndofs=0, nens=n))
+    plot_stat('total_pv.'+ str(n), pv.isel(pv_ndofs=0, nens=n))
+
+
+Nlist = np.arange(0,nt)
+
+v = DS.v
+w = DS.w
+dens = DS.dens
+total_dens = DS.total_dens
+QHZl = DS.QHZl
+densl = DS.densl
+hs = DS.hs
+coriolishz = DS.coriolishz
+if ndims > 1:
+    QXYl = DS.QXYl
+    coriolisxy = DS.coriolisxy
+
+for n in range(nens):
+
+    if plane == 'xz':
+        plotvar_scalar2D('.'.join(['hs', plane, str(n)]), hs.isel(hs_ndofs=0, dual_ncells_y=plane_idx,nens=n),0)
+        plotvar_scalar2D('.'.join(['coriolisxz', plane, str(n)]), coriolishz.isel(coriolishz_ndofs=0, primal_ncells_y=plane_idx,nens=n),0)
+        if ndims > 1:
+            plotvar_scalar2D('.'.join(['coriolisxz', plane, str(n)]), coriolishz.isel(coriolishz_ndofs=1, primal_ncells_y=plane_idx,nens=n),0)
+            plotvar_scalar2D('.'.join(['coriolisxy', plane, str(n)]), coriolisxy.isel(coriolisxy_ndofs=0, primal_ncells_y=plane_idx,nens=n),0)
+    if plane == 'yz':
+        plotvar_scalar2D('.'.join(['hs', plane, str(n)]), hs.isel(hs_ndofs=0, dual_ncells_x=plane_idx,nens=n),0)
+        plotvar_scalar2D('.'.join(['coriolisxz', plane, str(n)]), coriolishz.isel(coriolishz_ndofs=0, primal_ncells_x=plane_idx,nens=n),0)
+        if ndims > 1:
+            plotvar_scalar2D('.'.join(['coriolisyz', plane, str(n)]), coriolishz.isel(coriolishz_ndofs=1, primal_ncells_x=plane_idx,nens=n),0)
+            plotvar_scalar2D('.'.join(['coriolisxy', plane, str(n)]), coriolisxy.isel(coriolisxy_ndofs=0, primal_ncells_x=plane_idx,nens=n),0)
+    if plane == 'xy':
+        plotvar_scalar2D('.'.join(['hs', plane, str(n)]), hs.isel(hs_ndofs=0, dual_nlayers=plane_idx,nens=n),0)
+        plotvar_scalar2D('.'.join(['coriolisxz', plane, str(n)]), coriolishz.isel(coriolishz_ndofs=0, primal_nlayers=plane_idx,nens=n),0)
+        if ndims > 1:
+            plotvar_scalar2D('.'.join(['coriolisyz', plane, str(n)]), coriolishz.isel(coriolishz_ndofs=1, primal_nlayers=plane_idx,nens=n),0)
+            plotvar_scalar2D('.'.join(['coriolisxy', plane, str(n)]), coriolisxy.isel(coriolisxy_ndofs=0, primal_ninterfaces=plane_idx,nens=n),0)
+
+    for i in Nlist:
+        if plane == 'xz':
+            plotvar_scalar2D('.'.join(['qxz', plane, str(n)]), QHZl.isel(t=i,QHZl_ndofs=0, dual_ncells_y=plane_idx,nens=n),i)
+            plotvar_scalar2D('.'.join(['vx', plane, str(n)]), v.isel(t=i,v_ndofs=0, primal_ncells_y=plane_idx,nens=n),i)
+            plotvar_scalar2D('.'.join(['w', plane, str(n)]), w.isel(t=i,w_ndofs=0, primal_ncells_y=plane_idx,nens=n),i)
+            if ndims > 1:
+                plotvar_scalar2D('.'.join(['vy', plane, str(n)]), v.isel(t=i,v_ndofs=1, primal_ncells_y=plane_idx,nens=n),i)
+                plotvar_scalar2D('.'.join(['qyz', plane, str(n)]), QHZl.isel(t=i,QHZl_ndofs=1, dual_ncells_y=plane_idx,nens=n),i)
+                plotvar_scalar2D('.'.join(['qxy', plane, str(n)]), QXYl.isel(t=i,QXYl_ndofs=0, dual_ncells_y=plane_idx,nens=n),i)
+        if plane == 'yz':
+            plotvar_scalar2D('.'.join(['qxz', plane, str(n)]), QHZl.isel(t=i,QHZl_ndofs=0, dual_ncells_x=plane_idx,nens=n),i)
+            plotvar_scalar2D('.'.join(['vx', plane, str(n)]), v.isel(t=i,v_ndofs=0, primal_ncells_x=plane_idx,nens=n),i)
+            plotvar_scalar2D('.'.join(['w', plane, str(n)]), w.isel(t=i,w_ndofs=0, primal_ncells_x=plane_idx,nens=n),i)
+            if ndims > 1:
+                plotvar_scalar2D('.'.join(['vy', plane, str(n)]), v.isel(t=i,v_ndofs=1, primal_ncells_x=plane_idx,nens=n),i)
+                plotvar_scalar2D('.'.join(['qxy', plane, str(n)]), QXYl.isel(t=i,QXYl_ndofs=0, dual_ncells_x=plane_idx,nens=n),i)
+                plotvar_scalar2D('.'.join(['qyz', plane, str(n)]), QHZl.isel(t=i,QHZl_ndofs=1, dual_ncells_x=plane_idx,nens=n),i)
+        if plane == 'xy':
+            plotvar_scalar2D('.'.join(['qxz', plane, str(n)]), QHZl.isel(t=i,QHZl_ndofs=0, dual_ninterfaces=plane_idx,nens=n),i)
+            plotvar_scalar2D('.'.join(['vx', plane, str(n)]), v.isel(t=i,v_ndofs=0, primal_ninterfaces=plane_idx,nens=n),i)
+            plotvar_scalar2D('.'.join(['w', plane, str(n)]), w.isel(t=i,w_ndofs=0, primal_nlayers=plane_idx,nens=n),i)
+            if ndims > 1:
+                plotvar_scalar2D('.'.join(['vy', plane, str(n)]), v.isel(t=i,v_ndofs=1, primal_ninterfaces=plane_idx,nens=n),i)
+                plotvar_scalar2D('.'.join(['qxy', plane, str(n)]), QXYl.isel(t=i,QXYl_ndofs=0, dual_nlayers=plane_idx,nens=n),i)
+                plotvar_scalar2D('.'.join(['qyz', plane, str(n)]), QHZl.isel(t=i,QHZl_ndofs=1, dual_ncells_x=plane_idx,nens=n),i)
+
+
+        for l,name in zip(range(ndensity), dens_names):
+            if plane == 'xz':
+                plotvar_scalar2D('.'.join([name, plane, str(n)]), dens.isel(t=i,dens_ndofs=l, dual_ncells_y=plane_idx,nens=n),i)
+                plotvar_scalar2D('.'.join([name, 'l', plane, str(n)]), densl.isel(t=i,densl_ndofs=l, primal_ncells_y=plane_idx,nens=n),i)
+                plotvar_scalar2D('.'.join([name, 'c', plane, str(n)]), dens.isel(t=i,dens_ndofs=l, dual_ncells_y=plane_idx,nens=n) / total_dens.isel(t=i,total_dens_ndofs=0, dual_ncells_y=plane_idx,nens=n),i)
+
+            if plane == 'yz':
+                plotvar_scalar2D('.'.join([name, plane, str(n)]), dens.isel(t=i,dens_ndofs=l, dual_ncells_x=plane_idx,nens=n),i)
+                plotvar_scalar2D('.'.join([name, 'l', plane, str(n)]), densl.isel(t=i,densl_ndofs=l, primal_ncells_x=plane_idx,nens=n),i)
+                plotvar_scalar2D('.'.join([name, 'c', plane, str(n)]), dens.isel(t=i,dens_ndofs=l, dual_ncells_x=plane_idx,nens=n) / total_dens.isel(t=i,total_dens_ndofs=0, dual_ncells_x=plane_idx,nens=n),i)
+            if plane == 'xy':
+                plotvar_scalar2D('.'.join([name, plane, str(n)]), dens.isel(t=i,dens_ndofs=l, dual_nlayers=plane_idx,nens=n),i)
+                plotvar_scalar2D('.'.join([name, 'l', plane, str(n)]), densl.isel(t=i,densl_ndofs=l, primal_ninterfaces=plane_idx,nens=n),i)
+                plotvar_scalar2D('.'.join([name, 'c', plane, str(n)]), dens.isel(t=i,dens_ndofs=l, dual_nlayers=plane_idx,nens=n) / total_dens.isel(t=i,total_dens_ndofs=0, dual_nlayers=plane_idx,nens=n),i)
+
+
+
+
diff --git a/standalone/mmf_simplified/pam-c/plot_extrudedmodel2D_parallel.py b/standalone/mmf_simplified/pam-c/plot_extrudedmodel2D_parallel.py
new file mode 100644
index 00000000..f0eb7fce
--- /dev/null
+++ b/standalone/mmf_simplified/pam-c/plot_extrudedmodel2D_parallel.py
@@ -0,0 +1,117 @@
+import xarray as xr
+from plot_helpers import plotvar_scalar2D, plotvar_vector2D, plot_stat, plot_rawstat
+import numpy as np
+import sys
+import yaml
+
+
+model = sys.argv[1]
+nprocs = int(sys.argv[3])
+with open(sys.argv[2], 'r') as file:
+    config = yaml.safe_load(file)
+    ncbase = config['out_prefix']
+    crm_nx = config['crm_nx']
+    crm_ny = config['crm_ny']
+    nprocx = config['nprocx']
+    nprocy = config['nprocy']
+
+DSarr = []
+varr = []
+warr = []
+densarr = []
+QXZlarr = []
+denslarr = []
+hsarr = []
+coriolisxzarr = []
+
+for i in range(nprocs):
+    DS = xr.open_dataset(ncbase + str(i) + '.nc')
+    DS.load()
+    ndensity = DS.dims['dens_ndofs']
+    nt = DS.dims['t']
+    nens = DS.dims['nens']
+    if (i==0):
+        mass = DS.mass
+        energy = DS.energy
+        pens = DS.pens
+        pv = DS.pv
+        densmax = DS.densmax
+        densmin = DS.densmin
+    varr.append(DS.v)
+    warr.append(DS.w)
+    densarr.append(DS.dens)
+    QXZlarr.append(DS.QXZl)
+    denslarr.append(DS.densl)
+    hsarr.append(DS.hs)
+    coriolisxzarr.append(DS.coriolisxz)
+
+if model == 'swe':
+    dens_names = ['h',]
+    dens_stat_names = ['mass',]
+    nprogdens = 1
+if model == 'tswe':
+    dens_names = ['h','S',]
+    dens_stat_names = ['mass','bouyancy',]
+    nprogdens = 2
+
+if model == 'ce':
+    dens_names = ['rho','Theta',]
+    dens_stat_names = ['mass','entropic_var_density',]
+    nprogdens = 2
+
+#THIS IS A LITLTE BROKEN FOR RHOD VARIANTS...
+#probably ok, this is just a quick and dirty plotting script...
+
+#SUPER WRONG NOW FOR NAMES
+if model == 'mce':
+    dens_names = ['rho','Theta','rho_v', 'rho_l', 'rho_i']    
+    dens_stat_names = ['mass','entropic_var_density','vapor', 'liquid', 'ice']    
+    nprogdens = 5
+    
+for k in range(ndensity-nprogdens):
+    dens_names.append('T'+str(k))
+    dens_stat_names.append('tracer'+str(k))
+
+for n in range(nens):
+    for l,name in zip(range(ndensity), dens_stat_names):
+        plot_stat('total_'+name + '.' + str(n), mass.isel(mass_ndofs=l, nens=n))
+        plot_rawstat('min_' +name + '.' + str(n), densmin.isel(densmin_ndofs=l, nens=n))
+        plot_rawstat('max_' +name + '.' + str(n), densmax.isel(densmax_ndofs=l, nens=n))
+		        
+    plot_rawstat('internal_energy.' + str(n), energy.isel(energy_ndofs=3, nens=n))
+    plot_rawstat('potential_energy.'+ str(n), energy.isel(energy_ndofs=2, nens=n))
+    plot_rawstat('kinetic_energy.'+ str(n), energy.isel(energy_ndofs=1, nens=n))
+    plot_stat('total_energy.'+ str(n), energy.isel(energy_ndofs=0, nens=n))
+
+    plot_stat('total_pens.'+ str(n), pens.isel(pens_ndofs=0, nens=n))
+    plot_stat('total_pv.'+ str(n), pv.isel(pv_ndofs=0, nens=n))
+
+def build_data(
+
+Nlist = np.arange(0,nt)
+
+
+
+for n in range(nens):
+
+    plotvar_scalar2D('hs.'+ str(n), hs.isel(hs_ndofs=0, dual_ncells_y=0,nens=n),0)
+    plotvar_scalar2D('coriolisxz.'+ str(n), coriolisxz.isel(coriolisxz_ndofs=0, primal_ncells_y=0,nens=n),0)
+
+    for i in Nlist:
+        plotvar_scalar2D('qxz.'+ str(n), QXZl.isel(t=i,QXZl_ndofs=0, dual_ncells_y=0,nens=n),i)
+
+        plotvar_scalar2D('v.'+ str(n), v.isel(t=i,v_ndofs=0, primal_ncells_y=0,nens=n),i)
+        plotvar_scalar2D('w.'+ str(n), w.isel(t=i,w_ndofs=0, primal_ncells_y=0,nens=n),i)
+
+
+        for l,name in zip(range(ndensity), dens_names):
+            plotvar_scalar2D(name + '.' + str(n), dens.isel(t=i,dens_ndofs=l, dual_ncells_y=0,nens=n),i)
+            plotvar_scalar2D(name+'l.'+ str(n), densl.isel(t=i,densl_ndofs=l, primal_ncells_y=0,nens=n),i)
+#THIS ASSUMES TOTAL DENSITY IS IN DENS(0)
+            plotvar_scalar2D(name+'c.'+ str(n), dens.isel(t=i,dens_ndofs=l, dual_ncells_y=0,nens=n) / dens.isel(t=i,dens_ndofs=0, dual_ncells_y=0,nens=n),i)
+    #if model in ['tswe','ce','mce']:
+    #        plotvar_scalar2D('thetal', dens.isel(t=i,dens_ndofs=1, dual_ncells_y=0,nens=n) / dens.isel(t=i,dens_ndofs=0, dual_ncells_y=0,nens=n),i)
+
+
+
+
diff --git a/standalone/mmf_simplified/pam-c/plot_helpers.py b/standalone/mmf_simplified/pam-c/plot_helpers.py
new file mode 100644
index 00000000..68470ef4
--- /dev/null
+++ b/standalone/mmf_simplified/pam-c/plot_helpers.py
@@ -0,0 +1,61 @@
+import matplotlib.pyplot as plt
+from matplotlib.animation import FuncAnimation
+import numpy as np
+
+def plot_stat(statname, data):
+    plt.figure(figsize=(10,8))
+    plt.plot( (data - data[0])/data[0]*100. )
+    plt.xlabel('Nsteps')
+    plt.ylabel('Fractional Change in ' + statname)
+    plt.tight_layout()
+    plt.savefig(statname + '.png')
+
+def plot_rawstat(statname, data):
+    plt.figure(figsize=(10,8))
+    plt.plot(data)
+    plt.xlabel('Nsteps')
+    plt.ylabel(statname)
+    plt.tight_layout()
+    plt.savefig(statname + '.raw.png')
+
+def plotvar_scalar1D(plotname,vardat,i):
+    plt.figure(figsize=(10,8))
+    plt.plot(vardat)
+    plt.xlabel('x')
+    plt.ylabel(plotname)
+    plt.tight_layout()
+    plt.savefig(plotname + '.' + str(i) + '.png')
+    plt.close('all')
+
+def plotvar_scalar2D(plotname,vardat,i):
+    plt.figure(figsize=(10,8))
+    plt.contourf(vardat)
+    plt.colorbar()
+    plt.contour(vardat)
+    plt.xlabel('x')
+    plt.ylabel('y')
+    plt.tight_layout()
+    plt.savefig(plotname + '.' + str(i) + '.png')
+    plt.close('all')
+
+
+def plotvar_vector2D(plotname,vardat0, vardat1,i):
+    plt.figure(figsize=(10,8))
+    plt.contourf(vardat0)
+    plt.colorbar()
+    plt.contour(vardat0)
+    plt.xlabel('x')
+    plt.ylabel('y')
+    plt.tight_layout()
+    plt.savefig(plotname + '.0.' + str(i) + '.png')
+    plt.close('all')
+
+    plt.figure(figsize=(10,8))
+    plt.contourf(vardat1)
+    plt.colorbar()
+    plt.contour(vardat1)
+    plt.xlabel('x')
+    plt.ylabel('y')
+    plt.tight_layout()
+    plt.savefig(plotname + '.1.' + str(i) + '.png')
+    plt.close('all')
diff --git a/standalone/mmf_simplified/pam-c/plot_layermodel2D.py b/standalone/mmf_simplified/pam-c/plot_layermodel2D.py
new file mode 100644
index 00000000..343e0113
--- /dev/null
+++ b/standalone/mmf_simplified/pam-c/plot_layermodel2D.py
@@ -0,0 +1,88 @@
+import xarray as xr
+from plot_helpers import plotvar_scalar2D, plotvar_vector2D, plot_stat, plot_rawstat
+import numpy as np
+import sys
+
+DS = xr.open_dataset('test_dycore0.nc')
+DS.load()
+
+ndensity = DS.dims['dens_ndofs']
+nt = DS.dims['t']
+model = sys.argv[1]
+nens = DS.dims['nens']
+
+mass = DS.mass
+energy = DS.energy
+pens = DS.pens
+pv = DS.pv
+
+densmax = DS.densmax
+densmin = DS.densmin
+
+#maybe mass/tracer stuff gets a list of names of size ndensity/ndensityfct?
+
+if model == 'swe':
+    dens_names = ['h',]
+    densfct_names = []
+    dens_stat_names = ['mass',]
+    nprogdens = 1
+if model == 'tswe':
+    dens_names = ['h','S',]
+    dens_stat_names = ['mass','bouyancy',]
+    nprogdens = 2
+
+if model == 'ce':
+    dens_names = ['rho','Theta',]
+    dens_stat_names = ['mass','entropic_var_density',]
+    nprogdens = 2
+
+#THIS BREAKS FOR RHOD VARIANTS...
+#probably ok, this is just a quick and dirty plotting script...
+# MAYBE MODEL ARGUMENT BECOMES MCED THOUGH?
+if model == 'mce':
+    dens_names = ['rho','Theta','rho_v', 'rho_l', 'rho_i']
+    dens_stat_names = ['mass','entropic_var_density','vapor', 'liquid', 'ice']
+    nprogdens = 5
+    
+for k in range(ndensity-nprogdens):
+    dens_names.append('T'+str(k))
+    dens_stat_names.append('tracer'+str(k))
+
+for n in range(nens):
+    for l,name in zip(range(ndensity), dens_stat_names):
+        plot_stat('total_'+name + '.' + str(n), mass.isel(mass_ndofs=l, nens=n))
+        plot_rawstat('min_' +name + '.' + str(n), densmin.isel(densmin_ndofs=l, nens=n))
+        plot_rawstat('max_' +name + '.' + str(n), densmax.isel(densmax_ndofs=l, nens=n))
+		        
+    plot_rawstat('internal_energy.' + str(n), energy.isel(energy_ndofs=3, nens=n))
+    plot_rawstat('potential_energy.'+ str(n), energy.isel(energy_ndofs=2, nens=n))
+    plot_rawstat('kinetic_energy.'+ str(n), energy.isel(energy_ndofs=1, nens=n))
+    plot_stat('total_energy.'+ str(n), energy.isel(energy_ndofs=0, nens=n))
+
+    plot_stat('total_pens.'+ str(n), pens.isel(pens_ndofs=0, nens=n))
+    plot_stat('total_pv.'+ str(n), pv.isel(pv_ndofs=0, nens=n))
+
+
+Nlist = np.arange(0,nt)
+
+v = DS.v
+dens = DS.dens
+q = DS.q
+densl = DS.densl
+hs = DS.hs
+coriolis = DS.coriolis
+
+for n in range(nens):
+
+    plotvar_scalar2D('hs.' + str(n), hs.isel(hs_ndofs=0,dual_nlayers=0,nens=n),0)
+    plotvar_scalar2D('coriolis.' + str(n), coriolis.isel(coriolis_ndofs=0,primal_nlayers=0,nens=n),0)
+
+    for i in Nlist:
+        plotvar_scalar2D('q.' + str(n), q.isel(t=i,q_ndofs=0,dual_nlayers=0,nens=n),i)
+        plotvar_vector2D('v.' + str(n), v.isel(t=i,v_ndofs=0,primal_nlayers=0,nens=n), v.isel(t=i,v_ndofs=1,primal_nlayers=0,nens=n),i)
+        for l,name in zip(range(ndensity), dens_names):
+            plotvar_scalar2D(name +'.' + str(n), dens.isel(t=i,dens_ndofs=l,dual_nlayers=0,nens=n),i)
+            plotvar_scalar2D(name+'l.' + str(n), densl.isel(t=i,densl_ndofs=l,primal_nlayers=0,nens=n),i)
+#THIS ASSUMES TOTAL DENSITY IS IN DENS(0)
+            plotvar_scalar2D(name+'c.' + str(n), dens.isel(t=i,dens_ndofs=l,dual_nlayers=0,nens=n) / dens.isel(t=i,dens_ndofs=0,dual_nlayers=0,nens=n),i)
+
diff --git a/standalone/mmf_simplified/pam-c/run_extruded_script.sh b/standalone/mmf_simplified/pam-c/run_extruded_script.sh
new file mode 100755
index 00000000..138658fe
--- /dev/null
+++ b/standalone/mmf_simplified/pam-c/run_extruded_script.sh
@@ -0,0 +1,9 @@
+rm *.png *.nc
+cd ../build
+rm driver
+./../pam-c/make_script.sh $3
+mpirun.mpich -n $3 ./driver ../inputs/pamc_input_extruded_$2.yaml
+cd ../pam-c
+mv ../build/*.nc .
+python3 plot_extrudedmodel2D.py $1
+#python3 plot_extrudedmodel2D_parallel.py $1 ../inputs/pamc_input_extruded_$2.yaml $3
diff --git a/standalone/mmf_simplified/pam-c/run_layer_script.sh b/standalone/mmf_simplified/pam-c/run_layer_script.sh
new file mode 100755
index 00000000..8bf08767
--- /dev/null
+++ b/standalone/mmf_simplified/pam-c/run_layer_script.sh
@@ -0,0 +1,8 @@
+rm *.png *.nc
+cd ../build
+rm driver
+./../pam-c/make_script.sh $3
+mpirun.mpich -n $3 ./driver ../inputs/pamc_input_layer_$2.yaml
+cd ../pam-c
+mv ../build/*.nc .
+python3 plot_layermodel2D.py $1
diff --git a/standalone/mmf_simplified/pam-c/set_pamc_cmakevars.sh b/standalone/mmf_simplified/pam-c/set_pamc_cmakevars.sh
new file mode 100644
index 00000000..e9b289a7
--- /dev/null
+++ b/standalone/mmf_simplified/pam-c/set_pamc_cmakevars.sh
@@ -0,0 +1,9 @@
+#!/bin/bash
+
+add_cmake_vars="  -DPAMC_MODEL=extrudedmodel "
+add_cmake_vars+=" -DPAMC_HAMIL=man       "
+add_cmake_vars+=" -DPAMC_THERMO=constkappavirpottemp      "
+add_cmake_vars+=" -DPAMC_IO=serial        "
+
+#ce mce_rho an man
+#idealgaspottemp constkappavirpottemp
diff --git a/standalone/mmf_simplified/pamc_init.h b/standalone/mmf_simplified/pamc_init.h
new file mode 100644
index 00000000..22342f73
--- /dev/null
+++ b/standalone/mmf_simplified/pamc_init.h
@@ -0,0 +1,53 @@
+#pragma once
+#include "mpi.h"
+
+void partition_domain(std::string inFile, int &crm_nx, int &crm_ny)
+{
+
+  int ierr;
+  int nranks, myrank;
+  int px, py;
+  int i_beg, i_end;
+  int j_beg, j_end;
+  double nper;
+
+  //Read config file
+  YAML::Node config = YAML::LoadFile(inFile);
+  
+  int nprocx = config["nprocx"].as<int>();
+  int nprocy = config["nprocy"].as<int>();
+  ierr = MPI_Comm_size(MPI_COMM_WORLD,&nranks);
+  ierr = MPI_Comm_rank(MPI_COMM_WORLD,&myrank);
+  int nx_glob = config["crm_nx"].as<int>();
+  int ny_glob = config["crm_ny"].as<int>();
+    
+  if (!(nprocx * nprocy == nranks)) {endrun("Error: nranks != nprocx * nprocy");}
+
+  //Get my process grid IDs
+  py = floor(myrank / nprocx);
+  px = myrank - nprocx * py;
+
+  //Get my beginning and ending global indices; and domain sizes
+  nper = ((double) nx_glob)/nprocx;
+  i_beg = (int) round( nper* px    );
+  i_end = (int) round( nper*(px+1) )-1;
+  crm_nx = i_end - i_beg + 1;
+
+  nper = ((double) ny_glob)/nprocy;
+  j_beg = (int) round( nper* py    );
+  j_end = (int) round( nper*(py+1) )-1;
+  crm_ny = j_end - j_beg + 1;
+  
+}
+
+void set_domain_sizes(const YAML::Node &config, real &xlen, real &ylen, real &zlen)
+{
+#ifdef _SPAM
+  std::unique_ptr<TestCase> testcase;
+  testcase_from_config(testcase, config);
+  const auto [Lx, Ly, Lz] = testcase->get_domain();
+  xlen = Lx;
+  ylen = Ly;
+  zlen = Lz;
+#endif
+}