From 5c07bf6312e116e2e6a2bb84740afaeeeeef899b Mon Sep 17 00:00:00 2001
From: pdziekan <pdziekanp@gmail.com>
Date: Mon, 27 Apr 2020 17:46:55 +0200
Subject: [PATCH] remove trailing whitespaces

---
 libmpdata++/CMakeLists.txt                    |   8 +-
 libmpdata++/bcond/cyclic_1d.hpp               |  10 +-
 libmpdata++/bcond/cyclic_2d.hpp               |  28 +--
 libmpdata++/bcond/cyclic_3d.hpp               |  34 +--
 libmpdata++/bcond/detail/bcond_common.hpp     | 100 ++++----
 libmpdata++/bcond/detail/polar_common.hpp     |   6 +-
 libmpdata++/bcond/detail/remote_common.hpp    |  44 ++--
 libmpdata++/bcond/gndsky_2d.hpp               |  14 +-
 libmpdata++/bcond/gndsky_3d.hpp               |  10 +-
 libmpdata++/bcond/open_1d.hpp                 |   6 +-
 libmpdata++/bcond/open_2d.hpp                 |  42 ++--
 libmpdata++/bcond/open_3d.hpp                 |  64 ++---
 libmpdata++/bcond/polar_2d.hpp                |  38 +--
 libmpdata++/bcond/polar_3d.hpp                |  32 +--
 libmpdata++/bcond/remote_1d.hpp               |  78 +++---
 libmpdata++/bcond/remote_2d.hpp               |  42 ++--
 libmpdata++/bcond/remote_3d.hpp               |  46 ++--
 libmpdata++/bcond/rigid_2d.hpp                |  30 +--
 libmpdata++/bcond/rigid_3d.hpp                |  32 +--
 libmpdata++/bcond/shared.hpp                  |  16 +-
 libmpdata++/blitz.hpp                         |  24 +-
 libmpdata++/concurr/any.hpp                   |  52 ++--
 libmpdata++/concurr/boost_thread.hpp          |  16 +-
 libmpdata++/concurr/cxx11_thread.hpp          |  22 +-
 libmpdata++/concurr/detail/concurr_common.hpp |  76 +++---
 libmpdata++/concurr/detail/distmem.hpp        |  10 +-
 libmpdata++/concurr/detail/sharedmem.hpp      | 186 +++++++-------
 libmpdata++/concurr/detail/timer.hpp          |  14 +-
 libmpdata++/concurr/openmp.hpp                |   6 +-
 libmpdata++/concurr/serial.hpp                |   8 +-
 libmpdata++/concurr/threads.hpp               |   6 +-
 libmpdata++/formulae/arakawa_c.hpp            |  32 +--
 libmpdata++/formulae/common.hpp               |  34 +--
 libmpdata++/formulae/donorcell_formulae.hpp   |  50 ++--
 libmpdata++/formulae/idxperm.hpp              |  22 +-
 libmpdata++/formulae/kahan_sum.hpp            |   4 +-
 .../formulae/mpdata/formulae_mpdata_1d.hpp    |  94 +++----
 .../formulae/mpdata/formulae_mpdata_2d.hpp    | 142 +++++------
 .../formulae/mpdata/formulae_mpdata_3d.hpp    | 108 ++++----
 .../mpdata/formulae_mpdata_common.hpp         |  28 +--
 .../mpdata/formulae_mpdata_dfl_1d.hpp         |  34 +--
 .../mpdata/formulae_mpdata_dfl_2d.hpp         |  50 ++--
 .../mpdata/formulae_mpdata_dfl_3d.hpp         |  52 ++--
 .../mpdata/formulae_mpdata_fct_1d.hpp         |  70 +++---
 .../mpdata/formulae_mpdata_fct_2d.hpp         |  46 ++--
 .../mpdata/formulae_mpdata_fct_3d.hpp         |  12 +-
 .../mpdata/formulae_mpdata_fdiv_1d.hpp        |  62 ++---
 .../mpdata/formulae_mpdata_fdiv_2d.hpp        |  90 +++----
 .../mpdata/formulae_mpdata_fdiv_3d.hpp        | 134 +++++-----
 .../formulae/mpdata/formulae_mpdata_g_1d.hpp  |  16 +-
 .../formulae/mpdata/formulae_mpdata_g_2d.hpp  |  24 +-
 .../formulae/mpdata/formulae_mpdata_g_3d.hpp  |  22 +-
 .../formulae/mpdata/formulae_mpdata_gc_1d.hpp |  42 ++--
 .../formulae/mpdata/formulae_mpdata_gc_2d.hpp |  78 +++---
 .../formulae/mpdata/formulae_mpdata_gc_3d.hpp | 120 ++++-----
 .../mpdata/formulae_mpdata_hot_1d.hpp         |  16 +-
 .../mpdata/formulae_mpdata_hot_2d.hpp         |  38 +--
 .../mpdata/formulae_mpdata_hot_3d.hpp         |   6 +-
 .../mpdata/formulae_mpdata_psi_1d.hpp         |  44 ++--
 .../mpdata/formulae_mpdata_psi_2d.hpp         | 150 +++++------
 .../mpdata/formulae_mpdata_psi_3d.hpp         | 234 +++++++++---------
 libmpdata++/formulae/nabla_formulae.hpp       |  38 +--
 libmpdata++/formulae/stress_formulae.hpp      | 122 ++++-----
 libmpdata++/kahan_reduction.hpp               |  22 +-
 libmpdata++/opts.hpp                          |   2 +-
 libmpdata++/output/detail/output_common.hpp   |  30 +--
 libmpdata++/output/detail/xdmf_writer.hpp     |   4 +-
 libmpdata++/output/gnuplot.hpp                |  98 ++++----
 libmpdata++/output/hdf5.hpp                   |  60 ++---
 libmpdata++/output/hdf5_xdmf.hpp              |  14 +-
 libmpdata++/solvers/boussinesq.hpp            |   4 +-
 .../solvers/detail/boussinesq_common.hpp      |  10 +-
 .../solvers/detail/boussinesq_expl.hpp        |  38 +--
 .../solvers/detail/boussinesq_impl.hpp        |  32 +--
 .../solvers/detail/boussinesq_sgs_common.hpp  |  18 +-
 libmpdata++/solvers/detail/monitor.hpp        |   2 +-
 libmpdata++/solvers/detail/mpdata_common.hpp  |  40 +--
 libmpdata++/solvers/detail/mpdata_fct_1d.hpp  |  22 +-
 libmpdata++/solvers/detail/mpdata_fct_2d.hpp  |  20 +-
 libmpdata++/solvers/detail/mpdata_fct_3d.hpp  |  30 +--
 .../solvers/detail/mpdata_fct_common.hpp      |  14 +-
 libmpdata++/solvers/detail/mpdata_osc_1d.hpp  |  32 +--
 libmpdata++/solvers/detail/mpdata_osc_2d.hpp  |  56 ++---
 libmpdata++/solvers/detail/mpdata_osc_3d.hpp  |  62 ++---
 .../solvers/detail/mpdata_rhs_vip_common.hpp  |  72 +++---
 .../detail/mpdata_rhs_vip_prs_common.hpp      |  34 +--
 .../detail/mpdata_rhs_vip_prs_gcrk.hpp        |  26 +-
 .../solvers/detail/mpdata_rhs_vip_prs_mr.hpp  |  14 +-
 .../solvers/detail/mpdata_rhs_vip_prs_pc.hpp  |  22 +-
 .../detail/mpdata_rhs_vip_prs_sgs_common.hpp  |  22 +-
 .../detail/mpdata_rhs_vip_prs_sgs_dns.hpp     |   6 +-
 .../detail/mpdata_rhs_vip_prs_sgs_smg.hpp     |   8 +-
 libmpdata++/solvers/detail/solver_1d.hpp      |  46 ++--
 libmpdata++/solvers/detail/solver_2d.hpp      | 112 ++++-----
 libmpdata++/solvers/detail/solver_3d.hpp      | 100 ++++----
 libmpdata++/solvers/detail/solver_common.hpp  |  86 +++----
 libmpdata++/solvers/mpdata.hpp                |  22 +-
 libmpdata++/solvers/mpdata_rhs.hpp            |  58 ++---
 libmpdata++/solvers/mpdata_rhs_vip.hpp        | 108 ++++----
 libmpdata++/solvers/mpdata_rhs_vip_prs.hpp    |  28 +--
 .../solvers/mpdata_rhs_vip_prs_sgs.hpp        |  14 +-
 libmpdata++/solvers/shallow_water.hpp         |  48 ++--
 102 files changed, 2275 insertions(+), 2275 deletions(-)

diff --git a/libmpdata++/CMakeLists.txt b/libmpdata++/CMakeLists.txt
index 264ae3a5..6bf368a9 100644
--- a/libmpdata++/CMakeLists.txt
+++ b/libmpdata++/CMakeLists.txt
@@ -5,7 +5,7 @@ project(libmpdata++ CXX)
 # using include() istead of find_package(libmpdata++) to use local CMake code
 # and not the system-installed one
 include(${CMAKE_SOURCE_DIR}/../libmpdata++-config.cmake)
-if (NOT libmpdataxx_FOUND) 
+if (NOT libmpdataxx_FOUND)
   message(FATAL_ERROR "local libmpdata++-config.cmake not found!")
 endif()
 
@@ -13,20 +13,20 @@ endif()
 if (EXISTS "${CMAKE_SOURCE_DIR}/../.git")
   execute_process(COMMAND
    bash -c "git log -1 --format=\"format:#define LIBMPDATAXX_GIT_REVISION \\\"%H\\\"%n\" HEAD > git_revision.hpp"
-   WORKING_DIRECTORY ${CMAKE_SOURCE_DIR} 
+   WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}
   )
 endif()
 
 install(
   DIRECTORY
     bcond concurr formulae output solvers
-  DESTINATION 
+  DESTINATION
     include/libmpdata++
 )
 install(
   FILES
     blitz.hpp git_revision.hpp kahan_reduction.hpp opts.hpp
-  DESTINATION 
+  DESTINATION
     include/libmpdata++
 )
 install(
diff --git a/libmpdata++/bcond/cyclic_1d.hpp b/libmpdata++/bcond/cyclic_1d.hpp
index c7fc241f..8671a961 100644
--- a/libmpdata++/bcond/cyclic_1d.hpp
+++ b/libmpdata++/bcond/cyclic_1d.hpp
@@ -11,11 +11,11 @@ namespace libmpdataxx
 {
   namespace bcond
   {
-    template <typename real_t, int halo, bcond_e knd, drctn_e dir, int n_dims, int dim>    
+    template <typename real_t, int halo, bcond_e knd, drctn_e dir, int n_dims, int dim>
     class bcond<       real_t,     halo,         knd,         dir,     n_dims,     dim,
       typename std::enable_if<
-        knd == cyclic && 
-        dir == left   && 
+        knd == cyclic &&
+        dir == left   &&
         n_dims == 1
       >::type
     > : public detail::bcond_common<real_t, halo, n_dims>
@@ -35,7 +35,7 @@ namespace libmpdataxx
       {
         av[0](this->left_halo_vctr) = av[0](this->rght_intr_vctr);
       }
-      
+
       void fill_halos_vctr_alng_cyclic(arrvec_t<arr_t> &av, const bool ad = false)
       {
         fill_halos_vctr_alng(av, ad);
@@ -66,7 +66,7 @@ namespace libmpdataxx
       {
         av[0](this->rght_halo_vctr) = av[0](this->left_intr_vctr);
       }
-      
+
       void fill_halos_vctr_alng_cyclic(arrvec_t<arr_t> &av, const bool ad = false)
       {
         fill_halos_vctr_alng(av, ad);
diff --git a/libmpdata++/bcond/cyclic_2d.hpp b/libmpdata++/bcond/cyclic_2d.hpp
index b9f38c4f..19111088 100644
--- a/libmpdata++/bcond/cyclic_2d.hpp
+++ b/libmpdata++/bcond/cyclic_2d.hpp
@@ -12,14 +12,14 @@ namespace libmpdataxx
   namespace bcond
   {
     template <typename real_t, int halo, bcond_e knd, drctn_e dir, int n_dims, int d>
-    class bcond<       real_t,     halo,         knd,         dir,     n_dims,     d,  
+    class bcond<       real_t,     halo,         knd,         dir,     n_dims,     d,
       typename std::enable_if<
         knd == cyclic &&
         dir == left &&
         n_dims == 2
       >::type
     > : public detail::bcond_common<real_t, halo, n_dims>
-    { 
+    {
       using parent_t = detail::bcond_common<real_t, halo, n_dims>;
       using arr_t = blitz::Array<real_t, 2>;
       using parent_t::parent_t; // inheriting ctor
@@ -36,7 +36,7 @@ namespace libmpdataxx
       {
         fill_halos_sclr(a, j);
       }
-      
+
       void save_edge_vel(const arr_t &, const rng_t &) {}
 
       void set_edge_pres(arr_t &, const rng_t &, int) {}
@@ -46,7 +46,7 @@ namespace libmpdataxx
         using namespace idxperm;
         av[d](pi<d>(this->left_halo_vctr, j)) = av[d](pi<d>(this->rght_intr_vctr, j));
       }
-      
+
       void fill_halos_sgs_div(arr_t &a, const rng_t &j)
       {
         fill_halos_sclr(a, j);
@@ -58,7 +58,7 @@ namespace libmpdataxx
         // the same logic as fill_halos_vctr_alng but have to consider offset ... TODO: find a way to reuse !
         av[d + offset](pi<d>(this->left_halo_vctr, j)) = av[d + offset](pi<d>(this->rght_intr_vctr, j));
       }
-      
+
       void fill_halos_sgs_tnsr(arrvec_t<arr_t> &av, const arr_t &, const arr_t &, const rng_t &j, const real_t)
       {
         fill_halos_vctr_alng(av, j);
@@ -68,7 +68,7 @@ namespace libmpdataxx
       {
         fill_halos_sclr(a, j);
       }
-      
+
       void fill_halos_vctr_alng_cyclic(arrvec_t<arr_t> &av, const rng_t &j, const bool ad = false)
       {
         fill_halos_vctr_alng(av, j, ad);
@@ -81,14 +81,14 @@ namespace libmpdataxx
     };
 
     template <typename real_t, int halo, bcond_e knd, drctn_e dir, int n_dims, int d>
-    class bcond<       real_t,     halo,         knd,         dir,     n_dims,     d,  
+    class bcond<       real_t,     halo,         knd,         dir,     n_dims,     d,
       typename std::enable_if<
         knd == cyclic &&
         dir == rght &&
         n_dims == 2
       >::type
     > : public detail::bcond_common<real_t, halo, n_dims>
-    { 
+    {
       using parent_t = detail::bcond_common<real_t, halo, n_dims>;
       using arr_t = blitz::Array<real_t, 2>;
       using parent_t::parent_t; // inheriting ctor
@@ -100,12 +100,12 @@ namespace libmpdataxx
         using namespace idxperm;
         a(pi<d>(this->rght_halo_sclr, j)) = a(pi<d>(this->left_intr_sclr, j));
       }
-      
+
       void fill_halos_pres(arr_t &a, const rng_t &j)
       {
         fill_halos_sclr(a, j);
       }
-      
+
       void save_edge_vel(const arr_t &, const rng_t &) {}
 
       void set_edge_pres(arr_t &, const rng_t &, int) {}
@@ -115,7 +115,7 @@ namespace libmpdataxx
         using namespace idxperm;
         av[d](pi<d>(this->rght_halo_vctr, j)) = av[d](pi<d>(this->left_intr_vctr, j));
       }
-      
+
       void fill_halos_sgs_div(arr_t &a, const rng_t &j)
       {
         fill_halos_sclr(a, j);
@@ -127,17 +127,17 @@ namespace libmpdataxx
         // the same logic as fill_halos_vctr_alng but have to consider offset ... TODO: find a way to reuse !
         av[d + offset](pi<d>(this->rght_halo_vctr, j)) = av[d + offset](pi<d>(this->left_intr_vctr, j));
       }
-      
+
       void fill_halos_sgs_tnsr(arrvec_t<arr_t> &av, const arr_t &, const arr_t &, const rng_t &j, const real_t)
       {
         fill_halos_vctr_alng(av, j);
       }
-      
+
       void fill_halos_vctr_nrml(arr_t &a, const rng_t &j)
       {
         fill_halos_sclr(a, j);
       }
-      
+
       void fill_halos_vctr_alng_cyclic(arrvec_t<arr_t> &av, const rng_t &j, const bool ad = false)
       {
         fill_halos_vctr_alng(av, j, ad);
diff --git a/libmpdata++/bcond/cyclic_3d.hpp b/libmpdata++/bcond/cyclic_3d.hpp
index 16fb9536..18d4b039 100644
--- a/libmpdata++/bcond/cyclic_3d.hpp
+++ b/libmpdata++/bcond/cyclic_3d.hpp
@@ -12,14 +12,14 @@ namespace libmpdataxx
   namespace bcond
   {
     template <typename real_t, int halo, bcond_e knd, drctn_e dir, int n_dims, int d>
-    class bcond<       real_t,     halo,         knd,         dir,     n_dims,     d,  
+    class bcond<       real_t,     halo,         knd,         dir,     n_dims,     d,
       typename std::enable_if<
         knd == cyclic &&
         dir == left &&
         n_dims == 3
       >::type
     > : public detail::bcond_common<real_t, halo, n_dims>
-    { 
+    {
       using parent_t = detail::bcond_common<real_t, halo, n_dims>;
       using arr_t = blitz::Array<real_t, 3>;
       using parent_t::parent_t; // inheriting ctor
@@ -29,16 +29,16 @@ namespace libmpdataxx
       void fill_halos_sclr(arr_t &a, const rng_t &j, const rng_t &k, const bool deriv = false)
       {
         using namespace idxperm;
-        a(pi<d>(this->left_halo_sclr, j, k)) = a(pi<d>(this->rght_intr_sclr, j, k)); 
+        a(pi<d>(this->left_halo_sclr, j, k)) = a(pi<d>(this->rght_intr_sclr, j, k));
       }
 
       void fill_halos_pres(arr_t &a, const rng_t &j, const rng_t &k)
       {
         fill_halos_sclr(a, j, k);
       }
-      
+
       void save_edge_vel(const arr_t &, const rng_t &, const rng_t &) {}
-      
+
       void set_edge_pres(arr_t &, const rng_t &, const rng_t &, int) {}
 
       void fill_halos_vctr_alng(arrvec_t<arr_t> &av, const rng_t &j, const rng_t &k, const bool ad = false)
@@ -46,19 +46,19 @@ namespace libmpdataxx
         using namespace idxperm;
         av[d](pi<d>(this->left_halo_vctr, j, k)) = av[d](pi<d>(this->rght_intr_vctr, j, k));
       }
-      
+
       void fill_halos_sgs_div(arr_t &a, const rng_t &j, const rng_t &k)
       {
         fill_halos_sclr(a, j, k);
       }
-      
+
       void fill_halos_sgs_vctr(arrvec_t<arr_t> &av, const arr_t &, const rng_t &j, const rng_t &k, const int offset = 0)
       {
         using namespace idxperm;
         // the same logic as fill_halos_vctr_alng but have to consider offset ... TODO: find a way to reuse !
         av[d + offset](pi<d>(this->left_halo_vctr, j, k)) = av[d + offset](pi<d>(this->rght_intr_vctr, j, k));
       }
-      
+
       void fill_halos_sgs_tnsr(arrvec_t<arr_t> &av, const arr_t &, const arr_t &, const rng_t &j, const rng_t &k, const real_t)
       {
         fill_halos_vctr_alng(av, j, k);
@@ -68,7 +68,7 @@ namespace libmpdataxx
       {
         fill_halos_sclr(a, j, k);
       }
-      
+
       void fill_halos_vctr_alng_cyclic(arrvec_t<arr_t> &av, const rng_t &j, const rng_t &k, const bool ad = false)
       {
         fill_halos_vctr_alng(av, j, k, ad);
@@ -81,7 +81,7 @@ namespace libmpdataxx
     };
 
     template <typename real_t, int halo, bcond_e knd, drctn_e dir, int n_dims, int d>
-    class bcond<       real_t,     halo,         knd,         dir,     n_dims,     d,  
+    class bcond<       real_t,     halo,         knd,         dir,     n_dims,     d,
       typename std::enable_if<
         knd == cyclic &&
         dir == rght &&
@@ -105,29 +105,29 @@ namespace libmpdataxx
       {
         fill_halos_sclr(a, j, k);
       }
-      
+
       void save_edge_vel(const arr_t &, const rng_t &, const rng_t &) {}
-      
+
       void set_edge_pres(arr_t &, const rng_t &, const rng_t &, int) {}
-      
+
       void fill_halos_vctr_alng(arrvec_t<arr_t> &av, const rng_t &j, const rng_t &k, const bool ad = false)
       {
         using namespace idxperm;
         av[d](pi<d>(this->rght_halo_vctr, j, k)) = av[d](pi<d>(this->left_intr_vctr, j, k));
       }
-      
+
       void fill_halos_sgs_div(arr_t &a, const rng_t &j, const rng_t &k)
       {
         fill_halos_sclr(a, j, k);
       }
-      
+
       void fill_halos_sgs_vctr(arrvec_t<arr_t> &av, const arr_t &, const rng_t &j, const rng_t &k, const int offset = 0)
       {
         using namespace idxperm;
         // the same logic as fill_halos_vctr_alng but have to consider offset ... TODO: find a way to reuse !
         av[d + offset](pi<d>(this->rght_halo_vctr, j, k)) = av[d + offset](pi<d>(this->left_intr_vctr, j, k));
       }
-      
+
       void fill_halos_sgs_tnsr(arrvec_t<arr_t> &av, const arr_t &, const arr_t &, const rng_t &j, const rng_t &k, const real_t)
       {
         fill_halos_vctr_alng(av, j, k);
@@ -137,7 +137,7 @@ namespace libmpdataxx
       {
         fill_halos_sclr(a, j, k);
       }
-      
+
       void fill_halos_vctr_alng_cyclic(arrvec_t<arr_t> &av, const rng_t &j, const rng_t &k, const bool ad = false)
       {
         fill_halos_vctr_alng(av, j, k, ad);
diff --git a/libmpdata++/bcond/detail/bcond_common.hpp b/libmpdata++/bcond/detail/bcond_common.hpp
index e76fa8fa..45ce263f 100644
--- a/libmpdata++/bcond/detail/bcond_common.hpp
+++ b/libmpdata++/bcond/detail/bcond_common.hpp
@@ -14,18 +14,18 @@ namespace libmpdataxx
   {
     using namespace arakawa_c;
 
-    enum bcond_e { null, cyclic, polar, open, rigid, remote, gndsky, custom }; 
+    enum bcond_e { null, cyclic, polar, open, rigid, remote, gndsky, custom };
     enum drctn_e { left, rght };
 
     template<
-      typename real_t, 
+      typename real_t,
       int halo,
       bcond_e knd,
-      drctn_e dir, 
+      drctn_e dir,
       int n_dims,
       int dim,
       class enableif = void
-    > 
+    >
     class bcond
     {};
 
@@ -43,36 +43,36 @@ namespace libmpdataxx
         public:
 
         // 1D
-        virtual void fill_halos_sclr(arr_1d_t &, const bool deriv = false) 
-        { 
-          assert(false && "bcond::fill_halos_sclr() called!"); 
+        virtual void fill_halos_sclr(arr_1d_t &, const bool deriv = false)
+        {
+          assert(false && "bcond::fill_halos_sclr() called!");
         };
 
         virtual void fill_halos_vctr_alng(arrvec_t<arr_1d_t> &, const bool ad = false)
-        { 
-          assert(false && "bcond::fill_halos_vctr() called!"); 
+        {
+          assert(false && "bcond::fill_halos_vctr() called!");
         };
-        
+
         virtual void fill_halos_vctr_alng_cyclic(arrvec_t<blitz::Array<real_t, 1>> &, const bool ad = false)
         {};
 
         // 2D
-        virtual void fill_halos_sclr(arr_2d_t &, const rng_t &, const bool deriv = false) 
+        virtual void fill_halos_sclr(arr_2d_t &, const rng_t &, const bool deriv = false)
         {
           assert(false && "bcond::fill_halos_sclr() called!");
         };
-        
+
         virtual void fill_halos_pres(arr_2d_t &, const rng_t &)
         {
           assert(false && "bcond::fill_halos_pres() called!");
         };
-        
-        virtual void save_edge_vel(const arr_2d_t &, const rng_t &) 
+
+        virtual void save_edge_vel(const arr_2d_t &, const rng_t &)
         {
           assert(false && "bcond::save_edge_vel() called!");
         };
-        
-        virtual void set_edge_pres(arr_2d_t &, const rng_t &, int) 
+
+        virtual void set_edge_pres(arr_2d_t &, const rng_t &, int)
         {
           assert(false && "bcond::set_edge() called!");
         };
@@ -81,7 +81,7 @@ namespace libmpdataxx
         {
           assert(false && "bcond::fill_halos_vctr_alng() called!");
         };
-        
+
         virtual void fill_halos_sgs_div(arr_2d_t &, const rng_t &)
         {
           assert(false && "bcond::fill_halos_sgs_div() called!");
@@ -91,42 +91,42 @@ namespace libmpdataxx
         {
           assert(false && "bcond::fill_halos_sgs_vctr() called!");
         };
-        
+
         virtual void fill_halos_sgs_tnsr(arrvec_t<arr_2d_t> &, const arr_2d_t &, const arr_2d_t &, const rng_t &, const real_t)
         {
           assert(false && "bcond::fill_halos_sgs_tnsr called!");
         };
 
-        virtual void fill_halos_vctr_nrml(arr_2d_t &, const rng_t &) 
+        virtual void fill_halos_vctr_nrml(arr_2d_t &, const rng_t &)
         {
           assert(false && "bcond::fill_halos_vctr_nrml() called!");
         };
-        
-        virtual void fill_halos_vctr_alng_cyclic(arrvec_t<blitz::Array<real_t, 2>> &, const rng_t &, const bool ad = false) 
+
+        virtual void fill_halos_vctr_alng_cyclic(arrvec_t<blitz::Array<real_t, 2>> &, const rng_t &, const bool ad = false)
         {};
 
-        virtual void fill_halos_vctr_nrml_cyclic(blitz::Array<real_t, 2> &, const rng_t &) 
+        virtual void fill_halos_vctr_nrml_cyclic(blitz::Array<real_t, 2> &, const rng_t &)
         {};
-        
-        virtual void fill_halos_flux(arrvec_t<blitz::Array<real_t, 2>> &, const rng_t &) 
+
+        virtual void fill_halos_flux(arrvec_t<blitz::Array<real_t, 2>> &, const rng_t &)
         {};
 
         // 3D
-        virtual void fill_halos_sclr(arr_3d_t &, const rng_t &, const rng_t &, const bool deriv = false) 
+        virtual void fill_halos_sclr(arr_3d_t &, const rng_t &, const rng_t &, const bool deriv = false)
         {
           assert(false && "bcond::fill_halos_sclr() called!");
         };
-        
+
         virtual void fill_halos_pres(arr_3d_t &, const rng_t &, const rng_t &)
         {
           assert(false && "bcond::fill_halos_pres() called!");
         };
-        
-        virtual void save_edge_vel(const arr_3d_t &, const rng_t &, const rng_t &) 
+
+        virtual void save_edge_vel(const arr_3d_t &, const rng_t &, const rng_t &)
         {
           assert(false && "bcond::save_edge_vel() called!");
         };
-        
+
         virtual void set_edge_pres(arr_3d_t &, const rng_t &, const rng_t &, int)
         {
           assert(false && "bcond::set_edge() called!");
@@ -136,12 +136,12 @@ namespace libmpdataxx
         {
           assert(false && "bcond::fill_halos_vctr() called!");
         };
-        
-        virtual void fill_halos_sgs_div(arr_3d_t &, const rng_t &, const rng_t &) 
+
+        virtual void fill_halos_sgs_div(arr_3d_t &, const rng_t &, const rng_t &)
         {
           assert(false && "bcond::fill_halos_sgs_div() called!");
         };
-        
+
         virtual void fill_halos_sgs_vctr(arrvec_t<arr_3d_t> &,
                                             const arr_3d_t &,
                                             const rng_t &,
@@ -150,7 +150,7 @@ namespace libmpdataxx
         {
           assert(false && "bcond::fill_halos_sgs_vctr() called!");
         };
-        
+
         virtual void fill_halos_sgs_tnsr(arrvec_t<arr_3d_t> &,
                                             const arr_3d_t &,
                                             const arr_3d_t &,
@@ -161,25 +161,25 @@ namespace libmpdataxx
           assert(false && "bcond::fill_halos_sgs_tnsr called!");
         };
 
-        virtual void fill_halos_vctr_nrml(arr_3d_t &, const rng_t &, const rng_t &) 
+        virtual void fill_halos_vctr_nrml(arr_3d_t &, const rng_t &, const rng_t &)
         {
           assert(false && "bcond::fill_halos_vctr_nrml() called!");
         };
-        
-        virtual void fill_halos_vctr_alng_cyclic(arrvec_t<blitz::Array<real_t, 3>> &, const rng_t &, const rng_t &, const bool ad = false) 
+
+        virtual void fill_halos_vctr_alng_cyclic(arrvec_t<blitz::Array<real_t, 3>> &, const rng_t &, const rng_t &, const bool ad = false)
         {};
 
-        virtual void fill_halos_vctr_nrml_cyclic(blitz::Array<real_t, 3> &, const rng_t &, const rng_t &) 
+        virtual void fill_halos_vctr_nrml_cyclic(blitz::Array<real_t, 3> &, const rng_t &, const rng_t &)
         {};
-        
-        virtual void fill_halos_flux(arrvec_t<blitz::Array<real_t, 3>> &, const rng_t &, const rng_t &) 
+
+        virtual void fill_halos_flux(arrvec_t<blitz::Array<real_t, 3>> &, const rng_t &, const rng_t &)
         {};
 
         protected:
           // sclr
-        int 
+        int
           left_edge_sclr, rght_edge_sclr;
-        rng_t 
+        rng_t
           left_halo_sclr, rght_halo_sclr,
           left_intr_sclr, rght_intr_sclr,
           // vctr
@@ -198,28 +198,28 @@ namespace libmpdataxx
             i.last()
           ),
           left_halo_sclr(
-            (i^halo).first(), 
+            (i^halo).first(),
             (i^halo).first() + halo - 1
           ),
           rght_halo_sclr(
-            (i^halo).last() - (halo - 1), 
+            (i^halo).last() - (halo - 1),
             (i^halo).last()
           ),
           left_intr_sclr(
-            (i^(-1)).first(), 
+            (i^(-1)).first(),
             (i^(-1)).first() + halo - 1
           ),
           rght_intr_sclr(
-            (i^(-1)).last() - (halo - 1), 
+            (i^(-1)).last() - (halo - 1),
             (i^(-1)).last()
           ),
           // vctr
           left_halo_vctr(
-            (i^h^(halo-1)).first(), 
+            (i^h^(halo-1)).first(),
             (i^h^(halo-1)).first() + halo - 1
           ),
           rght_halo_vctr(
-            (i^h^(halo-1)).last() - (halo - 1), 
+            (i^h^(halo-1)).last() - (halo - 1),
             (i^h^(halo-1)).last()
           ),
           left_intr_vctr(
@@ -227,13 +227,13 @@ namespace libmpdataxx
             (i^h^(-1)).first() + halo - 1
           ),
           rght_intr_vctr(
-            (i^h^(-1)).last() - (halo - 1), 
+            (i^h^(-1)).last() - (halo - 1),
             (i^h^(-1)).last()
           )
-        {} 
+        {}
 
         // the one for use in shared
-        bcond_common() 
+        bcond_common()
         {}
       };
     } // namespace detail
diff --git a/libmpdata++/bcond/detail/polar_common.hpp b/libmpdata++/bcond/detail/polar_common.hpp
index 4f3193c2..ebdb8326 100644
--- a/libmpdata++/bcond/detail/polar_common.hpp
+++ b/libmpdata++/bcond/detail/polar_common.hpp
@@ -13,7 +13,7 @@ namespace libmpdataxx
 {
   namespace bcond
   {
-    namespace detail 
+    namespace detail
     {
       using namespace arakawa_c;
 
@@ -36,12 +36,12 @@ namespace libmpdataxx
 
         // ctor
         polar_common(
-          const rng_t &i, 
+          const rng_t &i,
           const std::array<int, n_dims> &grid_size
         ) :
           parent_t(i, grid_size),
           pole((grid_size[0] - 1) / 2)
-        {} 
+        {}
       };
     } // namespace detail
   } // namespace bcond
diff --git a/libmpdata++/bcond/detail/remote_common.hpp b/libmpdata++/bcond/detail/remote_common.hpp
index 5e4ffb7c..2d2cfba7 100644
--- a/libmpdata++/bcond/detail/remote_common.hpp
+++ b/libmpdata++/bcond/detail/remote_common.hpp
@@ -54,12 +54,12 @@ namespace libmpdataxx
 
 #  if !defined(NDEBUG)
           const int debug = 2;
-          std::pair<int, int> buf_rng; 
+          std::pair<int, int> buf_rng;
 #  endif
 #endif
 
         protected:
-        const bool is_cyclic = 
+        const bool is_cyclic =
 #if defined(USE_MPI)
           (dir == left && mpicom.rank() == 0) ||
           (dir == rght && mpicom.rank() == mpicom.size()-1);
@@ -68,14 +68,14 @@ namespace libmpdataxx
 #endif
 
         void send_hlpr(
-          const arr_t &a, 
-          const idx_t &idx_send 
+          const arr_t &a,
+          const idx_t &idx_send
         )
         {
 #if defined(USE_MPI)
-          // distinguishing between left and right messages 
+          // distinguishing between left and right messages
           // (important e.g. with 2 procs and cyclic bc)
-          const int  
+          const int
             msg_send = dir == left ? left : rght;
 
           // arr_send references part of the send buffer that will be used
@@ -88,12 +88,12 @@ namespace libmpdataxx
           {
             // use the pointer+size kind of send instead of serialization of blitz arrays, because
             // serialization caused memory leaks, probably because it breaks blitz reference counting
-            reqs[0] = mpicom.isend(peer, msg_send, buf_send, arr_send.size()); 
+            reqs[0] = mpicom.isend(peer, msg_send, buf_send, arr_send.size());
 
             // sending debug information
 #  if !defined(NDEBUG)
             reqs[1] = mpicom.isend(peer, msg_send ^ debug, std::pair<int,int>(
-              idx_send[0].first(), 
+              idx_send[0].first(),
               idx_send[0].last()
             ));
 #  endif
@@ -104,12 +104,12 @@ namespace libmpdataxx
         };
 
         void recv_hlpr(
-          const arr_t &a, 
+          const arr_t &a,
           const idx_t &idx_recv
         )
         {
 #if defined(USE_MPI)
-          const int  
+          const int
             msg_recv = dir == left ? rght : left;
 
 
@@ -129,8 +129,8 @@ namespace libmpdataxx
         }
 
         void send(
-          const arr_t &a, 
-          const idx_t &idx_send 
+          const arr_t &a,
+          const idx_t &idx_send
         )
         {
 #if defined(USE_MPI)
@@ -144,7 +144,7 @@ namespace libmpdataxx
         }
 
         void recv(
-          const arr_t &a, 
+          const arr_t &a,
           const idx_t &idx_recv
         )
         {
@@ -159,7 +159,7 @@ namespace libmpdataxx
           arr_t arr_recv(buf_recv, a(idx_recv).shape(), blitz::neverDeleteData); // TODO: shape directly from idx_recv
 
           // checking debug information
-          
+
           // positive modulo (grid_size_0 - 1)
 //          auto wrap = [this](int n) {return (n % (grid_size_0 - 1) + grid_size_0 - 1) % (grid_size_0 - 1);};
 //          assert(wrap(buf_rng.first) == wrap(idx_recv[0].first()));
@@ -173,8 +173,8 @@ namespace libmpdataxx
         }
 
         void xchng(
-          const arr_t &a, 
-          const idx_t &idx_send, 
+          const arr_t &a,
+          const idx_t &idx_send,
           const idx_t &idx_recv
         )
         {
@@ -183,13 +183,13 @@ namespace libmpdataxx
           recv_hlpr(a, idx_recv);
 
           // waiting for the transfers to finish
-          boost::mpi::wait_all(reqs.begin(), reqs.end()); 
+          boost::mpi::wait_all(reqs.begin(), reqs.end());
 
           // a blitz handler for the used part of the receive buffer
           arr_t arr_recv(buf_recv, a(idx_recv).shape(), blitz::neverDeleteData);
 
           // checking debug information
-          
+
           // positive modulo (grid_size_0 - 1)
          // auto wrap = [this](int n) {return (n % (grid_size_0 - 1) + grid_size_0 - 1) % (grid_size_0 - 1);};
          // assert(wrap(buf_rng.first) == wrap(idx_recv[0].first()));
@@ -204,8 +204,8 @@ namespace libmpdataxx
 
         public:
 
-        // ctor                                  
-        remote_common(                                                           
+        // ctor
+        remote_common(
           const rng_t &i,
           const std::array<int, n_dims> &grid_size
         ) :
@@ -218,9 +218,9 @@ namespace libmpdataxx
           buf_send = (real_t *) malloc(halo * slice_size * sizeof(real_t));
           buf_recv = (real_t *) malloc(halo * slice_size * sizeof(real_t));
 #endif
-        } 
+        }
 
-        // dtor                                  
+        // dtor
         ~remote_common()
         {
 #if defined(USE_MPI)
diff --git a/libmpdata++/bcond/gndsky_2d.hpp b/libmpdata++/bcond/gndsky_2d.hpp
index 9a451e9f..4a032b78 100644
--- a/libmpdata++/bcond/gndsky_2d.hpp
+++ b/libmpdata++/bcond/gndsky_2d.hpp
@@ -13,14 +13,14 @@ namespace libmpdataxx
   {
     // ground
     template <typename real_t, int halo, bcond_e knd, drctn_e dir, int n_dims, int d>
-    class bcond<       real_t,     halo,         knd,         dir,     n_dims,     d,  
+    class bcond<       real_t,     halo,         knd,         dir,     n_dims,     d,
       typename std::enable_if<
         knd == gndsky &&
         dir == left &&
         n_dims == 2
       >::type
     > : public bcond<real_t, halo, rigid, dir, n_dims, d>
-    { 
+    {
       using parent_t = bcond<real_t, halo, rigid, dir, n_dims, d>;
       using arr_t = blitz::Array<real_t, 2>;
       using parent_t::parent_t; // inheriting ctor
@@ -32,7 +32,7 @@ namespace libmpdataxx
         using namespace idxperm;
         a(pi<d>(this->left_edge_sclr - h, j)) = 2 * a(pi<d>(this->left_edge_sclr + h, j)) - a(pi<d>(this->left_edge_sclr + 1 + h, j));
       }
-     
+
       void fill_halos_sgs_vctr(arrvec_t<arr_t> &av, const arr_t &b, const rng_t &j, const int offset = 0)
       {
         using namespace idxperm;
@@ -41,16 +41,16 @@ namespace libmpdataxx
         const auto &a = av[offset + d];
         a(pi<d>(this->left_edge_sclr - h, j)) = 2 * b(pi<d>(this->left_edge_sclr, j)) - a(pi<d>(this->left_edge_sclr + h, j));
       }
-      
+
       void fill_halos_sgs_tnsr(arrvec_t<arr_t> &av, const arr_t &u, const arr_t &div, const rng_t &j, const real_t di)
       {
         using namespace idxperm;
         const auto &a = av[d];
         a(pi<d>(this->left_edge_sclr - h, j)) = 2 * ( ( 3 * u(pi<d>(this->left_edge_sclr + 1, j))
-                                                      - 2 * u(pi<d>(this->left_edge_sclr, j)) 
-                                                      -     u(pi<d>(this->left_edge_sclr + 2, j)) 
+                                                      - 2 * u(pi<d>(this->left_edge_sclr, j))
+                                                      -     u(pi<d>(this->left_edge_sclr + 2, j))
                                                       ) / di
-                                                    - div(pi<d>(this->left_edge_sclr - h, j)) 
+                                                    - div(pi<d>(this->left_edge_sclr - h, j))
                                                     );
       }
     };
diff --git a/libmpdata++/bcond/gndsky_3d.hpp b/libmpdata++/bcond/gndsky_3d.hpp
index e6d5cc96..65499210 100644
--- a/libmpdata++/bcond/gndsky_3d.hpp
+++ b/libmpdata++/bcond/gndsky_3d.hpp
@@ -13,14 +13,14 @@ namespace libmpdataxx
   {
     // ground
     template <typename real_t, int halo, bcond_e knd, drctn_e dir, int n_dims, int d>
-    class bcond<       real_t,     halo,         knd,         dir,     n_dims,     d,  
+    class bcond<       real_t,     halo,         knd,         dir,     n_dims,     d,
       typename std::enable_if<
         knd == gndsky &&
         dir == left &&
         n_dims == 3
       >::type
     > : public bcond<real_t, halo, rigid, dir, n_dims, d>
-    { 
+    {
       using parent_t = bcond<real_t, halo, rigid, dir, n_dims, d>;
       using arr_t = blitz::Array<real_t, 3>;
       using parent_t::parent_t; // inheriting ctor
@@ -42,14 +42,14 @@ namespace libmpdataxx
         const auto &a = av[offset + d];
         a(pi<d>(this->left_edge_sclr - h, j, k)) = 2 * b(pi<d>(this->left_edge_sclr, j, k)) - a(pi<d>(this->left_edge_sclr + h, j, k));
       }
-      
+
       void fill_halos_sgs_tnsr(arrvec_t<arr_t> &av, const arr_t &u, const arr_t &div, const rng_t &j, const rng_t &k, const real_t di)
       {
         using namespace idxperm;
         const auto &a = av[d];
         a(pi<d>(this->left_edge_sclr - h, j, k)) = 2 * ( ( 3 * u(pi<d>(this->left_edge_sclr + 1, j, k))
-                                                         - 2 * u(pi<d>(this->left_edge_sclr, j, k)) 
-                                                         -     u(pi<d>(this->left_edge_sclr + 2, j, k)) 
+                                                         - 2 * u(pi<d>(this->left_edge_sclr, j, k))
+                                                         -     u(pi<d>(this->left_edge_sclr + 2, j, k))
                                                          ) / di
                                                        - div(pi<d>(this->left_edge_sclr - h, j, k))
                                                        );
diff --git a/libmpdata++/bcond/open_1d.hpp b/libmpdata++/bcond/open_1d.hpp
index db3ed0bc..ceea4770 100644
--- a/libmpdata++/bcond/open_1d.hpp
+++ b/libmpdata++/bcond/open_1d.hpp
@@ -23,7 +23,7 @@ namespace libmpdataxx
       using parent_t = detail::bcond_common<real_t, halo, n_dims>;
       using arr_t = blitz::Array<real_t, 1>;
       using parent_t::parent_t; // inheriting ctor
-      
+
       public:
 
       void fill_halos_sclr(arr_t &a, const bool deriv = false)
@@ -56,14 +56,14 @@ namespace libmpdataxx
       using parent_t = detail::bcond_common<real_t, halo, n_dims>;
       using arr_t = blitz::Array<real_t, 1>;
       using parent_t::parent_t; // inheriting ctor
-      
+
       public:
 
       void fill_halos_sclr(arr_t &a, const bool deriv = false)
       {
         for (int i = this->rght_halo_sclr.first(); i <= this->rght_halo_sclr.last(); ++i)
         {
-          if (deriv) 
+          if (deriv)
             a(rng_t(i, i)) = 0;
           else
             a(rng_t(i, i)) = a(this->rght_edge_sclr);
diff --git a/libmpdata++/bcond/open_2d.hpp b/libmpdata++/bcond/open_2d.hpp
index 662e32c8..8b165446 100644
--- a/libmpdata++/bcond/open_2d.hpp
+++ b/libmpdata++/bcond/open_2d.hpp
@@ -36,11 +36,11 @@ namespace libmpdataxx
         {
           if (deriv)
             a(pi<d>(i, j)) = 0;
-          else 
+          else
             a(pi<d>(i, j)) = a(pi<d>(this->left_edge_sclr, j)); // zero-gradient condition for scalar
         }
       }
-      
+
       void fill_halos_pres(arr_t &a, const rng_t &j)
       {
         using namespace idxperm;
@@ -48,7 +48,7 @@ namespace libmpdataxx
         a(pi<d>(this->left_halo_sclr.last(), j)) = 2 * a(pi<d>(this->left_edge_sclr,     j))
                                                      - a(pi<d>(this->left_edge_sclr + 1, j));
       }
-      
+
       void save_edge_vel(const arr_t &a, const rng_t &j)
       {
         using namespace idxperm;
@@ -58,12 +58,12 @@ namespace libmpdataxx
         if(d != 0) edge_velocity.reindexSelf({a.lbound(0), 0});
         edge_velocity(pi<d>(0, j)) = a(pi<d>(this->left_edge_sclr, j));
       }
-      
+
       void set_edge_pres(arr_t &a, const rng_t &j, int sign)
       {
         using namespace idxperm;
         a(pi<d>(this->left_edge_sclr, j)) = sign * edge_velocity(pi<d>(0, j));
-        
+
         if (halo > 1)
         {
           a(pi<d>(this->left_halo_sclr.last() - 1, j)) =   3 * a(pi<d>(this->left_edge_sclr,     j))
@@ -75,19 +75,19 @@ namespace libmpdataxx
       {
         using namespace idxperm;
         const int i = this->left_edge_sclr;
-   
+
         // if executed first (d=0) this could contain NaNs
-        if (d == 0) 
+        if (d == 0)
         {
           av[d+1](pi<d>(i, (j-h).first())) = 0;
           av[d+1](pi<d>(i, (j+h).last())) = 0;
         }
-       
+
         // zero-divergence condition
         for (int ii = this->left_halo_vctr.first(); ii <= this->left_halo_vctr.last() - (ad ? 1 : 0); ++ii)
         {
-          av[d](pi<d>(ii, j)) = 
-            av[d](pi<d>(i+h, j)) 
+          av[d](pi<d>(ii, j)) =
+            av[d](pi<d>(i+h, j))
             -(
               av[d+1](pi<d>(i, j-h)) -
               av[d+1](pi<d>(i, j+h))
@@ -100,7 +100,7 @@ namespace libmpdataxx
         using namespace idxperm;
         // note intentional sclr
         for (int i = this->left_halo_sclr.first(); i <= this->left_halo_sclr.last(); ++i)
-          a(pi<d>(i, j)) = 0; 
+          a(pi<d>(i, j)) = 0;
       }
     };
 
@@ -116,10 +116,10 @@ namespace libmpdataxx
       using parent_t = detail::bcond_common<real_t, halo, n_dims>;
       using arr_t = blitz::Array<real_t, 2>;
       using parent_t::parent_t; // inheriting ctor
-      
+
       // holds saved initial value of edge velocity
       arr_t edge_velocity;
-      
+
       public:
 
       void fill_halos_sclr(arr_t &a, const rng_t &j, const bool deriv = false)
@@ -133,7 +133,7 @@ namespace libmpdataxx
             a(pi<d>(i, j)) = a(pi<d>(this->rght_edge_sclr, j)); // zero gradient for scalar
         }
       }
-      
+
       void fill_halos_pres(arr_t &a, const rng_t &j)
       {
         using namespace idxperm;
@@ -147,7 +147,7 @@ namespace libmpdataxx
         }
 
       }
-      
+
       void save_edge_vel(const arr_t &a, const rng_t &j)
       {
         using namespace idxperm;
@@ -157,7 +157,7 @@ namespace libmpdataxx
         if(d != 0) edge_velocity.reindexSelf({a.lbound(0), 0});
         edge_velocity(pi<d>(0, j)) = a(pi<d>(this->rght_edge_sclr, j));
       }
-      
+
       void set_edge_pres(arr_t &a, const rng_t &j, int sign)
       {
         using namespace idxperm;
@@ -170,29 +170,29 @@ namespace libmpdataxx
         const int i = this->rght_edge_sclr;
 
         // if executed first (d=0) this could contain NaNs
-        if (d == 0) 
+        if (d == 0)
         {
           av[d+1](pi<d>(i, (j-h).first())) = 0;
           av[d+1](pi<d>(i, (j+h).last())) = 0;
         }
-       
+
         // zero-divergence condition
         for (int ii = this->rght_halo_vctr.first() + (ad ? 1 : 0); ii <= this->rght_halo_vctr.last(); ++ii)
         {
-          av[d](pi<d>(ii, j)) = 
+          av[d](pi<d>(ii, j)) =
             av[d](pi<d>(i-h, j)) + (
               av[d+1](pi<d>(i, j-h)) -
               av[d+1](pi<d>(i, j+h))
             );
         }
       }
-      
+
       void fill_halos_vctr_nrml(arr_t &a, const rng_t &j)
       {
         using namespace idxperm;
         // note intentional sclr
         for (int i = this->rght_halo_sclr.first(); i <= this->rght_halo_sclr.last(); ++i)
-          a(pi<d>(i, j)) = 0; 
+          a(pi<d>(i, j)) = 0;
       }
     };
   } // namespace bcond
diff --git a/libmpdata++/bcond/open_3d.hpp b/libmpdata++/bcond/open_3d.hpp
index fcc7ef20..b621748a 100644
--- a/libmpdata++/bcond/open_3d.hpp
+++ b/libmpdata++/bcond/open_3d.hpp
@@ -12,18 +12,18 @@ namespace libmpdataxx
   namespace bcond
   {
     template <typename real_t, int halo, bcond_e knd, drctn_e dir, int n_dims, int d>
-    class bcond<       real_t,     halo,         knd,         dir,     n_dims,     d,  
+    class bcond<       real_t,     halo,         knd,         dir,     n_dims,     d,
       typename std::enable_if<
         knd == open &&
         dir == left &&
         n_dims == 3
-      >::type 
+      >::type
     > : public detail::bcond_common<real_t, halo, n_dims>
-    { 
+    {
       using parent_t = detail::bcond_common<real_t, halo, n_dims>;
       using arr_t = blitz::Array<real_t, 3>;
       using parent_t::parent_t; // inheriting ctor
-      
+
       // holds saved initial value of edge velocity
       arr_t edge_velocity;
 
@@ -40,7 +40,7 @@ namespace libmpdataxx
             a(pi<d>(i, j, k)) = a(pi<d>(this->left_edge_sclr, j, k));
         }
       }
-      
+
       void fill_halos_pres(arr_t &a, const rng_t &j, const rng_t &k)
       {
         using namespace idxperm;
@@ -53,7 +53,7 @@ namespace libmpdataxx
                                                             - 2 * a(pi<d>(this->left_edge_sclr + 1, j, k));
         }
       }
-      
+
       void save_edge_vel(const arr_t &a, const rng_t &j, const rng_t &k)
       {
         using namespace idxperm;
@@ -63,7 +63,7 @@ namespace libmpdataxx
         if(d != 0) edge_velocity.reindexSelf({a.lbound(0), 0, 0});
         edge_velocity(pi<d>(0, j, k)) = a(pi<d>(this->left_edge_sclr, j, k));
       }
-      
+
       void set_edge_pres(arr_t &a, const rng_t &j, const rng_t &k, int sign)
       {
         using namespace idxperm;
@@ -86,7 +86,7 @@ namespace libmpdataxx
           av[d+1](pi<d>(i, (j-h).first(), k)) = 0;
           av[d+1](pi<d>(i, (j+h).last(),  k)) = 0;
 
-          case 2: 
+          case 2:
           break;
 
           default: assert(false);
@@ -101,15 +101,15 @@ namespace libmpdataxx
         // zero-divergence condition
         for (int ii = this->left_halo_vctr.first(); ii <= this->left_halo_vctr.last() - (ad ? 1 : 0); ++ii)
         {
-          av[d](pi<d>(ii, j, k)) = 
-            av[d](pi<d>(i+h, j, k)) 
+          av[d](pi<d>(ii, j, k)) =
+            av[d](pi<d>(i+h, j, k))
             -(
-              av[d+1](pi<d>(i, j-h, k)) - 
-              av[d+1](pi<d>(i, j+h, k))   
-            ) 
+              av[d+1](pi<d>(i, j-h, k)) -
+              av[d+1](pi<d>(i, j+h, k))
+            )
             -(
               av[d+2](pi<d>(i, j, k-h)) -
-              av[d+2](pi<d>(i, j, k+h)) 
+              av[d+2](pi<d>(i, j, k+h))
             );
         }
       }
@@ -119,26 +119,26 @@ namespace libmpdataxx
         using namespace idxperm;
         // note intentional sclr
         for (int i = this->left_halo_sclr.first(); i <= this->left_halo_sclr.last(); ++i)
-          a(pi<d>(i, j, k)) = 0; 
+          a(pi<d>(i, j, k)) = 0;
       }
     };
 
     template <typename real_t, int halo, bcond_e knd, drctn_e dir, int n_dims, int d>
-    class bcond<       real_t,     halo,         knd,         dir,     n_dims,     d,  
+    class bcond<       real_t,     halo,         knd,         dir,     n_dims,     d,
       typename std::enable_if<
         knd == open &&
         dir == rght &&
         n_dims == 3
-      >::type 
+      >::type
     > : public detail::bcond_common<real_t, halo, n_dims>
-    { 
+    {
       using parent_t = detail::bcond_common<real_t, halo, n_dims>;
       using arr_t = blitz::Array<real_t, 3>;
       using parent_t::parent_t; // inheriting ctor
-      
+
       // holds saved initial value of edge velocity
       arr_t edge_velocity;
-      
+
       public:
 
       void fill_halos_sclr(arr_t &a, const rng_t &j, const rng_t &k, const bool deriv = false)
@@ -152,7 +152,7 @@ namespace libmpdataxx
             a(pi<d>(i, j, k)) = a(pi<d>(this->rght_edge_sclr, j, k));
         }
       }
-      
+
       void fill_halos_pres(arr_t &a, const rng_t &j, const rng_t &k)
       {
         using namespace idxperm;
@@ -166,7 +166,7 @@ namespace libmpdataxx
                                                              - 2 * a(pi<d>(this->rght_edge_sclr - 1, j, k));
         }
       }
-      
+
       void save_edge_vel(const arr_t &a, const rng_t &j, const rng_t &k)
       {
         using namespace idxperm;
@@ -176,7 +176,7 @@ namespace libmpdataxx
         if(d != 0) edge_velocity.reindexSelf({a.lbound(0), 0, 0});
         edge_velocity(pi<d>(0, j, k)) = a(pi<d>(this->rght_edge_sclr, j, k));
       }
-      
+
       void set_edge_pres(arr_t &a, const rng_t &j, const rng_t &k, int sign)
       {
         using namespace idxperm;
@@ -198,7 +198,7 @@ namespace libmpdataxx
           av[d+1](pi<d>(i, (j-h).first(), k)) = 0;
           av[d+1](pi<d>(i, (j+h).last(),  k)) = 0;
 
-          case 2: 
+          case 2:
           break;
 
           default: assert(false);
@@ -212,25 +212,25 @@ namespace libmpdataxx
 
         for (int ii = this->rght_halo_vctr.first() + (ad ? 1 : 0); ii <= this->rght_halo_vctr.last(); ++ii)
         {
-          av[d](pi<d>(ii, j, k)) = 
-            av[d](pi<d>(i-h, j, k)) 
+          av[d](pi<d>(ii, j, k)) =
+            av[d](pi<d>(i-h, j, k))
             +(
-              av[d+1](pi<d>(i, j-h, k)) - 
-              av[d+1](pi<d>(i, j+h, k)) 
+              av[d+1](pi<d>(i, j-h, k)) -
+              av[d+1](pi<d>(i, j+h, k))
             )
             +(
-              av[d+2](pi<d>(i, j, k-h)) - 
-              av[d+2](pi<d>(i, j, k+h)) 
+              av[d+2](pi<d>(i, j, k-h)) -
+              av[d+2](pi<d>(i, j, k+h))
             );
         }
       }
-      
+
       void fill_halos_vctr_nrml(arr_t &a, const rng_t &j, const rng_t &k)
       {
         using namespace idxperm;
         // note intentional sclr
         for (int i = this->rght_halo_sclr.first(); i <= this->rght_halo_sclr.last(); ++i)
-          a(pi<d>(i, j, k)) = 0; 
+          a(pi<d>(i, j, k)) = 0;
       }
     };
   } // namespace bcond
diff --git a/libmpdata++/bcond/polar_2d.hpp b/libmpdata++/bcond/polar_2d.hpp
index bd4a6d02..0d87112f 100644
--- a/libmpdata++/bcond/polar_2d.hpp
+++ b/libmpdata++/bcond/polar_2d.hpp
@@ -12,14 +12,14 @@ namespace libmpdataxx
   namespace bcond
   {
     template <typename real_t, int halo, bcond_e knd, drctn_e dir, int n_dims, int d>
-    class bcond<       real_t,     halo,         knd,         dir,     n_dims,     d,  
+    class bcond<       real_t,     halo,         knd,         dir,     n_dims,     d,
       typename std::enable_if<
         knd == polar &&
         dir == left &&
         n_dims == 2
-      >::type 
+      >::type
     > : public detail::polar_common<real_t, halo, n_dims>
-    { 
+    {
       using parent_t = detail::polar_common<real_t, halo, n_dims>;
       using arr_t = blitz::Array<real_t, 2>;
       using parent_t::parent_t; // inheriting ctor
@@ -31,15 +31,15 @@ namespace libmpdataxx
       {
         using namespace idxperm;
         for (int i = 0; i < halo; ++i)
-        { 
+        {
           for (int jj = j.first(); jj <= j.last(); jj++)
           {
             a(pi<d>(this->left_halo_sclr.last() - i,
-                    jj)) 
+                    jj))
             =
             a(pi<d>(this->left_edge_sclr + i,
                     this->polar_neighbours(jj)));
-              
+
           }
         }
       }
@@ -53,7 +53,7 @@ namespace libmpdataxx
           for (int jj = j.first(); jj <= j.last(); jj++)
           {
             av[d](pi<d>(this->left_halo_vctr.first(), jj))
-            = 
+            =
             av[d](pi<d>(this->left_edge_sclr + h, this->polar_neighbours(jj)));
           }
         }
@@ -63,29 +63,29 @@ namespace libmpdataxx
       {
         using namespace idxperm;
         for (int i = 0; i < halo; ++i)
-        { 
+        {
           for (int jj = j.first(); jj <= j.last(); jj++)
           {
             a(pi<d>(this->left_halo_sclr.first() + i,
-                    jj + h)) 
+                    jj + h))
             =
             a(pi<d>(this->left_intr_vctr.last() - i,
                     this->polar_neighbours(jj) + h));
-              
+
           }
         }
       }
     };
 
     template <typename real_t, int halo, bcond_e knd, drctn_e dir, int n_dims, int d>
-    class bcond<       real_t,     halo,         knd,         dir,     n_dims,     d,  
+    class bcond<       real_t,     halo,         knd,         dir,     n_dims,     d,
       typename std::enable_if<
         knd == polar &&
         dir == rght &&
         n_dims == 2
       >::type
     > : public detail::polar_common<real_t, halo, n_dims>
-    { 
+    {
       using parent_t = detail::polar_common<real_t, halo, n_dims>;
       using arr_t = blitz::Array<real_t, 2>;
       using parent_t::parent_t; // inheriting ctor
@@ -98,15 +98,15 @@ namespace libmpdataxx
         using namespace idxperm;
 
         for (int i = 0; i < halo; ++i)
-        { 
+        {
           for (int jj = j.first(); jj <= j.last(); jj++)
           {
             a(pi<d>(this->rght_halo_sclr.first() + i,
-                    jj)) 
+                    jj))
             =
             a(pi<d>(this->rght_edge_sclr - i,
                     this->polar_neighbours(jj)));
-              
+
           }
         }
       }
@@ -125,20 +125,20 @@ namespace libmpdataxx
           }
         }
       }
-      
+
       void fill_halos_vctr_nrml(arr_t &a, const rng_t &j)
       {
         using namespace idxperm;
         for (int i = 0; i < halo; ++i)
-        { 
+        {
           for (int jj = j.first(); jj <= j.last(); jj++)
           {
             a(pi<d>(this->rght_halo_sclr.first() + i,
-                    jj + h)) 
+                    jj + h))
             =
             a(pi<d>(this->rght_intr_vctr.last() - i,
                     this->polar_neighbours(jj) + h));
-              
+
           }
         }
       }
diff --git a/libmpdata++/bcond/polar_3d.hpp b/libmpdata++/bcond/polar_3d.hpp
index 5d8d19d3..96b1ae4c 100644
--- a/libmpdata++/bcond/polar_3d.hpp
+++ b/libmpdata++/bcond/polar_3d.hpp
@@ -12,14 +12,14 @@ namespace libmpdataxx
   namespace bcond
   {
     template <typename real_t, int halo, bcond_e knd, drctn_e dir, int n_dims, int d>
-    class bcond<       real_t,     halo,         knd,         dir,     n_dims,     d,  
+    class bcond<       real_t,     halo,         knd,         dir,     n_dims,     d,
       typename std::enable_if<
         knd == polar &&
         dir == left &&
         n_dims == 3
-      >::type 
+      >::type
     > : public detail::polar_common<real_t, halo, n_dims>
-    { 
+    {
       using parent_t = detail::polar_common<real_t, halo, n_dims>;
       using arr_t = blitz::Array<real_t, 3>;
       using parent_t::parent_t; // inheriting ctor
@@ -31,13 +31,13 @@ namespace libmpdataxx
       {
         using namespace idxperm;
         for (int i = 0; i < halo; ++i)
-        { 
+        {
           for (int jj = j.first(); jj <= j.last(); jj++)
           {
-            a(pi<d>(this->left_halo_sclr.last() - i, jj, k)) 
+            a(pi<d>(this->left_halo_sclr.last() - i, jj, k))
             =
             a(pi<d>(this->left_edge_sclr + i, this->polar_neighbours(jj), k));
-              
+
           }
         }
       }
@@ -51,7 +51,7 @@ namespace libmpdataxx
           for (int jj = j.first(); jj <= j.last(); jj++)
           {
             av[d](pi<d>(this->left_halo_vctr.first(), jj, k))
-            = 
+            =
             av[d](pi<d>(this->left_edge_sclr + h, this->polar_neighbours(jj), k));
           }
         }
@@ -61,10 +61,10 @@ namespace libmpdataxx
       {
         using namespace idxperm;
         for (int i = 0; i < halo; ++i)
-        { 
+        {
           for (int jj = j.first(); jj <= j.last(); jj++)
           {
-            a(pi<d>(this->left_halo_sclr.first() + i, jj + h, k)) 
+            a(pi<d>(this->left_halo_sclr.first() + i, jj + h, k))
             =
             a(pi<d>(this->left_intr_vctr.last() - i, this->polar_neighbours(jj) + h, k));
           }
@@ -73,14 +73,14 @@ namespace libmpdataxx
     };
 
     template <typename real_t, int halo, bcond_e knd, drctn_e dir, int n_dims, int d>
-    class bcond<       real_t,     halo,         knd,         dir,     n_dims,     d,  
+    class bcond<       real_t,     halo,         knd,         dir,     n_dims,     d,
       typename std::enable_if<
         knd == polar &&
         dir == rght &&
         n_dims == 3
       >::type
     > : public detail::polar_common<real_t, halo, n_dims>
-    { 
+    {
       using parent_t = detail::polar_common<real_t, halo, n_dims>;
       using arr_t = blitz::Array<real_t, 3>;
       using parent_t::parent_t; // inheriting ctor
@@ -93,10 +93,10 @@ namespace libmpdataxx
         using namespace idxperm;
 
         for (int i = 0; i < halo; ++i)
-        { 
+        {
           for (int jj = j.first(); jj <= j.last(); jj++)
           {
-            a(pi<d>(this->rght_halo_sclr.first() + i, jj, k)) 
+            a(pi<d>(this->rght_halo_sclr.first() + i, jj, k))
             =
             a(pi<d>(this->rght_edge_sclr - i, this->polar_neighbours(jj), k));
           }
@@ -117,15 +117,15 @@ namespace libmpdataxx
           }
         }
       }
-      
+
       void fill_halos_vctr_nrml(arr_t &a, const rng_t &j,  const rng_t &k)
       {
         using namespace idxperm;
         for (int i = 0; i < halo; ++i)
-        { 
+        {
           for (int jj = j.first(); jj <= j.last(); jj++)
           {
-            a(pi<d>(this->rght_halo_sclr.first() + i, jj + h, k)) 
+            a(pi<d>(this->rght_halo_sclr.first() + i, jj + h, k))
             =
             a(pi<d>(this->rght_intr_vctr.last() - i, this->polar_neighbours(jj) + h, k));
           }
diff --git a/libmpdata++/bcond/remote_1d.hpp b/libmpdata++/bcond/remote_1d.hpp
index abae9866..616faa78 100644
--- a/libmpdata++/bcond/remote_1d.hpp
+++ b/libmpdata++/bcond/remote_1d.hpp
@@ -11,11 +11,11 @@ namespace libmpdataxx
 {
   namespace bcond
   {
-    template <typename real_t, int halo, bcond_e knd, drctn_e dir, int n_dims, int dim>    
+    template <typename real_t, int halo, bcond_e knd, drctn_e dir, int n_dims, int dim>
     class bcond<       real_t,     halo,         knd,         dir,     n_dims,     dim,
       typename std::enable_if<
-        knd == remote && 
-        dir == left   && 
+        knd == remote &&
+        dir == left   &&
         n_dims == 1
       >::type
     > : public detail::remote_common<real_t, halo, dir, n_dims>
@@ -39,26 +39,26 @@ namespace libmpdataxx
       {
         fill_halos_sclr(a);
       }
-      
+
       void save_edge_vel(const arr_t &) {}
 
       void set_edge_pres(arr_t &, int) {}
 
       void fill_halos_vctr_alng(arrvec_t<arr_t> &av, const bool ad = false)
       {
-        if(!this->is_cyclic) 
+        if(!this->is_cyclic)
         {
           if(halo == 1)
-            this->send(av[0], idx_t(idx_ctor_arg_t(this->left_intr_vctr + off))); 
+            this->send(av[0], idx_t(idx_ctor_arg_t(this->left_intr_vctr + off)));
           else
             // processes fill vectors to the left of their domain
-            this->xchng(av[0], idx_t(idx_ctor_arg_t(this->left_intr_vctr + off)), idx_t(idx_ctor_arg_t((this->left_halo_vctr^h)^(-1)))); 
+            this->xchng(av[0], idx_t(idx_ctor_arg_t(this->left_intr_vctr + off)), idx_t(idx_ctor_arg_t((this->left_halo_vctr^h)^(-1))));
         }
-        else 
-          // cyclic should communicate both ways 
-          this->xchng(av[0], idx_t(idx_ctor_arg_t(this->left_intr_vctr + off)), idx_t(idx_ctor_arg_t(this->left_halo_vctr))); 
+        else
+          // cyclic should communicate both ways
+          this->xchng(av[0], idx_t(idx_ctor_arg_t(this->left_intr_vctr + off)), idx_t(idx_ctor_arg_t(this->left_halo_vctr)));
       }
-      
+
       void fill_halos_sgs_div(arr_t &a)
       {
         fill_halos_sclr(a);
@@ -68,29 +68,29 @@ namespace libmpdataxx
       {
         using namespace idxperm;
         // the same logic as fill_halos_vctr_alng but have to consider offset ... TODO: find a way to reuse !
-        if(!this->is_cyclic) 
+        if(!this->is_cyclic)
         {
           if(halo == 1)
-            this->send(av[0 + offset], idx_t(idx_ctor_arg_t(this->left_intr_vctr + off))); 
+            this->send(av[0 + offset], idx_t(idx_ctor_arg_t(this->left_intr_vctr + off)));
           else
             // processes fill vectors to the left of their domain
-            this->xchng(av[0 + offset], idx_t(idx_ctor_arg_t(this->left_intr_vctr + off)), idx_t(idx_ctor_arg_t((this->left_halo_vctr^h)^(-1)))); 
+            this->xchng(av[0 + offset], idx_t(idx_ctor_arg_t(this->left_intr_vctr + off)), idx_t(idx_ctor_arg_t((this->left_halo_vctr^h)^(-1))));
         }
-        else 
-          // cyclic should communicate both ways 
-          this->xchng(av[0 + offset], idx_t(idx_ctor_arg_t(this->left_intr_vctr + off)), idx_t(idx_ctor_arg_t(this->left_halo_vctr))); 
+        else
+          // cyclic should communicate both ways
+          this->xchng(av[0 + offset], idx_t(idx_ctor_arg_t(this->left_intr_vctr + off)), idx_t(idx_ctor_arg_t(this->left_halo_vctr)));
       }
-      
+
       void fill_halos_sgs_tnsr(arrvec_t<arr_t> &av, const arr_t &, const arr_t &, const real_t)
       {
         fill_halos_vctr_alng(av);
       }
 
-      void fill_halos_vctr_nrml(arr_t &a)                 
-      {                                                                         
-        fill_halos_sclr(a);                                                  
-      }  
-      
+      void fill_halos_vctr_nrml(arr_t &a)
+      {
+        fill_halos_sclr(a);
+      }
+
       void fill_halos_vctr_alng_cyclic(arrvec_t<arr_t> &av, const bool ad = false)
       {
         fill_halos_vctr_alng(av, ad);
@@ -132,7 +132,7 @@ namespace libmpdataxx
       {
         fill_halos_sclr(a);
       }
-      
+
       void save_edge_vel(const arr_t &) {}
 
       void set_edge_pres(arr_t &, int) {}
@@ -140,17 +140,17 @@ namespace libmpdataxx
 
       void fill_halos_vctr_alng(arrvec_t<arr_t> &av, const bool ad = false)
       {
-        if(!this->is_cyclic) 
+        if(!this->is_cyclic)
         {
           if(halo == 1)
-            this->recv(av[0],  idx_t(idx_ctor_arg_t(this->rght_halo_vctr))); 
+            this->recv(av[0],  idx_t(idx_ctor_arg_t(this->rght_halo_vctr)));
           else
-            this->xchng(av[0], idx_t(idx_ctor_arg_t(((this->rght_intr_vctr + off)^h)^(-1))), idx_t(idx_ctor_arg_t(this->rght_halo_vctr))); 
+            this->xchng(av[0], idx_t(idx_ctor_arg_t(((this->rght_intr_vctr + off)^h)^(-1))), idx_t(idx_ctor_arg_t(this->rght_halo_vctr)));
         }
         else
-          this->xchng(av[0], idx_t(idx_ctor_arg_t(this->rght_intr_vctr + off)), idx_t(idx_ctor_arg_t(this->rght_halo_vctr))); 
+          this->xchng(av[0], idx_t(idx_ctor_arg_t(this->rght_intr_vctr + off)), idx_t(idx_ctor_arg_t(this->rght_halo_vctr)));
       }
-      
+
       void fill_halos_sgs_div(arr_t &a)
       {
         fill_halos_sclr(a);
@@ -160,27 +160,27 @@ namespace libmpdataxx
       {
         using namespace idxperm;
         // the same logic as fill_halos_vctr_alng but have to consider offset ... TODO: find a way to reuse !
-        if(!this->is_cyclic) 
+        if(!this->is_cyclic)
         {
           if(halo == 1)
-            this->recv(av[0 + offset],  idx_t(idx_ctor_arg_t(this->rght_halo_vctr))); 
+            this->recv(av[0 + offset],  idx_t(idx_ctor_arg_t(this->rght_halo_vctr)));
           else
-            this->xchng(av[0 + offset], idx_t(idx_ctor_arg_t(((this->rght_intr_vctr + off)^h)^(-1))), idx_t(idx_ctor_arg_t(this->rght_halo_vctr))); 
+            this->xchng(av[0 + offset], idx_t(idx_ctor_arg_t(((this->rght_intr_vctr + off)^h)^(-1))), idx_t(idx_ctor_arg_t(this->rght_halo_vctr)));
         }
         else
-          this->xchng(av[0 + offset], idx_t(idx_ctor_arg_t(this->rght_intr_vctr + off)), idx_t(idx_ctor_arg_t(this->rght_halo_vctr))); 
+          this->xchng(av[0 + offset], idx_t(idx_ctor_arg_t(this->rght_intr_vctr + off)), idx_t(idx_ctor_arg_t(this->rght_halo_vctr)));
       }
-      
+
       void fill_halos_sgs_tnsr(arrvec_t<arr_t> &av, const arr_t &, const arr_t &, const real_t)
       {
         fill_halos_vctr_alng(av);
       }
 
-      void fill_halos_vctr_nrml(arr_t &a)                 
-      {                                                                         
-        fill_halos_sclr(a);                                                  
-      }  
-      
+      void fill_halos_vctr_nrml(arr_t &a)
+      {
+        fill_halos_sclr(a);
+      }
+
       void fill_halos_vctr_alng_cyclic(arrvec_t<arr_t> &av, const bool ad = false)
       {
         fill_halos_vctr_alng(av, ad);
diff --git a/libmpdata++/bcond/remote_2d.hpp b/libmpdata++/bcond/remote_2d.hpp
index ca71dd08..6c9f3460 100644
--- a/libmpdata++/bcond/remote_2d.hpp
+++ b/libmpdata++/bcond/remote_2d.hpp
@@ -11,11 +11,11 @@ namespace libmpdataxx
 {
   namespace bcond
   {
-    template <typename real_t, int halo, bcond_e knd, drctn_e dir, int n_dims, int d>    
+    template <typename real_t, int halo, bcond_e knd, drctn_e dir, int n_dims, int d>
     class bcond<       real_t,     halo,         knd,         dir,     n_dims,     d,
       typename std::enable_if<
-        knd == remote && 
-        dir == left   && 
+        knd == remote &&
+        dir == left   &&
         n_dims == 2
       >::type
     > : public detail::remote_common<real_t, halo, dir, n_dims>
@@ -38,7 +38,7 @@ namespace libmpdataxx
       {
         fill_halos_sclr(a, j);
       }
-      
+
       void save_edge_vel(const arr_t &, const rng_t &) {}
 
       void set_edge_pres(arr_t &, const rng_t &, int) {}
@@ -47,7 +47,7 @@ namespace libmpdataxx
       void fill_halos_vctr_alng(arrvec_t<arr_t> &av, const rng_t &j, const bool ad = false)
       {
         using namespace idxperm;
-        if(!this->is_cyclic) 
+        if(!this->is_cyclic)
         {
           if(halo == 1)
             // send vectors to the left of the domain
@@ -59,7 +59,7 @@ namespace libmpdataxx
         else
           this->xchng(av[0], pi<d>(this->left_intr_vctr + off, j), pi<d>(this->left_halo_vctr, j));
       }
-      
+
       void fill_halos_sgs_div(arr_t &a, const rng_t &j)
       {
         fill_halos_sclr(a, j);
@@ -69,7 +69,7 @@ namespace libmpdataxx
       {
         using namespace idxperm;
         // the same logic as fill_halos_vctr_alng but have to consider offset ... TODO: find a way to reuse !
-        if(!this->is_cyclic) 
+        if(!this->is_cyclic)
         {
           if(halo == 1)
             // send vectors to the left of the domain
@@ -81,7 +81,7 @@ namespace libmpdataxx
         else
           this->xchng(av[0 + offset], pi<d>(this->left_intr_vctr + off, j), pi<d>(this->left_halo_vctr, j));
       }
-      
+
       void fill_halos_sgs_tnsr(arrvec_t<arr_t> &av, const arr_t &, const arr_t &, const rng_t &j, const real_t)
       {
         fill_halos_vctr_alng(av, j);
@@ -90,11 +90,11 @@ namespace libmpdataxx
       // TODO: sgs fill_halos
 
       // TODO: move to common? (same in cyclic!)
-      void fill_halos_vctr_nrml(arr_t &a, const rng_t &j)                 
-      {                                                                         
-        fill_halos_sclr(a, j);                                                  
-      }  
-      
+      void fill_halos_vctr_nrml(arr_t &a, const rng_t &j)
+      {
+        fill_halos_sclr(a, j);
+      }
+
       void fill_halos_vctr_alng_cyclic(arrvec_t<arr_t> &av, const rng_t &j, const bool ad = false)
       {
         fill_halos_vctr_alng(av, j, ad);
@@ -134,7 +134,7 @@ namespace libmpdataxx
       {
         fill_halos_sclr(a, j);
       }
-      
+
       void save_edge_vel(const arr_t &, const rng_t &) {}
 
       void set_edge_pres(arr_t &, const rng_t &, int) {}
@@ -154,7 +154,7 @@ namespace libmpdataxx
         else
           this->xchng(av[0], pi<d>(this->rght_intr_vctr + off, j), pi<d>(this->rght_halo_vctr, j));
       }
-      
+
       void fill_halos_sgs_div(arr_t &a, const rng_t &j)
       {
         fill_halos_sclr(a, j);
@@ -176,18 +176,18 @@ namespace libmpdataxx
         else
           this->xchng(av[0 + offset], pi<d>(this->rght_intr_vctr + off, j), pi<d>(this->rght_halo_vctr, j));
       }
-      
+
       void fill_halos_sgs_tnsr(arrvec_t<arr_t> &av, const arr_t &, const arr_t &, const rng_t &j, const real_t)
       {
         fill_halos_vctr_alng(av, j);
       }
 
       // TODO: move to common? (same in cyclic!)
-      void fill_halos_vctr_nrml(arr_t &a, const rng_t &j)                 
-      {                                                                         
-        fill_halos_sclr(a, j);                                                  
-      }  
-      
+      void fill_halos_vctr_nrml(arr_t &a, const rng_t &j)
+      {
+        fill_halos_sclr(a, j);
+      }
+
       void fill_halos_vctr_alng_cyclic(arrvec_t<arr_t> &av, const rng_t &j, const bool ad = false)
       {
         fill_halos_vctr_alng(av, j, ad);
diff --git a/libmpdata++/bcond/remote_3d.hpp b/libmpdata++/bcond/remote_3d.hpp
index f14951fc..cbac8bb5 100644
--- a/libmpdata++/bcond/remote_3d.hpp
+++ b/libmpdata++/bcond/remote_3d.hpp
@@ -11,11 +11,11 @@ namespace libmpdataxx
 {
   namespace bcond
   {
-    template <typename real_t, int halo, bcond_e knd, drctn_e dir, int n_dims, int d>    
+    template <typename real_t, int halo, bcond_e knd, drctn_e dir, int n_dims, int d>
     class bcond<       real_t,     halo,         knd,         dir,     n_dims,     d,
       typename std::enable_if<
-        knd == remote && 
-        dir == left   && 
+        knd == remote &&
+        dir == left   &&
         n_dims == 3
       >::type
     > : public detail::remote_common<real_t, halo, dir, n_dims>
@@ -39,9 +39,9 @@ namespace libmpdataxx
       {
         fill_halos_sclr(a, j, k);
       }
-      
+
       void save_edge_vel(const arr_t &, const rng_t &, const rng_t &) {}
-      
+
       void set_edge_pres(arr_t &, const rng_t &, const rng_t &, int) {}
 
 
@@ -59,12 +59,12 @@ namespace libmpdataxx
         else
           this->xchng(av[0], pi<d>(this->left_intr_vctr + off, j, k), pi<d>(this->left_halo_vctr, j, k));
       }
-      
+
       void fill_halos_sgs_div(arr_t &a, const rng_t &j, const rng_t &k)
       {
         fill_halos_sclr(a, j, k);
       }
-      
+
       void fill_halos_sgs_vctr(arrvec_t<arr_t> &av, const arr_t &, const rng_t &j, const rng_t &k, const int offset = 0)
       {
         using namespace idxperm;
@@ -80,18 +80,18 @@ namespace libmpdataxx
         else
           this->xchng(av[0 + offset], pi<d>(this->left_intr_vctr + off, j, k), pi<d>(this->left_halo_vctr, j, k));
       }
-      
+
       void fill_halos_sgs_tnsr(arrvec_t<arr_t> &av, const arr_t &, const arr_t &, const rng_t &j, const rng_t &k, const real_t)
       {
         fill_halos_vctr_alng(av, j, k);
       }
 
       // TODO: move to common? (same in cyclic!)
-      void fill_halos_vctr_nrml(arr_t &a, const rng_t &j, const rng_t &k)                 
-      {                                                                         
-        fill_halos_sclr(a, j, k);                                                  
-      }  
-      
+      void fill_halos_vctr_nrml(arr_t &a, const rng_t &j, const rng_t &k)
+      {
+        fill_halos_sclr(a, j, k);
+      }
+
       void fill_halos_vctr_alng_cyclic(arrvec_t<arr_t> &av, const rng_t &j, const rng_t &k, const bool ad = false)
       {
         fill_halos_vctr_alng(av, j, k, ad);
@@ -131,9 +131,9 @@ namespace libmpdataxx
       {
         fill_halos_sclr(a, j, k);
       }
-      
+
       void save_edge_vel(const arr_t &, const rng_t &, const rng_t &) {}
-      
+
       void set_edge_pres(arr_t &, const rng_t &, const rng_t &, int) {}
 
 
@@ -143,19 +143,19 @@ namespace libmpdataxx
         if(!this->is_cyclic)
         {
           if(halo == 1)
-            this->recv(av[0], pi<d>(this->rght_halo_vctr, j, k)); 
+            this->recv(av[0], pi<d>(this->rght_halo_vctr, j, k));
           else
             this->xchng(av[0], pi<d>(((this->rght_intr_vctr + off)^h)^(-1), j, k), pi<d>(this->rght_halo_vctr, j, k));
         }
         else
           this->xchng(av[0], pi<d>(this->rght_intr_vctr + off, j, k), pi<d>(this->rght_halo_vctr, j, k));
       }
-      
+
       void fill_halos_sgs_div(arr_t &a, const rng_t &j, const rng_t &k)
       {
         fill_halos_sclr(a, j, k);
       }
-      
+
       void fill_halos_sgs_vctr(arrvec_t<arr_t> &av, const arr_t &, const rng_t &j, const rng_t &k, const int offset = 0)
       {
         using namespace idxperm;
@@ -170,18 +170,18 @@ namespace libmpdataxx
         else
           this->xchng(av[0 + offset], pi<d>(this->rght_intr_vctr + off, j, k), pi<d>(this->rght_halo_vctr, j, k));
       }
-      
+
       void fill_halos_sgs_tnsr(arrvec_t<arr_t> &av, const arr_t &, const arr_t &, const rng_t &j, const rng_t &k, const real_t)
       {
         fill_halos_vctr_alng(av, j, k);
       }
 
       // TODO: move to common? (same in cyclic!)
-      void fill_halos_vctr_nrml(arr_t &a, const rng_t &j, const rng_t &k)                 
-      {                                                                         
-        fill_halos_sclr(a, j, k);                                                  
+      void fill_halos_vctr_nrml(arr_t &a, const rng_t &j, const rng_t &k)
+      {
+        fill_halos_sclr(a, j, k);
       }
-       
+
       void fill_halos_vctr_alng_cyclic(arrvec_t<arr_t> &av, const rng_t &j, const rng_t &k, const bool ad = false)
       {
         fill_halos_vctr_alng(av, j, k, ad);
diff --git a/libmpdata++/bcond/rigid_2d.hpp b/libmpdata++/bcond/rigid_2d.hpp
index 998eb6c1..c1867a7b 100644
--- a/libmpdata++/bcond/rigid_2d.hpp
+++ b/libmpdata++/bcond/rigid_2d.hpp
@@ -12,20 +12,20 @@ namespace libmpdataxx
   namespace bcond
   {
     template <typename real_t, int halo, bcond_e knd, drctn_e dir, int n_dims, int d>
-    class bcond<       real_t,     halo,         knd,         dir,     n_dims,     d,  
+    class bcond<       real_t,     halo,         knd,         dir,     n_dims,     d,
       typename std::enable_if<
         knd == rigid &&
         dir == left &&
         n_dims == 2
       >::type
     > : public detail::bcond_common<real_t, halo, n_dims>
-    { 
+    {
       using parent_t = detail::bcond_common<real_t, halo, n_dims>;
       using arr_t = blitz::Array<real_t, n_dims>;
       using parent_t::parent_t; // inheriting ctor
 
       public:
-      
+
       void fill_halos_sclr(arr_t &a, const rng_t &j, const bool deriv = false)
       {
         using namespace idxperm;
@@ -35,7 +35,7 @@ namespace libmpdataxx
           a(pi<d>(i, j)) = a(pi<d>(this->left_edge_sclr + n, j));
         }
       }
- 
+
       void fill_halos_pres(arr_t &a, const rng_t &j)
       {
         using namespace idxperm;
@@ -45,10 +45,10 @@ namespace libmpdataxx
           a(pi<d>(i, j)) = 2 * a(pi<d>(this->left_edge_sclr,     j))
                              - a(pi<d>(this->left_edge_sclr + n, j));
         }
-      }     
+      }
 
       void save_edge_vel(const arr_t &, const rng_t &) {}
-     
+
       void set_edge_pres(arr_t &a, const rng_t &j, int)
       {
         using namespace idxperm;
@@ -70,7 +70,7 @@ namespace libmpdataxx
         // note intentional sclr
         fill_halos_sclr(a, j);
       }
-      
+
       void fill_halos_flux(arrvec_t<arr_t> &av, const rng_t &j)
       {
         using namespace idxperm;
@@ -103,7 +103,7 @@ namespace libmpdataxx
     };
 
     template <typename real_t, int halo, bcond_e knd, drctn_e dir, int n_dims, int d>
-    class bcond<       real_t,     halo,         knd,         dir,     n_dims,     d,  
+    class bcond<       real_t,     halo,         knd,         dir,     n_dims,     d,
       typename std::enable_if<
         knd == rigid &&
         dir == rght &&
@@ -114,9 +114,9 @@ namespace libmpdataxx
       using parent_t = detail::bcond_common<real_t, halo, n_dims>;
       using arr_t = blitz::Array<real_t, n_dims>;
       using parent_t::parent_t; // inheriting ctor
-      
+
       public:
-      
+
       void fill_halos_sclr(arr_t &a, const rng_t &j, const bool deriv = false)
       {
         // zero flux condition
@@ -126,7 +126,7 @@ namespace libmpdataxx
           a(pi<d>(i, j)) = a(pi<d>(this->rght_edge_sclr - n, j)); // zero gradient for scalar gradient
         }
       }
-      
+
       void fill_halos_pres(arr_t &a, const rng_t &j)
       {
         using namespace idxperm;
@@ -137,9 +137,9 @@ namespace libmpdataxx
                              - a(pi<d>(this->rght_edge_sclr - n, j));
         }
       }
-      
+
       void save_edge_vel(const arr_t &, const rng_t &) {}
-      
+
       void set_edge_pres(arr_t &a, const rng_t &j, int)
       {
         using namespace idxperm;
@@ -155,13 +155,13 @@ namespace libmpdataxx
           av[d](pi<d>(i, j)) = -av[d](pi<d>(this->rght_edge_sclr - n + h, j));
         }
       }
-      
+
       void fill_halos_vctr_nrml(arr_t &a, const rng_t &j)
       {
         // note intentional sclr
         fill_halos_sclr(a, j);
       }
-      
+
       void fill_halos_flux(arrvec_t<arr_t> &av, const rng_t &j)
       {
         using namespace idxperm;
diff --git a/libmpdata++/bcond/rigid_3d.hpp b/libmpdata++/bcond/rigid_3d.hpp
index 0fca941c..2c683a68 100644
--- a/libmpdata++/bcond/rigid_3d.hpp
+++ b/libmpdata++/bcond/rigid_3d.hpp
@@ -12,20 +12,20 @@ namespace libmpdataxx
   namespace bcond
   {
     template <typename real_t, int halo, bcond_e knd, drctn_e dir, int n_dims, int d>
-    class bcond<       real_t,     halo,         knd,         dir,     n_dims,     d,  
+    class bcond<       real_t,     halo,         knd,         dir,     n_dims,     d,
       typename std::enable_if<
         knd == rigid &&
         dir == left &&
         n_dims == 3
       >::type
     > : public detail::bcond_common<real_t, halo, n_dims>
-    { 
+    {
       using parent_t = detail::bcond_common<real_t, halo, n_dims>;
       using arr_t = blitz::Array<real_t, n_dims>;
       using parent_t::parent_t; // inheriting ctor
 
       public:
-      
+
       void fill_halos_sclr(arr_t &a, const rng_t &j, const rng_t &k, const bool deriv = false)
       {
         using namespace idxperm;
@@ -35,7 +35,7 @@ namespace libmpdataxx
           a(pi<d>(i, j, k)) = a(pi<d>(this->left_edge_sclr + n, j, k));
         }
       }
-      
+
       void fill_halos_pres(arr_t &a, const rng_t &j, const rng_t &k)
       {
         using namespace idxperm;
@@ -46,9 +46,9 @@ namespace libmpdataxx
                                 - a(pi<d>(this->left_edge_sclr + n, j, k));
         }
       }
-      
+
       void save_edge_vel(const arr_t &, const rng_t &, const rng_t &) {}
-      
+
       void set_edge_pres(arr_t &a, const rng_t &j, const rng_t &k, int)
       {
         using namespace idxperm;
@@ -70,7 +70,7 @@ namespace libmpdataxx
         // note intentional sclr
         fill_halos_sclr(a, j, k);
       }
-      
+
       void fill_halos_flux(arrvec_t<arr_t> &av, const rng_t &j, const rng_t &k)
       {
         using namespace idxperm;
@@ -104,7 +104,7 @@ namespace libmpdataxx
     };
 
     template <typename real_t, int halo, bcond_e knd, drctn_e dir, int n_dims, int d>
-    class bcond<       real_t,     halo,         knd,         dir,     n_dims,     d,  
+    class bcond<       real_t,     halo,         knd,         dir,     n_dims,     d,
       typename std::enable_if<
         knd == rigid &&
         dir == rght &&
@@ -115,9 +115,9 @@ namespace libmpdataxx
       using parent_t = detail::bcond_common<real_t, halo, n_dims>;
       using arr_t = blitz::Array<real_t, n_dims>;
       using parent_t::parent_t; // inheriting ctor
-      
+
       public:
-      
+
       void fill_halos_sclr(arr_t &a, const rng_t &j, const rng_t &k, const bool deriv = false)
       {
         // zero flux condition
@@ -127,10 +127,10 @@ namespace libmpdataxx
           a(pi<d>(i, j, k)) = a(pi<d>(this->rght_edge_sclr - n, j, k)); // zero gradient for scalar gradient
         }
       }
-      
-      
+
+
       void save_edge_vel(const arr_t &, const rng_t &, const rng_t &) {}
-      
+
       void fill_halos_pres(arr_t &a, const rng_t &j, const rng_t &k)
       {
         using namespace idxperm;
@@ -141,7 +141,7 @@ namespace libmpdataxx
                                 - a(pi<d>(this->rght_edge_sclr - n, j, k));
         }
       }
-      
+
       void set_edge_pres(arr_t &a, const rng_t &j, const rng_t &k, int)
       {
         using namespace idxperm;
@@ -157,13 +157,13 @@ namespace libmpdataxx
           av[d](pi<d>(i, j, k)) = -av[d](pi<d>(this->rght_edge_sclr - n + h, j, k));
         }
       }
-      
+
       void fill_halos_vctr_nrml(arr_t &a, const rng_t &j, const rng_t &k)
       {
         // note intentional sclr
         fill_halos_sclr(a, j, k);
       }
-      
+
       void fill_halos_flux(arrvec_t<arr_t> &av, const rng_t &j, const rng_t &k)
       {
         using namespace idxperm;
diff --git a/libmpdata++/bcond/shared.hpp b/libmpdata++/bcond/shared.hpp
index f41f2bc2..c677d077 100644
--- a/libmpdata++/bcond/shared.hpp
+++ b/libmpdata++/bcond/shared.hpp
@@ -15,9 +15,9 @@ namespace libmpdataxx
     class shared : public detail::bcond_common<real_t, halo, n_dims>
     {
       public:
-      
+
       using parent_t = detail::bcond_common<real_t, halo, n_dims>;
-      
+
       using arr_1d_t = typename parent_t::arr_1d_t;
       using arr_2d_t = typename parent_t::arr_2d_t;
       using arr_3d_t = typename parent_t::arr_3d_t;
@@ -28,7 +28,7 @@ namespace libmpdataxx
 
       virtual void fill_halos_pres(arr_2d_t &, const rng_t &) { };
       virtual void fill_halos_pres(arr_3d_t &, const rng_t &, const rng_t &) { };
-      
+
       virtual void save_edge_vel(const arr_2d_t &, const rng_t &) { };
       virtual void save_edge_vel(const arr_3d_t &, const rng_t &, const rng_t &) { };
 
@@ -37,17 +37,17 @@ namespace libmpdataxx
 
       virtual void fill_halos_vctr_alng(arrvec_t<arr_1d_t> &, const bool) { };
       virtual void fill_halos_vctr_alng(arrvec_t<arr_2d_t> &, const rng_t &, const bool) { };
-      virtual void fill_halos_vctr_alng(arrvec_t<arr_3d_t> &, const rng_t &, const rng_t &, const bool) { }; 
-      
+      virtual void fill_halos_vctr_alng(arrvec_t<arr_3d_t> &, const rng_t &, const rng_t &, const bool) { };
+
       virtual void fill_halos_vctr_nrml(arr_2d_t &, const rng_t &) { };
       virtual void fill_halos_vctr_nrml(arr_3d_t &, const rng_t &, const rng_t &) { };
-      
+
       virtual void fill_halos_sgs_div(arr_2d_t &, const rng_t &) { };
       virtual void fill_halos_sgs_div(arr_3d_t &, const rng_t &, const rng_t &) { };
-      
+
       virtual void fill_halos_sgs_vctr(arrvec_t<arr_2d_t> &, const arr_2d_t &, const rng_t &, const int offset = 0) { };
       virtual void fill_halos_sgs_vctr(arrvec_t<arr_3d_t> &, const arr_3d_t &, const rng_t &, const rng_t &, const int offset = 0) { };
-      
+
       virtual void fill_halos_sgs_tnsr(arrvec_t<arr_2d_t> &, const arr_2d_t &, const arr_2d_t &, const rng_t &, const real_t) { };
       virtual void fill_halos_sgs_tnsr(arrvec_t<arr_3d_t> &, const arr_3d_t &, const arr_3d_t &,
                                                              const rng_t &, const rng_t &, const real_t) { };
diff --git a/libmpdata++/blitz.hpp b/libmpdata++/blitz.hpp
index 96a05f09..606b0996 100644
--- a/libmpdata++/blitz.hpp
+++ b/libmpdata++/blitz.hpp
@@ -18,7 +18,7 @@
 
 // force use of #pragma ivdep even if Blitz thinks the compiler does not support it
 // (as of gcc 20140212, it gives an ICE: http://gcc.gnu.org/bugzilla/show_bug.cgi?id=60198) - TODO: check in CMake
-//#define BZ_USE_ALIGNMENT_PRAGMAS  
+//#define BZ_USE_ALIGNMENT_PRAGMAS
 
 //#if defined(USE_MPI)
 //#  define BZ_HAVE_BOOST_SERIALIZATION
@@ -27,7 +27,7 @@
 #include <blitz/tv2fastiter.h> // otherwise Clang fails in debug mode
 #include <blitz/array.h>
 
-  
+
 #include <libmpdata++/kahan_reduction.hpp>
 
 //////////////////////////////////////////////////////////
@@ -46,7 +46,7 @@
 {                                        \
   init                                   \
   return safeToReturn(expr);             \
-} 
+}
 
 namespace libmpdataxx
 {
@@ -73,31 +73,31 @@ namespace libmpdataxx
   {
     using type = typename expr_t::T_numtype;
   };
-  
+
   template <class expr_t>
   struct real_t_helper<int, expr_t>
   {
     using type = expr_t;
   };
 
-  // Boost ptr_vector 
+  // Boost ptr_vector
   template <class arr_t>
-  struct arrvec_t : boost::ptr_vector<arr_t> 
+  struct arrvec_t : boost::ptr_vector<arr_t>
   {
     using parent_t = boost::ptr_vector<arr_t>;
 
-    const arr_t &operator[](const int i) const 
-    {   
+    const arr_t &operator[](const int i) const
+    {
       return this->at(
         (i + this->size()) % this->size()
-      );  
+      );
     }
-    
+
     arr_t &operator[](const int i)
-    {   
+    {
       return this->at(
         (i + this->size()) % this->size()
-      );  
+      );
     }
 
     void push_back(arr_t *arr)
diff --git a/libmpdata++/concurr/any.hpp b/libmpdata++/concurr/any.hpp
index 8d8e9d9d..3285ff4f 100644
--- a/libmpdata++/concurr/any.hpp
+++ b/libmpdata++/concurr/any.hpp
@@ -15,57 +15,57 @@ namespace libmpdataxx
     template <typename real_t, int n_dims, typename advance_arg_t = int>
     struct any
     {
-      virtual 
-      void advance(advance_arg_t) 
-      { assert(false); throw; }  
+      virtual
+      void advance(advance_arg_t)
+      { assert(false); throw; }
 
-      virtual 
+      virtual
       blitz::Array<real_t, n_dims> advectee(int eqn = 0)
       { assert(false); throw; }
 
-      virtual 
+      virtual
       const blitz::Array<real_t, n_dims> advectee_global(int eqn = 0)
       { assert(false); throw; }
 
-      virtual 
+      virtual
       void advectee_global_set(const blitz::Array<real_t, n_dims>, int eqn = 0)
       { assert(false); throw; }
 
-      virtual 
-      blitz::Array<real_t, n_dims> advector(int dim = 0) 
+      virtual
+      blitz::Array<real_t, n_dims> advector(int dim = 0)
       { assert(false); throw; }
 
-      virtual 
-      blitz::Array<real_t, n_dims> g_factor() 
+      virtual
+      blitz::Array<real_t, n_dims> g_factor()
       { assert(false); throw; }
-     
-      virtual 
-      blitz::Array<real_t, n_dims> vab_coefficient() 
+
+      virtual
+      blitz::Array<real_t, n_dims> vab_coefficient()
       { assert(false); throw; }
-      
-      virtual 
-      blitz::Array<real_t, n_dims> vab_relaxed_state(int d = 0) 
+
+      virtual
+      blitz::Array<real_t, n_dims> vab_relaxed_state(int d = 0)
       { assert(false); throw; }
-      
-      virtual 
+
+      virtual
       blitz::Array<real_t, n_dims> sclr_array(const std::string &name, int n = 0)
       { assert(false); throw; }
 
-      virtual 
-      bool *panic_ptr() 
+      virtual
+      bool *panic_ptr()
       { assert(false && "unimplemented!"); throw; }
-      
-      virtual 
+
+      virtual
       const real_t time() const
       { assert(false); throw; }
-      
+
       // minimum of an advectee, mpi-aware
-      virtual 
+      virtual
       const real_t min(int eqn = 0) const
       { assert(false); throw; }
-      
+
       // maximum of an advectee, mpi-aware
-      virtual 
+      virtual
       const real_t max(int eqn = 0) const
       { assert(false); throw; }
 
diff --git a/libmpdata++/concurr/boost_thread.hpp b/libmpdata++/concurr/boost_thread.hpp
index e14a6126..83675a7c 100644
--- a/libmpdata++/concurr/boost_thread.hpp
+++ b/libmpdata++/concurr/boost_thread.hpp
@@ -29,14 +29,14 @@ namespace libmpdataxx
     class boost_thread : public detail::concurr_common<solver_t, bcxl, bcxr, bcyl, bcyr, bczl, bczr>
     {
       using parent_t = detail::concurr_common<solver_t, bcxl, bcxr, bcyl, bcyr, bczl, bczr>;
- 
-      class mem_t : public parent_t::mem_t 
+
+      class mem_t : public parent_t::mem_t
       {
         boost::barrier b;
 
         public:
 
-        static int size(const unsigned max_threads = std::numeric_limits<unsigned>::max()) 
+        static int size(const unsigned max_threads = std::numeric_limits<unsigned>::max())
         {
           const char *env_var("OMP_NUM_THREADS");
 
@@ -53,8 +53,8 @@ namespace libmpdataxx
         // ctor
         mem_t(const std::array<int, solver_t::n_dims> &grid_size) :
           b(size(grid_size[0])),
-          parent_t::mem_t(grid_size, size(grid_size[0])) 
-        {}; 
+          parent_t::mem_t(grid_size, size(grid_size[0]))
+        {};
 
         void barrier()
         {
@@ -68,8 +68,8 @@ namespace libmpdataxx
       void solve(typename parent_t::advance_arg_t nt)
       {
         boost::thread_group threads;
-        for (int i = 0; i < this->algos.size(); ++i) 
-        {  
+        for (int i = 0; i < this->algos.size(); ++i)
+        {
           std::unique_ptr<boost::thread> thp;
           thp.reset(new boost::thread(
             &solver_t::solve, boost::ref(this->algos[i]), nt
@@ -80,7 +80,7 @@ namespace libmpdataxx
       }
 
       // ctor
-      boost_thread(const typename solver_t::rt_params_t &p) : 
+      boost_thread(const typename solver_t::rt_params_t &p) :
         parent_t(p, new mem_t(p.grid_size), mem_t::size(p.grid_size[0]))
       {}
 
diff --git a/libmpdata++/concurr/cxx11_thread.hpp b/libmpdata++/concurr/cxx11_thread.hpp
index 6324e319..b89a556e 100644
--- a/libmpdata++/concurr/cxx11_thread.hpp
+++ b/libmpdata++/concurr/cxx11_thread.hpp
@@ -33,10 +33,10 @@ namespace libmpdataxx
 
         public:
 
-        explicit barrier(const std::size_t count) : 
-          m_count(count), 
+        explicit barrier(const std::size_t count) :
+          m_count(count),
           m_threshold(count),
-          m_generation(0) 
+          m_generation(0)
         { }
 
         bool wait()
@@ -71,14 +71,14 @@ namespace libmpdataxx
     class cxx11_thread : public detail::concurr_common<solver_t, bcxl, bcxr, bcyl, bcyr, bczl, bczr>
     {
       using parent_t = detail::concurr_common<solver_t, bcxl, bcxr, bcyl, bcyr, bczl, bczr>;
- 
-      class mem_t : public parent_t::mem_t 
+
+      class mem_t : public parent_t::mem_t
       {
         detail::barrier b;
 
         public:
 
-        static int size(const unsigned max_threads = std::numeric_limits<unsigned>::max()) 
+        static int size(const unsigned max_threads = std::numeric_limits<unsigned>::max())
         {
           const char *env_var("OMP_NUM_THREADS");
 
@@ -94,8 +94,8 @@ namespace libmpdataxx
         // ctor
         mem_t(const std::array<int, solver_t::n_dims> &grid_size) :
           b(size(grid_size[0])),
-          parent_t::mem_t(grid_size, size(grid_size[0])) 
-        {}; 
+          parent_t::mem_t(grid_size, size(grid_size[0]))
+        {};
 
         void barrier()
         {
@@ -108,8 +108,8 @@ namespace libmpdataxx
       void solve(typename parent_t::advance_arg_t nt)
       {
         boost::ptr_vector<std::thread> threads(mem_t::size());
-        for (int i = 0; i < this->algos.size(); ++i) 
-        {  
+        for (int i = 0; i < this->algos.size(); ++i)
+        {
           threads.push_back(new std::thread(
             &solver_t::solve, &(this->algos[i]), nt
           ));
@@ -118,7 +118,7 @@ namespace libmpdataxx
       }
 
       // ctor
-      cxx11_thread(const typename solver_t::rt_params_t &p) : 
+      cxx11_thread(const typename solver_t::rt_params_t &p) :
         parent_t(p, new mem_t(p.grid_size), mem_t::size(p.grid_size[0]))
       {}
 
diff --git a/libmpdata++/concurr/detail/concurr_common.hpp b/libmpdata++/concurr/detail/concurr_common.hpp
index 65fd3c02..691e4ff0 100644
--- a/libmpdata++/concurr/detail/concurr_common.hpp
+++ b/libmpdata++/concurr/detail/concurr_common.hpp
@@ -41,7 +41,7 @@ namespace libmpdataxx
     namespace detail
     {
       template<
-        class solver_t_, 
+        class solver_t_,
         bcond::bcond_e bcxl, bcond::bcond_e bcxr,
         bcond::bcond_e bcyl, bcond::bcond_e bcyr,
         bcond::bcond_e bczl, bcond::bcond_e bczr
@@ -51,17 +51,17 @@ namespace libmpdataxx
         public:
 
         typedef solver_t_ solver_t;
-        
+
         static_assert(
           (solver_t::n_dims == 3) ||
-          (solver_t::n_dims == 2 
-            && bczl == bcond::null 
+          (solver_t::n_dims == 2
+            && bczl == bcond::null
             && bczr == bcond::null
           ) ||
-          (solver_t::n_dims == 1 
-            && bczl == bcond::null 
+          (solver_t::n_dims == 1
+            && bczl == bcond::null
             && bczr == bcond::null
-            && bcyl == bcond::null 
+            && bcyl == bcond::null
             && bcyr == bcond::null
           )
           ,
@@ -78,7 +78,7 @@ namespace libmpdataxx
         > mem_t;
 
         // member fields
-        boost::ptr_vector<solver_t> algos; 
+        boost::ptr_vector<solver_t> algos;
         std::unique_ptr<mem_t> mem;
         timer tmr;
 
@@ -104,11 +104,11 @@ namespace libmpdataxx
           solver_t::alloc(mem.get(), p.n_iters);
 
           // allocate per-thread structures
-          init(p, mem->grid_size, size); 
+          init(p, mem->grid_size, size);
         }
 
         private:
- 
+
         template <
           bcond::bcond_e type,
           bcond::drctn_e dir,
@@ -116,7 +116,7 @@ namespace libmpdataxx
         >
         void bc_set(
           typename solver_t::bcp_t &bcp
-        ) 
+        )
         {
           // sanity check - polar coords do not work with MPI yet
           if (type == bcond::polar && mem->distmem.size() > 1)
@@ -131,7 +131,7 @@ namespace libmpdataxx
               ||
               (dir == bcond::rght && mem->distmem.rank() != mem->distmem.size() - 1)
               // cyclic condition for distmem domain (note: will not work if a non-cyclic condition is on the other end)
-              || 
+              ||
               (type == bcond::cyclic)
             ) return bc_set<bcond::remote, dir, dim>(bcp);
           }
@@ -139,7 +139,7 @@ namespace libmpdataxx
           // bc allocation, all mpi routines called by the remote bcnd ctor are thread-safe (?)
           bcp.reset(
             new bcond::bcond<real_t, solver_t::halo, type, dir, solver_t::n_dims, dim>(
-              mem->slab(mem->grid_size[dim]), 
+              mem->slab(mem->grid_size[dim]),
               mem->distmem.grid_size
             )
           );
@@ -156,7 +156,7 @@ namespace libmpdataxx
           bc_set<bcxl, bcond::left, 0>(bxl);
           bc_set<bcxr, bcond::rght, 0>(bxr);
 
-          for (int i0 = 0; i0 < n0; ++i0) 
+          for (int i0 = 0; i0 < n0; ++i0)
           {
             shrdl.reset(new bcond::shared<real_t, solver_t::halo, solver_t::n_dims>());
             shrdr.reset(new bcond::shared<real_t, solver_t::halo, solver_t::n_dims>());
@@ -165,11 +165,11 @@ namespace libmpdataxx
               new solver_t(
                 typename solver_t::ctor_args_t({
                   i0,
-                  mem.get(), 
+                  mem.get(),
                   i0 == 0      ? bxl : shrdl,
                   i0 == n0 - 1 ? bxr : shrdr,
                   mem->slab(grid_size[0], i0, n0)
-                }), 
+                }),
                 p
               )
             );
@@ -180,12 +180,12 @@ namespace libmpdataxx
         // TODO: assert parallelisation in the right dimensions! (blitz::assertContiguous)
         void init(
           const typename solver_t::rt_params_t &p,
-          const std::array<rng_t, 2> &grid_size, 
+          const std::array<rng_t, 2> &grid_size,
           const int &n0, const int &n1 = 1
         ) {
-          for (int i0 = 0; i0 < n0; ++i0) 
+          for (int i0 = 0; i0 < n0; ++i0)
           {
-            for (int i1 = 0; i1 < n1; ++i1) 
+            for (int i1 = 0; i1 < n1; ++i1)
             {
               typename solver_t::bcp_t bxl, bxr, byl, byr, shrdl, shrdr;
 
@@ -202,13 +202,13 @@ namespace libmpdataxx
                 new solver_t(
                   typename solver_t::ctor_args_t({
                     i0,
-                    mem.get(), 
+                    mem.get(),
                     i0 == 0      ? bxl : shrdl,
                     i0 == n0 - 1 ? bxr : shrdr,
-                    byl, byr, 
-                    mem->slab(grid_size[0], i0, n0),  
+                    byl, byr,
+                    mem->slab(grid_size[0], i0, n0),
                     mem->slab(grid_size[1], i1, n1)
-                  }), 
+                  }),
                   p
                 )
               );
@@ -219,17 +219,17 @@ namespace libmpdataxx
         // 3D version
         void init(
           const typename solver_t::rt_params_t &p,
-          const std::array<rng_t, 3> &grid_size, 
+          const std::array<rng_t, 3> &grid_size,
           const int &n0, const int &n1 = 1, const int &n2 = 1
         ) {
           typename solver_t::bcp_t bxl, bxr, byl, byr, bzl, bzr, shrdl, shrdr;
 
           // TODO: renew pointers only if invalid ?
-          for (int i0 = 0; i0 < n0; ++i0) 
+          for (int i0 = 0; i0 < n0; ++i0)
           {
-            for (int i1 = 0; i1 < n1; ++i1) 
+            for (int i1 = 0; i1 < n1; ++i1)
             {
-              for (int i2 = 0; i2 < n2; ++i2) 
+              for (int i2 = 0; i2 < n2; ++i2)
               {
                 bc_set<bcxl, bcond::left, 0>(bxl);
                 bc_set<bcxr, bcond::rght, 0>(bxr);
@@ -247,15 +247,15 @@ namespace libmpdataxx
                   new solver_t(
                     typename solver_t::ctor_args_t({
                       i0,
-                      mem.get(), 
+                      mem.get(),
                       i0 == 0      ? bxl : shrdl,
                       i0 == n0 - 1 ? bxr : shrdr,
-                      byl, byr, 
-                      bzl, bzr, 
-                      mem->slab(grid_size[0], i0, n0), 
-                      mem->slab(grid_size[1], i1, n1), 
+                      byl, byr,
+                      bzl, bzr,
+                      mem->slab(grid_size[0], i0, n0),
+                      mem->slab(grid_size[1], i1, n1),
                       mem->slab(grid_size[2], i2, n2)
-                    }), 
+                    }),
                     p
                   )
                 );
@@ -267,13 +267,13 @@ namespace libmpdataxx
         virtual void solve(advance_arg_t nt) = 0;
 
         public:
-    
+
         void advance(advance_arg_t nt) final
-        {   
+        {
           tmr.resume();
           solve(nt);
           tmr.stop();
-        }  
+        }
 
         typename solver_t::arr_t advectee(int e = 0) final
         {
@@ -312,12 +312,12 @@ namespace libmpdataxx
         {
           return mem->vab_coefficient();
         }
-        
+
         typename solver_t::arr_t vab_relaxed_state(int d = 0) final
         {
           return mem->vab_relaxed_state(d);
         }
-        
+
         typename solver_t::arr_t sclr_array(const std::string &name, int n = 0) final
         {
           return mem->sclr_array(name, n);
diff --git a/libmpdata++/concurr/detail/distmem.hpp b/libmpdata++/concurr/detail/distmem.hpp
index ace2aa87..a73b8f5c 100644
--- a/libmpdata++/concurr/detail/distmem.hpp
+++ b/libmpdata++/concurr/detail/distmem.hpp
@@ -49,8 +49,8 @@ namespace libmpdataxx
 
         std::array<int, n_dims> grid_size;
 
-        int rank() 
-        { 
+        int rank()
+        {
 #if defined(USE_MPI)
           return mpicom.rank();  // is it thread-safe? TODO: init once in ctor?
 #else
@@ -58,7 +58,7 @@ namespace libmpdataxx
 #endif
         }
 
-        int size() 
+        int size()
         {
 #if defined(USE_MPI)
           return mpicom.size();   // is it thread-safe? TODO: init once in ctor?
@@ -102,8 +102,8 @@ namespace libmpdataxx
         }
 
         // ctor
-        distmem(const std::array<int, n_dims> &grid_size) 
-          : grid_size(grid_size) 
+        distmem(const std::array<int, n_dims> &grid_size)
+          : grid_size(grid_size)
         {
 #if !defined(USE_MPI)
           if (
diff --git a/libmpdata++/concurr/detail/sharedmem.hpp b/libmpdata++/concurr/detail/sharedmem.hpp
index 926881f0..cf927d6a 100644
--- a/libmpdata++/concurr/detail/sharedmem.hpp
+++ b/libmpdata++/concurr/detail/sharedmem.hpp
@@ -26,15 +26,15 @@ namespace libmpdataxx
         typename real_t,
         int n_dims,
         int n_tlev
-      >  
+      >
       class sharedmem_common
       {
-        using arr_t = blitz::Array<real_t, n_dims>; 
+        using arr_t = blitz::Array<real_t, n_dims>;
 
         static_assert(n_dims > 0, "n_dims <= 0");
         static_assert(n_tlev > 0, "n_tlev <= 0");
 
-        std::unique_ptr<blitz::Array<real_t, 1>> xtmtmp; 
+        std::unique_ptr<blitz::Array<real_t, 1>> xtmtmp;
         std::unique_ptr<blitz::Array<double, 1>> sumtmp;
 
         protected:
@@ -45,7 +45,7 @@ namespace libmpdataxx
 
         int n = 0;
         const int size;
-        std::array<rng_t, n_dims> grid_size; 
+        std::array<rng_t, n_dims> grid_size;
         bool panic = false; // for multi-threaded SIGTERM handling
 
         detail::distmem<real_t, n_dims> distmem;
@@ -58,13 +58,13 @@ namespace libmpdataxx
         arrvec_t<arr_t> vab_relax; // velocity absorber relaxed state
         arrvec_t<arr_t> khn_tmp; // Kahan sum for donor-cell
 
-        std::unordered_map< 
+        std::unordered_map<
           const char*, // intended for addressing with __FILE__
           boost::ptr_vector<arrvec_t<arr_t>>
         > tmp;
-        
+
         // list of temporary fields that can be accessed from outside of concurr
-        std::unordered_map< 
+        std::unordered_map<
           std::string,
           std::pair<const char*, int>
         > avail_tmp;
@@ -73,7 +73,7 @@ namespace libmpdataxx
         {
           assert(false && "sharedmem_common::barrier() called!");
         }
-     
+
         void cycle(const int &rank)
         {
           barrier();
@@ -86,7 +86,7 @@ namespace libmpdataxx
         sharedmem_common(const std::array<int, n_dims> &grid_size, const int &size)
           : n(0), distmem(grid_size), size(size) // TODO: is n(0) needed?
         {
-          for (int d = 0; d < n_dims; ++d) 
+          for (int d = 0; d < n_dims; ++d)
           {
             this->grid_size[d] = slab(
               rng_t(0, grid_size[d]-1),
@@ -100,10 +100,10 @@ namespace libmpdataxx
           oss << "grid_size[0]: " << this->grid_size[0] << " origin[0]: " << origin[0] << std::endl;
           std::cerr << oss.str() << std::endl;
 
-          if (size > grid_size[0]) 
+          if (size > grid_size[0])
             throw std::runtime_error("number of subdomains greater than number of gridpoints");
 
-          if (n_dims != 1) 
+          if (n_dims != 1)
             sumtmp.reset(new blitz::Array<double, 1>(this->grid_size[0]));
           xtmtmp.reset(new blitz::Array<real_t, 1>(size));
         }
@@ -111,7 +111,7 @@ namespace libmpdataxx
         /// @brief concurrency-aware summation of array elements
         double sum(const int &rank, const arr_t &arr, const idx_t<n_dims> &ijk, const bool sum_khn)
         {
-          // doing a two-step sum to reduce numerical error 
+          // doing a two-step sum to reduce numerical error
           // and make parallel results reproducible
           for (int c = ijk[0].first(); c <= ijk[0].last(); ++c) // TODO: optimise for i.count() == 1
           {
@@ -134,7 +134,7 @@ namespace libmpdataxx
           barrier();
           return result;
 #else
-          if(rank == 0) 
+          if(rank == 0)
           {
             // master thread calculates the sum from this process, stores in shared array
             if (sum_khn)
@@ -154,7 +154,7 @@ namespace libmpdataxx
         /// @brief concurrency-aware summation of a (element-wise) product of two arrays
         double sum(const int &rank, const arr_t &arr1, const arr_t &arr2, const idx_t<n_dims> &ijk, const bool sum_khn)
         {
-          // doing a two-step sum to reduce numerical error 
+          // doing a two-step sum to reduce numerical error
           // and make parallel results reproducible
           for (int c = ijk[0].first(); c <= ijk[0].last(); ++c)
           {
@@ -165,7 +165,7 @@ namespace libmpdataxx
             if (sum_khn)
               (*sumtmp)(c) = blitz::kahan_sum(arr1(slice_idx) * arr2(slice_idx));
             else
-              (*sumtmp)(c) = blitz::sum(arr1(slice_idx) * arr2(slice_idx)); 
+              (*sumtmp)(c) = blitz::sum(arr1(slice_idx) * arr2(slice_idx));
           }
           // TODO: code below same as in the function above
           barrier(); // wait for all threads to calc their part
@@ -178,7 +178,7 @@ namespace libmpdataxx
           barrier();
           return result;
 #else
-          if(rank == 0) 
+          if(rank == 0)
           {
             // master thread calculates the sum from this process, stores in shared array
             if (sum_khn)
@@ -198,7 +198,7 @@ namespace libmpdataxx
         real_t min(const int &rank, const arr_t &arr)
         {
           // min across local threads
-          (*xtmtmp)(rank) = blitz::min(arr); 
+          (*xtmtmp)(rank) = blitz::min(arr);
           barrier();
 #if !defined(USE_MPI)
           real_t result = blitz::min(*xtmtmp);
@@ -222,7 +222,7 @@ namespace libmpdataxx
         real_t max(const int &rank, const arr_t &arr)
         {
           // max across local threads
-          (*xtmtmp)(rank) = blitz::max(arr); 
+          (*xtmtmp)(rank) = blitz::max(arr);
           barrier();
 #if !defined(USE_MPI)
           real_t result = blitz::max(*xtmtmp);
@@ -265,7 +265,7 @@ namespace libmpdataxx
         // and hence to not use BZ_THREADSAFE
         private:
         boost::ptr_vector<arr_t> tobefreed;
-        
+
         public:
         arr_t *never_delete(arr_t *arg)
         {
@@ -283,21 +283,21 @@ namespace libmpdataxx
 
         private:
         // helper methods to define subdomain ranges
-        static int min(const int &span, const int &rank, const int &size) 
+        static int min(const int &span, const int &rank, const int &size)
         {
-          return rank * span / size; 
+          return rank * span / size;
         }
 
-        static int max(const int &span, const int &rank, const int &size) 
+        static int max(const int &span, const int &rank, const int &size)
         {
-          return min(span, rank + 1, size) - 1;  
+          return min(span, rank + 1, size) - 1;
         }
 
         public:
         static rng_t slab(
           const rng_t &span,
-          const int &rank = 0,  
-          const int &size = 1 
+          const int &rank = 0,
+          const int &size = 1
         ) {
           return rng_t(
             span.first() + min(span.length(), rank, size),
@@ -308,7 +308,7 @@ namespace libmpdataxx
         virtual arr_t advectee(int e = 0) = 0;
 
         void advectee_global_set(const arr_t arr, int e = 0)
-        {   
+        {
 #if defined(USE_MPI)
           if(this->distmem.size() > 1)
           {
@@ -317,7 +317,7 @@ namespace libmpdataxx
           else
 #endif
           advectee(e) = arr;
-        }  
+        }
 
         protected:
 
@@ -353,7 +353,7 @@ namespace libmpdataxx
         }
 
         const blitz::Array<real_t, 1> advectee_global(int e = 0)
-        {   
+        {
 #if defined(USE_MPI)
           if(this->distmem.size() > 1)
           {
@@ -366,7 +366,7 @@ namespace libmpdataxx
             for(auto &size : sizes) { size = this->slab(rng_t(0, this->distmem.grid_size[0]-1), size, this->distmem.size()).length();}
             // calc displacement
             std::vector<int> displ(sizes.size());
-            std::partial_sum(sizes.begin(), sizes.end(), displ.begin()); 
+            std::partial_sum(sizes.begin(), sizes.end(), displ.begin());
             std::transform(displ.begin(), displ.end(), sizes.begin(), displ.begin(), std::minus<int>()); // exclusive_scan is c++17
             // a vector that will store the received data, relevant only on process rank=0
             std::vector<real_t> out_values(this->distmem.grid_size[0]);
@@ -385,18 +385,18 @@ namespace libmpdataxx
           else
 #endif
             return advectee(e);
-        }  
+        }
 
-        blitz::Array<real_t, 1> advector(int d = 0)  
-        {   
+        blitz::Array<real_t, 1> advector(int d = 0)
+        {
           using namespace arakawa_c;
           assert(d == 0);
           // returning just the domain interior, i.e. without halos
           // reindexed to make it more intuitive when working with index placeholders
           // (i.e. border between cell 0 and cell 1 is indexed with 0)
           auto orgn = decltype(this->origin)({
-                 this->origin[0] - 1 
-               }); 
+                 this->origin[0] - 1
+               });
 
           return this->GC[d](
             this->distmem_ext(this->grid_size[0]^(-1)^h)
@@ -405,12 +405,12 @@ namespace libmpdataxx
               ? decltype(this->origin)({this->origin[0] - 1})
               : orgn
           );
-        }   
+        }
 
         blitz::Array<real_t, 1> g_factor()
         {
           // a sanity check
-          if (this->G.get() == nullptr) 
+          if (this->G.get() == nullptr)
             throw std::runtime_error("g_factor() called with nug option unset?");
 
           // the same logic as in advectee() - see above
@@ -418,17 +418,17 @@ namespace libmpdataxx
             this->grid_size[0]
           ).reindex(this->origin);
         }
-        
+
         blitz::Array<real_t, 1> vab_coefficient()
         {
           throw std::logic_error("absorber not yet implemented in 1d");
         }
-        
-        blitz::Array<real_t, 1> vab_relaxed_state(int d = 0)  
-        {   
+
+        blitz::Array<real_t, 1> vab_relaxed_state(int d = 0)
+        {
           throw std::logic_error("absorber not yet implemented in 1d");
-        }   
-        
+        }
+
         blitz::Array<real_t, 1> sclr_array(const std::string& name, int n = 0)
         {
           return this->tmp.at(this->avail_tmp[name].first)[this->avail_tmp[name].second][n](
@@ -456,7 +456,7 @@ namespace libmpdataxx
         }
 
         const blitz::Array<real_t, 2> advectee_global(int e = 0)
-        {   
+        {
 #if defined(USE_MPI)
           if(this->distmem.size() > 1)
           {
@@ -466,14 +466,14 @@ namespace libmpdataxx
             // a vector of number of elements to be sent by each non-root process
             std::vector<int> sizes(this->distmem.size());
             std::iota(sizes.begin(), sizes.end(), 0); // fill with 1,2,3,...
-            for(auto &size : sizes) 
-            { 
+            for(auto &size : sizes)
+            {
               size = this->slab(rng_t(0, this->distmem.grid_size[0]-1), size, this->distmem.size()).length()
                       * this->grid_size[1].length();
             }
             // calc displacement
             std::vector<int> displ(sizes.size());
-            std::partial_sum(sizes.begin(), sizes.end(), displ.begin()); 
+            std::partial_sum(sizes.begin(), sizes.end(), displ.begin());
             std::transform(displ.begin(), displ.end(), sizes.begin(), displ.begin(), std::minus<int>()); // exclusive_scan is c++17
             // a vector that will store the received data, relevant only on process rank=0
             std::vector<real_t> out_values(this->distmem.grid_size[0] * this->grid_size[1].length());
@@ -485,7 +485,7 @@ namespace libmpdataxx
             boost::mpi::gatherv(this->distmem.mpicom, in_values_vec, out_values.data(), sizes, displ, 0);
             // send the result to other processes
             boost::mpi::broadcast(this->distmem.mpicom, out_values, 0);
-         
+
             blitz::Array<real_t, 2> res(out_values.data(), blitz::shape(
               this->distmem.grid_size[0], this->grid_size[1].length()),
               blitz::duplicateData);
@@ -494,39 +494,39 @@ namespace libmpdataxx
           else
 #endif
             return advectee(e);
-        }  
+        }
 
-        blitz::Array<real_t, 2> advector(int d = 0)  
-        {   
+        blitz::Array<real_t, 2> advector(int d = 0)
+        {
           using namespace arakawa_c;
           assert(d == 0 || d== 1);
           // returning just the domain interior, i.e. without halos
           // reindexed to make it more intuitive when working with index placeholders
           auto orgn = decltype(this->origin)({
-                 this->origin[0] - 1, 
+                 this->origin[0] - 1,
                  this->origin[1]
-               }); 
+               });
 
           switch (d)
-          { 
-            case 0: 
+          {
+            case 0:
               return this->GC[d](
-                this->distmem_ext(this->grid_size[0]^(-1)^h), 
+                this->distmem_ext(this->grid_size[0]^(-1)^h),
                 this->grid_size[1]
-              ).reindex(orgn); 
-            case 1: 
+              ).reindex(orgn);
+            case 1:
               return this->GC[d](
-                this->distmem_ext(this->grid_size[0]), 
+                this->distmem_ext(this->grid_size[0]),
                 this->grid_size[1]^(-1)^h
               ).reindex(orgn);
             default: assert(false); throw;
           }
-        }   
+        }
 
         blitz::Array<real_t, 2> g_factor()
         {
           // a sanity check
-          if (this->G.get() == nullptr) 
+          if (this->G.get() == nullptr)
             throw std::runtime_error("g_factor() called with nug option unset?");
 
           // the same logic as in advectee() - see above
@@ -535,11 +535,11 @@ namespace libmpdataxx
             this->grid_size[1]
           ).reindex(this->origin);
         }
-        
+
         blitz::Array<real_t, 2> vab_coefficient()
         {
           // a sanity check
-          if (this->vab_coeff.get() == nullptr) 
+          if (this->vab_coeff.get() == nullptr)
             throw std::runtime_error("vab_coeff() called with option vip_vab unset?");
 
           // the same logic as in advectee() - see above
@@ -548,20 +548,20 @@ namespace libmpdataxx
             this->grid_size[1]
           ).reindex(this->origin);
         }
-        
-        blitz::Array<real_t, 2> vab_relaxed_state(int d = 0)  
-        {   
+
+        blitz::Array<real_t, 2> vab_relaxed_state(int d = 0)
+        {
           assert(d == 0 || d== 1);
           // a sanity check
-          if (this->vab_coeff.get() == nullptr) 
+          if (this->vab_coeff.get() == nullptr)
             throw std::runtime_error("vab_relaxed_state() called with option vip_vab unset?");
           // the same logic as in advectee() - see above
           return this->vab_relax[d](
             this->grid_size[0],
             this->grid_size[1]
           ).reindex(this->origin);
-        }   
-        
+        }
+
         blitz::Array<real_t, 2> sclr_array(const std::string& name, int n = 0)
         {
           return this->tmp.at(this->avail_tmp[name].first)[this->avail_tmp[name].second][n](
@@ -591,7 +591,7 @@ namespace libmpdataxx
         }
 
         const blitz::Array<real_t, 3> advectee_global(int e = 0)
-        {   
+        {
 #if defined(USE_MPI)
           if(this->distmem.size() > 1)
           {
@@ -601,14 +601,14 @@ namespace libmpdataxx
             // a vector of number of elements to be sent by each non-root process
             std::vector<int> sizes(this->distmem.size());
             std::iota(sizes.begin(), sizes.end(), 0); // fill with 1,2,3,...
-            for(auto &size : sizes) 
-            { 
+            for(auto &size : sizes)
+            {
               size = this->slab(rng_t(0, this->distmem.grid_size[0]-1), size, this->distmem.size()).length()
                       * this->grid_size[1].length() * this->grid_size[2].length();
             }
             // calc displacement
             std::vector<int> displ(sizes.size());
-            std::partial_sum(sizes.begin(), sizes.end(), displ.begin()); 
+            std::partial_sum(sizes.begin(), sizes.end(), displ.begin());
             std::transform(displ.begin(), displ.end(), sizes.begin(), displ.begin(), std::minus<int>()); // exclusive_scan is c++17
             // a vector that will store the received data, relevant only on process rank=0
             std::vector<real_t> out_values(this->distmem.grid_size[0] * this->grid_size[1].length() * this->grid_size[2].length());
@@ -620,7 +620,7 @@ namespace libmpdataxx
             boost::mpi::gatherv(this->distmem.mpicom, in_values_vec, out_values.data(), sizes, displ, 0);
             // send the result to other processes
             boost::mpi::broadcast(this->distmem.mpicom, out_values, 0);
-         
+
             blitz::Array<real_t, 3> res(out_values.data(), blitz::shape(
               this->distmem.grid_size[0], this->grid_size[1].length(), this->grid_size[2].length()),
               blitz::duplicateData);
@@ -629,48 +629,48 @@ namespace libmpdataxx
           else
 #endif
             return advectee(e);
-        }  
+        }
 
-        blitz::Array<real_t, 3> advector(int d = 0)  
-        {   
+        blitz::Array<real_t, 3> advector(int d = 0)
+        {
           using namespace arakawa_c;
           assert(d == 0 || d == 1 || d == 2);
           // returning just the domain interior, i.e. without halos
           // reindexed to make it more intuitive when working with index placeholders
           auto orgn = decltype(this->origin)({
-                 this->origin[0] - 1, 
+                 this->origin[0] - 1,
                  this->origin[1],
                  this->origin[2]
-               }); 
+               });
 
           switch (d)
-          { 
-            case 0: 
+          {
+            case 0:
               return this->GC[d](
                 this->distmem_ext(this->grid_size[0]^(-1)^h),
                 this->grid_size[1],
                 this->grid_size[2]
-              ).reindex(orgn);  
-            case 1: 
+              ).reindex(orgn);
+            case 1:
               return this->GC[d](
                 this->distmem_ext(this->grid_size[0]),
                 this->grid_size[1]^(-1)^h,
                 this->grid_size[2]
-              ).reindex(orgn);  
-            case 2: 
+              ).reindex(orgn);
+            case 2:
               return this->GC[d](
                 this->distmem_ext(this->grid_size[0]),
                 this->grid_size[1],
                 this->grid_size[2]^(-1)^h
-              ).reindex(orgn);  
+              ).reindex(orgn);
             default: assert(false); throw;
           }
-        }   
+        }
 
         blitz::Array<real_t, 3> g_factor()
         {
           // a sanity check
-          if (this->G.get() == nullptr) 
+          if (this->G.get() == nullptr)
             throw std::runtime_error("g_factor() called with nug option unset?");
 
           // the same logic as in advectee() - see above
@@ -684,7 +684,7 @@ namespace libmpdataxx
         blitz::Array<real_t, 3> vab_coefficient()
         {
           // a sanity check
-          if (this->vab_coeff.get() == nullptr) 
+          if (this->vab_coeff.get() == nullptr)
             throw std::runtime_error("vab_coeff() called with option vip_vab unset?");
 
           // the same logic as in advectee() - see above
@@ -694,12 +694,12 @@ namespace libmpdataxx
             this->grid_size[2]
           ).reindex(this->origin);
         }
-        
-        blitz::Array<real_t, 3> vab_relaxed_state(int d = 0)  
-        {   
+
+        blitz::Array<real_t, 3> vab_relaxed_state(int d = 0)
+        {
           assert(d == 0 || d == 1 || d == 2);
           // a sanity check
-          if (this->vab_coeff.get() == nullptr) 
+          if (this->vab_coeff.get() == nullptr)
             throw std::runtime_error("vab_relaxed_state() called with option vip_vab unset?");
           // the same logic as in advectee() - see above
           return this->vab_relax[d](
@@ -707,7 +707,7 @@ namespace libmpdataxx
             this->grid_size[1],
             this->grid_size[2]
           ).reindex(this->origin);
-        }   
+        }
 
         blitz::Array<real_t, 3> sclr_array(const std::string& name, int n = 0)
         {
diff --git a/libmpdata++/concurr/detail/timer.hpp b/libmpdata++/concurr/detail/timer.hpp
index 1f06e7d4..10d8d190 100644
--- a/libmpdata++/concurr/detail/timer.hpp
+++ b/libmpdata++/concurr/detail/timer.hpp
@@ -1,4 +1,4 @@
-/** 
+/**
  * @file
  * @copyright University of Warsaw
  * @section LICENSE
@@ -14,15 +14,15 @@ namespace libmpdataxx
 {
   namespace concurr
   {
-    namespace detail 
+    namespace detail
     {
-      class timer 
+      class timer
       {
-        std::unique_ptr<boost::timer::cpu_timer> tmr; 
+        std::unique_ptr<boost::timer::cpu_timer> tmr;
         bool started = false;
 
         public:
-      
+
         // ctor
         timer()
         {
@@ -30,9 +30,9 @@ namespace libmpdataxx
         }
 
         void resume()
-        { 
+        {
           if (started) tmr->resume();
-          else 
+          else
           {
             started = true;
             tmr->start();
diff --git a/libmpdata++/concurr/openmp.hpp b/libmpdata++/concurr/openmp.hpp
index 1ca60287..24f7c78b 100644
--- a/libmpdata++/concurr/openmp.hpp
+++ b/libmpdata++/concurr/openmp.hpp
@@ -30,7 +30,7 @@ namespace libmpdataxx
     class openmp : public detail::concurr_common<solver_t, bcxl, bcxr, bcyl, bcyr, bczl, bczr>
     {
       using parent_t = detail::concurr_common<solver_t, bcxl, bcxr, bcyl, bcyr, bczl, bczr>;
- 
+
 
       struct mem_t : parent_t::mem_t
       {
@@ -70,13 +70,13 @@ namespace libmpdataxx
           i = omp_get_thread_num();
 #endif
           this->algos[i].solve(nt);
-        } 
+        }
       }
 
       public:
 
       // ctor
-      openmp(const typename solver_t::rt_params_t &p) : 
+      openmp(const typename solver_t::rt_params_t &p) :
         parent_t(p, new mem_t(p.grid_size), mem_t::size(p.grid_size[0]))
       {}
 
diff --git a/libmpdata++/concurr/serial.hpp b/libmpdata++/concurr/serial.hpp
index 2b815744..f6abc1ea 100644
--- a/libmpdata++/concurr/serial.hpp
+++ b/libmpdata++/concurr/serial.hpp
@@ -24,7 +24,7 @@ namespace libmpdataxx
     class serial : public detail::concurr_common<solver_t, bcxl, bcxr, bcyl, bcyr, bczl, bczr>
     {
       using parent_t = detail::concurr_common<solver_t, bcxl, bcxr, bcyl, bcyr, bczl, bczr>;
- 
+
 
       struct mem_t : parent_t::mem_t
       {
@@ -33,8 +33,8 @@ namespace libmpdataxx
         void barrier() { }
 
         // ctors
-        mem_t(const std::array<int, solver_t::n_dims> &grid_size) 
-          : parent_t::mem_t(grid_size, size()) 
+        mem_t(const std::array<int, solver_t::n_dims> &grid_size)
+          : parent_t::mem_t(grid_size, size())
         {};
       };
 
@@ -46,7 +46,7 @@ namespace libmpdataxx
       public:
 
       // ctor
-      serial(const typename solver_t::rt_params_t &p) : 
+      serial(const typename solver_t::rt_params_t &p) :
         parent_t(p, new mem_t(p.grid_size), mem_t::size())
       {}
 
diff --git a/libmpdata++/concurr/threads.hpp b/libmpdata++/concurr/threads.hpp
index 164b8219..2da9488e 100644
--- a/libmpdata++/concurr/threads.hpp
+++ b/libmpdata++/concurr/threads.hpp
@@ -18,8 +18,8 @@ namespace libmpdataxx
 {
   namespace concurr
   {
-    /// @brief shared-memory concurency logic using threads 
-    ///        (\ref libmpdataxx::concurr::openmp if supported, 
+    /// @brief shared-memory concurency logic using threads
+    ///        (\ref libmpdataxx::concurr::openmp if supported,
     ///        \ref libmpdataxx::concurr::boost_thread otherwise)
     template <
       class solver_t,
@@ -29,7 +29,7 @@ namespace libmpdataxx
       bcond::bcond_e bcyr = bcond::null,
       bcond::bcond_e bczl = bcond::null,
       bcond::bcond_e bczr = bcond::null
-    > using threads = 
+    > using threads =
 #if defined(_OPENMP)
     openmp<solver_t, bcxl, bcxr, bcyl, bcyr, bczl, bczr>;
 #else
diff --git a/libmpdata++/formulae/arakawa_c.hpp b/libmpdata++/formulae/arakawa_c.hpp
index 6c3e3b43..631f0d63 100644
--- a/libmpdata++/formulae/arakawa_c.hpp
+++ b/libmpdata++/formulae/arakawa_c.hpp
@@ -14,41 +14,41 @@ namespace libmpdataxx
   {
     namespace
     {
-      struct hlf_t {} h; 
+      struct hlf_t {} h;
 
       inline rng_t operator+(
         const rng_t &i, const hlf_t &
-      ) { 
-        return i; 
-      } 
+      ) {
+        return i;
+      }
 
       inline rng_t operator-(
         const rng_t &i, const hlf_t &
-      ) { 
-        return i-1; 
+      ) {
+        return i-1;
       }
-      
+
       inline int operator+(
         const int i, const hlf_t &
-      ) { 
-        return i; 
-      } 
+      ) {
+        return i;
+      }
 
       inline int operator-(
         const int i, const hlf_t &
-      ) { 
-        return i-1; 
+      ) {
+        return i-1;
       }
 
       template<class n_t>
       inline rng_t operator^(
         const rng_t &r, const n_t &n
-      ) { 
+      ) {
         return rng_t(
-          (r - n).first(), 
+          (r - n).first(),
           (r + n).last()
-        ); 
-      } 
+        );
+      }
     };
   } // namespace arakawa_c
 } // namespace libmpdataxx
diff --git a/libmpdata++/formulae/common.hpp b/libmpdata++/formulae/common.hpp
index 3b9846b6..800839c2 100644
--- a/libmpdata++/formulae/common.hpp
+++ b/libmpdata++/formulae/common.hpp
@@ -25,7 +25,7 @@ namespace libmpdataxx
     {
       return std::abs(a);
     }
-    
+
     template<class ix_t, class arg_t>
     forceinline_macro auto abs(const arg_t &a, typename std::enable_if<std::is_same<ix_t, rng_t>::value>::type* = 0)
     {
@@ -38,7 +38,7 @@ namespace libmpdataxx
       using cm_t = typename std::common_type<a_t, b_t>::type;
       return std::min(static_cast<cm_t>(a), static_cast<cm_t>(b));
     }
-    
+
     template<class ix_t, class a_t, class b_t>
     forceinline_macro auto min(const a_t &a, const b_t &b, typename std::enable_if<std::is_same<ix_t, rng_t>::value>::type* = 0)
     {
@@ -51,34 +51,34 @@ namespace libmpdataxx
       using cm_t = typename std::common_type<a_t, b_t>::type;
       return std::max(static_cast<cm_t>(a), static_cast<cm_t>(b));
     }
-    
+
     template<class ix_t, class a_t, class b_t>
     forceinline_macro auto max(const a_t &a, const b_t &b, typename std::enable_if<std::is_same<ix_t, rng_t>::value>::type* = 0)
     {
       return blitz::max(a, b);
     }
-   
+
     // variadic max & min
     template<class ix_t, class a_t, class... b_ts>
     forceinline_macro auto max(const a_t &a, const b_ts & ... bs)
     {
       return max<ix_t>(a, max<ix_t>(bs...));
     }
-    
+
     template<class ix_t, class a_t, class... b_ts>
     forceinline_macro auto min(const a_t &a, const b_ts & ... bs)
     {
       return min<ix_t>(a, min<ix_t>(bs...));
     }
-      
+
     template<class ix_t, class arg_t>
-    forceinline_macro auto where(bool c, const arg_t &a, const arg_t &b, typename std::enable_if<std::is_same<ix_t, int>::value>::type* = 0)   
+    forceinline_macro auto where(bool c, const arg_t &a, const arg_t &b, typename std::enable_if<std::is_same<ix_t, int>::value>::type* = 0)
     {
       return c ? a : b;
     }
-    
+
     template<class ix_t, class c_t, class a_t, class b_t>
-    forceinline_macro auto where(c_t c, const a_t &a, const b_t &b, typename std::enable_if<std::is_same<ix_t, rng_t>::value>::type* = 0)  
+    forceinline_macro auto where(c_t c, const a_t &a, const b_t &b, typename std::enable_if<std::is_same<ix_t, rng_t>::value>::type* = 0)
     {
       return blitz::where(c, a, b);
     }
@@ -130,7 +130,7 @@ namespace libmpdataxx
       const ix_t &,
       typename std::enable_if<!opts::isset(opts, opts::nug)>::type* = 0 // enabled if nug == false
     ) {
-      return 1; 
+      return 1;
     }
 
     // 2D: G = const = 1
@@ -140,7 +140,7 @@ namespace libmpdataxx
       const ix_t &, const ix_t &,
       typename std::enable_if<!opts::isset(opts, opts::nug)>::type* = 0 // enabled if nug == false
     ) {
-      return 1; 
+      return 1;
     }
 
     // 3D: G = const = 1
@@ -150,7 +150,7 @@ namespace libmpdataxx
       const ix_t &, const ix_t &, const ix_t &,
       typename std::enable_if<!opts::isset(opts, opts::nug)>::type* = 0 // enabled if nug == false
     ) {
-      return 1; 
+      return 1;
     }
 
     // 1D on ND: G != const
@@ -159,15 +159,15 @@ namespace libmpdataxx
       const arr_t &G,
       const ix_t &i,
       typename std::enable_if<opts::isset(opts, opts::nug)>::type* = 0 // enabled if nug == true
-    ) 
+    )
     {
       return return_helper<ix_t>(
         G(i) + 0 // return_macro includes a call to blitz::safeToReturn() which expects an expression as an arg
       );
     }
-    
+
     // 2D: G != const
-    template<opts::opts_t opts, int d, class arr_t, class ix_t> 
+    template<opts::opts_t opts, int d, class arr_t, class ix_t>
     inline auto G(
       const arr_t &G,
       const ix_t &i,
@@ -179,9 +179,9 @@ namespace libmpdataxx
         G(idxperm::pi<d>(i, j)) + 0
       );
     }
-    
+
     // 3D: G != const
-    template<opts::opts_t opts, int d, class arr_t, class ix_t> 
+    template<opts::opts_t opts, int d, class arr_t, class ix_t>
     inline auto G(
       const arr_t &G,
       const ix_t &i,
diff --git a/libmpdata++/formulae/donorcell_formulae.hpp b/libmpdata++/formulae/donorcell_formulae.hpp
index 8cea888f..eda4cf9c 100644
--- a/libmpdata++/formulae/donorcell_formulae.hpp
+++ b/libmpdata++/formulae/donorcell_formulae.hpp
@@ -22,7 +22,7 @@ namespace libmpdataxx
 
       const int n_tlev = 2, halo = 1;
 
-      template<opts_t opts, class T1, class T2, class T3> 
+      template<opts_t opts, class T1, class T2, class T3>
       inline auto F(
         const T1 &psi_l, const T2 &psi_r, const T3 &GC
       )
@@ -34,46 +34,46 @@ namespace libmpdataxx
       }
 
       template <opts_t opts, class arr_1d_t>
-      inline auto make_flux( 
-        const arr_1d_t &psi, 
-        const arr_1d_t &GC, 
+      inline auto make_flux(
+        const arr_1d_t &psi,
+        const arr_1d_t &GC,
         const rng_t &i
       )
       {
         return return_helper<rng_t>(F<opts>(
-          psi(i  ), 
-          psi(i+1), 
+          psi(i  ),
+          psi(i+1),
           GC(i+h)
         ));
       }
 
-      template<opts_t opts, int d, class arr_2d_t>  
-      inline auto make_flux( 
-        const arr_2d_t &psi, 
-        const arr_2d_t &GC, 
-        const rng_t &i, 
+      template<opts_t opts, int d, class arr_2d_t>
+      inline auto make_flux(
+        const arr_2d_t &psi,
+        const arr_2d_t &GC,
+        const rng_t &i,
         const rng_t &j
-      ) 
+      )
       {
         return return_helper<rng_t>(F<opts>(
-          psi(pi<d>(i,   j)), 
-          psi(pi<d>(i+1, j)), 
+          psi(pi<d>(i,   j)),
+          psi(pi<d>(i+1, j)),
            GC(pi<d>(i+h, j))
         ));
       }
 
-      template<opts_t opts, int d, class arr_3d_t>  
-      inline auto make_flux( 
-        const arr_3d_t &psi, 
-        const arr_3d_t &GC, 
-        const rng_t &i, 
+      template<opts_t opts, int d, class arr_3d_t>
+      inline auto make_flux(
+        const arr_3d_t &psi,
+        const arr_3d_t &GC,
+        const rng_t &i,
         const rng_t &j,
         const rng_t &k
       )
       {
         return return_helper<rng_t>(F<opts>(
-          psi(pi<d>(i,   j, k)), 
-          psi(pi<d>(i+1, j, k)), 
+          psi(pi<d>(i,   j, k)),
+          psi(pi<d>(i+1, j, k)),
            GC(pi<d>(i+h, j, k))
         ));
       }
@@ -82,7 +82,7 @@ namespace libmpdataxx
       inline void donorcell_sum(
         const arrvec_t<a_t> &khn_tmp,
         const idx_t<1> i,
-        a_t psi_new, 
+        a_t psi_new,
         const a_t &psi_old,
         const f1_t &flx_1,
         const f2_t &flx_2,
@@ -121,7 +121,7 @@ namespace libmpdataxx
           psi_new = psi_old + ((-flx_1 + flx_2) + (-flx_3 + flx_4)) / g;
         }
         else
-        { 
+        {
           kahan_zro(khn_tmp[0](ij), khn_tmp[1](ij), khn_tmp[2](ij), psi_new);
           kahan_add(khn_tmp[0](ij), khn_tmp[1](ij), khn_tmp[2](ij), psi_new, psi_old);
           kahan_add(khn_tmp[0](ij), khn_tmp[1](ij), khn_tmp[2](ij), psi_new, -flx_1 / g);
@@ -150,7 +150,7 @@ namespace libmpdataxx
         {
           // note: the parentheses are intended to minimise chances of numerical errors
           psi_new = psi_old + ((-flx_1 + flx_2) + (-flx_3 + flx_4) + (-flx_5 + flx_6)) / g;
-        } 
+        }
         else
         {
           kahan_zro(khn_tmp[0](ijk), khn_tmp[1](ijk), khn_tmp[2](ijk), psi_new);
@@ -164,6 +164,6 @@ namespace libmpdataxx
         }
       }
 
-    } // namespace donorcell 
+    } // namespace donorcell
   } // namespace formulae
 } // namespace libmpdataxx
diff --git a/libmpdata++/formulae/idxperm.hpp b/libmpdata++/formulae/idxperm.hpp
index 7b77aaf4..5c5c1c74 100644
--- a/libmpdata++/formulae/idxperm.hpp
+++ b/libmpdata++/formulae/idxperm.hpp
@@ -13,14 +13,14 @@ namespace libmpdataxx
     template <int n_dims> using int_idx_t = blitz::TinyVector<int, n_dims>;
 
     // 2D - ranges or mix of ranges with ints
-    template<int d> 
+    template<int d>
     inline idx_t<2> pi(const rng_t &i, const rng_t &j);
 
     template<>
     inline idx_t<2> pi<0>(const rng_t &i, const rng_t &j) { return idx_t<2>({i,j}); }
 
     template<>
-    inline idx_t<2> pi<1>(const rng_t &j, const rng_t &i) { return idx_t<2>({i,j}); } 
+    inline idx_t<2> pi<1>(const rng_t &j, const rng_t &i) { return idx_t<2>({i,j}); }
 
     template<int d>
     inline idx_t<2> pi(const int   &i, const rng_t &j) { return pi<d>(rng_t(i,i), j); }
@@ -28,15 +28,15 @@ namespace libmpdataxx
     // 2D - ints
     template<int d>
     inline int_idx_t<2> pi(const int i, const int j);
-    
+
     template<>
     inline int_idx_t<2> pi<0>(const int i, const int j) { return int_idx_t<2>({i,j}); }
-    
+
     template<>
     inline int_idx_t<2> pi<1>(const int j, const int i) { return int_idx_t<2>({i,j}); }
 
     // 3D - ranges or mix of ranges with ints
-    template<int d> 
+    template<int d>
     inline idx_t<3> pi(const rng_t &i, const rng_t &j, const rng_t &k);
 
     template<>
@@ -47,7 +47,7 @@ namespace libmpdataxx
 
     template<>
     inline idx_t<3> pi<2>(const rng_t &k, const rng_t &i, const rng_t &j) { return idx_t<3>({i,j,k}); }
-    
+
     template<int d>
     inline idx_t<3> pi(const int &i, const rng_t &j, const rng_t &k) { return pi<d>(rng_t(i,i), j, k); }
 
@@ -56,17 +56,17 @@ namespace libmpdataxx
 
     template<int d>
     inline idx_t<3> pi(const int &i, const int   &j, const rng_t &k) { return pi<d>(rng_t(i,i), rng_t(j,j), k); }
-    
+
     // 3D - ints
-    template<int d> 
+    template<int d>
     inline int_idx_t<3> pi(const int i, const int j, const int k);
-    
+
     template<>
     inline int_idx_t<3> pi<0>(const int i, const int j, const int k) { return int_idx_t<3>({i,j,k}); }
-    
+
     template<>
     inline int_idx_t<3> pi<1>(const int j, const int k, const int i) { return int_idx_t<3>({i,j,k}); }
-    
+
     template<>
     inline int_idx_t<3> pi<2>(const int k, const int i, const int j) { return int_idx_t<3>({i,j,k}); }
   } // namespace idxperm
diff --git a/libmpdata++/formulae/kahan_sum.hpp b/libmpdata++/formulae/kahan_sum.hpp
index ade01a01..929820f4 100644
--- a/libmpdata++/formulae/kahan_sum.hpp
+++ b/libmpdata++/formulae/kahan_sum.hpp
@@ -11,14 +11,14 @@ namespace libmpdataxx
   namespace formulae
   {
     template <class a_t>
-    inline void kahan_zro(a_t c, const a_t&, const a_t&, a_t sum) 
+    inline void kahan_zro(a_t c, const a_t&, const a_t&, a_t sum)
     {
       sum = 0;
       c = 0;
     }
 
     template <class a_t, class f_t>
-    inline void kahan_add(a_t c, a_t y, a_t t, a_t sum, f_t input) 
+    inline void kahan_add(a_t c, a_t y, a_t t, a_t sum, f_t input)
     {
       y = input - c;
       t = sum + y;
diff --git a/libmpdata++/formulae/mpdata/formulae_mpdata_1d.hpp b/libmpdata++/formulae/mpdata/formulae_mpdata_1d.hpp
index c7ef5073..1ea33166 100644
--- a/libmpdata++/formulae/mpdata/formulae_mpdata_1d.hpp
+++ b/libmpdata++/formulae/mpdata/formulae_mpdata_1d.hpp
@@ -12,59 +12,59 @@
 #include <libmpdata++/formulae/mpdata/formulae_mpdata_fdiv_1d.hpp>
 
 namespace libmpdataxx
-{ 
-  namespace formulae 
-  { 
-    namespace mpdata 
+{
+  namespace formulae
+  {
+    namespace mpdata
     {
       // first come helpers for divergence form of antidiffusive velocity
       template <opts_t opts, class arr_1d_t, class ix_t>
       forceinline_macro auto div_2nd(
-        const arr_1d_t &psi, 
+        const arr_1d_t &psi,
         const arrvec_t<arr_1d_t> &GC,
-        const arr_1d_t &G, 
+        const arr_1d_t &G,
         const ix_t &i
       )
       {
         return return_helper<ix_t>(
           abs(GC[0](i+h)) / 2
-          * ndx_psi<opts>(psi, i) 
-          - 
+          * ndx_psi<opts>(psi, i)
+          -
           GC[0](i+h) / 2
           * nfdiv<opts>(psi, GC, G, i)
         );
       }
-      
+
       template <opts_t opts, class arr_1d_t, class ix_t>
       forceinline_macro auto div_3rd_upwind(
-        const arr_1d_t &psi, 
+        const arr_1d_t &psi,
         const arrvec_t<arr_1d_t> &GC,
-        const arr_1d_t &G, 
-        const ix_t &i, 
+        const arr_1d_t &G,
+        const ix_t &i,
         typename std::enable_if<!opts::isset(opts, opts::iga)>::type* = 0
       )
       {
         return return_helper<ix_t>(
           abs(div_2nd<opts>(psi, GC, G, i)) / 2
-          * ndx_psi<opts>(psi, i) 
+          * ndx_psi<opts>(psi, i)
         );
       }
 
       template <opts_t opts, class arr_1d_t, class ix_t>
       forceinline_macro auto div_3rd_upwind(
-        const arr_1d_t &psi, 
+        const arr_1d_t &psi,
         const arrvec_t<arr_1d_t> &GC,
-        const arr_1d_t &G, 
-        const ix_t &i, 
+        const arr_1d_t &G,
+        const ix_t &i,
         typename std::enable_if<opts::isset(opts, opts::iga)>::type* = 0
       )
       {
         return 0;
       }
-      
+
       template <opts_t opts, solvers::tmprl_extrp_t tmprl_extrp, class arr_1d_t, class ix_t>
       forceinline_macro auto div_3rd_temporal(
-        const arr_1d_t &psi, 
+        const arr_1d_t &psi,
         const arrvec_t<arr_1d_t> &ndtt_GC,
         const ix_t &i,
         typename std::enable_if<tmprl_extrp == solvers::noextrp>::type* = 0
@@ -72,10 +72,10 @@ namespace libmpdataxx
       {
         return ndtt_GC0<opts>(psi, ndtt_GC[0], i);
       }
-      
+
       template <opts_t opts, solvers::tmprl_extrp_t tmprl_extrp, class arr_1d_t, class ix_t>
       forceinline_macro auto div_3rd_temporal(
-        const arr_1d_t &psi, 
+        const arr_1d_t &psi,
         const arrvec_t<arr_1d_t> &ndtt_GC,
         const ix_t &i,
         typename std::enable_if<tmprl_extrp == solvers::linear2>::type* = 0
@@ -83,10 +83,10 @@ namespace libmpdataxx
       {
         return 10 * ndtt_GC0<opts>(psi, ndtt_GC[0], i);
       }
-      
+
       template <opts_t opts, solvers::sptl_intrp_t sptl_intrp, class arr_1d_t, class ix_t>
       forceinline_macro auto div_3rd_spatial_helper(
-        const arr_1d_t &psi, 
+        const arr_1d_t &psi,
         const arrvec_t<arr_1d_t> &GC,
         const ix_t &i,
         typename std::enable_if<sptl_intrp == solvers::exact>::type* = 0
@@ -96,10 +96,10 @@ namespace libmpdataxx
           ndxx_GC0<opts>(psi, GC[0], i)
         );
       }
-      
+
       template <opts_t opts, solvers::sptl_intrp_t sptl_intrp, class arr_1d_t, class ix_t>
       forceinline_macro auto div_3rd_spatial_helper(
-        const arr_1d_t &psi, 
+        const arr_1d_t &psi,
         const arrvec_t<arr_1d_t> &GC,
         const ix_t &i,
         typename std::enable_if<sptl_intrp == solvers::aver2>::type* = 0
@@ -109,10 +109,10 @@ namespace libmpdataxx
           4 * ndxx_GC0<opts>(psi, GC[0], i)
         );
       }
-      
+
       template <opts_t opts, solvers::sptl_intrp_t sptl_intrp, class arr_1d_t, class ix_t>
       forceinline_macro auto div_3rd_spatial_helper(
-        const arr_1d_t &psi, 
+        const arr_1d_t &psi,
         const arrvec_t<arr_1d_t> &GC,
         const ix_t &i,
         typename std::enable_if<sptl_intrp == solvers::aver4>::type* = 0
@@ -120,12 +120,12 @@ namespace libmpdataxx
       {
         return 0;
       }
-      
+
       template <opts_t opts, solvers::sptl_intrp_t sptl_intrp, class arr_1d_t, class ix_t>
       forceinline_macro auto div_3rd_spatial(
-        const arr_1d_t &psi, 
+        const arr_1d_t &psi,
         const arrvec_t<arr_1d_t> &GC,
-        const arr_1d_t &G, 
+        const arr_1d_t &G,
         const ix_t &i
       )
       {
@@ -138,31 +138,31 @@ namespace libmpdataxx
           )
         );
       }
-      
+
       template <opts_t opts, solvers::sptl_intrp_t, solvers::tmprl_extrp_t, class arr_1d_t, class ix_t>
       forceinline_macro auto div_3rd(
-        const arr_1d_t &psi, 
+        const arr_1d_t &psi,
         const arrvec_t<arr_1d_t> &GC,
         const arrvec_t<arr_1d_t> &ndt_GC,
         const arrvec_t<arr_1d_t> &ndtt_GC,
-        const arr_1d_t &G, 
-        const ix_t &i, 
+        const arr_1d_t &G,
+        const ix_t &i,
         typename std::enable_if<!opts::isset(opts, opts::div_3rd)>::type* = 0
       )
       {
         return 0;
       }
-      
+
       template <opts_t opts, solvers::sptl_intrp_t sptl_intrp, solvers::tmprl_extrp_t tmprl_extrp, class arr_1d_t, class ix_t>
       forceinline_macro auto div_3rd(
-        const arr_1d_t &psi, 
+        const arr_1d_t &psi,
         const arrvec_t<arr_1d_t> &GC,
         const arrvec_t<arr_1d_t> &ndt_GC,
         const arrvec_t<arr_1d_t> &ndtt_GC,
-        const arr_1d_t &G, 
+        const arr_1d_t &G,
         const ix_t &i,
         typename std::enable_if<opts::isset(opts, opts::div_3rd)>::type* = 0
-      ) 
+      )
       {
         return return_helper<ix_t>(
           // upwind differencing correction
@@ -181,12 +181,12 @@ namespace libmpdataxx
           )
         );
       }
-      
+
       // antidiffusive velocity - standard version
       template <opts_t opts, solvers::sptl_intrp_t, solvers::tmprl_extrp_t, class arr_1d_t>
       inline void antidiff( // antidiffusive velocity
-        arr_1d_t &res, 
-        const arr_1d_t &psi, 
+        arr_1d_t &res,
+        const arr_1d_t &psi,
         const arrvec_t<arr_1d_t> &GC,
         const arrvec_t<arr_1d_t> &ndt_GC, // to have consistent interface with the div_3rd version
         const arrvec_t<arr_1d_t> &ndtt_GC, // ditto
@@ -197,11 +197,11 @@ namespace libmpdataxx
       {
         for (int i = ir.first(); i <= ir.last(); ++i)
         {
-          res(i) = 
+          res(i) =
           // second-order terms
           abs(GC[0](i+h)) / 2
           * (1 - abs(GC[0](i+h)) / G_bar_x<opts>(G, i))
-          * ndx_psi<opts>(psi, i) 
+          * ndx_psi<opts>(psi, i)
           // third-order terms
           + TOT<opts>(psi, GC[0], G, i) //higher order term
           // divergent flow terms
@@ -213,12 +213,12 @@ namespace libmpdataxx
       template <opts_t opts, solvers::sptl_intrp_t sptl_intrp, solvers::tmprl_extrp_t tmprl_extrp, class arr_1d_t>
       inline void antidiff(
         arr_1d_t &res,
-        const arr_1d_t &psi, 
+        const arr_1d_t &psi,
         const arrvec_t<arr_1d_t> &GC,
         const arrvec_t<arr_1d_t> &ndt_GC,
         const arrvec_t<arr_1d_t> &ndtt_GC,
-        const arr_1d_t &G, 
-        const rng_t &ir, 
+        const arr_1d_t &G,
+        const rng_t &ir,
         typename std::enable_if<opts::isset(opts, opts::div_2nd)>::type* = 0
       )
       {
@@ -227,11 +227,11 @@ namespace libmpdataxx
 
         for (int i = ir.first(); i <= ir.last(); ++i)
         {
-          res(i + h) = 
+          res(i + h) =
           div_2nd<opts>(psi, GC, G, i) +
           div_3rd<opts, sptl_intrp, tmprl_extrp>(psi, GC, ndt_GC, ndtt_GC, G, i);
         }
-      } 
+      }
 
     } // namespace mpdata
   } // namespace formulae
diff --git a/libmpdata++/formulae/mpdata/formulae_mpdata_2d.hpp b/libmpdata++/formulae/mpdata/formulae_mpdata_2d.hpp
index 1b7ce8b0..3c24215d 100644
--- a/libmpdata++/formulae/mpdata/formulae_mpdata_2d.hpp
+++ b/libmpdata++/formulae/mpdata/formulae_mpdata_2d.hpp
@@ -12,66 +12,66 @@
 #include <libmpdata++/formulae/mpdata/formulae_mpdata_fdiv_2d.hpp>
 #include <boost/preprocessor/punctuation/comma.hpp>
 
-namespace libmpdataxx 
-{ 
-  namespace formulae 
-  { 
-    namespace mpdata 
+namespace libmpdataxx
+{
+  namespace formulae
+  {
+    namespace mpdata
     {
       // first come helpers for divergence form of antidiffusive velocity
       template <opts_t opts, int dim, class arr_2d_t, class ix_t>
       forceinline_macro auto div_2nd(
-        const arr_2d_t &psi, 
+        const arr_2d_t &psi,
         const arrvec_t<arr_2d_t> &GC,
-        const arr_2d_t &G, 
-        const ix_t &i, 
+        const arr_2d_t &G,
+        const ix_t &i,
         const ix_t &j
       )
       {
         return return_helper<ix_t>(
           // second order terms
           abs(GC[dim](pi<dim>(i+h, j))) / 2
-          * ndx_psi<opts, dim>(psi, i, j) 
-          - 
+          * ndx_psi<opts, dim>(psi, i, j)
+          -
           GC[dim](pi<dim>(i+h, j)) / 2
           * nfdiv<opts, dim>(psi, GC, G, i, j)
         );
       }
-      
+
       template <opts_t opts, int dim, class arr_2d_t, class ix_t>
       forceinline_macro auto div_3rd_upwind(
-        const arr_2d_t &psi, 
+        const arr_2d_t &psi,
         const arrvec_t<arr_2d_t> &GC,
-        const arr_2d_t &G, 
-        const ix_t &i, 
+        const arr_2d_t &G,
+        const ix_t &i,
         const ix_t &j,
         typename std::enable_if<!opts::isset(opts, opts::iga)>::type* = 0
       )
       {
         return return_helper<ix_t>(
           abs(div_2nd<opts, dim>(psi, GC, G, i, j)) / 2
-          * ndx_psi<opts, dim>(psi, i, j) 
+          * ndx_psi<opts, dim>(psi, i, j)
         );
       }
 
       template <opts_t opts, int dim, class arr_2d_t, class ix_t>
       forceinline_macro auto div_3rd_upwind(
-        const arr_2d_t &psi, 
+        const arr_2d_t &psi,
         const arrvec_t<arr_2d_t> &GC,
-        const arr_2d_t &G, 
-        const ix_t &i, 
+        const arr_2d_t &G,
+        const ix_t &i,
         const ix_t &j,
         typename std::enable_if<opts::isset(opts, opts::iga)>::type* = 0
       )
       {
         return 0;
       }
-      
+
       template <opts_t opts, int dim, solvers::tmprl_extrp_t tmprl_extrp, class arr_2d_t, class ix_t>
       forceinline_macro auto div_3rd_temporal(
-        const arr_2d_t &psi, 
+        const arr_2d_t &psi,
         const arrvec_t<arr_2d_t> &ndtt_GC,
-        const ix_t &i, 
+        const ix_t &i,
         const ix_t &j,
         typename std::enable_if<tmprl_extrp == solvers::noextrp>::type* = 0
       )
@@ -80,12 +80,12 @@ namespace libmpdataxx
           ndtt_GC0<opts, dim>(psi, ndtt_GC[dim], i, j)
         );
       }
-      
+
       template <opts_t opts, int dim, solvers::tmprl_extrp_t tmprl_extrp, class arr_2d_t, class ix_t>
       forceinline_macro auto div_3rd_temporal(
-        const arr_2d_t &psi, 
+        const arr_2d_t &psi,
         const arrvec_t<arr_2d_t> &ndtt_GC,
-        const ix_t &i, 
+        const ix_t &i,
         const ix_t &j,
         typename std::enable_if<tmprl_extrp == solvers::linear2>::type* = 0
       )
@@ -94,12 +94,12 @@ namespace libmpdataxx
           10 * ndtt_GC0<opts, dim>(psi, ndtt_GC[dim], i, j)
         );
       }
-      
+
       template <opts_t opts, int dim, solvers::sptl_intrp_t sptl_intrp, class arr_2d_t, class ix_t>
       forceinline_macro auto div_3rd_spatial_helper(
-        const arr_2d_t &psi, 
+        const arr_2d_t &psi,
         const arrvec_t<arr_2d_t> &GC,
-        const ix_t &i, 
+        const ix_t &i,
         const ix_t &j,
         typename std::enable_if<sptl_intrp == solvers::exact>::type* = 0
       )
@@ -108,12 +108,12 @@ namespace libmpdataxx
           ndxx_GC0<opts, dim>(psi, GC[dim], i, j)
         );
       }
-      
+
       template <opts_t opts, int dim, solvers::sptl_intrp_t sptl_intrp, class arr_2d_t, class ix_t>
       forceinline_macro auto div_3rd_spatial_helper(
-        const arr_2d_t &psi, 
+        const arr_2d_t &psi,
         const arrvec_t<arr_2d_t> &GC,
-        const ix_t &i, 
+        const ix_t &i,
         const ix_t &j,
         typename std::enable_if<sptl_intrp == solvers::aver2>::type* = 0
       )
@@ -122,25 +122,25 @@ namespace libmpdataxx
           4 * ndxx_GC0<opts, dim>(psi, GC[dim], i, j)
         );
       }
-      
+
       template <opts_t opts, int dim, solvers::sptl_intrp_t sptl_intrp, class arr_2d_t, class ix_t>
       forceinline_macro auto div_3rd_spatial_helper(
-        const arr_2d_t &psi, 
+        const arr_2d_t &psi,
         const arrvec_t<arr_2d_t> &GC,
-        const ix_t &i, 
+        const ix_t &i,
         const ix_t &j,
         typename std::enable_if<sptl_intrp == solvers::aver4>::type* = 0
       )
       {
         return 0;
       }
-      
+
       template <opts_t opts, int dim, solvers::sptl_intrp_t sptl_intrp, class arr_2d_t, class ix_t>
       forceinline_macro auto div_3rd_spatial(
-        const arr_2d_t &psi, 
+        const arr_2d_t &psi,
         const arrvec_t<arr_2d_t> &GC,
-        const arr_2d_t &G, 
-        const ix_t &i, 
+        const arr_2d_t &G,
+        const ix_t &i,
         const ix_t &j
       )
       {
@@ -153,36 +153,36 @@ namespace libmpdataxx
           )
         );
       }
-      
+
       template <opts_t opts, int dim,
                 solvers::sptl_intrp_t, solvers::tmprl_extrp_t,
                 class arr_2d_t, class ix_t>
       forceinline_macro auto div_3rd(
-        const arr_2d_t &psi_np1, 
-        const arr_2d_t &psi_n, 
+        const arr_2d_t &psi_np1,
+        const arr_2d_t &psi_n,
         const arrvec_t<arr_2d_t> &GC,
         const arrvec_t<arr_2d_t> &ndt_GC,
         const arrvec_t<arr_2d_t> &ndtt_GC,
-        const arr_2d_t &G, 
-        const ix_t &i, 
+        const arr_2d_t &G,
+        const ix_t &i,
         const ix_t &j,
         typename std::enable_if<!opts::isset(opts, opts::div_3rd) && !opts::isset(opts, opts::div_3rd_dt)>::type* = 0
       )
       {
         return 0;
       }
-      
+
       template <opts_t opts, int dim,
                 solvers::sptl_intrp_t sptl_intrp, solvers::tmprl_extrp_t tmprl_extrp,
                 class arr_2d_t, class ix_t>
       forceinline_macro auto div_3rd(
-        const arr_2d_t &psi_np1, 
-        const arr_2d_t &psi_n, 
+        const arr_2d_t &psi_np1,
+        const arr_2d_t &psi_n,
         const arrvec_t<arr_2d_t> &GC,
         const arrvec_t<arr_2d_t> &ndt_GC,
         const arrvec_t<arr_2d_t> &ndtt_GC,
-        const arr_2d_t &G, 
-        const ix_t &i, 
+        const arr_2d_t &G,
+        const ix_t &i,
         const ix_t &j,
         typename std::enable_if<opts::isset(opts, opts::div_3rd)>::type* = 0
       )
@@ -204,18 +204,18 @@ namespace libmpdataxx
           )
         );
       }
-      
+
       template <opts_t opts, int dim,
                 solvers::sptl_intrp_t sptl_intrp, solvers::tmprl_extrp_t tmprl_extrp,
                 class arr_2d_t, class ix_t>
       forceinline_macro auto div_3rd(
-        const arr_2d_t &psi_np1, 
-        const arr_2d_t &psi_n, 
+        const arr_2d_t &psi_np1,
+        const arr_2d_t &psi_n,
         const arrvec_t<arr_2d_t> &GC,
         const arrvec_t<arr_2d_t> &ndt_GC,
         const arrvec_t<arr_2d_t> &ndtt_GC,
-        const arr_2d_t &G, 
-        const ix_t &i, 
+        const arr_2d_t &G,
+        const ix_t &i,
         const ix_t &j,
         typename std::enable_if<opts::isset(opts, opts::div_3rd_dt)>::type* = 0
       )
@@ -237,33 +237,33 @@ namespace libmpdataxx
           )
         );
       }
-      
+
       // antidiffusive velocity - standard version
       template <opts_t opts, int dim, solvers::sptl_intrp_t, solvers::tmprl_extrp_t, class arr_2d_t>
       inline void antidiff(
-        arr_2d_t &res, 
-        const arr_2d_t &psi_np1, 
-        const arr_2d_t &psi_n, 
+        arr_2d_t &res,
+        const arr_2d_t &psi_np1,
+        const arr_2d_t &psi_n,
         const arrvec_t<arr_2d_t> &GC,
         const arrvec_t<arr_2d_t> &ndt_GC, // to have consistent interface with the div_3rd version
         const arrvec_t<arr_2d_t> &ndtt_GC, // ditto
-        const arr_2d_t &G, 
-        const rng_t &ir, 
+        const arr_2d_t &G,
+        const rng_t &ir,
         const rng_t &jr,
         typename std::enable_if<!opts::isset(opts, opts::div_2nd) && !opts::isset(opts, opts::div_3rd)>::type* = 0
-      ) 
+      )
       {
         for (int i = ir.first(); i <= ir.last(); ++i)
         {
           for (int j = jr.first(); j <= jr.last(); ++j)
           {
-            res(pi<dim>(i, j)) = 
+            res(pi<dim>(i, j)) =
             // second order terms
             abs(GC[dim](pi<dim>(i+h, j))) / 2
             * (1 - abs(GC[dim](pi<dim>(i+h, j))) / G_bar_x<opts, dim>(G, i, j))
-            * ndx_psi<opts, dim>(psi_np1, i, j) 
-            - 
-            GC[dim](pi<dim>(i+h, j)) 
+            * ndx_psi<opts, dim>(psi_np1, i, j)
+            -
+            GC[dim](pi<dim>(i+h, j))
             * GC1_bar_xy<dim>(GC[dim+1], i, j)
             / (2 * G_bar_x<opts, dim>(G, i, j))
             * ndy_psi<opts, dim>(psi_np1, i, j)
@@ -280,14 +280,14 @@ namespace libmpdataxx
       // antidiffusive velocity - divergence form
       template <opts_t opts, int dim, solvers::sptl_intrp_t sptl_intrp, solvers::tmprl_extrp_t tmprl_extrp, class arr_2d_t>
       inline void antidiff(
-        arr_2d_t &res, 
-        const arr_2d_t &psi_np1, 
-        const arr_2d_t &psi_n, 
+        arr_2d_t &res,
+        const arr_2d_t &psi_np1,
+        const arr_2d_t &psi_n,
         const arrvec_t<arr_2d_t> &GC,
         const arrvec_t<arr_2d_t> &ndt_GC,
         const arrvec_t<arr_2d_t> &ndtt_GC,
-        const arr_2d_t &G, 
-        const rng_t &ir, 
+        const arr_2d_t &G,
+        const rng_t &ir,
         const rng_t &jr,
         typename std::enable_if<opts::isset(opts, opts::div_2nd)>::type* = 0
       )
@@ -298,14 +298,14 @@ namespace libmpdataxx
         {
           for (int j = jr.first(); j <= jr.last(); ++j)
           {
-            res(pi<dim>(i + h, j)) = 
+            res(pi<dim>(i + h, j)) =
             div_2nd<opts, dim>(psi_np1, GC, G, i, j) +
             div_3rd<opts, dim, sptl_intrp, tmprl_extrp>(psi_np1, psi_n, GC, ndt_GC, ndtt_GC, G, i, j)
             // fourth order terms
             + FOT<opts, dim>(psi_np1, GC, G, i, j);
           }
         }
-      } 
+      }
     } // namespace mpdata
   } // namespace formulae
-} // namespcae libmpdataxx 
+} // namespcae libmpdataxx
diff --git a/libmpdata++/formulae/mpdata/formulae_mpdata_3d.hpp b/libmpdata++/formulae/mpdata/formulae_mpdata_3d.hpp
index 2581b85b..36be071c 100644
--- a/libmpdata++/formulae/mpdata/formulae_mpdata_3d.hpp
+++ b/libmpdata++/formulae/mpdata/formulae_mpdata_3d.hpp
@@ -21,10 +21,10 @@ namespace libmpdataxx
       // first come helpers for divergence form of antidiffusive velocity
       template <opts_t opts, int dim, class arr_3d_t, class ix_t>
       forceinline_macro auto div_2nd(
-        const arr_3d_t &psi, 
+        const arr_3d_t &psi,
         const arrvec_t<arr_3d_t> &GC,
-        const arr_3d_t &G, 
-        const ix_t &i, 
+        const arr_3d_t &G,
+        const ix_t &i,
         const ix_t &j,
         const ix_t &k
       )
@@ -32,19 +32,19 @@ namespace libmpdataxx
         return return_helper<ix_t>(
           // second order terms
           abs(GC[dim](pi<dim>(i+h, j, k))) / 2
-          * ndx_psi<opts, dim>(psi, i, j, k) 
-          - 
+          * ndx_psi<opts, dim>(psi, i, j, k)
+          -
           GC[dim](pi<dim>(i+h, j, k)) / 2
           * nfdiv<opts, dim>(psi, GC, G, i, j, k)
         );
       }
-      
+
       template <opts_t opts, int dim, class arr_3d_t, class ix_t>
       forceinline_macro auto div_3rd_upwind(
-        const arr_3d_t &psi, 
+        const arr_3d_t &psi,
         const arrvec_t<arr_3d_t> &GC,
-        const arr_3d_t &G, 
-        const ix_t &i, 
+        const arr_3d_t &G,
+        const ix_t &i,
         const ix_t &j,
         const ix_t &k,
         typename std::enable_if<!opts::isset(opts, opts::iga)>::type* = 0
@@ -52,16 +52,16 @@ namespace libmpdataxx
       {
         return return_helper<ix_t>(
           abs(div_2nd<opts, dim>(psi, GC, G, i, j, k)) / 2
-          * ndx_psi<opts, dim>(psi, i, j, k) 
+          * ndx_psi<opts, dim>(psi, i, j, k)
         );
       }
 
       template <opts_t opts, int dim, class arr_3d_t, class ix_t>
       forceinline_macro auto div_3rd_upwind(
-        const arr_3d_t &psi, 
+        const arr_3d_t &psi,
         const arrvec_t<arr_3d_t> &GC,
-        const arr_3d_t &G, 
-        const ix_t &i, 
+        const arr_3d_t &G,
+        const ix_t &i,
         const ix_t &j,
         const ix_t &k,
         typename std::enable_if<opts::isset(opts, opts::iga)>::type* = 0
@@ -69,12 +69,12 @@ namespace libmpdataxx
       {
         return 0;
       }
-      
+
       template <opts_t opts, int dim, solvers::tmprl_extrp_t tmprl_extrp, class arr_3d_t, class ix_t>
       forceinline_macro auto div_3rd_temporal(
-        const arr_3d_t &psi, 
+        const arr_3d_t &psi,
         const arrvec_t<arr_3d_t> &ndtt_GC,
-        const ix_t &i, 
+        const ix_t &i,
         const ix_t &j,
         const ix_t &k,
         typename std::enable_if<tmprl_extrp == solvers::noextrp>::type* = 0
@@ -84,12 +84,12 @@ namespace libmpdataxx
           ndtt_GC0<opts, dim>(psi, ndtt_GC[dim], i, j, k)
         );
       }
-      
+
       template <opts_t opts, int dim, solvers::tmprl_extrp_t tmprl_extrp, class arr_3d_t, class ix_t>
       forceinline_macro auto div_3rd_temporal(
-        const arr_3d_t &psi, 
+        const arr_3d_t &psi,
         const arrvec_t<arr_3d_t> &ndtt_GC,
-        const ix_t &i, 
+        const ix_t &i,
         const ix_t &j,
         const ix_t &k,
         typename std::enable_if<tmprl_extrp == solvers::linear2>::type* = 0
@@ -99,12 +99,12 @@ namespace libmpdataxx
           10 * ndtt_GC0<opts, dim>(psi, ndtt_GC[dim], i, j, k)
         );
       }
-      
+
       template <opts_t opts, int dim, solvers::sptl_intrp_t sptl_intrp, class arr_3d_t, class ix_t>
       forceinline_macro auto div_3rd_spatial_helper(
-        const arr_3d_t &psi, 
+        const arr_3d_t &psi,
         const arrvec_t<arr_3d_t> &GC,
-        const ix_t &i, 
+        const ix_t &i,
         const ix_t &j,
         const ix_t &k,
         typename std::enable_if<sptl_intrp == solvers::exact>::type* = 0
@@ -114,12 +114,12 @@ namespace libmpdataxx
           ndxx_GC0<opts, dim>(psi, GC[dim], i, j, k)
         );
       }
-      
+
       template <opts_t opts, int dim, solvers::sptl_intrp_t sptl_intrp, class arr_3d_t, class ix_t>
       forceinline_macro auto div_3rd_spatial_helper(
-        const arr_3d_t &psi, 
+        const arr_3d_t &psi,
         const arrvec_t<arr_3d_t> &GC,
-        const ix_t &i, 
+        const ix_t &i,
         const ix_t &j,
         const ix_t &k,
         typename std::enable_if<sptl_intrp == solvers::aver2>::type* = 0
@@ -129,12 +129,12 @@ namespace libmpdataxx
           4 * ndxx_GC0<opts, dim>(psi, GC[dim], i, j, k)
         );
       }
-      
+
       template <opts_t opts, int dim, solvers::sptl_intrp_t sptl_intrp, class arr_3d_t, class ix_t>
       forceinline_macro auto div_3rd_spatial_helper(
-        const arr_3d_t &psi, 
+        const arr_3d_t &psi,
         const arrvec_t<arr_3d_t> &GC,
-        const ix_t &i, 
+        const ix_t &i,
         const ix_t &j,
         const ix_t &k,
         typename std::enable_if<sptl_intrp == solvers::aver4>::type* = 0
@@ -145,10 +145,10 @@ namespace libmpdataxx
 
       template <opts_t opts, int dim, solvers::sptl_intrp_t sptl_intrp, class arr_3d_t, class ix_t>
       forceinline_macro auto div_3rd_spatial(
-        const arr_3d_t &psi, 
+        const arr_3d_t &psi,
         const arrvec_t<arr_3d_t> &GC,
-        const arr_3d_t &G, 
-        const ix_t &i, 
+        const arr_3d_t &G,
+        const ix_t &i,
         const ix_t &j,
         const ix_t &k
       )
@@ -162,18 +162,18 @@ namespace libmpdataxx
           )
         );
       }
-      
+
       template <opts_t opts, int dim,
                 solvers::sptl_intrp_t, solvers::tmprl_extrp_t,
                 class arr_3d_t, class ix_t>
       forceinline_macro auto div_3rd(
-        const arr_3d_t &psi_np1, 
-        const arr_3d_t &psi_n, 
+        const arr_3d_t &psi_np1,
+        const arr_3d_t &psi_n,
         const arrvec_t<arr_3d_t> &GC,
         const arrvec_t<arr_3d_t> &ndt_GC,
         const arrvec_t<arr_3d_t> &ndtt_GC,
-        const arr_3d_t &G, 
-        const ix_t &i, 
+        const arr_3d_t &G,
+        const ix_t &i,
         const ix_t &j,
         const ix_t &k,
         typename std::enable_if<!opts::isset(opts, opts::div_3rd) && !opts::isset(opts, opts::div_3rd_dt)>::type* = 0
@@ -181,18 +181,18 @@ namespace libmpdataxx
       {
         return 0;
       }
-      
+
       template <opts_t opts, int dim,
                 solvers::sptl_intrp_t sptl_intrp, solvers::tmprl_extrp_t tmprl_extrp,
                 class arr_3d_t, class ix_t>
       forceinline_macro auto div_3rd(
-        const arr_3d_t &psi_np1, 
-        const arr_3d_t &psi_n, 
+        const arr_3d_t &psi_np1,
+        const arr_3d_t &psi_n,
         const arrvec_t<arr_3d_t> &GC,
         const arrvec_t<arr_3d_t> &ndt_GC,
         const arrvec_t<arr_3d_t> &ndtt_GC,
-        const arr_3d_t &G, 
-        const ix_t &i, 
+        const arr_3d_t &G,
+        const ix_t &i,
         const ix_t &j,
         const ix_t &k,
         typename std::enable_if<opts::isset(opts, opts::div_3rd)>::type* = 0
@@ -215,18 +215,18 @@ namespace libmpdataxx
           )
         );
       }
-      
+
       template <opts_t opts, int dim,
                 solvers::sptl_intrp_t sptl_intrp, solvers::tmprl_extrp_t tmprl_extrp,
                 class arr_3d_t, class ix_t>
       forceinline_macro auto div_3rd(
-        const arr_3d_t &psi_np1, 
-        const arr_3d_t &psi_n, 
+        const arr_3d_t &psi_np1,
+        const arr_3d_t &psi_n,
         const arrvec_t<arr_3d_t> &GC,
         const arrvec_t<arr_3d_t> &ndt_GC,
         const arrvec_t<arr_3d_t> &ndtt_GC,
-        const arr_3d_t &G, 
-        const ix_t &i, 
+        const arr_3d_t &G,
+        const ix_t &i,
         const ix_t &j,
         const ix_t &k,
         typename std::enable_if<opts::isset(opts, opts::div_3rd_dt)>::type* = 0
@@ -272,7 +272,7 @@ namespace libmpdataxx
           {
             for (int k = kr.first(); k <= kr.last(); ++k)
             {
-              res(pi<dim>(i, j, k)) = 
+              res(pi<dim>(i, j, k)) =
                 // second order terms
                 abs(GC[dim](pi<dim>(i+h, j, k))) / 2
               * (1 - abs(GC[dim](pi<dim>(i+h, j, k))) / G_bar_x<opts, dim>(G, i, j, k))
@@ -297,14 +297,14 @@ namespace libmpdataxx
       // antidiffusive velocity - divergence form
       template <opts_t opts, int dim, solvers::sptl_intrp_t sptl_intrp, solvers::tmprl_extrp_t tmprl_extrp, class arr_3d_t>
       inline void antidiff(
-        arr_3d_t &res, 
-        const arr_3d_t &psi_np1, 
-        const arr_3d_t &psi_n, 
+        arr_3d_t &res,
+        const arr_3d_t &psi_np1,
+        const arr_3d_t &psi_n,
         const arrvec_t<arr_3d_t> &GC,
         const arrvec_t<arr_3d_t> &ndt_GC,
         const arrvec_t<arr_3d_t> &ndtt_GC,
-        const arr_3d_t &G, 
-        const rng_t &ir, 
+        const arr_3d_t &G,
+        const rng_t &ir,
         const rng_t &jr,
         const rng_t &kr,
         typename std::enable_if<opts::isset(opts, opts::div_2nd)>::type* = 0
@@ -319,13 +319,13 @@ namespace libmpdataxx
           {
             for (int k = kr.first(); k <= kr.last(); ++k)
             {
-              res(pi<dim>(i + h, j, k)) = 
+              res(pi<dim>(i + h, j, k)) =
               div_2nd<opts, dim>(psi_np1, GC, G, i, j, k) +
               div_3rd<opts, dim, sptl_intrp, tmprl_extrp>(psi_np1, psi_n, GC, ndt_GC, ndtt_GC, G, i, j, k);
             }
           }
         }
-      } 
+      }
     } // namespace mpdata
   } // namespace formulae
 } // namespcae libmpdataxx
diff --git a/libmpdata++/formulae/mpdata/formulae_mpdata_common.hpp b/libmpdata++/formulae/mpdata/formulae_mpdata_common.hpp
index 09d14544..b5f15ed1 100644
--- a/libmpdata++/formulae/mpdata/formulae_mpdata_common.hpp
+++ b/libmpdata++/formulae/mpdata/formulae_mpdata_common.hpp
@@ -14,54 +14,54 @@
 //#include <boost/preprocessor/control/if.hpp>
 
 namespace libmpdataxx
-{ 
+{
   // in namespace solvers to be consistent with other options
   namespace solvers
   {
     enum sptl_intrp_t
-    { 
+    {
       exact,
       aver2,
       aver4,
     };
-    
+
     enum tmprl_extrp_t
-    { 
+    {
       noextrp,
       linear2,
     };
   }
 
-  namespace formulae 
-  { 
-    namespace mpdata 
+  namespace formulae
+  {
+    namespace mpdata
     {
       using namespace arakawa_c;
       using idxperm::pi;
       using opts::opts_t;
       using std::abs;
-      
+
       using blitz::pow2;
       using blitz::pow3;
       using blitz::pow4;
 
       const int n_tlev = 2;
 
-      constexpr const int halo(const opts_t &opts) 
+      constexpr const int halo(const opts_t &opts)
       {
         return (
           opts::isset(opts, opts::tot)        || // see psi 2-nd derivatives in eq. (36) in PKS & LGM 1998
           opts::isset(opts, opts::dfl)        || // see +3/2 in eq. (30) in PKS & LGM 1998
-          opts::isset(opts, opts::div_2nd)    || 
+          opts::isset(opts, opts::div_2nd)    ||
           opts::isset(opts, opts::div_3rd)    ||
           opts::isset(opts, opts::div_3rd_dt)
-        ) ? 2 : 1; 
+        ) ? 2 : 1;
       }
 
       // frac: implemented using blitz::where()
       template<opts_t opts, class ix_t, class nom_t, class den_t>
       forceinline_macro auto frac(
-        const nom_t &nom, 
+        const nom_t &nom,
         const den_t &den,
         typename std::enable_if<opts::isset(opts, opts::pfc)>::type* = 0 // enabled if pfc == true
       )
@@ -75,7 +75,7 @@ namespace libmpdataxx
       //       if den == 0, then adding a smallest representable positive number
       template<opts_t opts, class ix_t, class nom_t, class den_t>
       forceinline_macro auto frac(
-        const nom_t &nom, 
+        const nom_t &nom,
         const den_t &den,
         typename std::enable_if<!opts::isset(opts, opts::pfc)>::type* = 0 // enabled if pfc == false
       )
@@ -88,7 +88,7 @@ namespace libmpdataxx
       // a bigger-epsilon version for FCT (used regardless of opts::eps setting)
       template<class ix_t, class nom_t, class den_t>
       forceinline_macro auto fct_frac(
-        const nom_t &nom, 
+        const nom_t &nom,
         const den_t &den
       )
       {
diff --git a/libmpdata++/formulae/mpdata/formulae_mpdata_dfl_1d.hpp b/libmpdata++/formulae/mpdata/formulae_mpdata_dfl_1d.hpp
index e262ac98..b7747b01 100644
--- a/libmpdata++/formulae/mpdata/formulae_mpdata_dfl_1d.hpp
+++ b/libmpdata++/formulae/mpdata/formulae_mpdata_dfl_1d.hpp
@@ -9,10 +9,10 @@
 #include <libmpdata++/formulae/mpdata/formulae_mpdata_common.hpp>
 
 namespace libmpdataxx
-{ 
-  namespace formulae 
-  { 
-    namespace mpdata 
+{
+  namespace formulae
+  {
+    namespace mpdata
     {
 
       //divergent flow correction see eq. (30) from @copybrief Smolarkiewicz_and_Margolin_1998)
@@ -22,10 +22,10 @@ namespace libmpdataxx
         const arr_1d_t &GC,
         const arr_1d_t &G,
         const ix_t &i,
-        typename std::enable_if<!opts::isset(opts, opts::dfl)>::type* = 0 
+        typename std::enable_if<!opts::isset(opts, opts::dfl)>::type* = 0
       )
-      { 
-        return 0;  
+      {
+        return 0;
       }
 
       template<opts_t opts, class arr_1d_t, class ix_t>
@@ -34,14 +34,14 @@ namespace libmpdataxx
         const arr_1d_t &GC,
         const arr_1d_t &G,
         const ix_t &i,
-        typename std::enable_if<opts::isset(opts, opts::dfl) && !opts::isset(opts, opts::iga)>::type* = 0 
+        typename std::enable_if<opts::isset(opts, opts::dfl) && !opts::isset(opts, opts::iga)>::type* = 0
       )
       {
         return return_helper<ix_t>(
-          - fconst<arr_1d_t>(0.5) * GC(i+h) 
-          / 
-          (formulae::G<opts>(G, i+1) + formulae::G<opts>(G, i)) 
-          * 
+          - fconst<arr_1d_t>(0.5) * GC(i+h)
+          /
+          (formulae::G<opts>(G, i+1) + formulae::G<opts>(G, i))
+          *
           (GC((i+1)+h) - GC(i-h))
         );
       }
@@ -52,14 +52,14 @@ namespace libmpdataxx
         const arr_1d_t &GC,
         const arr_1d_t &G,
         const ix_t &i,
-        typename std::enable_if<opts::isset(opts, opts::dfl) && opts::isset(opts, opts::iga)>::type* = 0 
+        typename std::enable_if<opts::isset(opts, opts::dfl) && opts::isset(opts, opts::iga)>::type* = 0
       )
       {
         return return_helper<ix_t>(
-          - fconst<arr_1d_t>(0.5) * GC(i+h) 
-          / 
-          (formulae::G<opts>(G, i+1) + formulae::G<opts>(G, i)) 
-          * 
+          - fconst<arr_1d_t>(0.5) * GC(i+h)
+          /
+          (formulae::G<opts>(G, i+1) + formulae::G<opts>(G, i))
+          *
           (GC((i+1)+h) - GC(i-h))
           *
           fconst<arr_1d_t>(0.5) *  (psi(i+1) + psi(i)) //to be compatible with iga formulation
diff --git a/libmpdata++/formulae/mpdata/formulae_mpdata_dfl_2d.hpp b/libmpdata++/formulae/mpdata/formulae_mpdata_dfl_2d.hpp
index 68a3347c..10ec4188 100644
--- a/libmpdata++/formulae/mpdata/formulae_mpdata_dfl_2d.hpp
+++ b/libmpdata++/formulae/mpdata/formulae_mpdata_dfl_2d.hpp
@@ -10,11 +10,11 @@
 #include <libmpdata++/formulae/mpdata/formulae_mpdata_g_2d.hpp>
 #include <libmpdata++/formulae/mpdata/formulae_mpdata_psi_2d.hpp>
 
-namespace libmpdataxx 
-{ 
-  namespace formulae 
-  { 
-    namespace mpdata 
+namespace libmpdataxx
+{
+  namespace formulae
+  {
+    namespace mpdata
     {
       // divergent flow correction - no correction
       template<opts_t opts, int dim, class arr_2d_t, class ix_t>
@@ -24,38 +24,38 @@ namespace libmpdataxx
         const arr_2d_t &G,
         const ix_t &i,
         const ix_t &j,
-        typename std::enable_if<!opts::isset(opts, opts::dfl)>::type* = 0 
+        typename std::enable_if<!opts::isset(opts, opts::dfl)>::type* = 0
       )
-      { 
-        return 0;  
+      {
+        return 0;
       }
 
       // divergent flow correction - general case
       template<opts_t opts, int dim, class arr_2d_t, class ix_t>
       inline auto DFL(
         const arr_2d_t &psi,    //to have the same arguments as in iga option
-        const arrvec_t<arr_2d_t> &GC,      
+        const arrvec_t<arr_2d_t> &GC,
         const arr_2d_t &G,
         const ix_t &i,
         const ix_t &j,
-        typename std::enable_if<opts::isset(opts, opts::dfl) && !opts::isset(opts, opts::iga)>::type* = 0 
+        typename std::enable_if<opts::isset(opts, opts::dfl) && !opts::isset(opts, opts::iga)>::type* = 0
       )
       {
         return return_helper<ix_t>(
-          - fconst<arr_2d_t>(0.25) * GC[dim](pi<dim>(i+h, j)) 
+          - fconst<arr_2d_t>(0.25) * GC[dim](pi<dim>(i+h, j))
           /
-          G_bar_x<opts, dim>(G, i, j) 
-          * 
+          G_bar_x<opts, dim>(G, i, j)
+          *
           (
             (
-              GC[dim](pi<dim>((i+1)+h, j)) - 
+              GC[dim](pi<dim>((i+1)+h, j)) -
               GC[dim](pi<dim>(i-h    , j))
             )
             +
             (
-              GC[dim-1](pi<dim>(i+1, j+h)) + 
+              GC[dim-1](pi<dim>(i+1, j+h)) +
               GC[dim-1](pi<dim>(i,   j+h)) -
-              GC[dim-1](pi<dim>(i+1, j-h)) - 
+              GC[dim-1](pi<dim>(i+1, j-h)) -
               GC[dim-1](pi<dim>(i,   j-h))
             )
           )
@@ -70,30 +70,30 @@ namespace libmpdataxx
         const arr_2d_t &G,
         const ix_t &i,
         const ix_t &j,
-        typename std::enable_if<opts::isset(opts, opts::dfl) && opts::isset(opts, opts::iga)>::type* = 0 
+        typename std::enable_if<opts::isset(opts, opts::dfl) && opts::isset(opts, opts::iga)>::type* = 0
       )
       {
         return return_helper<ix_t>(
-          - fconst<arr_2d_t>(0.25) * GC[dim](pi<dim>(i+h, j)) 
+          - fconst<arr_2d_t>(0.25) * GC[dim](pi<dim>(i+h, j))
           /
-          G_bar_x<opts, dim>(G, i, j) 
-          * 
+          G_bar_x<opts, dim>(G, i, j)
+          *
           (
             (
-              GC[dim](pi<dim>((i+1)+h, j)) - 
+              GC[dim](pi<dim>((i+1)+h, j)) -
               GC[dim](pi<dim>(i-h    , j))
             )
             +
             (
-              GC[dim-1](pi<dim>(i+1, j+h)) + 
+              GC[dim-1](pi<dim>(i+1, j+h)) +
               GC[dim-1](pi<dim>(i,   j+h)) -
-              GC[dim-1](pi<dim>(i+1, j-h)) - 
+              GC[dim-1](pi<dim>(i+1, j-h)) -
               GC[dim-1](pi<dim>(i,   j-h))
             )
           )
-          * psi_bar_x<opts, dim>(psi, i, j) 
+          * psi_bar_x<opts, dim>(psi, i, j)
         );
       }
     } // namespace mpdata
   } // namespace formulae
-} // namespace libmpdataxx 
+} // namespace libmpdataxx
diff --git a/libmpdata++/formulae/mpdata/formulae_mpdata_dfl_3d.hpp b/libmpdata++/formulae/mpdata/formulae_mpdata_dfl_3d.hpp
index cba75024..b10bcacd 100644
--- a/libmpdata++/formulae/mpdata/formulae_mpdata_dfl_3d.hpp
+++ b/libmpdata++/formulae/mpdata/formulae_mpdata_dfl_3d.hpp
@@ -10,11 +10,11 @@
 #include <libmpdata++/formulae/mpdata/formulae_mpdata_g_3d.hpp>
 #include <libmpdata++/formulae/mpdata/formulae_mpdata_psi_3d.hpp>
 
-namespace libmpdataxx 
-{ 
-  namespace formulae 
-  { 
-    namespace mpdata 
+namespace libmpdataxx
+{
+  namespace formulae
+  {
+    namespace mpdata
     {
       // divergent flow correction - no correction
       template<opts_t opts, int dim, class arr_3d_t, class ix_t>
@@ -25,46 +25,46 @@ namespace libmpdataxx
         const ix_t &i,
         const ix_t &j,
         const ix_t &k,
-        typename std::enable_if<!opts::isset(opts, opts::dfl)>::type* = 0 
+        typename std::enable_if<!opts::isset(opts, opts::dfl)>::type* = 0
       )
-      { 
-        return 0;  
+      {
+        return 0;
       }
 
       // divergent flow correction - general case
       template<opts_t opts, int dim, class arr_3d_t, class ix_t>
       inline auto DFL(
         const arr_3d_t &psi,    //to have the same arguments as in iga option
-        const arrvec_t<arr_3d_t> &GC,      
+        const arrvec_t<arr_3d_t> &GC,
         const arr_3d_t &G,
         const ix_t &i,
         const ix_t &j,
         const ix_t &k,
-        typename std::enable_if<opts::isset(opts, opts::dfl) && !opts::isset(opts, opts::iga)>::type* = 0 
+        typename std::enable_if<opts::isset(opts, opts::dfl) && !opts::isset(opts, opts::iga)>::type* = 0
       )
       {
         return return_helper<ix_t>(
-          - fconst<arr_3d_t>(0.25) * GC[dim](pi<dim>(i+h, j, k)) 
+          - fconst<arr_3d_t>(0.25) * GC[dim](pi<dim>(i+h, j, k))
           /
           G_bar_x<opts, dim>(G, i, j, k)
-          * 
+          *
           (
             (
-              GC[dim](pi<dim>((i+1)+h, j, k)) - 
+              GC[dim](pi<dim>((i+1)+h, j, k)) -
               GC[dim](pi<dim>(i-h    , j, k))
             )
             +
             (
-              GC[dim+1](pi<dim>(i+1, j+h, k)) + 
+              GC[dim+1](pi<dim>(i+1, j+h, k)) +
               GC[dim+1](pi<dim>(i,   j+h, k)) -
-              GC[dim+1](pi<dim>(i+1, j-h, k)) - 
+              GC[dim+1](pi<dim>(i+1, j-h, k)) -
               GC[dim+1](pi<dim>(i,   j-h, k))
             )
             +
             (
-              GC[dim-1](pi<dim>(i+1, j, k+h)) + 
+              GC[dim-1](pi<dim>(i+1, j, k+h)) +
               GC[dim-1](pi<dim>(i,   j, k+h)) -
-              GC[dim-1](pi<dim>(i+1, j, k-h)) - 
+              GC[dim-1](pi<dim>(i+1, j, k-h)) -
               GC[dim-1](pi<dim>(i,   j, k-h))
             )
           )
@@ -80,31 +80,31 @@ namespace libmpdataxx
         const ix_t &i,
         const ix_t &j,
         const ix_t &k,
-        typename std::enable_if<opts::isset(opts, opts::dfl) && opts::isset(opts, opts::iga)>::type* = 0 
+        typename std::enable_if<opts::isset(opts, opts::dfl) && opts::isset(opts, opts::iga)>::type* = 0
       )
       {
         return return_helper<ix_t>(
-          - fconst<arr_3d_t>(0.25) * GC[dim](pi<dim>(i+h, j, k)) 
+          - fconst<arr_3d_t>(0.25) * GC[dim](pi<dim>(i+h, j, k))
           /
           G_bar_x<opts, dim>(G, i, j, k)
-          * 
+          *
           (
             (
-              GC[dim](pi<dim>((i+1)+h, j, k)) - 
+              GC[dim](pi<dim>((i+1)+h, j, k)) -
               GC[dim](pi<dim>(i-h    , j, k))
             )
             +
             (
-              GC[dim+1](pi<dim>(i+1, j+h, k)) + 
+              GC[dim+1](pi<dim>(i+1, j+h, k)) +
               GC[dim+1](pi<dim>(i,   j+h, k)) -
-              GC[dim+1](pi<dim>(i+1, j-h, k)) - 
+              GC[dim+1](pi<dim>(i+1, j-h, k)) -
               GC[dim+1](pi<dim>(i,   j-h, k))
             )
             +
             (
-              GC[dim-1](pi<dim>(i+1, j, k+h)) + 
+              GC[dim-1](pi<dim>(i+1, j, k+h)) +
               GC[dim-1](pi<dim>(i,   j, k+h)) -
-              GC[dim-1](pi<dim>(i+1, j, k-h)) - 
+              GC[dim-1](pi<dim>(i+1, j, k-h)) -
               GC[dim-1](pi<dim>(i,   j, k-h))
             )
           )
@@ -113,4 +113,4 @@ namespace libmpdataxx
       }
     } // namespace mpdata
   } // namespace formulae
-} // namespace libmpdataxx 
+} // namespace libmpdataxx
diff --git a/libmpdata++/formulae/mpdata/formulae_mpdata_fct_1d.hpp b/libmpdata++/formulae/mpdata/formulae_mpdata_fct_1d.hpp
index 544b8ac7..eaa8e2d3 100644
--- a/libmpdata++/formulae/mpdata/formulae_mpdata_fct_1d.hpp
+++ b/libmpdata++/formulae/mpdata/formulae_mpdata_fct_1d.hpp
@@ -2,7 +2,7 @@
   * @copyright University of Warsaw
   * @author Anna Jaruga <ajaruga@igf.fuw.edu.pl>
   * @author Sylwester Arabas <slayoo@igf.fuw.edu.pl>
-  * @brief Flux Corrected Transport formulae for MPDATA 
+  * @brief Flux Corrected Transport formulae for MPDATA
   *        (aka non-oscillatory, monotonic, sign-preserving option)
   * @section LICENSE
   * GPLv3+ (see the COPYING file or http://www.gnu.org/licenses/)
@@ -13,22 +13,22 @@
 #include <libmpdata++/formulae/mpdata/formulae_mpdata_common.hpp>
 
 namespace libmpdataxx
-{ 
-  namespace formulae 
-  { 
+{
+  namespace formulae
+  {
     namespace mpdata
     {
       using namespace arakawa_c;
 
       /// \f$ \beta^{\uparrow}_{i} = \frac { \psi^{max}_{i}- \psi^{*}_{i} }
-      /// { \sum\limits_{I} \frac{\Delta t}{\Delta x^{I}} \left( [u^{I}_{i-1/2}]^{+} \psi^{*}_{i-1} - 
+      /// { \sum\limits_{I} \frac{\Delta t}{\Delta x^{I}} \left( [u^{I}_{i-1/2}]^{+} \psi^{*}_{i-1} -
       /// [u^{I}_{i+1/2}]^{-} \psi^{*}_{i+1} \right)  } \f$ \n
       /// eq.(19a) in Smolarkiewicz & Grabowski 1990 (J.Comp.Phys.,86,355-375)
 
       template<opts_t opts, class arr_1d_t, class ix_t>
       forceinline_macro auto beta_up_nominator(
         const arr_1d_t &psi,
-        const arr_1d_t &psi_max, 
+        const arr_1d_t &psi_max,
         const arr_1d_t &G,
         const ix_t &i
       )
@@ -36,7 +36,7 @@ namespace libmpdataxx
         return return_helper<ix_t>(
           (max<ix_t>(psi_max(i), psi(i-1), psi(i), psi(i+1)) - psi(i)) * formulae::G<opts>(G, i)
         );
-      }                                                                    //to make beta dimensionless 
+      }                                                                    //to make beta dimensionless
                                                                            //when transporting mixing ratios with momentum
       template <opts_t opts, class arr_1d_t, class flx_t>
       forceinline_macro void beta_up( // for positive sign signal
@@ -51,7 +51,7 @@ namespace libmpdataxx
         using ix_t = int;
         for (int i = ir.first(); i <= ir.last(); ++i)
         {
-          b(i) = 
+          b(i) =
           fct_frac<ix_t>(
             beta_up_nominator<opts>(psi, psi_max, G, i)
             , // ----------------------------
@@ -59,40 +59,40 @@ namespace libmpdataxx
             - negpart<opts, ix_t>(flx[0](i+h))
           );
         }
-      } 
+      }
 
       /// \f$ \beta^{\downarrow}_{i} = \frac { \psi^{*}_{i}- \psi^{min}_{i} }
-      /// { \sum\limits_{I} \frac{\Delta t}{\Delta x^{I}} \left( [u^{I}_{i+1/2}]^{+} \psi^{*}_{i} - 
+      /// { \sum\limits_{I} \frac{\Delta t}{\Delta x^{I}} \left( [u^{I}_{i+1/2}]^{+} \psi^{*}_{i} -
       /// [u^{I}_{i-1/2}]^{-} \psi^{*}_{i} \right)  } \f$ \n
       /// eq.(19b) in Smolarkiewicz & Grabowski 1990 (J.Comp.Phys.,86,355-375)
 
       template<opts_t opts, class arr_1d_t, class ix_t>
       forceinline_macro auto beta_dn_nominator(
         const arr_1d_t &psi,
-        const arr_1d_t &psi_min, 
-        const arr_1d_t &G, 
+        const arr_1d_t &psi_min,
+        const arr_1d_t &G,
         const ix_t &i
       )
       {
         return return_helper<ix_t>(
           (psi(i) - min<ix_t>(psi_min(i), psi(i-1), psi(i), psi(i+1))) * formulae::G<opts>(G, i)
         );
-      }                                                                      //to make beta dimensionless 
+      }                                                                      //to make beta dimensionless
                                                                              //when transporting mixing ratios with momentum
       template <opts_t opts, class arr_1d_t, class flx_t>
       forceinline_macro void beta_dn( //positive sign signal
-        arr_1d_t &b, 
-        const arr_1d_t &psi, 
+        arr_1d_t &b,
+        const arr_1d_t &psi,
         const arr_1d_t &psi_min, // from before the first iteration
-        const flx_t &flx, 
-        const arr_1d_t &G, 
+        const flx_t &flx,
+        const arr_1d_t &G,
         const rng_t &ir
       )
       {
         using ix_t = int;
         for (int i = ir.first(); i <= ir.last(); ++i)
         {
-          b(i) = 
+          b(i) =
           fct_frac<ix_t>(
             beta_dn_nominator<opts>(psi, psi_min, G, i)
             , // --------------------------
@@ -100,10 +100,10 @@ namespace libmpdataxx
             - negpart<opts, ix_t>(flx[0](i-h))
           );
         }
-      } 
+      }
 
       /// nonoscillatory antidiffusive velocity: \n
-      /// \f$ U^{MON}_{i+1/2}=min(1,\beta ^{\downarrow}_i,\beta ^{\uparrow} _{i+1})[U_{i+1/2}]^{+} 
+      /// \f$ U^{MON}_{i+1/2}=min(1,\beta ^{\downarrow}_i,\beta ^{\uparrow} _{i+1})[U_{i+1/2}]^{+}
       /// + min(1,\beta^{\uparrow}_{i},\beta^{\downarrow}_{i+1/2})[u_{i+1/2}]^{-} \f$ \n
       /// where<ix_t> \f$ [\cdot]^{+}=max(\cdot,0) \f$ and \f$ [\cdot]^{-}=min(\cdot,0) \f$ \n
       /// eq.(18) in Smolarkiewicz & Grabowski 1990 (J.Comp.Phys.,86,355-375)
@@ -132,32 +132,32 @@ namespace libmpdataxx
                psi(i) > 0,
                // then
                min<ix_t>(1,
-                 beta_dn(i    ), 
+                 beta_dn(i    ),
                  beta_up(i + 1)
-               ), 
+               ),
                // else
                min<ix_t>(1,
-                 beta_up(i    ), 
+                 beta_up(i    ),
                  beta_dn(i + 1)
-               )  
-             ),  
+               )
+             ),
              // else
              where<ix_t>(
                // if
                psi(i+1) > 0,
                // then
                min<ix_t>(1,
-                 beta_up(i    ), 
+                 beta_up(i    ),
                  beta_dn(i + 1)
-               ), 
+               ),
                // else
                min<ix_t>(1,
-                 beta_dn(i   ), 
+                 beta_dn(i   ),
                  beta_up(i + 1)
-               )  
-             )   
+               )
+             )
           );
-        } 
+        }
       }
 
       template <opts_t opts, class arr_1d_t>
@@ -175,7 +175,7 @@ namespace libmpdataxx
         using ix_t = int;
         for (int i = ir.first(); i <= ir.last(); ++i)
         {
-          GC_m(i+h) = 
+          GC_m(i+h) =
           GC_corr(i+h) * where<ix_t>(
             // if
             GC_corr(i+h) > 0,
@@ -183,15 +183,15 @@ namespace libmpdataxx
             min<ix_t>(1,
               beta_dn(i),
               beta_up(i + 1)
-            ), 
+            ),
             // else
             min<ix_t>(1,
               beta_up(i),
               beta_dn(i + 1)
-            )  
+            )
           );
         }
-      }  
+      }
     } // namespace mpdata_fct
   } // namespace formulae
 } // namespcae libmpdataxx
diff --git a/libmpdata++/formulae/mpdata/formulae_mpdata_fct_2d.hpp b/libmpdata++/formulae/mpdata/formulae_mpdata_fct_2d.hpp
index 40b1239d..dd905a21 100644
--- a/libmpdata++/formulae/mpdata/formulae_mpdata_fct_2d.hpp
+++ b/libmpdata++/formulae/mpdata/formulae_mpdata_fct_2d.hpp
@@ -10,25 +10,25 @@
 #include <boost/preprocessor/punctuation/comma.hpp>
 
 namespace libmpdataxx
-{ 
-  namespace formulae 
-  { 
+{
+  namespace formulae
+  {
     namespace mpdata
     {
       using namespace arakawa_c;
- 
+
       //see Smolarkiewicz & Grabowski 1990 (J.Comp.Phys.,86,355-375)
       template <opts_t opts, class arr_2d_t, class ix_t>
       forceinline_macro auto beta_up_nominator(
         const arr_2d_t &psi,
         const arr_2d_t &psi_max,
         const arr_2d_t &G,
-        const ix_t &i,  
+        const ix_t &i,
         const ix_t &j
       )
       {
         return return_helper<ix_t>(
-          (  
+          (
             max<ix_t>(     psi_max(i, j),
                            psi(i, j+1),
               psi(i-1, j), psi(i, j  ), psi(i+1, j),
@@ -45,7 +45,7 @@ namespace libmpdataxx
         const arr_2d_t &psi_max, // from before the first iteration
         const flx_t &flx,
         const arr_2d_t &G,
-        const rng_t &ir,  
+        const rng_t &ir,
         const rng_t &jr
       )
       {
@@ -54,48 +54,48 @@ namespace libmpdataxx
         {
           for (int j = jr.first(); j <= jr.last(); ++j)
           {
-            b(i, j) = 
+            b(i, j) =
             fct_frac<ix_t>(
               beta_up_nominator<opts>(psi, psi_max, G, i, j)
               , // -----------------------------------------------------------
               ( pospart<opts, ix_t>(flx[0](i-h, j))
               - negpart<opts, ix_t>(flx[0](i+h, j)) )  // additional parenthesis so that we first sum
-              +                                                        // fluxes in separate dimensions 
+              +                                                        // fluxes in separate dimensions
               ( pospart<opts, ix_t>(flx[1](i, j-h))    // could be important for accuracy if one of them
               - negpart<opts, ix_t>(flx[1](i, j+h)) )  // is of different magnitude than the other
             );
           }
         }
-      } 
+      }
 
       template <opts_t opts, class arr_2d_t, class ix_t>
       forceinline_macro auto beta_dn_nominator(
-        const arr_2d_t &psi, 
+        const arr_2d_t &psi,
         const arr_2d_t &psi_min,
-        const arr_2d_t &G, 
+        const arr_2d_t &G,
         const ix_t &i,
-        const ix_t &j 
+        const ix_t &j
       )
       {
         return return_helper<ix_t>(
           (
             psi(i, j)
-            - min<ix_t>(              psi_min(i,j), 
+            - min<ix_t>(              psi_min(i,j),
                                       psi(i, j+1),
                          psi(i-1, j), psi(i, j  ), psi(i+1, j),
                                       psi(i, j-1)
             )
           ) * formulae::G<opts, 0>(G, i, j)  //see beta_up_nominator
         );
-      } 
+      }
 
       template <opts_t opts, class arr_2d_t, class flx_t>
       forceinline_macro auto beta_dn(
-        arr_2d_t &b, 
-        const arr_2d_t &psi, 
+        arr_2d_t &b,
+        const arr_2d_t &psi,
         const arr_2d_t &psi_min, // from before the first iteration
         const flx_t &flx,
-        const arr_2d_t &G, 
+        const arr_2d_t &G,
         const rng_t &ir,
         const rng_t &jr
       )
@@ -105,7 +105,7 @@ namespace libmpdataxx
         {
           for (int j = jr.first(); j <= jr.last(); ++j)
           {
-            b(i, j) = 
+            b(i, j) =
             fct_frac<ix_t>(
               beta_dn_nominator<opts>(psi, psi_min, G, i, j)
               , // ---------------------------------------------------------
@@ -117,7 +117,7 @@ namespace libmpdataxx
             );
           }
         }
-      } 
+      }
 
       template <opts_t opts, int d, class arr_2d_t>
       forceinline_macro auto GC_mono( //for variable-sign signal and no infinite gauge option
@@ -148,7 +148,7 @@ namespace libmpdataxx
                 // then
                 min<ix_t>(1,
                   beta_dn(pi<d>(i,     j)),
-                  beta_up(pi<d>(i + 1, j)) 
+                  beta_up(pi<d>(i + 1, j))
                 ),
                 // else
                 min<ix_t>(1,
@@ -174,7 +174,7 @@ namespace libmpdataxx
             );
           }
         }
-      } 
+      }
 
       template <opts_t opts, int d, class arr_2d_t>
       forceinline_macro auto GC_mono( //for infinite gauge option or positive-sign signal
@@ -197,7 +197,7 @@ namespace libmpdataxx
             GC_m[d]( pi<d>(i+h, j) ) =
             GC_corr[d]( pi<d>(i+h, j) ) * where<ix_t>(
               // if
-              GC_corr[d]( pi<d>(i+h, j) ) > 0, 
+              GC_corr[d]( pi<d>(i+h, j) ) > 0,
               // then
               min<ix_t>(1,
                 beta_dn(pi<d>(i,     j)),
diff --git a/libmpdata++/formulae/mpdata/formulae_mpdata_fct_3d.hpp b/libmpdata++/formulae/mpdata/formulae_mpdata_fct_3d.hpp
index def743fe..44e97c25 100644
--- a/libmpdata++/formulae/mpdata/formulae_mpdata_fct_3d.hpp
+++ b/libmpdata++/formulae/mpdata/formulae_mpdata_fct_3d.hpp
@@ -23,7 +23,7 @@ namespace libmpdataxx
         const arr_3d_t &psi,
         const arr_3d_t &psi_max,
         const arr_3d_t &G,
-        const ix_t &i, 
+        const ix_t &i,
         const ix_t &j,
         const ix_t &k
       )
@@ -42,7 +42,7 @@ namespace libmpdataxx
           - psi(i, j, k)
           ) * formulae::G<opts, 0>(G, i, j, k) //to make beta up dimensionless when transporting mixing ratios with momentum
         );
-      } 
+      }
 
       template <opts_t opts, class arr_3d_t, class flx_t>
       forceinline_macro void beta_up(
@@ -51,7 +51,7 @@ namespace libmpdataxx
         const arr_3d_t &psi_max, // from before the first iteration
         const flx_t &flx,
         const arr_3d_t &G,
-        const rng_t &ir, 
+        const rng_t &ir,
         const rng_t &jr,
         const rng_t &kr
       )
@@ -63,7 +63,7 @@ namespace libmpdataxx
           {
             for (int k = kr.first(); k <= kr.last(); ++k)
             {
-              b(i, j, k) = 
+              b(i, j, k) =
               fct_frac<ix_t>(
                 beta_up_nominator<opts>(psi, psi_max, G, i, j, k)
                 , //-------------------------------------------------------------------------------------
@@ -106,7 +106,7 @@ namespace libmpdataxx
           ) * formulae::G<opts, 0>(G, i, j, k) //to make beta up dimensionless when transporting mixing ratios with momentum
         );
       }
-      
+
       template <opts_t opts, class arr_3d_t, class flx_t>
       forceinline_macro void beta_dn(
         arr_3d_t &b,
@@ -126,7 +126,7 @@ namespace libmpdataxx
           {
             for (int k = kr.first(); k <= kr.last(); ++k)
             {
-              b(i, j, k) = 
+              b(i, j, k) =
               fct_frac<ix_t>(
                 beta_dn_nominator<opts>(psi, psi_min, G, i, j, k)
                 , //-----------------------------------------------------------------------------------
diff --git a/libmpdata++/formulae/mpdata/formulae_mpdata_fdiv_1d.hpp b/libmpdata++/formulae/mpdata/formulae_mpdata_fdiv_1d.hpp
index 67341b41..4e15475d 100644
--- a/libmpdata++/formulae/mpdata/formulae_mpdata_fdiv_1d.hpp
+++ b/libmpdata++/formulae/mpdata/formulae_mpdata_fdiv_1d.hpp
@@ -13,18 +13,18 @@
 #include <libmpdata++/formulae/mpdata/formulae_mpdata_gc_1d.hpp>
 #include <libmpdata++/formulae/mpdata/formulae_mpdata_g_1d.hpp>
 
-namespace libmpdataxx 
-{ 
-  namespace formulae 
-  { 
-    namespace mpdata 
+namespace libmpdataxx
+{
+  namespace formulae
+  {
+    namespace mpdata
     {
-      // flux divergence i.e. 
+      // flux divergence i.e.
       // 1 / G * dx * d(GC * psi)/dx at (i)
       template <opts_t opts, class arr_1d_t, class ix_t, class arrvec_t>
       forceinline_macro auto fdiv_centre(
         const arr_1d_t &psi,
-        const arrvec_t &GC, 
+        const arrvec_t &GC,
         const arr_1d_t &G,
         const ix_t &i
       )
@@ -36,13 +36,13 @@ namespace libmpdataxx
           ) / formulae::G<opts>(G, i)
         );
       }
-      
-      // nondimensionalised flux divergence i.e. 
+
+      // nondimensionalised flux divergence i.e.
       // 1 / (G * psi) * dx * d(GC * psi)/dx at (i+1/2) - positive sign scalar version
       template <opts_t opts, class arr_1d_t, class ix_t, class arrvec_t>
       forceinline_macro auto nfdiv(
         const arr_1d_t &psi,
-        const arrvec_t &GC, 
+        const arrvec_t &GC,
         const arr_1d_t &G,
         const ix_t &i,
         typename std::enable_if<!opts::isset(opts, opts::iga) && !opts::isset(opts, opts::abs)>::type* = 0
@@ -61,13 +61,13 @@ namespace libmpdataxx
           )
         );
       }
-      
-      // nondimensionalised flux divergence i.e. 
+
+      // nondimensionalised flux divergence i.e.
       // 1 / (G * psi) * dx * d(GC * psi)/dx at (i+1/2) - variable sign scalar version
       template <opts_t opts, class arr_1d_t, class ix_t, class arrvec_t>
       forceinline_macro auto nfdiv(
         const arr_1d_t &psi,
-        const arrvec_t &GC, 
+        const arrvec_t &GC,
         const arr_1d_t &G,
         const ix_t &i,
         typename std::enable_if<!opts::isset(opts, opts::iga) && opts::isset(opts, opts::abs)>::type* = 0
@@ -86,13 +86,13 @@ namespace libmpdataxx
           )
         );
       }
-      
-      // nondimensionalised flux divergence i.e. 
+
+      // nondimensionalised flux divergence i.e.
       // 1 / (G * psi) * dx * d(GC * psi)/dx at (i+1/2) - infinite gauge version
       template <opts_t opts, class arr_1d_t, class ix_t, class arrvec_t>
       forceinline_macro auto nfdiv(
         const arr_1d_t &psi,
-        const arrvec_t &GC, 
+        const arrvec_t &GC,
         const arr_1d_t &G,
         const ix_t &i,
         typename std::enable_if<opts::isset(opts, opts::iga)>::type* = 0
@@ -105,13 +105,13 @@ namespace libmpdataxx
           ) / G_bar_x<opts>(G, i)
         );
       }
-     
+
       // nondimensionalised divergence of flux divergence flux i.e.
       // 1 / (G * psi) * dx * d(GC * fdiv)/dx at (i+1/2) - positive sign scalar version
       template <opts_t opts, class arr_1d_t, class ix_t, class arrvec_t>
       forceinline_macro auto nfdiv_fdiv(
         const arr_1d_t &psi,
-        const arrvec_t &GC, 
+        const arrvec_t &GC,
         const arr_1d_t &G,
         const ix_t &i,
         typename std::enable_if<!opts::isset(opts, opts::iga) && !opts::isset(opts, opts::abs)>::type* = 0
@@ -132,13 +132,13 @@ namespace libmpdataxx
           )
         );
       }
-      
+
       // nondimensionalised divergence of flux divergence flux i.e.
       // 1 / (G * psi) * dx * d(GC * fdiv)/dx at (i+1/2) - variable sign scalar version
       template <opts_t opts, class arr_1d_t, class ix_t, class arrvec_t>
       forceinline_macro auto nfdiv_fdiv(
         const arr_1d_t &psi,
-        const arrvec_t &GC, 
+        const arrvec_t &GC,
         const arr_1d_t &G,
         const ix_t &i,
         typename std::enable_if<!opts::isset(opts, opts::iga) && opts::isset(opts, opts::abs)>::type* = 0
@@ -159,13 +159,13 @@ namespace libmpdataxx
           )
         );
       }
-      
+
       // nondimensionalised divergence of flux divergence flux i.e.
       // 1 / (G * psi) * dx * d(GC * fdiv)/dx at (i+1/2) - infinite gauge version
       template <opts_t opts, class arr_1d_t, class ix_t, class arrvec_t>
       forceinline_macro auto nfdiv_fdiv(
         const arr_1d_t &psi,
-        const arrvec_t &GC, 
+        const arrvec_t &GC,
         const arr_1d_t &G,
         const ix_t &i,
         typename std::enable_if<opts::isset(opts, opts::iga)>::type* = 0
@@ -178,20 +178,20 @@ namespace libmpdataxx
           ) / G_bar_x<opts>(G, i)
         );
       }
-      
+
       // nondimensionalised x derivative of flux divergence i.e.
       // 1 / (G * psi) * dx * d(fdiv)/dx at (i+1/2) - positive sign scalar version
       template <opts_t opts, class arr_1d_t, class ix_t, class arrvec_t>
       forceinline_macro auto ndx_fdiv(
         const arr_1d_t &psi,
-        const arrvec_t &GC, 
+        const arrvec_t &GC,
         const arr_1d_t &G,
         const ix_t &i,
         typename std::enable_if<!opts::isset(opts, opts::iga) && !opts::isset(opts, opts::abs)>::type* = 0
       )
       {
         return return_helper<ix_t>(
-          4 * 
+          4 *
           frac<opts, ix_t>(
             fdiv_centre<opts>(psi, GC, G, i+1)
           - fdiv_centre<opts>(psi, GC, G, i  )
@@ -203,20 +203,20 @@ namespace libmpdataxx
           )
         );
       }
-      
+
       // nondimensionalised x derivative of flux divergence i.e.
       // 1 / (G * psi) * dx * d(fdiv)/dx at (i+1/2) - variable sign scalar version
       template <opts_t opts, class arr_1d_t, class ix_t, class arrvec_t>
       forceinline_macro auto ndx_fdiv(
         const arr_1d_t &psi,
-        const arrvec_t &GC, 
+        const arrvec_t &GC,
         const arr_1d_t &G,
         const ix_t &i,
         typename std::enable_if<!opts::isset(opts, opts::iga) && opts::isset(opts, opts::abs)>::type* = 0
       )
       {
         return return_helper<ix_t>(
-          4 * 
+          4 *
           frac<opts, ix_t>(
             fdiv_centre<opts>(psi, GC, G, i+1)
           - fdiv_centre<opts>(psi, GC, G, i  )
@@ -228,13 +228,13 @@ namespace libmpdataxx
           )
         );
       }
-      
+
       // nondimensionalised x derivative of flux divergence i.e.
       // 1 / (G * psi) * dx * d(fdiv)/dx at (i+1/2) - infinite gauge version
       template <opts_t opts, class arr_1d_t, class ix_t, class arrvec_t>
       forceinline_macro auto ndx_fdiv(
         const arr_1d_t &psi,
-        const arrvec_t &GC, 
+        const arrvec_t &GC,
         const arr_1d_t &G,
         const ix_t &i,
         typename std::enable_if<opts::isset(opts, opts::iga)>::type* = 0
@@ -247,4 +247,4 @@ namespace libmpdataxx
       }
     } // namespace mpdata
   } // namespace formulae
-} // namespace libmpdataxx 
+} // namespace libmpdataxx
diff --git a/libmpdata++/formulae/mpdata/formulae_mpdata_fdiv_2d.hpp b/libmpdata++/formulae/mpdata/formulae_mpdata_fdiv_2d.hpp
index 4755f640..9283a0b1 100644
--- a/libmpdata++/formulae/mpdata/formulae_mpdata_fdiv_2d.hpp
+++ b/libmpdata++/formulae/mpdata/formulae_mpdata_fdiv_2d.hpp
@@ -16,18 +16,18 @@
 #include <libmpdata++/formulae/mpdata/formulae_mpdata_gc_2d.hpp>
 #include <libmpdata++/formulae/mpdata/formulae_mpdata_g_2d.hpp>
 
-namespace libmpdataxx 
-{ 
-  namespace formulae 
-  { 
-    namespace mpdata 
+namespace libmpdataxx
+{
+  namespace formulae
+  {
+    namespace mpdata
     {
-      // flux divergence i.e. 
+      // flux divergence i.e.
       // 1 / G * (dx * d(GC * psi)/dx + dy * d(GC * psi)/dy) at (i, j)
       template <opts_t opts, int dim, class arr_2d_t, class arrvec_t, class ix_t>
       forceinline_macro auto fdiv_centre(
         const arr_2d_t &psi,
-        const arrvec_t &GC, 
+        const arrvec_t &GC,
         const arr_2d_t &G,
         const ix_t &i,
         const ix_t &j
@@ -42,13 +42,13 @@ namespace libmpdataxx
           ) / formulae::G<opts, dim>(G, i, j)
         );
       }
-      
-      // flux divergence i.e. 
+
+      // flux divergence i.e.
       // 1 / G * (dx * d(GC * psi)/dx + dy * d(GC * psi)/dy) at (i+1/2, j+1/2)
       template <opts_t opts, int dim, class arr_2d_t, class arrvec_t, class ix_t>
       forceinline_macro auto fdiv_corner_xy(
         const arr_2d_t &psi,
-        const arrvec_t &GC, 
+        const arrvec_t &GC,
         const arr_2d_t &G,
         const ix_t &i,
         const ix_t &j
@@ -58,19 +58,19 @@ namespace libmpdataxx
           (
               GC0_bar_xy<dim>(GC[dim], i+1, j  ) * psi_bar_y<opts, dim>(psi, i+1, j)
             - GC0_bar_xy<dim>(GC[dim], i  , j  ) * psi_bar_y<opts, dim>(psi, i  , j)
-              
+
             + GC1_bar_xy<dim>(GC[dim+1], i, j+1) * psi_bar_x<opts, dim>(psi, i, j+1)
             - GC1_bar_xy<dim>(GC[dim+1], i, j  ) * psi_bar_x<opts, dim>(psi, i, j  )
           ) / G_bar_xy<opts, dim>(G, i, j)
         );
       }
-      
-      // nondimensionalised flux divergence i.e. 
+
+      // nondimensionalised flux divergence i.e.
       // 1 / (G * psi) * (dx * d(GC * psi)/dx + dy * d(GC * psi)/dy) at (i+1/2, j) - positive sign scalar version
       template <opts_t opts, int dim, class arr_2d_t, class arrvec_t, class ix_t>
       forceinline_macro auto nfdiv(
         const arr_2d_t &psi,
-        const arrvec_t &GC, 
+        const arrvec_t &GC,
         const arr_2d_t &G,
         const ix_t &i,
         const ix_t &j,
@@ -96,13 +96,13 @@ namespace libmpdataxx
           )
         );
       }
-      
-      // nondimensionalised flux divergence i.e. 
+
+      // nondimensionalised flux divergence i.e.
       // 1 / (G * psi) * (dx * d(GC * psi)/dx + dy * d(GC * psi)/dy) at (i+1/2, j) - variable sign scalar version
       template <opts_t opts, int dim, class arr_2d_t, class arrvec_t, class ix_t>
       forceinline_macro auto nfdiv(
         const arr_2d_t &psi,
-        const arrvec_t &GC, 
+        const arrvec_t &GC,
         const arr_2d_t &G,
         const ix_t &i,
         const ix_t &j,
@@ -128,18 +128,18 @@ namespace libmpdataxx
           )
         );
       }
-      
-      // nondimensionalised flux divergence i.e. 
+
+      // nondimensionalised flux divergence i.e.
       // 1 / (G * psi) * (dx * d(GC * psi)/dx + dy * d(GC * psi)/dy) at (i+1/2, j) - infinite gauge version
       template <opts_t opts, int dim, class arr_2d_t, class arrvec_t, class ix_t>
       forceinline_macro auto nfdiv(
         const arr_2d_t &psi,
-        const arrvec_t &GC, 
+        const arrvec_t &GC,
         const arr_2d_t &G,
         const ix_t &i,
         const ix_t &j,
         typename std::enable_if<opts::isset(opts, opts::iga)>::type* = 0
-      ) 
+      )
       {
         return return_helper<ix_t>(
           (
@@ -150,13 +150,13 @@ namespace libmpdataxx
           ) / G_bar_x<opts, dim>(G, i, j)
         );
       }
-     
+
       // nondimensionalised divergence of flux divergence flux i.e.
       // 1 / (G * psi) * (dx * d(GC * fdiv)/dx + dy * d(GC * fdiv)/dy) at (i+1/2, j) - positive sign scalar version
       template <opts_t opts, int dim, class arr_2d_t, class arrvec_t, class ix_t>
       forceinline_macro auto nfdiv_fdiv(
         const arr_2d_t &psi,
-        const arrvec_t &GC, 
+        const arrvec_t &GC,
         const arr_2d_t &G,
         const ix_t &i,
         const ix_t &j,
@@ -184,18 +184,18 @@ namespace libmpdataxx
           )
         );
       }
-      
+
       // nondimensionalised divergence of flux divergence flux i.e.
       // 1 / (G * psi) * (dx * d(GC * fdiv)/dx + dy * d(GC * fdiv)/dy) at (i+1/2, j) - variable sign scalar version
       template <opts_t opts, int dim, class arr_2d_t, class arrvec_t, class ix_t>
       forceinline_macro auto nfdiv_fdiv(
         const arr_2d_t &psi,
-        const arrvec_t &GC, 
+        const arrvec_t &GC,
         const arr_2d_t &G,
         const ix_t &i,
         const ix_t &j,
         typename std::enable_if<!opts::isset(opts, opts::iga) && opts::isset(opts, opts::abs)>::type* = 0
-      ) 
+      )
       {
         return return_helper<ix_t>(
           8 *
@@ -218,13 +218,13 @@ namespace libmpdataxx
           )
         );
       }
-      
+
       // nondimensionalised divergence of flux divergence flux i.e.
       // 1 / (G * psi) * (dx * d(GC * fdiv)/dx + dy * d(GC * fdiv)/dy) at (i+1/2, j) - infinite gauge version
       template <opts_t opts, int dim, class arr_2d_t, class arrvec_t, class ix_t>
       forceinline_macro auto nfdiv_fdiv(
         const arr_2d_t &psi,
-        const arrvec_t &GC, 
+        const arrvec_t &GC,
         const arr_2d_t &G,
         const ix_t &i,
         const ix_t &j,
@@ -240,13 +240,13 @@ namespace libmpdataxx
           ) / G_bar_x<opts, dim>(G, i, j)
         );
       }
-      
+
       // nondimensionalised x derivative of flux divergence i.e.
       // 1 / (G * psi) * dx * d(fdiv)/dx at (i+1/2, j) - positive sign scalar version
       template <opts_t opts, int dim, class arr_2d_t, class arrvec_t, class ix_t>
       forceinline_macro auto ndx_fdiv(
         const arr_2d_t &psi,
-        const arrvec_t &GC, 
+        const arrvec_t &GC,
         const arr_2d_t &G,
         const ix_t &i,
         const ix_t &j,
@@ -254,7 +254,7 @@ namespace libmpdataxx
       )
       {
         return return_helper<ix_t>(
-          8 * 
+          8 *
           frac<opts, ix_t>(
             fdiv_centre<opts, dim>(psi, GC, G, i+1, j)
           - fdiv_centre<opts, dim>(psi, GC, G, i  , j)
@@ -270,13 +270,13 @@ namespace libmpdataxx
           )
         );
       }
-      
+
       // nondimensionalised x derivative of flux divergence i.e.
       // 1 / (G * psi) * dx * d(fdiv)/dx at (i+1/2, j) - variable sign scalar version
       template <opts_t opts, int dim, class arr_2d_t, class arrvec_t, class ix_t>
       forceinline_macro auto ndx_fdiv(
         const arr_2d_t &psi,
-        const arrvec_t &GC, 
+        const arrvec_t &GC,
         const arr_2d_t &G,
         const ix_t &i,
         const ix_t &j,
@@ -284,7 +284,7 @@ namespace libmpdataxx
       )
       {
         return return_helper<ix_t>(
-          8 * 
+          8 *
           frac<opts, ix_t>(
             fdiv_centre<opts, dim>(psi, GC, G, i+1, j)
           - fdiv_centre<opts, dim>(psi, GC, G, i  , j)
@@ -300,13 +300,13 @@ namespace libmpdataxx
           )
         );
       }
-      
+
       // nondimensionalised x derivative of flux divergence i.e.
       // 1 / (G * psi) * dx * d(fdiv)/dx at (i+1/2, j) - infinite gauge version
       template <opts_t opts, int dim, class arr_2d_t, class arrvec_t, class ix_t>
       forceinline_macro auto ndx_fdiv(
         const arr_2d_t &psi,
-        const arrvec_t &GC, 
+        const arrvec_t &GC,
         const arr_2d_t &G,
         const ix_t &i,
         const ix_t &j,
@@ -319,13 +319,13 @@ namespace libmpdataxx
         );
       }
 
-      // nondimensionalised flux divergence of time derivative of psi i.e. 
+      // nondimensionalised flux divergence of time derivative of psi i.e.
       // 1 / (G * psi) * (dx * d(GC * dpsi/dt)/dx + dy * d(GC * dpsi/dt)/dy) at (i+1/2, j) - positive sign scalar version
       template <opts_t opts, int dim, class arr_2d_t, class arrvec_t, class ix_t>
       forceinline_macro auto nfdiv_dt(
         const arr_2d_t &psi_np1,
         const arr_2d_t &psi_n,
-        const arrvec_t &GC, 
+        const arrvec_t &GC,
         const arr_2d_t &G,
         const ix_t &i,
         const ix_t &j,
@@ -359,14 +359,14 @@ namespace libmpdataxx
           )
         );
       }
-      
-      // nondimensionalised flux divergence of time derivative of psi i.e. 
+
+      // nondimensionalised flux divergence of time derivative of psi i.e.
       // 1 / (G * psi) * (dx * d(GC * dpsi/dt)/dx + dy * d(GC * dpsi/dt)/dy) at (i+1/2, j) - variable sign scalar version
       template <opts_t opts, int dim, class arr_2d_t, class arrvec_t, class ix_t>
       forceinline_macro auto nfdiv_dt(
         const arr_2d_t &psi_np1,
         const arr_2d_t &psi_n,
-        const arrvec_t &GC, 
+        const arrvec_t &GC,
         const arr_2d_t &G,
         const ix_t &i,
         const ix_t &j,
@@ -400,14 +400,14 @@ namespace libmpdataxx
           )
         );
       }
-      
-      // nondimensionalised flux divergence of time derivative of psi i.e. 
+
+      // nondimensionalised flux divergence of time derivative of psi i.e.
       // 1 / (G * psi) * (dx * d(GC * dpsi/dt)/dx + dy * d(GC * dpsi/dt)/dy) at (i+1/2, j) - infinite gauge version
       template <opts_t opts, int dim, class arr_2d_t, class arrvec_t, class ix_t>
       forceinline_macro auto nfdiv_dt(
         const arr_2d_t &psi_np1,
         const arr_2d_t &psi_n,
-        const arrvec_t &GC, 
+        const arrvec_t &GC,
         const arr_2d_t &G,
         const ix_t &i,
         const ix_t &j,
@@ -428,4 +428,4 @@ namespace libmpdataxx
 
     } // namespace mpdata
   } // namespace formulae
-} // namespace libmpdataxx 
+} // namespace libmpdataxx
diff --git a/libmpdata++/formulae/mpdata/formulae_mpdata_fdiv_3d.hpp b/libmpdata++/formulae/mpdata/formulae_mpdata_fdiv_3d.hpp
index df8f3637..26d013b0 100644
--- a/libmpdata++/formulae/mpdata/formulae_mpdata_fdiv_3d.hpp
+++ b/libmpdata++/formulae/mpdata/formulae_mpdata_fdiv_3d.hpp
@@ -17,18 +17,18 @@
 #include <libmpdata++/formulae/mpdata/formulae_mpdata_gc_3d.hpp>
 #include <libmpdata++/formulae/mpdata/formulae_mpdata_g_3d.hpp>
 
-namespace libmpdataxx 
-{ 
-  namespace formulae 
-  { 
-    namespace mpdata 
+namespace libmpdataxx
+{
+  namespace formulae
+  {
+    namespace mpdata
     {
-      // flux divergence i.e. 
+      // flux divergence i.e.
       // 1 / G * (dx * d(GC * psi)/dx + dy * d(GC * psi)/dy + dz * d(GC * psi)/dz) at (i, j, k)
       template <opts_t opts, int dim, class arr_3d_t, class ix_t, class arrvec_t>
       forceinline_macro auto fdiv_centre(
         const arr_3d_t &psi,
-        const arrvec_t &GC, 
+        const arrvec_t &GC,
         const arr_3d_t &G,
         const ix_t &i,
         const ix_t &j,
@@ -48,13 +48,13 @@ namespace libmpdataxx
           ) / formulae::G<opts, dim>(G, i, j, k)
         );
       }
-      
-      // flux divergence i.e. 
+
+      // flux divergence i.e.
       // 1 / G * (dx * d(GC * psi)/dx + dy * d(GC * psi)/dy + dz * d(GC * psi)/dz) at (i+1/2, j+1/2, k)
       template <opts_t opts, int dim, class arr_3d_t, class ix_t, class arrvec_t>
       forceinline_macro auto fdiv_corner_xy(
         const arr_3d_t &psi,
-        const arrvec_t &GC, 
+        const arrvec_t &GC,
         const arr_3d_t &G,
         const ix_t &i,
         const ix_t &j,
@@ -65,22 +65,22 @@ namespace libmpdataxx
           (
               GC0_bar_xy<dim>(GC[dim  ], i+1, j, k) * psi_bar_y<opts, dim>(psi, i+1, j, k)
             - GC0_bar_xy<dim>(GC[dim  ], i  , j, k) * psi_bar_y<opts, dim>(psi, i  , j, k)
-              
+
             + GC1_bar_xy<dim>(GC[dim+1], i, j+1, k) * psi_bar_x<opts, dim>(psi, i, j+1, k)
             - GC1_bar_xy<dim>(GC[dim+1], i, j  , k) * psi_bar_x<opts, dim>(psi, i, j  , k)
-            
+
             + GC2_bar_xy<dim>(GC[dim-1], i, j, k  ) * psi_bar_xyz<opts, dim>(psi, i, j , k  )
             - GC2_bar_xy<dim>(GC[dim-1], i, j, k-1) * psi_bar_xyz<opts, dim>(psi, i, j , k-1)
           ) / G_bar_xy<opts, dim>(G, i, j, k)
         );
       }
-      
-      // flux divergence i.e. 
+
+      // flux divergence i.e.
       // 1 / G * (dx * d(GC * psi)/dx + dy * d(GC * psi)/dy + dz * d(GC * psi)/dz) at (i+1/2, j, k+1/2)
       template <opts_t opts, int dim, class arr_3d_t, class ix_t, class arrvec_t>
       forceinline_macro auto fdiv_corner_xz(
         const arr_3d_t &psi,
-        const arrvec_t &GC, 
+        const arrvec_t &GC,
         const arr_3d_t &G,
         const ix_t &i,
         const ix_t &j,
@@ -91,23 +91,23 @@ namespace libmpdataxx
           (
               GC0_bar_xz<dim>(GC[dim  ], i+1, j, k) * psi_bar_z<opts, dim>(psi, i+1, j, k)
             - GC0_bar_xz<dim>(GC[dim  ], i  , j, k) * psi_bar_z<opts, dim>(psi, i  , j, k)
-              
+
             + GC1_bar_xz<dim>(GC[dim+1], i, j  , k) * psi_bar_xyz<opts, dim>(psi, i, j  , k)
             - GC1_bar_xz<dim>(GC[dim+1], i, j-1, k) * psi_bar_xyz<opts, dim>(psi, i, j-1, k)
-            
+
             + GC2_bar_xz<dim>(GC[dim-1], i, j, k+1) * psi_bar_x<opts, dim>(psi, i, j , k+1)
             - GC2_bar_xz<dim>(GC[dim-1], i, j, k  ) * psi_bar_x<opts, dim>(psi, i, j , k  )
           ) / G_bar_xz<opts, dim>(G, i, j, k)
         );
       }
-      
-      // nondimensionalised flux divergence i.e. 
+
+      // nondimensionalised flux divergence i.e.
       // 1 / (G * psi) * (dx * d(GC * psi)/dx + dy * d(GC * psi)/dy + dz * d(GC * psi)/dz) at (i+1/2, j, k)
       // positive sign scalar version
       template <opts_t opts, int dim, class arr_3d_t, class ix_t, class arrvec_t>
       forceinline_macro auto nfdiv(
         const arr_3d_t &psi,
-        const arrvec_t &GC, 
+        const arrvec_t &GC,
         const arr_3d_t &G,
         const ix_t &i,
         const ix_t &j,
@@ -123,7 +123,7 @@ namespace libmpdataxx
 
           + GC1_bar_x<dim>(GC[dim+1], i, j  , k) * psi_bar_xy<opts, dim>(psi, i, j  , k)
           - GC1_bar_x<dim>(GC[dim+1], i, j-1, k) * psi_bar_xy<opts, dim>(psi, i, j-1, k)
-          
+
           + GC2_bar_x<dim>(GC[dim-1], i, j, k  ) * psi_bar_xz<opts, dim>(psi, i, j, k  )
           - GC2_bar_x<dim>(GC[dim-1], i, j, k-1) * psi_bar_xz<opts, dim>(psi, i, j, k-1)
           ,
@@ -142,14 +142,14 @@ namespace libmpdataxx
           )
         );
       }
-      
-      // nondimensionalised flux divergence i.e. 
+
+      // nondimensionalised flux divergence i.e.
       // 1 / (G * psi) * (dx * d(GC * psi)/dx + dy * d(GC * psi)/dy + dz * d(GC * psi)/dz) at (i+1/2, j, k)
       // variable sign scalar version
       template <opts_t opts, int dim, class arr_3d_t, class ix_t, class arrvec_t>
       forceinline_macro auto nfdiv(
         const arr_3d_t &psi,
-        const arrvec_t &GC, 
+        const arrvec_t &GC,
         const arr_3d_t &G,
         const ix_t &i,
         const ix_t &j,
@@ -165,7 +165,7 @@ namespace libmpdataxx
 
           + GC1_bar_x<dim>(GC[dim+1], i, j  , k) * psi_bar_xy<opts, dim>(psi, i, j  , k)
           - GC1_bar_x<dim>(GC[dim+1], i, j-1, k) * psi_bar_xy<opts, dim>(psi, i, j-1, k)
-          
+
           + GC2_bar_x<dim>(GC[dim-1], i, j, k  ) * psi_bar_xz<opts, dim>(psi, i, j, k  )
           - GC2_bar_x<dim>(GC[dim-1], i, j, k-1) * psi_bar_xz<opts, dim>(psi, i, j, k-1)
           ,
@@ -184,14 +184,14 @@ namespace libmpdataxx
           )
         );
       }
-      
-      // nondimensionalised flux divergence i.e. 
+
+      // nondimensionalised flux divergence i.e.
       // 1 / (G * psi) * (dx * d(GC * psi)/dx + dy * d(GC * psi)/dy + dz * d(GC * psi)/dz) at (i+1/2, j, k)
       // infinite gauge version
       template <opts_t opts, int dim, class arr_3d_t, class ix_t, class arrvec_t>
       forceinline_macro auto nfdiv(
         const arr_3d_t &psi,
-        const arrvec_t &GC, 
+        const arrvec_t &GC,
         const arr_3d_t &G,
         const ix_t &i,
         const ix_t &j,
@@ -206,20 +206,20 @@ namespace libmpdataxx
 
           + GC1_bar_x<dim>(GC[dim+1], i, j  , k) * psi_bar_xy<opts, dim>(psi, i, j  , k)
           - GC1_bar_x<dim>(GC[dim+1], i, j-1, k) * psi_bar_xy<opts, dim>(psi, i, j-1, k)
-          
+
           + GC2_bar_x<dim>(GC[dim-1], i, j, k  ) * psi_bar_xz<opts, dim>(psi, i, j, k  )
           - GC2_bar_x<dim>(GC[dim-1], i, j, k-1) * psi_bar_xz<opts, dim>(psi, i, j, k-1)
           ) / G_bar_x<opts, dim>(G, i, j, k)
         );
       }
-      
+
       // nondimensionalised divergence of flux divergence flux i.e.
       // 1 / (G * psi) * (dx * d(GC * fdiv)/dx + dy * d(GC * fdiv)/dy + dz * d(GC * fdiv)/dz) at (i+1/2, j, k)
       // positive sign scalar version
       template <opts_t opts, int dim, class arr_3d_t, class ix_t, class arrvec_t>
       forceinline_macro auto nfdiv_fdiv(
         const arr_3d_t &psi,
-        const arrvec_t &GC, 
+        const arrvec_t &GC,
         const arr_3d_t &G,
         const ix_t &i,
         const ix_t &j,
@@ -235,7 +235,7 @@ namespace libmpdataxx
 
           + GC1_bar_x<dim>(GC[dim+1], i, j  , k) * fdiv_corner_xy<opts, dim>(psi, GC, G, i, j  , k)
           - GC1_bar_x<dim>(GC[dim+1], i, j-1, k) * fdiv_corner_xy<opts, dim>(psi, GC, G, i, j-1, k)
-          
+
           + GC2_bar_x<dim>(GC[dim-1], i, j, k  ) * fdiv_corner_xz<opts, dim>(psi, GC, G, i, j, k  )
           - GC2_bar_x<dim>(GC[dim-1], i, j, k-1) * fdiv_corner_xz<opts, dim>(psi, GC, G, i, j, k-1)
           ,
@@ -256,14 +256,14 @@ namespace libmpdataxx
           )
         );
       }
-      
+
       // nondimensionalised divergence of flux divergence flux i.e.
       // 1 / (G * psi) * (dx * d(GC * fdiv)/dx + dy * d(GC * fdiv)/dy + dz * d(GC * fdiv)/dz) at (i+1/2, j, k)
       // variable sign scalar version
       template <opts_t opts, int dim, class arr_3d_t, class ix_t, class arrvec_t>
       forceinline_macro auto nfdiv_fdiv(
         const arr_3d_t &psi,
-        const arrvec_t &GC, 
+        const arrvec_t &GC,
         const arr_3d_t &G,
         const ix_t &i,
         const ix_t &j,
@@ -279,7 +279,7 @@ namespace libmpdataxx
 
           + GC1_bar_x<dim>(GC[dim+1], i, j  , k) * fdiv_corner_xy<opts, dim>(psi, GC, G, i, j  , k)
           - GC1_bar_x<dim>(GC[dim+1], i, j-1, k) * fdiv_corner_xy<opts, dim>(psi, GC, G, i, j-1, k)
-          
+
           + GC2_bar_x<dim>(GC[dim-1], i, j, k  ) * fdiv_corner_xz<opts, dim>(psi, GC, G, i, j, k  )
           - GC2_bar_x<dim>(GC[dim-1], i, j, k-1) * fdiv_corner_xz<opts, dim>(psi, GC, G, i, j, k-1)
           ,
@@ -300,14 +300,14 @@ namespace libmpdataxx
           )
         );
       }
-      
+
       // nondimensionalised divergence of flux divergence flux i.e.
       // 1 / (G * psi) * (dx * d(GC * fdiv)/dx + dy * d(GC * fdiv)/dy + dz * d(GC * fdiv)/dz) at (i+1/2, j, k)
       // infinite gauge version
       template <opts_t opts, int dim, class arr_3d_t, class ix_t, class arrvec_t>
       forceinline_macro auto nfdiv_fdiv(
         const arr_3d_t &psi,
-        const arrvec_t &GC, 
+        const arrvec_t &GC,
         const arr_3d_t &G,
         const ix_t &i,
         const ix_t &j,
@@ -322,19 +322,19 @@ namespace libmpdataxx
 
           + GC1_bar_x<dim>(GC[dim+1], i, j  , k) * fdiv_corner_xy<opts, dim>(psi, GC, G, i, j  , k)
           - GC1_bar_x<dim>(GC[dim+1], i, j-1, k) * fdiv_corner_xy<opts, dim>(psi, GC, G, i, j-1, k)
-          
+
           + GC2_bar_x<dim>(GC[dim-1], i, j, k  ) * fdiv_corner_xz<opts, dim>(psi, GC, G, i, j, k  )
           - GC2_bar_x<dim>(GC[dim-1], i, j, k-1) * fdiv_corner_xz<opts, dim>(psi, GC, G, i, j, k-1)
           ) / G_bar_x<opts, dim>(G, i, j, k)
         );
       }
-      
+
       // nondimensionalised x derivative of flux divergence i.e.
       // 1 / (G * psi) * dx * d(fdiv)/dx at (i+1/2, j, k) - positive sign scalar version
       template <opts_t opts, int dim, class arr_3d_t, class ix_t, class arrvec_t>
       forceinline_macro auto ndx_fdiv(
         const arr_3d_t &psi,
-        const arrvec_t &GC, 
+        const arrvec_t &GC,
         const arr_3d_t &G,
         const ix_t &i,
         const ix_t &j,
@@ -343,7 +343,7 @@ namespace libmpdataxx
       )
       {
         return return_helper<ix_t>(
-          12 * 
+          12 *
           frac<opts, ix_t>(
             fdiv_centre<opts, dim>(psi, GC, G, i+1, j, k)
           - fdiv_centre<opts, dim>(psi, GC, G, i  , j, k)
@@ -363,13 +363,13 @@ namespace libmpdataxx
           )
         );
       }
-      
+
       // nondimensionalised x derivative of flux divergence i.e.
       // 1 / (G * psi) * dx * d(fdiv)/dx at (i+1/2, j, k) - variable sign scalar version
       template <opts_t opts, int dim, class arr_3d_t, class ix_t, class arrvec_t>
       forceinline_macro auto ndx_fdiv(
         const arr_3d_t &psi,
-        const arrvec_t &GC, 
+        const arrvec_t &GC,
         const arr_3d_t &G,
         const ix_t &i,
         const ix_t &j,
@@ -378,7 +378,7 @@ namespace libmpdataxx
       )
       {
         return return_helper<ix_t>(
-          12 * 
+          12 *
           frac<opts, ix_t>(
             fdiv_centre<opts, dim>(psi, GC, G, i+1, j, k)
           - fdiv_centre<opts, dim>(psi, GC, G, i  , j, k)
@@ -398,13 +398,13 @@ namespace libmpdataxx
           )
         );
       }
-      
+
       // nondimensionalised x derivative of flux divergence i.e.
       // 1 / (G * psi) * dx * d(fdiv)/dx at (i+1/2, j, k) - infinite gauge version
       template <opts_t opts, int dim, class arr_3d_t, class ix_t, class arrvec_t>
       forceinline_macro auto ndx_fdiv(
         const arr_3d_t &psi,
-        const arrvec_t &GC, 
+        const arrvec_t &GC,
         const arr_3d_t &G,
         const ix_t &i,
         const ix_t &j,
@@ -417,15 +417,15 @@ namespace libmpdataxx
           - fdiv_centre<opts, dim>(psi, GC, G, i  , j, k)
         );
       }
-      
-      // nondimensionalised flux divergence of time derivative of psi i.e. 
+
+      // nondimensionalised flux divergence of time derivative of psi i.e.
       // 1 / (G * psi) * (dx * d(GC * dpsi/dt)/dx + dy * d(GC * dpsi/dt)/dy + dz * d(GC * dpsi/dt)/dz) at (i+1/2, j, k)
       // positive sign scalar version
       template <opts_t opts, int dim, class arr_3d_t, class ix_t, class arrvec_t>
       forceinline_macro auto nfdiv_dt(
         const arr_3d_t &psi_np1,
         const arr_3d_t &psi_n,
-        const arrvec_t &GC, 
+        const arrvec_t &GC,
         const arr_3d_t &G,
         const ix_t &i,
         const ix_t &j,
@@ -442,11 +442,11 @@ namespace libmpdataxx
             (psi_np1(pi<dim>(i  , j, k)) - psi_n(pi<dim>(i  , j, k)))
 
           + GC1_bar_x<dim>(GC[dim+1], i, j  , k) *
-            (psi_bar_xy<opts, dim>(psi_np1, i, j, k) - psi_bar_xy<opts, dim>(psi_n, i, j, k)) 
+            (psi_bar_xy<opts, dim>(psi_np1, i, j, k) - psi_bar_xy<opts, dim>(psi_n, i, j, k))
           - GC1_bar_x<dim>(GC[dim+1], i, j-1, k) *
-            (psi_bar_xy<opts, dim>(psi_np1, i, j-1, k) - psi_bar_xy<opts, dim>(psi_n, i, j-1, k)) 
-          
-          + GC2_bar_x<dim>(GC[dim-1], i, j, k  ) * 
+            (psi_bar_xy<opts, dim>(psi_np1, i, j-1, k) - psi_bar_xy<opts, dim>(psi_n, i, j-1, k))
+
+          + GC2_bar_x<dim>(GC[dim-1], i, j, k  ) *
             (psi_bar_xz<opts, dim>(psi_np1, i, j, k)- psi_bar_xz<opts, dim>(psi_n, i, j, k))
           - GC2_bar_x<dim>(GC[dim-1], i, j, k-1) *
             (psi_bar_xz<opts, dim>(psi_np1, i, j, k-1) - psi_bar_xz<opts, dim>(psi_n, i, j, k-1))
@@ -477,14 +477,14 @@ namespace libmpdataxx
         );
       }
 
-      // nondimensionalised flux divergence of time derivative of psi i.e. 
+      // nondimensionalised flux divergence of time derivative of psi i.e.
       // 1 / (G * psi) * (dx * d(GC * dpsi/dt)/dx + dy * d(GC * dpsi/dt)/dy + dz * d(GC * dpsi/dt)/dz) at (i+1/2, j, k)
       // variable sign scalar version
       template <opts_t opts, int dim, class arr_3d_t, class ix_t, class arrvec_t>
       forceinline_macro auto nfdiv_dt(
         const arr_3d_t &psi_np1,
         const arr_3d_t &psi_n,
-        const arrvec_t &GC, 
+        const arrvec_t &GC,
         const arr_3d_t &G,
         const ix_t &i,
         const ix_t &j,
@@ -501,11 +501,11 @@ namespace libmpdataxx
             (abs(psi_np1(pi<dim>(i  , j, k))) - abs(psi_n(pi<dim>(i  , j, k))))
 
           + GC1_bar_x<dim>(GC[dim+1], i, j  , k) *
-            (psi_bar_xy<opts, dim>(psi_np1, i, j, k) - psi_bar_xy<opts, dim>(psi_n, i, j, k)) 
+            (psi_bar_xy<opts, dim>(psi_np1, i, j, k) - psi_bar_xy<opts, dim>(psi_n, i, j, k))
           - GC1_bar_x<dim>(GC[dim+1], i, j-1, k) *
-            (psi_bar_xy<opts, dim>(psi_np1, i, j-1, k) - psi_bar_xy<opts, dim>(psi_n, i, j-1, k)) 
-          
-          + GC2_bar_x<dim>(GC[dim-1], i, j, k  ) * 
+            (psi_bar_xy<opts, dim>(psi_np1, i, j-1, k) - psi_bar_xy<opts, dim>(psi_n, i, j-1, k))
+
+          + GC2_bar_x<dim>(GC[dim-1], i, j, k  ) *
             (psi_bar_xz<opts, dim>(psi_np1, i, j, k)- psi_bar_xz<opts, dim>(psi_n, i, j, k))
           - GC2_bar_x<dim>(GC[dim-1], i, j, k-1) *
             (psi_bar_xz<opts, dim>(psi_np1, i, j, k-1) - psi_bar_xz<opts, dim>(psi_n, i, j, k-1))
@@ -535,15 +535,15 @@ namespace libmpdataxx
           )
         );
       }
-      
-      // nondimensionalised flux divergence of time derivative of psi i.e. 
+
+      // nondimensionalised flux divergence of time derivative of psi i.e.
       // 1 / (G * psi) * (dx * d(GC * dpsi/dt)/dx + dy * d(GC * dpsi/dt)/dy + dz * d(GC * dpsi/dt)/dz) at (i+1/2, j, k)
       // infinite gauge version
       template <opts_t opts, int dim, class arr_3d_t, class ix_t, class arrvec_t>
       forceinline_macro auto nfdiv_dt(
         const arr_3d_t &psi_np1,
         const arr_3d_t &psi_n,
-        const arrvec_t &GC, 
+        const arrvec_t &GC,
         const arr_3d_t &G,
         const ix_t &i,
         const ix_t &j,
@@ -559,11 +559,11 @@ namespace libmpdataxx
             (psi_np1(pi<dim>(i  , j, k)) - psi_n(pi<dim>(i  , j, k)))
 
           + GC1_bar_x<dim>(GC[dim+1], i, j  , k) *
-            (psi_bar_xy<opts, dim>(psi_np1, i, j, k) - psi_bar_xy<opts, dim>(psi_n, i, j, k)) 
+            (psi_bar_xy<opts, dim>(psi_np1, i, j, k) - psi_bar_xy<opts, dim>(psi_n, i, j, k))
           - GC1_bar_x<dim>(GC[dim+1], i, j-1, k) *
-            (psi_bar_xy<opts, dim>(psi_np1, i, j-1, k) - psi_bar_xy<opts, dim>(psi_n, i, j-1, k)) 
-          
-          + GC2_bar_x<dim>(GC[dim-1], i, j, k  ) * 
+            (psi_bar_xy<opts, dim>(psi_np1, i, j-1, k) - psi_bar_xy<opts, dim>(psi_n, i, j-1, k))
+
+          + GC2_bar_x<dim>(GC[dim-1], i, j, k  ) *
             (psi_bar_xz<opts, dim>(psi_np1, i, j, k)- psi_bar_xz<opts, dim>(psi_n, i, j, k))
           - GC2_bar_x<dim>(GC[dim-1], i, j, k-1) *
             (psi_bar_xz<opts, dim>(psi_np1, i, j, k-1) - psi_bar_xz<opts, dim>(psi_n, i, j, k-1))
@@ -572,4 +572,4 @@ namespace libmpdataxx
       }
     } // namespace mpdata
   } // namespace formulae
-} // namespace libmpdataxx 
+} // namespace libmpdataxx
diff --git a/libmpdata++/formulae/mpdata/formulae_mpdata_g_1d.hpp b/libmpdata++/formulae/mpdata/formulae_mpdata_g_1d.hpp
index 58761eed..aaa30e91 100644
--- a/libmpdata++/formulae/mpdata/formulae_mpdata_g_1d.hpp
+++ b/libmpdata++/formulae/mpdata/formulae_mpdata_g_1d.hpp
@@ -10,15 +10,15 @@
 
 #include <libmpdata++/formulae/mpdata/formulae_mpdata_common.hpp>
 
-namespace libmpdataxx 
-{ 
-  namespace formulae 
-  { 
-    namespace mpdata 
+namespace libmpdataxx
+{
+  namespace formulae
+  {
+    namespace mpdata
     {
       // interpolation of G to (i+1/2) - general case
       template<opts_t opts, class arr_1d_t, class ix_t>
-      inline auto G_bar_x( 
+      inline auto G_bar_x(
         const arr_1d_t &G,
         const ix_t &i,
         typename std::enable_if<opts::isset(opts, opts::nug)>::type* = 0
@@ -37,10 +37,10 @@ namespace libmpdataxx
         const arr_1d_t &G,
         const ix_t &i,
         typename std::enable_if<!opts::isset(opts, opts::nug)>::type* = 0
-      ) 
+      )
       {
         return 1;
       }
     } // namespace mpdata
   } // namespace formulae
-} // namespace libmpdataxx 
+} // namespace libmpdataxx
diff --git a/libmpdata++/formulae/mpdata/formulae_mpdata_g_2d.hpp b/libmpdata++/formulae/mpdata/formulae_mpdata_g_2d.hpp
index 4ebefaf8..44932ea6 100644
--- a/libmpdata++/formulae/mpdata/formulae_mpdata_g_2d.hpp
+++ b/libmpdata++/formulae/mpdata/formulae_mpdata_g_2d.hpp
@@ -13,21 +13,21 @@
 
 #include <libmpdata++/formulae/mpdata/formulae_mpdata_common.hpp>
 
-namespace libmpdataxx 
-{ 
-  namespace formulae 
-  { 
-    namespace mpdata 
+namespace libmpdataxx
+{
+  namespace formulae
+  {
+    namespace mpdata
     {
       // interpolation of G to (i+1/2, j) - general case
       template<opts_t opts, int dim, class arr_2d_t, class ix_t>
-      inline auto G_bar_x( 
+      inline auto G_bar_x(
         const arr_2d_t &G,
         const ix_t &i,
         const ix_t &j,
         typename std::enable_if<opts::isset(opts, opts::nug)>::type* = 0
-      ) 
-      { 
+      )
+      {
         return return_helper<ix_t>(
           (
             formulae::G<opts, dim>(G, i+1, j) + formulae::G<opts, dim>(G, i, j)
@@ -45,7 +45,7 @@ namespace libmpdataxx
       ) {
         return 1;
       }
-      
+
       // interpolation of G to (i+1/2, j+1/2) - general case
       template<opts_t opts, int dim, class arr_2d_t, class ix_t>
       inline auto G_bar_xy(
@@ -54,7 +54,7 @@ namespace libmpdataxx
         const ix_t &j,
         typename std::enable_if<opts::isset(opts, opts::nug)>::type* = 0
       )
-      { 
+      {
         return return_helper<ix_t>(
           (
             formulae::G<opts, dim>(G, i  , j  ) +
@@ -64,7 +64,7 @@ namespace libmpdataxx
           ) / 4
         );
       }
-      
+
       // interpolation of G to (i+1/2, j+1/2) - constant G version
       template<opts_t opts, int dim, class arr_2d_t, class ix_t>
       inline auto G_bar_xy(
@@ -77,4 +77,4 @@ namespace libmpdataxx
       }
     } // namespace mpdata
   } // namespace formulae
-} // namespace libmpdataxx 
+} // namespace libmpdataxx
diff --git a/libmpdata++/formulae/mpdata/formulae_mpdata_g_3d.hpp b/libmpdata++/formulae/mpdata/formulae_mpdata_g_3d.hpp
index 213b5b77..916e8112 100644
--- a/libmpdata++/formulae/mpdata/formulae_mpdata_g_3d.hpp
+++ b/libmpdata++/formulae/mpdata/formulae_mpdata_g_3d.hpp
@@ -14,11 +14,11 @@
 
 #include <libmpdata++/formulae/mpdata/formulae_mpdata_common.hpp>
 
-namespace libmpdataxx 
-{ 
-  namespace formulae 
-  { 
-    namespace mpdata 
+namespace libmpdataxx
+{
+  namespace formulae
+  {
+    namespace mpdata
     {
       // interpolation of G to (i+1/2, j, k) - general case
       template<opts_t opts, int dim, class arr_3d_t, class ix_t>
@@ -40,7 +40,7 @@ namespace libmpdataxx
 
       // interpolation of G to (i+1/2, j, k) - constant G version
       template<opts_t opts, int dim, class arr_3d_t, class ix_t>
-      inline auto G_bar_x( 
+      inline auto G_bar_x(
         const arr_3d_t &G,
         const ix_t &i,
         const ix_t &j,
@@ -50,7 +50,7 @@ namespace libmpdataxx
       {
           return 1;
       }
-      
+
       // interpolation of G to (i+1/2, j+1/2, k) - general case
       template<opts_t opts, int dim, class arr_3d_t, class ix_t>
       inline auto G_bar_xy(
@@ -70,7 +70,7 @@ namespace libmpdataxx
           ) / 4
         );
       }
-      
+
       // interpolation of G to (i+1/2, j+1/2, k) - constant G version
       template<opts_t opts, int dim, class arr_3d_t, class ix_t>
       inline auto G_bar_xy(
@@ -83,7 +83,7 @@ namespace libmpdataxx
       {
         return 1;
       }
-      
+
       // interpolation of G to (i+1/2, j, k+1/2) - general case
       template<opts_t opts, int dim, class arr_3d_t, class ix_t>
       inline auto G_bar_xz(
@@ -103,7 +103,7 @@ namespace libmpdataxx
           ) / 4
         );
       }
-      
+
       // interpolation of G to (i+1/2, j, k+1/2) - constant G version
       template<opts_t opts, int dim, class arr_3d_t, class ix_t>
       inline auto G_bar_xz(
@@ -117,4 +117,4 @@ namespace libmpdataxx
       }
     } // namespace mpdata
   } // namespace formulae
-} // namespace libmpdataxx 
+} // namespace libmpdataxx
diff --git a/libmpdata++/formulae/mpdata/formulae_mpdata_gc_1d.hpp b/libmpdata++/formulae/mpdata/formulae_mpdata_gc_1d.hpp
index 5d66690d..7dd5694b 100644
--- a/libmpdata++/formulae/mpdata/formulae_mpdata_gc_1d.hpp
+++ b/libmpdata++/formulae/mpdata/formulae_mpdata_gc_1d.hpp
@@ -11,15 +11,15 @@
 #include <libmpdata++/formulae/mpdata/formulae_mpdata_common.hpp>
 #include <libmpdata++/formulae/mpdata/formulae_mpdata_g_2d.hpp>
 
-namespace libmpdataxx 
-{ 
-  namespace formulae 
-  { 
-    namespace mpdata 
+namespace libmpdataxx
+{
+  namespace formulae
+  {
+    namespace mpdata
     {
       // interpolation of GC[0] to (i)
       template<class arr_1d_t, class ix_t>
-      inline auto GC0_bar_x( 
+      inline auto GC0_bar_x(
         const arr_1d_t &GC,
         const ix_t &i
       )
@@ -35,7 +35,7 @@ namespace libmpdataxx
       // dx * dGC[0]/dx at (i+1/2)
       template <class arr_1d_t, class ix_t>
       inline auto ndx_GC0(
-        const arr_1d_t &GC, 
+        const arr_1d_t &GC,
         const ix_t &i
       )
       {
@@ -43,13 +43,13 @@ namespace libmpdataxx
           (GC(i+h+1) - GC(i+h-1)) / 2
         );
       }
-      
+
       // nondimensionalised xx derivative of GC[0] i.e.
       // dx^2 * dGC[0]/dxx at (i+1/2) - general case
       template <opts_t opts, class arr_1d_t, class ix_t>
       inline auto ndxx_GC0(
-        const arr_1d_t &psi, 
-        const arr_1d_t &GC, 
+        const arr_1d_t &psi,
+        const arr_1d_t &GC,
         const ix_t &i,
         typename std::enable_if<!opts::isset(opts, opts::iga)>::type* = 0
       )
@@ -58,29 +58,29 @@ namespace libmpdataxx
           GC(i+h+1) + GC(i+h-1) - 2 * GC(i+h)
         );
       }
-      
+
       // nondimensionalised xx derivative of GC[0] i.e.
       // dx^2 * dGC[0]/dxx at (i+1/2) - infinite gauge version
       template <opts_t opts, class arr_1d_t, class ix_t>
       inline auto ndxx_GC0(
-        const arr_1d_t &psi, 
-        const arr_1d_t &GC, 
+        const arr_1d_t &psi,
+        const arr_1d_t &GC,
         const ix_t &i,
         typename std::enable_if<opts::isset(opts, opts::iga)>::type* = 0
       )
       {
         return return_helper<ix_t>(
-          (GC(i+h+1) + GC(i+h-1) - 2 * GC(i+h)) * 
+          (GC(i+h+1) + GC(i+h-1) - 2 * GC(i+h)) *
            psi_bar_x<opts>(psi, i)
         );
       }
-      
+
       // nondimensionalised tt derivative of GC[0] i.e.
       // dt^2 * dGC[0]/dtt at (i+1/2) - general case
       template <opts_t opts, class arr_1d_t, class ix_t>
       inline auto ndtt_GC0(
-        const arr_1d_t &psi, 
-        const arr_1d_t &ndtt_GC, 
+        const arr_1d_t &psi,
+        const arr_1d_t &ndtt_GC,
         const ix_t &i,
         typename std::enable_if<!opts::isset(opts, opts::iga)>::type* = 0
       )
@@ -89,13 +89,13 @@ namespace libmpdataxx
           ndtt_GC(i+h) + 0
         );
       }
-      
+
       // nondimensionalised tt derivative of GC[0] i.e.
       // dGC[0]/dtt at (i+1/2) - infinite-gauge version
       template <opts_t opts, class arr_1d_t, class ix_t>
       inline auto ndtt_GC0(
-        const arr_1d_t &psi, 
-        const arr_1d_t &ndtt_GC, 
+        const arr_1d_t &psi,
+        const arr_1d_t &ndtt_GC,
         const ix_t &i,
         typename std::enable_if<opts::isset(opts, opts::iga)>::type* = 0
       )
@@ -106,4 +106,4 @@ namespace libmpdataxx
       }
     } // namespace mpdata
   } // namespace formulae
-} // namespace libmpdataxx 
+} // namespace libmpdataxx
diff --git a/libmpdata++/formulae/mpdata/formulae_mpdata_gc_2d.hpp b/libmpdata++/formulae/mpdata/formulae_mpdata_gc_2d.hpp
index 3671b3e4..3b58c9b2 100644
--- a/libmpdata++/formulae/mpdata/formulae_mpdata_gc_2d.hpp
+++ b/libmpdata++/formulae/mpdata/formulae_mpdata_gc_2d.hpp
@@ -14,15 +14,15 @@
 #include <libmpdata++/formulae/mpdata/formulae_mpdata_common.hpp>
 #include <libmpdata++/formulae/mpdata/formulae_mpdata_g_2d.hpp>
 
-namespace libmpdataxx 
-{ 
-  namespace formulae 
-  { 
-    namespace mpdata 
+namespace libmpdataxx
+{
+  namespace formulae
+  {
+    namespace mpdata
     {
       // interpolation of GC[0] to (i, j)
       template <int dim, class arr_2d_t, class ix_t>
-      inline auto GC0_bar_x( 
+      inline auto GC0_bar_x(
         const arr_2d_t &GC,
         const ix_t &i,
         const ix_t &j
@@ -38,26 +38,26 @@ namespace libmpdataxx
       // interpolation of GC[0] to (i, j+1/2)
       template <int dim, class arr_2d_t, class ix_t>
       inline auto GC0_bar_xy(
-        const arr_2d_t &GC, 
-        const ix_t &i, 
+        const arr_2d_t &GC,
+        const ix_t &i,
         const ix_t &j
       )
       {
         return return_helper<ix_t>(
           (
-            GC(pi<dim>(i+h, j  )) + 
+            GC(pi<dim>(i+h, j  )) +
             GC(pi<dim>(i-h, j  )) +
-            GC(pi<dim>(i+h, j+1)) + 
-            GC(pi<dim>(i-h, j+1)) 
+            GC(pi<dim>(i+h, j+1)) +
+            GC(pi<dim>(i-h, j+1))
           ) / 4
         );
       }
-      
-      
+
+
       // interpolation of GC[1] to (i+1/2, j+1/2)
-      // caution proper call looks like GC1_bar_x<dim>(GC[dim+1], i, j) - note dim vs dim+1 
+      // caution proper call looks like GC1_bar_x<dim>(GC[dim+1], i, j) - note dim vs dim+1
       template <int dim, class arr_2d_t, class ix_t>
-      inline auto GC1_bar_x( 
+      inline auto GC1_bar_x(
         const arr_2d_t &GC,
         const ix_t &i,
         const ix_t &j
@@ -69,22 +69,22 @@ namespace libmpdataxx
           ) / 2
         );
       }
- 
+
       // interpolation of GC[1] to (i+1/2, j)
-      // caution proper call looks like GC1_bar_xy<dim>(GC[dim+1], i, j) - note dim vs dim+1 
+      // caution proper call looks like GC1_bar_xy<dim>(GC[dim+1], i, j) - note dim vs dim+1
       template <int dim, class arr_2d_t, class ix_t>
       inline auto GC1_bar_xy(
-        const arr_2d_t &GC, 
-        const ix_t &i, 
+        const arr_2d_t &GC,
+        const ix_t &i,
         const ix_t &j
       )
-      { 
+      {
         return return_helper<ix_t>(
           (
-            GC(pi<dim>(i+1, j+h)) + 
+            GC(pi<dim>(i+1, j+h)) +
             GC(pi<dim>(i,   j+h)) +
-            GC(pi<dim>(i+1, j-h)) + 
-            GC(pi<dim>(i,   j-h)) 
+            GC(pi<dim>(i+1, j-h)) +
+            GC(pi<dim>(i,   j-h))
           ) / 4
         );
       }
@@ -93,22 +93,22 @@ namespace libmpdataxx
       // dx * dGC[0]/dx at (i+1/2, j)
       template <int dim, class arr_2d_t, class ix_t>
       inline auto ndx_GC0(
-        const arr_2d_t &GC, 
+        const arr_2d_t &GC,
         const ix_t &i,
         const ix_t &j
       )
-      { 
+      {
         return return_helper<ix_t>(
           (GC(pi<dim>(i+h+1, j)) - GC(pi<dim>(i+h-1, j))) / 2
         );
       }
-      
+
       // nondimensionalised xx derivative of GC[0] i.e.
       // dx^2 * dGC[0]/dxx at (i+1/2, j) - general case
       template <opts_t opts, int dim, class arr_2d_t, class ix_t>
       inline auto ndxx_GC0(
-        const arr_2d_t &psi, 
-        const arr_2d_t &GC, 
+        const arr_2d_t &psi,
+        const arr_2d_t &GC,
         const ix_t &i,
         const ix_t &j,
         typename std::enable_if<!opts::isset(opts, opts::iga)>::type* = 0
@@ -118,30 +118,30 @@ namespace libmpdataxx
           GC(pi<dim>(i+h+1, j)) + GC(pi<dim>(i+h-1, j)) - 2 * GC(pi<dim>(i+h, j))
         );
       }
-      
+
       // nondimensionalised xx derivative of GC[0] i.e.
       // dx^2 * dGC[0]/dxx at (i+1/2, j) - infinite gauge version
       template <opts_t opts, int dim, class arr_2d_t, class ix_t>
       inline auto ndxx_GC0(
-        const arr_2d_t &psi, 
-        const arr_2d_t &GC, 
+        const arr_2d_t &psi,
+        const arr_2d_t &GC,
         const ix_t &i,
         const ix_t &j,
         typename std::enable_if<opts::isset(opts, opts::iga)>::type* = 0
       )
       {
         return return_helper<ix_t>(
-          (GC(pi<dim>(i+h+1, j)) + GC(pi<dim>(i+h-1, j)) - 2 * GC(pi<dim>(i+h, j))) * 
+          (GC(pi<dim>(i+h+1, j)) + GC(pi<dim>(i+h-1, j)) - 2 * GC(pi<dim>(i+h, j))) *
            psi_bar_x<opts, dim>(psi, i, j)
         );
       }
-      
+
       // nondimensionalised tt derivative of GC[0] i.e.
       // dt^2 * dGC[0]/dtt at (i+1/2, j) - general case
       template <opts_t opts, int dim, class arr_2d_t, class ix_t>
       inline auto ndtt_GC0(
-        const arr_2d_t &psi, 
-        const arr_2d_t &ndtt_GC, 
+        const arr_2d_t &psi,
+        const arr_2d_t &ndtt_GC,
         const ix_t &i,
         const ix_t &j,
         typename std::enable_if<!opts::isset(opts, opts::iga)>::type* = 0
@@ -151,13 +151,13 @@ namespace libmpdataxx
           ndtt_GC(pi<dim>(i+h, j)) + 0
         );
       }
-      
+
       // nondimensionalised tt derivative of GC[0] i.e.
       // dt^2 * dGC[0]/dtt at (i+1/2, j) - infinite gauge version
       template <opts_t opts, int dim, class arr_2d_t, class ix_t>
       inline auto ndtt_GC0(
-        const arr_2d_t &psi, 
-        const arr_2d_t &ndtt_GC, 
+        const arr_2d_t &psi,
+        const arr_2d_t &ndtt_GC,
         const ix_t &i,
         const ix_t &j,
         typename std::enable_if<opts::isset(opts, opts::iga)>::type* = 0
@@ -169,4 +169,4 @@ namespace libmpdataxx
       }
     } // namespace mpdata
   } // namespace formulae
-} // namespace libmpdataxx 
+} // namespace libmpdataxx
diff --git a/libmpdata++/formulae/mpdata/formulae_mpdata_gc_3d.hpp b/libmpdata++/formulae/mpdata/formulae_mpdata_gc_3d.hpp
index f2c3b0fc..1b74d4c6 100644
--- a/libmpdata++/formulae/mpdata/formulae_mpdata_gc_3d.hpp
+++ b/libmpdata++/formulae/mpdata/formulae_mpdata_gc_3d.hpp
@@ -7,9 +7,9 @@
 // various numerical expressions relating to the advector field GC
 // note that the function naming convention and the comments in this file correspond to dim = 0,
 // if dim = 1 or 2 then you have to mentally perform appropiate substitutions
-// for example for dim = 1 (GC[0], GC[1], GC[2]) -> (GC[1], GC[2], GC[0]), 
+// for example for dim = 1 (GC[0], GC[1], GC[2]) -> (GC[1], GC[2], GC[0]),
 //                         (x, y, z) -> (y, z, x) and (i+1/2, j, k) -> (i, j+1/2, k),
-//             for dim = 2 (GC[0], GC[1], GC[2]) -> (GC[2], GC[0], GC[1]), 
+//             for dim = 2 (GC[0], GC[1], GC[2]) -> (GC[2], GC[0], GC[1]),
 //                         (x, y, z) -> (z, x, y) and (i+1/2, j, k) -> (i, j, k+1/2)
 
 #pragma once
@@ -17,15 +17,15 @@
 #include <libmpdata++/formulae/mpdata/formulae_mpdata_common.hpp>
 #include <libmpdata++/formulae/mpdata/formulae_mpdata_g_2d.hpp>
 
-namespace libmpdataxx 
-{ 
-  namespace formulae 
-  { 
-    namespace mpdata 
+namespace libmpdataxx
+{
+  namespace formulae
+  {
+    namespace mpdata
     {
       // interpolation of GC[0] to (i, j, k)
       template<int dim, class arr_3d_t, class ix_t>
-      inline auto GC0_bar_x( 
+      inline auto GC0_bar_x(
         const arr_3d_t &GC,
         const ix_t &i,
         const ix_t &j,
@@ -34,27 +34,27 @@ namespace libmpdataxx
       {
         return return_helper<ix_t>(
           (
-            GC(pi<dim>(i+h, j, k)) + 
+            GC(pi<dim>(i+h, j, k)) +
             GC(pi<dim>(i-h, j, k))
           ) / 2
         );
       }
-      
+
       // interpolation of GC[0] to (i, j+1/2, k)
       template<int dim, class arr_3d_t, class ix_t>
       inline auto GC0_bar_xy(
-        const arr_3d_t &GC, 
-        const ix_t &i, 
+        const arr_3d_t &GC,
+        const ix_t &i,
         const ix_t &j,
         const ix_t &k
       )
       {
         return return_helper<ix_t>(
           (
-            GC(pi<dim>(i+h, j  , k)) + 
+            GC(pi<dim>(i+h, j  , k)) +
             GC(pi<dim>(i-h, j  , k)) +
-            GC(pi<dim>(i+h, j+1, k)) + 
-            GC(pi<dim>(i-h, j+1, k)) 
+            GC(pi<dim>(i+h, j+1, k)) +
+            GC(pi<dim>(i-h, j+1, k))
           ) / 4
         );
       }
@@ -62,25 +62,25 @@ namespace libmpdataxx
       // interpolation of GC[0] to (i, j, k+1/2)
       template<int dim, class arr_3d_t, class ix_t>
       inline auto GC0_bar_xz(
-        const arr_3d_t &GC, 
-        const ix_t &i, 
+        const arr_3d_t &GC,
+        const ix_t &i,
         const ix_t &j,
         const ix_t &k
       )
       {
         return return_helper<ix_t>(
           (
-            GC(pi<dim>(i+h, j, k  )) + 
+            GC(pi<dim>(i+h, j, k  )) +
             GC(pi<dim>(i-h, j, k  )) +
-            GC(pi<dim>(i+h, j, k+1)) + 
-            GC(pi<dim>(i-h, j, k+1)) 
+            GC(pi<dim>(i+h, j, k+1)) +
+            GC(pi<dim>(i-h, j, k+1))
           ) / 4
         );
       }
 
       // interpolation of GC[1] to (i+1/2, j+1/2, k)
       template<int dim, class arr_3d_t, class ix_t>
-      inline auto GC1_bar_x( 
+      inline auto GC1_bar_x(
         const arr_3d_t &GC,
         const ix_t &i,
         const ix_t &j,
@@ -89,7 +89,7 @@ namespace libmpdataxx
       {
         return return_helper<ix_t>(
           (
-            GC(pi<dim>(i+1, j+h, k)) + 
+            GC(pi<dim>(i+1, j+h, k)) +
             GC(pi<dim>(i  , j+h, k))
           ) / 2
         );
@@ -98,53 +98,53 @@ namespace libmpdataxx
       // interpolation of GC[1] to (i+1/2, j, k)
       template<int dim, class arr_3d_t, class ix_t>
       inline auto GC1_bar_xy(
-        const arr_3d_t &GC, 
-        const ix_t &i, 
+        const arr_3d_t &GC,
+        const ix_t &i,
         const ix_t &j,
         const ix_t &k
       )
       {
         return return_helper<ix_t>(
           (
-            GC(pi<dim>(i+1, j+h, k)) + 
+            GC(pi<dim>(i+1, j+h, k)) +
             GC(pi<dim>(i  , j+h, k)) +
-            GC(pi<dim>(i+1, j-h, k)) + 
-            GC(pi<dim>(i  , j-h, k)) 
+            GC(pi<dim>(i+1, j-h, k)) +
+            GC(pi<dim>(i  , j-h, k))
           ) / 4
         );
       }
-      
+
       // interpolation of GC[1] to (i+1/2, j+1/2, k+1/2)
       template<int dim, class arr_3d_t, class ix_t>
       inline auto GC1_bar_xz(
-        const arr_3d_t &GC, 
-        const ix_t &i, 
+        const arr_3d_t &GC,
+        const ix_t &i,
         const ix_t &j,
         const ix_t &k
       )
       {
         return return_helper<ix_t>(
           (
-            GC(pi<dim>(i+1, j+h, k  )) + 
+            GC(pi<dim>(i+1, j+h, k  )) +
             GC(pi<dim>(i+1, j+h, k+1)) +
-            GC(pi<dim>(i  , j+h, k  )) + 
-            GC(pi<dim>(i  , j+h, k+1)) 
+            GC(pi<dim>(i  , j+h, k  )) +
+            GC(pi<dim>(i  , j+h, k+1))
           ) / 4
         );
       }
-      
+
       // interpolation of GC[2] to (i+1/2, j, k)
       template<int dim, class arr_3d_t, class ix_t>
       inline auto GC2_bar_x(
-        const arr_3d_t &GC, 
-        const ix_t &i, 
+        const arr_3d_t &GC,
+        const ix_t &i,
         const ix_t &j,
         const ix_t &k
       )
       {
         return return_helper<ix_t>(
           (
-            GC(pi<dim>(i+1, j, k+h)) + 
+            GC(pi<dim>(i+1, j, k+h)) +
             GC(pi<dim>(i  , j, k+h))
           ) / 2
         );
@@ -153,36 +153,36 @@ namespace libmpdataxx
       // interpolation of GC[2] to (i+1/2, j, k)
       template<int dim, class arr_3d_t, class ix_t>
       inline auto GC2_bar_xz(
-        const arr_3d_t &GC, 
-        const ix_t &i, 
+        const arr_3d_t &GC,
+        const ix_t &i,
         const ix_t &j,
         const ix_t &k
       )
       {
         return return_helper<ix_t>(
           (
-            GC(pi<dim>(i+1, j, k+h)) + 
+            GC(pi<dim>(i+1, j, k+h)) +
             GC(pi<dim>(i  , j, k+h)) +
-            GC(pi<dim>(i+1, j, k-h)) + 
-            GC(pi<dim>(i  , j, k-h)) 
+            GC(pi<dim>(i+1, j, k-h)) +
+            GC(pi<dim>(i  , j, k-h))
           ) / 4
         );
       }
-      
+
       // interpolation of GC[2] to (i+1/2, j+1/2, k+1/2)
       template<int dim, class arr_3d_t, class ix_t>
       inline auto GC2_bar_xy(
-        const arr_3d_t &GC, 
-        const ix_t &i, 
+        const arr_3d_t &GC,
+        const ix_t &i,
         const ix_t &j,
         const ix_t &k
       )
       {
         return return_helper<ix_t>(
           (
-            GC(pi<dim>(i+1, j+1, k+h)) + 
+            GC(pi<dim>(i+1, j+1, k+h)) +
             GC(pi<dim>(i  , j+1, k+h)) +
-            GC(pi<dim>(i+1, j  , k+h)) + 
+            GC(pi<dim>(i+1, j  , k+h)) +
             GC(pi<dim>(i  , j  , k+h))
           ) / 4
         );
@@ -192,7 +192,7 @@ namespace libmpdataxx
       // dx * dGC[0]/dx at (i+1/2, j, k)
       template <int dim, class arr_3d_t, class ix_t>
       inline auto ndx_GC0(
-        const arr_3d_t &GC, 
+        const arr_3d_t &GC,
         const ix_t &i,
         const ix_t &j,
         const ix_t &k
@@ -202,13 +202,13 @@ namespace libmpdataxx
           (GC(pi<dim>(i+h+1, j, k)) - GC(pi<dim>(i+h-1, j, k))) / 2
         );
       }
-      
+
       // nondimensionalised xx derivative of GC[0] i.e.
       // dx^2 * dGC[0]/dxx at (i+1/2, j, k) - general case
       template <opts_t opts, int dim, class arr_3d_t, class ix_t>
       inline auto ndxx_GC0(
-        const arr_3d_t &psi, 
-        const arr_3d_t &GC, 
+        const arr_3d_t &psi,
+        const arr_3d_t &GC,
         const ix_t &i,
         const ix_t &j,
         const ix_t &k,
@@ -219,13 +219,13 @@ namespace libmpdataxx
           GC(pi<dim>(i+h+1, j, k)) + GC(pi<dim>(i+h-1, j, k)) - 2 * GC(pi<dim>(i+h, j, k))
         );
       }
-      
+
       // nondimensionalised xx derivative of GC[0] i.e.
       // dx^2 * dGC[0]/dxx at (i+1/2, j, k) - infinite gauge version
       template <opts_t opts, int dim, class arr_3d_t, class ix_t>
       inline auto ndxx_GC0(
-        const arr_3d_t &psi, 
-        const arr_3d_t &GC, 
+        const arr_3d_t &psi,
+        const arr_3d_t &GC,
         const ix_t &i,
         const ix_t &j,
         const ix_t &k,
@@ -233,16 +233,16 @@ namespace libmpdataxx
       )
       {
         return return_helper<ix_t>(
-          (GC(pi<dim>(i+h+1, j, k)) + GC(pi<dim>(i+h-1, j, k)) - 2 * GC(pi<dim>(i+h, j, k))) * 
+          (GC(pi<dim>(i+h+1, j, k)) + GC(pi<dim>(i+h-1, j, k)) - 2 * GC(pi<dim>(i+h, j, k))) *
            psi_bar_x<opts, dim>(psi, i, j, k)
         );
       }
-      
+
       // nondimensionalised tt derivative of GC[0] i.e.
       // dt^2 * dGC[0]/dtt at (i+1/2, j, k) - general case
       template <opts_t opts, int dim, class arr_3d_t, class ix_t>
       inline auto ndtt_GC0(
-        const arr_3d_t &psi, 
+        const arr_3d_t &psi,
         const arr_3d_t &ndtt_GC,
         const ix_t &i,
         const ix_t &j,
@@ -254,12 +254,12 @@ namespace libmpdataxx
           ndtt_GC(pi<dim>(i+h, j, k)) + 0
         );
       }
-      
+
       // nondimensionalised tt derivative of GC[0] i.e.
       // dt^2 * dGC[0]/dtt at (i+1/2, j, k) - infinite gauge version
       template <opts_t opts, int dim, class arr_3d_t, class ix_t>
       inline auto ndtt_GC0(
-        const arr_3d_t &psi, 
+        const arr_3d_t &psi,
         const arr_3d_t &ndtt_GC,
         const ix_t &i,
         const ix_t &j,
@@ -273,4 +273,4 @@ namespace libmpdataxx
       }
     } // namespace mpdata
   } // namespace formulae
-} // namespace libmpdataxx 
+} // namespace libmpdataxx
diff --git a/libmpdata++/formulae/mpdata/formulae_mpdata_hot_1d.hpp b/libmpdata++/formulae/mpdata/formulae_mpdata_hot_1d.hpp
index 94b70891..7538896a 100644
--- a/libmpdata++/formulae/mpdata/formulae_mpdata_hot_1d.hpp
+++ b/libmpdata++/formulae/mpdata/formulae_mpdata_hot_1d.hpp
@@ -11,10 +11,10 @@
 #include <libmpdata++/formulae/mpdata/formulae_mpdata_g_1d.hpp>
 
 namespace libmpdataxx
-{ 
-  namespace formulae 
-  { 
-    namespace mpdata 
+{
+  namespace formulae
+  {
+    namespace mpdata
     {
       template<opts_t opts, class arr_1d_t, class ix_t>
       forceinline_macro auto ndxx_psi_coeff(
@@ -39,23 +39,23 @@ namespace libmpdataxx
         const arr_1d_t &GC,
         const arr_1d_t &G,
         const ix_t &i,
-        typename std::enable_if<opts::isset(opts, opts::tot)>::type* = 0 
+        typename std::enable_if<opts::isset(opts, opts::tot)>::type* = 0
       )
       {
         return return_helper<ix_t>(
             ndxx_psi<opts>(psi, i) * ndxx_psi_coeff<opts>(GC, G, i)
         );
       }
-      
+
       template<opts_t opts, class arr_1d_t, class ix_t>
       forceinline_macro auto TOT(
         const arr_1d_t &psi,
         const arr_1d_t &GC,
         const arr_1d_t &G,
         const ix_t &i,
-        typename std::enable_if<!opts::isset(opts, opts::tot)>::type* = 0 
+        typename std::enable_if<!opts::isset(opts, opts::tot)>::type* = 0
       )
-      { 
+      {
         return 0;
       }
     } // namespace mpdata
diff --git a/libmpdata++/formulae/mpdata/formulae_mpdata_hot_2d.hpp b/libmpdata++/formulae/mpdata/formulae_mpdata_hot_2d.hpp
index 16d40160..4266ac9a 100644
--- a/libmpdata++/formulae/mpdata/formulae_mpdata_hot_2d.hpp
+++ b/libmpdata++/formulae/mpdata/formulae_mpdata_hot_2d.hpp
@@ -12,11 +12,11 @@
 #include <libmpdata++/formulae/mpdata/formulae_mpdata_g_2d.hpp>
 #include <boost/preprocessor/punctuation/comma.hpp>
 
-namespace libmpdataxx 
-{ 
-  namespace formulae 
-  { 
-    namespace mpdata 
+namespace libmpdataxx
+{
+  namespace formulae
+  {
+    namespace mpdata
     {
       template<opts_t opts, int dim, class arr_2d_t, class ix_t>
       forceinline_macro auto ndxx_psi_coeff(
@@ -44,11 +44,11 @@ namespace libmpdataxx
       )
       {
         return return_helper<ix_t>(
-          (abs(GC[dim](pi<dim>(i+h, j))) - 2 * pow2(GC[dim](pi<dim>(i+h, j))) / G_bar_x<opts, dim>(G, i, j)) 
+          (abs(GC[dim](pi<dim>(i+h, j))) - 2 * pow2(GC[dim](pi<dim>(i+h, j))) / G_bar_x<opts, dim>(G, i, j))
            * GC1_bar_xy<dim>(GC[dim+1], i, j) / (2 * G_bar_x<opts, dim>(G, i, j))
         );
       }
-      
+
       // third order terms
       template<opts_t opts, int dim, class arr_2d_t, class ix_t>
       forceinline_macro auto TOT(
@@ -58,11 +58,11 @@ namespace libmpdataxx
         const ix_t &i,
         const ix_t &j,
         typename std::enable_if<opts::isset(opts, opts::tot)>::type* = 0
-      ) 
+      )
       {
         return return_helper<ix_t>(
-           ndxx_psi<opts, dim>(psi, i, j) * ndxx_psi_coeff<opts, dim>(GC[dim], G, i, j) 
-           + 
+           ndxx_psi<opts, dim>(psi, i, j) * ndxx_psi_coeff<opts, dim>(GC[dim], G, i, j)
+           +
            ndxy_psi<opts, dim>(psi, i, j) * ndxy_psi_coeff<opts, dim>(GC, G, i, j)
         );
       }
@@ -96,7 +96,7 @@ namespace libmpdataxx
           ) / 2
         );
       }
-      
+
       template<opts_t opts, int dim, class arr_2d_t, class ix_t>
       forceinline_macro auto ndxxy_psi_coeff(
         const arrvec_t<arr_2d_t> &GC,
@@ -106,13 +106,13 @@ namespace libmpdataxx
       )
       {
         return return_helper<ix_t>(
-          3 * 
-          (abs(GC[dim](pi<dim>(i+h, j))) - pow2(GC[dim](pi<dim>(i+h, j))) / G_bar_x<opts, dim>(G, i, j)) 
+          3 *
+          (abs(GC[dim](pi<dim>(i+h, j))) - pow2(GC[dim](pi<dim>(i+h, j))) / G_bar_x<opts, dim>(G, i, j))
            * GC[dim](pi<dim>(i+h, j)) * GC1_bar_xy<dim>(GC[dim-1], i, j)
            / pow2(G_bar_x<opts, dim>(G, i, j))
         );
       }
-      
+
       template<opts_t opts, int dim, class arr_2d_t, class ix_t>
       forceinline_macro auto ndxyy_psi_coeff(
         const arrvec_t<arr_2d_t> &GC,
@@ -132,7 +132,7 @@ namespace libmpdataxx
           ) / G_bar_x<opts, dim>(G, i, j) / 4
         );
       }
-      
+
       // fourth order terms
       template<opts_t opts, int dim, class arr_2d_t, class ix_t>
       forceinline_macro auto FOT(
@@ -148,10 +148,10 @@ namespace libmpdataxx
                       "adding fourth-order terms makes sense only when third-order terms are present (tot or div_3rd option)");
         static_assert(opts::isset(opts, opts::iga), "fot option only available with iga");
         return return_helper<ix_t>(
-         ndxxx_psi<opts, dim>(psi, i, j) * ndxxx_psi_coeff<opts, dim>(GC[dim], G, i, j) 
-         + 
+         ndxxx_psi<opts, dim>(psi, i, j) * ndxxx_psi_coeff<opts, dim>(GC[dim], G, i, j)
+         +
          ndxxy_psi<opts, dim>(psi, i, j) * ndxxy_psi_coeff<opts, dim>(GC, G, i, j)
-         + 
+         +
          ndxyy_psi<opts, dim>(psi, i, j) * ndxyy_psi_coeff<opts, dim>(GC, G, i, j)
         );
       }
@@ -170,4 +170,4 @@ namespace libmpdataxx
       }
     } // namespace mpdata
   } // namespace formulae
-} // namespcae libmpdataxx 
+} // namespcae libmpdataxx
diff --git a/libmpdata++/formulae/mpdata/formulae_mpdata_hot_3d.hpp b/libmpdata++/formulae/mpdata/formulae_mpdata_hot_3d.hpp
index fc2afd3f..79ad44ed 100644
--- a/libmpdata++/formulae/mpdata/formulae_mpdata_hot_3d.hpp
+++ b/libmpdata++/formulae/mpdata/formulae_mpdata_hot_3d.hpp
@@ -99,11 +99,11 @@ namespace libmpdataxx
       {
         return return_helper<ix_t>(
           ndxx_psi<opts, dim>(psi, i, j, k) * ndxx_psi_coeff<opts, dim>(GC[dim], G, i, j, k)
-          + 
+          +
           ndxy_psi<opts, dim>(psi, i, j, k) * ndxy_psi_coeff<opts, dim>(GC, G, i, j, k)
-          + 
+          +
           ndxz_psi<opts, dim>(psi, i, j, k) * ndxz_psi_coeff<opts, dim>(GC, G, i, j, k)
-          + 
+          +
           ndyz_psi<opts, dim>(psi, i, j, k) * ndyz_psi_coeff<opts, dim>(GC, G, i, j, k)
         );
       }
diff --git a/libmpdata++/formulae/mpdata/formulae_mpdata_psi_1d.hpp b/libmpdata++/formulae/mpdata/formulae_mpdata_psi_1d.hpp
index 9b7c24ec..98f1eb01 100644
--- a/libmpdata++/formulae/mpdata/formulae_mpdata_psi_1d.hpp
+++ b/libmpdata++/formulae/mpdata/formulae_mpdata_psi_1d.hpp
@@ -10,15 +10,15 @@
 
 #include <libmpdata++/formulae/mpdata/formulae_mpdata_common.hpp>
 
-namespace libmpdataxx 
-{ 
-  namespace formulae 
-  { 
-    namespace mpdata 
+namespace libmpdataxx
+{
+  namespace formulae
+  {
+    namespace mpdata
     {
       // interpolation of psi to (i+1/2) - positive sign scalar / infinite gauge version
       template<opts_t opts, class arr_1d_t, class ix_t>
-      inline auto psi_bar_x( 
+      inline auto psi_bar_x(
         const arr_1d_t &psi,
         const ix_t &i,
         typename std::enable_if<!opts::isset(opts, opts::abs)>::type* = 0
@@ -30,10 +30,10 @@ namespace libmpdataxx
           ) / 2
         );
       }
-     
+
       // interpolation of psi to (i+1/2) - variable sign scalar version
       template<opts_t opts, class arr_1d_t, class ix_t>
-      inline auto psi_bar_x( 
+      inline auto psi_bar_x(
         const arr_1d_t &psi,
         const ix_t &i,
         typename std::enable_if<opts::isset(opts, opts::abs)>::type* = 0
@@ -47,11 +47,11 @@ namespace libmpdataxx
       }
 
       // nondimensionalised x derivative of psi i.e.
-      // dx/psi * dpsi/dx at (i+1/2) - positive sign scalar version  
+      // dx/psi * dpsi/dx at (i+1/2) - positive sign scalar version
       template<opts_t opts, class arr_1d_t, class ix_t>
       inline auto ndx_psi(
-        const arr_1d_t &psi, 
-        const ix_t &i, 
+        const arr_1d_t &psi,
+        const ix_t &i,
         typename std::enable_if<!opts::isset(opts, opts::iga) && !opts::isset(opts, opts::abs)>::type* = 0
       )
       {
@@ -66,11 +66,11 @@ namespace libmpdataxx
       }
 
       // nondimensionalised x derivative of psi i.e.
-      // dx/psi * dpsi/dx at (i+1/2) - variable-sign scalar version 
+      // dx/psi * dpsi/dx at (i+1/2) - variable-sign scalar version
       template<opts_t opts, class arr_1d_t, class ix_t>
       inline auto ndx_psi(
-        const arr_1d_t &psi, 
-        const ix_t &i, 
+        const arr_1d_t &psi,
+        const ix_t &i,
         typename std::enable_if<!opts::isset(opts, opts::iga) && opts::isset(opts, opts::abs)>::type* = 0
       )
       {
@@ -80,16 +80,16 @@ namespace libmpdataxx
             abs(psi(i+1)) - abs(psi(i))
             ,// -----------------------
             abs(psi(i+1)) + abs(psi(i))
-          ) 
+          )
         );
       }
 
       // nondimensionalised x derivative of psi i.e.
-      // dx/psi * dpsi/dx at (i+1/2) - infinite gauge version 
+      // dx/psi * dpsi/dx at (i+1/2) - infinite gauge version
       template<opts_t opts, class arr_1d_t, class ix_t>
       inline auto ndx_psi(
-        const arr_1d_t &psi, 
-        const ix_t &i, 
+        const arr_1d_t &psi,
+        const ix_t &i,
         typename std::enable_if<opts::isset(opts, opts::iga)>::type* = 0
       )
       {
@@ -103,7 +103,7 @@ namespace libmpdataxx
           )
         );
       }
-      
+
       // nondimensionalised xx derivative of psi i.e.
       // dx^2/psi * dpsi/dxx at (i+1/2) - positive sign scalar version
       template<opts_t opts, class arr_1d_t, class ix_t>
@@ -130,7 +130,7 @@ namespace libmpdataxx
         const arr_1d_t &psi,
         const ix_t &i,
         typename std::enable_if<!opts::isset(opts, opts::iga) && opts::isset(opts, opts::abs)>::type* = 0
-      ) 
+      )
       {
         return return_helper<ix_t>(
             2 *
@@ -149,7 +149,7 @@ namespace libmpdataxx
         const arr_1d_t &psi,
         const ix_t &i,
         typename std::enable_if<opts::isset(opts, opts::iga)>::type* = 0
-      ) 
+      )
       {
         static_assert(!opts::isset(opts, opts::abs), "iga & abs are mutually exclusive");
         return return_helper<ix_t>(
@@ -161,4 +161,4 @@ namespace libmpdataxx
       }
     } // namespace mpdata
   } // namespace formulae
-} // namespcae libmpdataxx 
+} // namespcae libmpdataxx
diff --git a/libmpdata++/formulae/mpdata/formulae_mpdata_psi_2d.hpp b/libmpdata++/formulae/mpdata/formulae_mpdata_psi_2d.hpp
index cab81531..d8a5255d 100644
--- a/libmpdata++/formulae/mpdata/formulae_mpdata_psi_2d.hpp
+++ b/libmpdata++/formulae/mpdata/formulae_mpdata_psi_2d.hpp
@@ -13,20 +13,20 @@
 
 #include <libmpdata++/formulae/mpdata/formulae_mpdata_common.hpp>
 
-namespace libmpdataxx 
-{ 
-  namespace formulae 
-  { 
-    namespace mpdata 
+namespace libmpdataxx
+{
+  namespace formulae
+  {
+    namespace mpdata
     {
       // interpolation of psi to (i+1/2, j) - positive sign scalar / infinite gauge version
       template <opts_t opts, int dim, class arr_2d_t, class ix_t>
-      inline auto psi_bar_x( 
+      inline auto psi_bar_x(
         const arr_2d_t &psi,
         const ix_t &i,
         const ix_t &j,
         typename std::enable_if<!opts::isset(opts, opts::abs)>::type* = 0
-      ) 
+      )
       {
         return return_helper<ix_t>(
           (
@@ -34,15 +34,15 @@ namespace libmpdataxx
           ) / 2
         );
       }
-     
+
       // interpolation of psi to (i+1/2, j) - variable sign scalar version
       template <opts_t opts, int dim, class arr_2d_t, class ix_t>
-      inline auto psi_bar_x( 
+      inline auto psi_bar_x(
         const arr_2d_t &psi,
         const ix_t &i,
         const ix_t &j,
         typename std::enable_if<opts::isset(opts, opts::abs)>::type* = 0
-      ) 
+      )
       {
         return return_helper<ix_t>(
           (
@@ -50,10 +50,10 @@ namespace libmpdataxx
           ) / 2
         );
       }
-      
+
       // interpolation of psi to (i, j+1/2) - positive sign scalar / infinite gauge version
       template <opts_t opts, int dim, class arr_2d_t, class ix_t>
-      inline auto psi_bar_y( 
+      inline auto psi_bar_y(
         const arr_2d_t &psi,
         const ix_t &i,
         const ix_t &j,
@@ -69,7 +69,7 @@ namespace libmpdataxx
 
       // interpolation of psi to (i, j+1/2) - variable sign scalar version
       template <opts_t opts, int dim, class arr_2d_t, class ix_t>
-      inline auto psi_bar_y( 
+      inline auto psi_bar_y(
         const arr_2d_t &psi,
         const ix_t &i,
         const ix_t &j,
@@ -82,10 +82,10 @@ namespace libmpdataxx
           ) / 2
         );
       }
-      
+
       // interpolation of psi to (i+1/2, j+1/2) - positive sign scalar / infinite gauge version
       template <opts_t opts, int dim, class arr_2d_t, class ix_t>
-      inline auto psi_bar_xy( 
+      inline auto psi_bar_xy(
         const arr_2d_t &psi,
         const ix_t &i,
         const ix_t &j,
@@ -94,7 +94,7 @@ namespace libmpdataxx
       {
         return return_helper<ix_t>(
           (
-            psi(pi<dim>(i+1, j  )) + 
+            psi(pi<dim>(i+1, j  )) +
             psi(pi<dim>(i  , j  )) +
             psi(pi<dim>(i  , j+1)) +
             psi(pi<dim>(i+1, j+1))
@@ -104,7 +104,7 @@ namespace libmpdataxx
 
       // interpolation of psi to (i+1/2, j+1/2) - variable sign scalar version
       template <opts_t opts, int dim, class arr_2d_t, class ix_t>
-      inline auto psi_bar_xy( 
+      inline auto psi_bar_xy(
         const arr_2d_t &psi,
         const ix_t &i,
         const ix_t &j,
@@ -113,7 +113,7 @@ namespace libmpdataxx
       {
         return return_helper<ix_t>(
           (
-            abs(psi(pi<dim>(i+1, j  ))) + 
+            abs(psi(pi<dim>(i+1, j  ))) +
             abs(psi(pi<dim>(i  , j  ))) +
             abs(psi(pi<dim>(i  , j+1))) +
             abs(psi(pi<dim>(i+1, j+1)))
@@ -122,14 +122,14 @@ namespace libmpdataxx
       }
 
       // nondimensionalised x derivative of psi i.e.
-      // dx/psi * dpsi/dx at (i+1/2, j) - positive sign scalar version  
+      // dx/psi * dpsi/dx at (i+1/2, j) - positive sign scalar version
       template <opts_t opts, int dim, class arr_2d_t, class ix_t>
       inline auto ndx_psi(
-        const arr_2d_t &psi, 
-        const ix_t &i, 
+        const arr_2d_t &psi,
+        const ix_t &i,
         const ix_t &j,
         typename std::enable_if<!opts::isset(opts, opts::iga) && !opts::isset(opts, opts::abs)>::type* = 0
-      ) 
+      )
       {
         return return_helper<ix_t>(
           2 *
@@ -142,11 +142,11 @@ namespace libmpdataxx
       }
 
       // nondimensionalised x derivative of psi i.e.
-      // dx/psi * dpsi/dx at (i+1/2, j) - variable-sign scalar version 
+      // dx/psi * dpsi/dx at (i+1/2, j) - variable-sign scalar version
       template <opts_t opts, int dim, class arr_2d_t, class ix_t>
       inline auto ndx_psi(
-        const arr_2d_t &psi, 
-        const ix_t &i, 
+        const arr_2d_t &psi,
+        const ix_t &i,
         const ix_t &j,
         typename std::enable_if<!opts::isset(opts, opts::iga) && opts::isset(opts, opts::abs)>::type* = 0
       )
@@ -157,16 +157,16 @@ namespace libmpdataxx
             abs(psi(pi<dim>(i+1, j))) - abs(psi(pi<dim>(i, j)))
             ,// -------------------------------------------
             abs(psi(pi<dim>(i+1, j))) + abs(psi(pi<dim>(i, j)))
-          ) 
+          )
         );
       }
 
       // nondimensionalised x derivative of psi i.e.
-      // dx/psi * dpsi/dx at (i+1/2, j) - infinite gauge version 
+      // dx/psi * dpsi/dx at (i+1/2, j) - infinite gauge version
       template <opts_t opts, int dim, class arr_2d_t, class ix_t>
       inline auto ndx_psi(
-        const arr_2d_t &psi, 
-        const ix_t &i, 
+        const arr_2d_t &psi,
+        const ix_t &i,
         const ix_t &j,
         typename std::enable_if<opts::isset(opts, opts::iga)>::type* = 0
       )
@@ -181,19 +181,19 @@ namespace libmpdataxx
           )
         );
       }
-      
+
       // nondimensionalised y derivative of psi i.e.
       // dy/psi * dpsi/dy at (i+1/2, j) - positive sign scalar version
       template <opts_t opts, int dim, class arr_2d_t, class ix_t>
       inline auto ndy_psi(
-        const arr_2d_t &psi, 
-        const ix_t &i, 
+        const arr_2d_t &psi,
+        const ix_t &i,
         const ix_t &j,
         typename std::enable_if<!opts::isset(opts, opts::iga) && !opts::isset(opts, opts::abs)>::type* = 0
       )
       {
         return return_helper<ix_t>(
-          frac<opts, ix_t>( 
+          frac<opts, ix_t>(
             psi(pi<dim>(i+1, j+1)) + psi(pi<dim>(i, j+1)) - psi(pi<dim>(i+1, j-1)) - psi(pi<dim>(i, j-1))
             ,// ---------------------------------------------------------------------------------
             psi(pi<dim>(i+1, j+1)) + psi(pi<dim>(i, j+1)) + psi(pi<dim>(i+1, j-1)) + psi(pi<dim>(i, j-1))
@@ -205,14 +205,14 @@ namespace libmpdataxx
       // dy/psi * dpsi/dy at (i+1/2, j) - variable sign scalar version
       template <opts_t opts, int dim, class arr_2d_t, class ix_t>
       inline auto ndy_psi(
-        const arr_2d_t &psi, 
-        const ix_t &i, 
+        const arr_2d_t &psi,
+        const ix_t &i,
         const ix_t &j,
         typename std::enable_if<!opts::isset(opts, opts::iga) && opts::isset(opts, opts::abs)>::type* = 0
       )
       {
         return return_helper<ix_t>(
-          frac<opts, ix_t>( 
+          frac<opts, ix_t>(
             abs(psi(pi<dim>(i+1, j+1))) + abs(psi(pi<dim>(i, j+1))) - abs(psi(pi<dim>(i+1, j-1))) - abs(psi(pi<dim>(i, j-1)))
             ,// -----------------------------------------------------------------------------------------------------
             abs(psi(pi<dim>(i+1, j+1))) + abs(psi(pi<dim>(i, j+1))) + abs(psi(pi<dim>(i+1, j-1))) + abs(psi(pi<dim>(i, j-1)))
@@ -224,8 +224,8 @@ namespace libmpdataxx
       // dy/psi * dpsi/dy at (i+1/2, j) - infinite gauge version
       template <opts_t opts, int dim, class arr_2d_t, class ix_t>
       inline auto ndy_psi(
-        const arr_2d_t &psi, 
-        const ix_t &i, 
+        const arr_2d_t &psi,
+        const ix_t &i,
         const ix_t &j,
         typename std::enable_if<opts::isset(opts, opts::iga)>::type* = 0
       )
@@ -239,7 +239,7 @@ namespace libmpdataxx
           )
         );
       }
-      
+
       // nondimensionalised xx derivative of psi i.e.
       // dx^2/psi * dpsi/dxx at (i+1/2, j) - positive sign scalar version
       template <opts_t opts, int dim, class arr_2d_t, class ix_t>
@@ -312,7 +312,7 @@ namespace libmpdataxx
         return return_helper<ix_t>(
             2 *
             frac<opts, ix_t>(
-              psi(pi<dim>(i+1, j+1)) - psi(pi<dim>(i, j+1)) - psi(pi<dim>(i+1, j-1)) + psi(pi<dim>(i, j-1))            
+              psi(pi<dim>(i+1, j+1)) - psi(pi<dim>(i, j+1)) - psi(pi<dim>(i+1, j-1)) + psi(pi<dim>(i, j-1))
               ,//-----------------------------------------------------------------------------------------
               psi(pi<dim>(i+1, j+1)) + psi(pi<dim>(i, j+1)) + psi(pi<dim>(i+1, j-1)) + psi(pi<dim>(i, j-1))
           )
@@ -332,7 +332,7 @@ namespace libmpdataxx
         return return_helper<ix_t>(
             2 *
             frac<opts, ix_t>(
-              abs(psi(pi<dim>(i+1, j+1))) - abs(psi(pi<dim>(i, j+1))) - abs(psi(pi<dim>(i+1, j-1))) + abs(psi(pi<dim>(i, j-1)))            
+              abs(psi(pi<dim>(i+1, j+1))) - abs(psi(pi<dim>(i, j+1))) - abs(psi(pi<dim>(i+1, j-1))) + abs(psi(pi<dim>(i, j-1)))
               ,//-------------------------------------------------------------------------------------------------------------
               abs(psi(pi<dim>(i+1, j+1))) + abs(psi(pi<dim>(i, j+1))) + abs(psi(pi<dim>(i+1, j-1))) + abs(psi(pi<dim>(i, j-1)))
           )
@@ -352,7 +352,7 @@ namespace libmpdataxx
         static_assert(!opts::isset(opts, opts::abs), "iga & abs options are mutually exclusive");
         return return_helper<ix_t>(
           2 *
-          (psi(pi<dim>(i+1, j+1)) - psi(pi<dim>(i, j+1)) - psi(pi<dim>(i+1, j-1)) + psi(pi<dim>(i, j-1)))            
+          (psi(pi<dim>(i+1, j+1)) - psi(pi<dim>(i, j+1)) - psi(pi<dim>(i+1, j-1)) + psi(pi<dim>(i, j-1)))
           / //-------------------------------------------------------------------------------------------
           (1 + 1 + 1 + 1)
         );
@@ -374,7 +374,7 @@ namespace libmpdataxx
           (1 + 1 + 1 + 1)
         );
       }
-      
+
       // nondimensionalised xxy derivative of psi i.e.
       // dx^2*dy/psi * dpsi/dxxy at (i+1/2, j) - infinite gauge version
       template <opts_t opts, int dim, class arr_2d_t, class ix_t>
@@ -383,16 +383,16 @@ namespace libmpdataxx
         const ix_t &i,
         const ix_t &j,
         typename std::enable_if<opts::isset(opts, opts::iga)>::type* = 0
-      ) 
+      )
       {
         return return_helper<ix_t>(
-          (  psi(pi<dim>(i+2, j+1)) - psi(pi<dim>(i+1, j+1)) - psi(pi<dim>(i, j+1)) + psi(pi<dim>(i-1, j+1)) 
+          (  psi(pi<dim>(i+2, j+1)) - psi(pi<dim>(i+1, j+1)) - psi(pi<dim>(i, j+1)) + psi(pi<dim>(i-1, j+1))
             -psi(pi<dim>(i+2, j-1)) + psi(pi<dim>(i+1, j-1)) + psi(pi<dim>(i, j-1)) - psi(pi<dim>(i-1, j-1)) )
           / //------------------------------------------------------------------------------------------------
           (1 + 1 + 1 + 1 + 1 + 1 + 1 + 1)
         );
       }
-      
+
       // nondimensionalised xyy derivative of psi i.e.
       // dx*dy^2/psi * dpsi/dxyy at (i+1/2, j) - infinite gauge version
       template <opts_t opts, int dim, class arr_2d_t, class ix_t>
@@ -412,12 +412,12 @@ namespace libmpdataxx
       }
 
       // nondimensionalised tx derivative of psi i.e.
-      // dx*dt/psi * dpsi/dtx at (i+1/2, j) - positive sign scalar version  
+      // dx*dt/psi * dpsi/dtx at (i+1/2, j) - positive sign scalar version
       template<opts_t opts, int d, class arr_2d_t, class ix_t>
       inline auto ndtx_psi(
-        const arr_2d_t &psi_np1, 
-        const arr_2d_t &psi_n, 
-        const ix_t &i, 
+        const arr_2d_t &psi_np1,
+        const arr_2d_t &psi_n,
+        const ix_t &i,
         const ix_t &j,
         typename std::enable_if<!opts::isset(opts, opts::iga) && !opts::isset(opts, opts::abs)>::type* = 0
       )
@@ -437,14 +437,14 @@ namespace libmpdataxx
           )
         );
       }
-      
+
       // nondimensionalised tx derivative of psi i.e.
-      // dx*dt/psi * dpsi/dtx at (i+1/2, j) - variable sign scalar version  
+      // dx*dt/psi * dpsi/dtx at (i+1/2, j) - variable sign scalar version
       template<opts_t opts, int d, class arr_2d_t, class ix_t>
       inline auto ndtx_psi(
-        const arr_2d_t &psi_np1, 
-        const arr_2d_t &psi_n, 
-        const ix_t &i, 
+        const arr_2d_t &psi_np1,
+        const arr_2d_t &psi_n,
+        const ix_t &i,
         const ix_t &j,
         typename std::enable_if<!opts::isset(opts, opts::iga) && opts::isset(opts, opts::abs)>::type* = 0
       )
@@ -464,14 +464,14 @@ namespace libmpdataxx
           )
         );
       }
-      
+
       // nondimensionalised tx derivative of psi i.e.
-      // dx*dt/psi * dpsi/dtx at (i+1/2, j) - infinite gauge version  
+      // dx*dt/psi * dpsi/dtx at (i+1/2, j) - infinite gauge version
       template<opts_t opts, int d, class arr_2d_t, class ix_t>
       inline auto ndtx_psi(
-        const arr_2d_t &psi_np1, 
-        const arr_2d_t &psi_n, 
-        const ix_t &i, 
+        const arr_2d_t &psi_np1,
+        const arr_2d_t &psi_n,
+        const ix_t &i,
         const ix_t &j,
         typename std::enable_if<opts::isset(opts, opts::iga)>::type* = 0
       )
@@ -485,12 +485,12 @@ namespace libmpdataxx
       }
 
       // nondimensionalised t derivative of psi i.e.
-      // dt/psi * dpsi/dt at (i+1/2, j) - positive sign scalar version  
+      // dt/psi * dpsi/dt at (i+1/2, j) - positive sign scalar version
       template<opts_t opts, int d, class arr_2d_t, class ix_t>
       inline auto ndt_psi(
-        const arr_2d_t &psi_np1, 
-        const arr_2d_t &psi_n, 
-        const ix_t &i, 
+        const arr_2d_t &psi_np1,
+        const arr_2d_t &psi_n,
+        const ix_t &i,
         const ix_t &j,
         typename std::enable_if<!opts::isset(opts, opts::iga) && !opts::isset(opts, opts::abs)>::type* = 0
       )
@@ -510,14 +510,14 @@ namespace libmpdataxx
           )
         );
       }
-      
+
       // nondimensionalised t derivative of psi i.e.
-      // dt/psi * dpsi/dt at (i+1/2, j) - variable sign scalar version  
+      // dt/psi * dpsi/dt at (i+1/2, j) - variable sign scalar version
       template<opts_t opts, int d, class arr_2d_t, class ix_t>
       inline auto ndt_psi(
-        const arr_2d_t &psi_np1, 
-        const arr_2d_t &psi_n, 
-        const ix_t &i, 
+        const arr_2d_t &psi_np1,
+        const arr_2d_t &psi_n,
+        const ix_t &i,
         const ix_t &j,
         typename std::enable_if<!opts::isset(opts, opts::iga) && opts::isset(opts, opts::abs)>::type* = 0
       )
@@ -537,14 +537,14 @@ namespace libmpdataxx
           )
         );
       }
-      
+
       // nondimensionalised t derivative of psi i.e.
-      // dt/psi * dpsi/dt at (i+1/2, j) - infinite gauge version  
+      // dt/psi * dpsi/dt at (i+1/2, j) - infinite gauge version
       template<opts_t opts, int d, class arr_2d_t, class ix_t>
       inline auto ndt_psi(
-        const arr_2d_t &psi_np1, 
-        const arr_2d_t &psi_n, 
-        const ix_t &i, 
+        const arr_2d_t &psi_np1,
+        const arr_2d_t &psi_n,
+        const ix_t &i,
         const ix_t &j,
         typename std::enable_if<opts::isset(opts, opts::iga)>::type* = 0
       )
@@ -560,4 +560,4 @@ namespace libmpdataxx
       }
     } // namespace mpdata
   } // namespace formulae
-} // namespcae libmpdataxx 
+} // namespcae libmpdataxx
diff --git a/libmpdata++/formulae/mpdata/formulae_mpdata_psi_3d.hpp b/libmpdata++/formulae/mpdata/formulae_mpdata_psi_3d.hpp
index 2f0c30c6..329c7210 100644
--- a/libmpdata++/formulae/mpdata/formulae_mpdata_psi_3d.hpp
+++ b/libmpdata++/formulae/mpdata/formulae_mpdata_psi_3d.hpp
@@ -14,15 +14,15 @@
 
 #include <libmpdata++/formulae/mpdata/formulae_mpdata_common.hpp>
 
-namespace libmpdataxx 
-{ 
-  namespace formulae 
-  { 
-    namespace mpdata 
+namespace libmpdataxx
+{
+  namespace formulae
+  {
+    namespace mpdata
     {
       // interpolation of psi to (i+1/2, j, k) - positive sign scalar / infinite gauge version
       template<opts_t opts, int dim, class arr_3d_t, class ix_t>
-      inline auto psi_bar_x( 
+      inline auto psi_bar_x(
         const arr_3d_t &psi,
         const ix_t &i,
         const ix_t &j,
@@ -32,15 +32,15 @@ namespace libmpdataxx
       {
         return return_helper<ix_t>(
           (
-            psi(pi<dim>(i+1, j, k)) + 
+            psi(pi<dim>(i+1, j, k)) +
             psi(pi<dim>(i  , j, k))
           ) / 2
         );
       }
-      
+
       // interpolation of psi to (i+1/2, j, k) - variable sign scalar version
       template<opts_t opts, int dim, class arr_3d_t, class ix_t>
-      inline auto psi_bar_x( 
+      inline auto psi_bar_x(
         const arr_3d_t &psi,
         const ix_t &i,
         const ix_t &j,
@@ -50,15 +50,15 @@ namespace libmpdataxx
       {
         return return_helper<ix_t>(
           (
-            abs(psi(pi<dim>(i+1, j, k))) + 
+            abs(psi(pi<dim>(i+1, j, k))) +
             abs(psi(pi<dim>(i  , j, k)))
           ) / 2
         );
       }
-      
+
       // interpolation of psi to (i, j+1/2, k) - positive sign scalar / infinite gauge version
       template<opts_t opts, int dim, class arr_3d_t, class ix_t>
-      inline auto psi_bar_y( 
+      inline auto psi_bar_y(
         const arr_3d_t &psi,
         const ix_t &i,
         const ix_t &j,
@@ -68,7 +68,7 @@ namespace libmpdataxx
       {
         return return_helper<ix_t>(
           (
-            psi(pi<dim>(i, j  , k)) + 
+            psi(pi<dim>(i, j  , k)) +
             psi(pi<dim>(i, j+1, k))
           ) / 2
         );
@@ -76,7 +76,7 @@ namespace libmpdataxx
 
       // interpolation of psi to (i, j+1/2, k) - variable sign scalar version
       template<opts_t opts, int dim, class arr_3d_t, class ix_t>
-      inline auto psi_bar_y( 
+      inline auto psi_bar_y(
         const arr_3d_t &psi,
         const ix_t &i,
         const ix_t &j,
@@ -86,15 +86,15 @@ namespace libmpdataxx
       {
         return return_helper<ix_t>(
           (
-            abs(psi(pi<dim>(i, j  , k))) + 
+            abs(psi(pi<dim>(i, j  , k))) +
             abs(psi(pi<dim>(i, j+1, k)))
           ) / 2
         );
       }
-      
+
       // interpolation of psi to (i, j, k+1/2) - positive sign scalar / infinite gauge version
       template<opts_t opts, int dim, class arr_3d_t, class ix_t>
-      inline auto psi_bar_z( 
+      inline auto psi_bar_z(
         const arr_3d_t &psi,
         const ix_t &i,
         const ix_t &j,
@@ -104,7 +104,7 @@ namespace libmpdataxx
       {
         return return_helper<ix_t>(
           (
-            psi(pi<dim>(i, j, k  )) + 
+            psi(pi<dim>(i, j, k  )) +
             psi(pi<dim>(i, j, k+1))
           ) / 2
         );
@@ -112,7 +112,7 @@ namespace libmpdataxx
 
       // interpolation of psi to (i, j, k+1/2) - variable sign scalar version
       template<opts_t opts, int dim, class arr_3d_t, class ix_t>
-      inline auto psi_bar_z( 
+      inline auto psi_bar_z(
         const arr_3d_t &psi,
         const ix_t &i,
         const ix_t &j,
@@ -122,15 +122,15 @@ namespace libmpdataxx
       {
         return return_helper<ix_t>(
           (
-            abs(psi(pi<dim>(i, j, k  ))) + 
+            abs(psi(pi<dim>(i, j, k  ))) +
             abs(psi(pi<dim>(i, j, k+1)))
           ) / 2
         );
       }
-      
+
       // interpolation of psi to (i+1/2, j+1/2, k) - positive sign scalar / infinite gauge version
       template<opts_t opts, int dim, class arr_3d_t, class ix_t>
-      inline auto psi_bar_xy( 
+      inline auto psi_bar_xy(
         const arr_3d_t &psi,
         const ix_t &i,
         const ix_t &j,
@@ -140,7 +140,7 @@ namespace libmpdataxx
       {
         return return_helper<ix_t>(
           (
-            psi(pi<dim>(i+1, j  , k)) + 
+            psi(pi<dim>(i+1, j  , k)) +
             psi(pi<dim>(i  , j  , k)) +
             psi(pi<dim>(i  , j+1, k)) +
             psi(pi<dim>(i+1, j+1, k))
@@ -150,7 +150,7 @@ namespace libmpdataxx
 
       // interpolation of psi to (i+1/2, j+1/2, k) - variable sign scalar version
       template<opts_t opts, int dim, class arr_3d_t, class ix_t>
-      inline auto psi_bar_xy( 
+      inline auto psi_bar_xy(
         const arr_3d_t &psi,
         const ix_t &i,
         const ix_t &j,
@@ -160,17 +160,17 @@ namespace libmpdataxx
       {
         return return_helper<ix_t>(
           (
-            abs(psi(pi<dim>(i+1, j  , k))) + 
+            abs(psi(pi<dim>(i+1, j  , k))) +
             abs(psi(pi<dim>(i  , j  , k))) +
             abs(psi(pi<dim>(i  , j+1, k))) +
             abs(psi(pi<dim>(i+1, j+1, k)))
           ) / 4
         );
       }
-      
+
       // interpolation of psi to (i+1/2, j, k+1/2) - positive sign scalar / infinite gauge version
       template<opts_t opts, int dim, class arr_3d_t, class ix_t>
-      inline auto psi_bar_xz( 
+      inline auto psi_bar_xz(
         const arr_3d_t &psi,
         const ix_t &i,
         const ix_t &j,
@@ -180,7 +180,7 @@ namespace libmpdataxx
       {
         return return_helper<ix_t>(
           (
-            psi(pi<dim>(i+1, j  , k)) + 
+            psi(pi<dim>(i+1, j  , k)) +
             psi(pi<dim>(i  , j  , k)) +
             psi(pi<dim>(i  , j, k+1)) +
             psi(pi<dim>(i+1, j, k+1))
@@ -190,7 +190,7 @@ namespace libmpdataxx
 
       // interpolation of psi to (i+1/2, j, k+1/2) - variable sign scalar version
       template<opts_t opts, int dim, class arr_3d_t, class ix_t>
-      inline auto psi_bar_xz( 
+      inline auto psi_bar_xz(
         const arr_3d_t &psi,
         const ix_t &i,
         const ix_t &j,
@@ -200,17 +200,17 @@ namespace libmpdataxx
       {
         return return_helper<ix_t>(
           (
-            abs(psi(pi<dim>(i+1, j  , k))) + 
+            abs(psi(pi<dim>(i+1, j  , k))) +
             abs(psi(pi<dim>(i  , j  , k))) +
             abs(psi(pi<dim>(i  , j, k+1))) +
             abs(psi(pi<dim>(i+1, j, k+1)))
           ) / 4
         );
       }
-      
+
       // interpolation of psi to (i+1/2, j+1/2, k+1/2) - positive sign scalar / infinite gauge version
       template<opts_t opts, int dim, class arr_3d_t, class ix_t>
-      inline auto psi_bar_xyz( 
+      inline auto psi_bar_xyz(
         const arr_3d_t &psi,
         const ix_t &i,
         const ix_t &j,
@@ -221,20 +221,20 @@ namespace libmpdataxx
         return return_helper<ix_t>(
           (
             psi(pi<dim>(i  , j  , k  )) +
-            psi(pi<dim>(i+1, j  , k  )) + 
+            psi(pi<dim>(i+1, j  , k  )) +
             psi(pi<dim>(i  , j+1, k  )) +
             psi(pi<dim>(i  , j  , k+1)) +
-            psi(pi<dim>(i+1, j+1, k  )) + 
-            psi(pi<dim>(i+1, j  , k+1)) + 
-            psi(pi<dim>(i  , j+1, k+1)) + 
+            psi(pi<dim>(i+1, j+1, k  )) +
+            psi(pi<dim>(i+1, j  , k+1)) +
+            psi(pi<dim>(i  , j+1, k+1)) +
             psi(pi<dim>(i+1, j+1, k+1))
           ) / 8
         );
       }
-      
+
       // interpolation of psi to (i+1/2, j+1/2, k+1/2) - variable sign scalar version
       template<opts_t opts, int dim, class arr_3d_t, class ix_t>
-      inline auto psi_bar_xyz( 
+      inline auto psi_bar_xyz(
         const arr_3d_t &psi,
         const ix_t &i,
         const ix_t &j,
@@ -245,23 +245,23 @@ namespace libmpdataxx
         return return_helper<ix_t>(
           (
             abs(psi(pi<dim>(i  , j  , k  ))) +
-            abs(psi(pi<dim>(i+1, j  , k  ))) + 
+            abs(psi(pi<dim>(i+1, j  , k  ))) +
             abs(psi(pi<dim>(i  , j+1, k  ))) +
             abs(psi(pi<dim>(i  , j  , k+1))) +
-            abs(psi(pi<dim>(i+1, j+1, k  ))) + 
-            abs(psi(pi<dim>(i+1, j  , k+1))) + 
-            abs(psi(pi<dim>(i  , j+1, k+1))) + 
+            abs(psi(pi<dim>(i+1, j+1, k  ))) +
+            abs(psi(pi<dim>(i+1, j  , k+1))) +
+            abs(psi(pi<dim>(i  , j+1, k+1))) +
             abs(psi(pi<dim>(i+1, j+1, k+1)))
           ) / 8
         );
       }
 
       // nondimensionalised x derivative of psi i.e.
-      // dx/psi * dpsi/dx at (i+1/2, j, k) - positive sign scalar version  
+      // dx/psi * dpsi/dx at (i+1/2, j, k) - positive sign scalar version
       template<opts_t opts, int d, class arr_3d_t, class ix_t>
       inline auto ndx_psi(
-        const arr_3d_t &psi, 
-        const ix_t &i, 
+        const arr_3d_t &psi,
+        const ix_t &i,
         const ix_t &j,
         const ix_t &k,
         typename std::enable_if<!opts::isset(opts, opts::iga) && !opts::isset(opts, opts::abs)>::type* = 0
@@ -278,11 +278,11 @@ namespace libmpdataxx
       }
 
       // nondimensionalised x derivative of psi i.e.
-      // dx/psi * dpsi/dx at (i+1/2, j, k) - variable-sign scalar version 
+      // dx/psi * dpsi/dx at (i+1/2, j, k) - variable-sign scalar version
       template<opts_t opts, int d, class arr_3d_t, class ix_t>
       inline auto ndx_psi(
-        const arr_3d_t &psi, 
-        const ix_t &i, 
+        const arr_3d_t &psi,
+        const ix_t &i,
         const ix_t &j,
         const ix_t &k,
         typename std::enable_if<!opts::isset(opts, opts::iga) && opts::isset(opts, opts::abs)>::type* = 0
@@ -294,16 +294,16 @@ namespace libmpdataxx
             abs(psi(pi<d>(i+1, j, k))) - abs(psi(pi<d>(i, j, k)))
             ,
             abs(psi(pi<d>(i+1, j, k))) + abs(psi(pi<d>(i, j, k)))
-          ) 
+          )
         );
       }
 
       // nondimensionalised x derivative of psi i.e.
-      // dx/psi * dpsi/dx at (i+1/2, j, k) - infinite gauge version 
+      // dx/psi * dpsi/dx at (i+1/2, j, k) - infinite gauge version
       template<opts_t opts, int d, class arr_3d_t, class ix_t>
       inline auto ndx_psi(  // inf. gauge option
-        const arr_3d_t &psi, 
-        const ix_t &i, 
+        const arr_3d_t &psi,
+        const ix_t &i,
         const ix_t &j,
         const ix_t &k,
         typename std::enable_if<opts::isset(opts, opts::iga)>::type* = 0
@@ -319,15 +319,15 @@ namespace libmpdataxx
       // dy/psi * dpsi/dy at (i+1/2, j, k) - positive sign scalar version
       template<opts_t opts, int d, class arr_3d_t, class ix_t>
       inline auto ndy_psi(
-        const arr_3d_t &psi, 
-        const ix_t &i, 
+        const arr_3d_t &psi,
+        const ix_t &i,
         const ix_t &j,
         const ix_t &k,
         typename std::enable_if<!opts::isset(opts, opts::iga) && !opts::isset(opts, opts::abs)>::type* = 0
       )
       {
         return return_helper<ix_t>(
-          frac<opts, ix_t>( 
+          frac<opts, ix_t>(
               psi(pi<d>(i+1, j+1, k))
             + psi(pi<d>(i  , j+1, k))
             - psi(pi<d>(i+1, j-1, k))
@@ -345,15 +345,15 @@ namespace libmpdataxx
       // dy/psi * dpsi/dy at (i+1/2, j, k) - variable sign scalar version
       template<opts_t opts, int d, class arr_3d_t, class ix_t>
       inline auto ndy_psi(
-        const arr_3d_t &psi, 
-        const ix_t &i, 
+        const arr_3d_t &psi,
+        const ix_t &i,
         const ix_t &j,
         const ix_t &k,
         typename std::enable_if<!opts::isset(opts, opts::iga) && opts::isset(opts, opts::abs)>::type* = 0
       )
       {
         return return_helper<ix_t>(
-          frac<opts, ix_t>( 
+          frac<opts, ix_t>(
               abs(psi(pi<d>(i+1, j+1, k)))
             + abs(psi(pi<d>(i  , j+1, k)))
             - abs(psi(pi<d>(i+1, j-1, k)))
@@ -371,8 +371,8 @@ namespace libmpdataxx
       // dy/psi * dpsi/dy at (i+1/2, j, k) - infinite gauge version
       template<opts_t opts, int d, class arr_3d_t, class ix_t>
       inline auto ndy_psi(
-        const arr_3d_t &psi, 
-        const ix_t &i, 
+        const arr_3d_t &psi,
+        const ix_t &i,
         const ix_t &j,
         const ix_t &k,
         typename std::enable_if<opts::isset(opts, opts::iga)>::type* = 0
@@ -388,20 +388,20 @@ namespace libmpdataxx
           ) / 4
         );
       }
-      
+
       // nondimensionalised z derivative of psi i.e.
       // dz/psi * dpsi/dz at (i+1/2, j, k) - positive sign scalar version
       template<opts_t opts, int d, class arr_3d_t, class ix_t>
       inline auto ndz_psi( // positive sign signal
-        const arr_3d_t &psi, 
-        const ix_t &i, 
+        const arr_3d_t &psi,
+        const ix_t &i,
         const ix_t &j,
         const ix_t &k,
         typename std::enable_if<!opts::isset(opts, opts::iga) && !opts::isset(opts, opts::abs)>::type* = 0
       )
       {
         return return_helper<ix_t>(
-          frac<opts, ix_t>( 
+          frac<opts, ix_t>(
               psi(pi<d>(i+1, j, k+1))
             + psi(pi<d>(i  , j, k+1))
             - psi(pi<d>(i+1, j, k-1))
@@ -419,15 +419,15 @@ namespace libmpdataxx
       // dz/psi * dpsi/dz at (i+1/2, j, k) - variable sign scalar version
       template<opts_t opts, int d, class arr_3d_t, class ix_t>
       inline auto ndz_psi(
-        const arr_3d_t &psi, 
-        const ix_t &i, 
+        const arr_3d_t &psi,
+        const ix_t &i,
         const ix_t &j,
         const ix_t &k,
         typename std::enable_if<!opts::isset(opts, opts::iga) && opts::isset(opts, opts::abs)>::type* = 0
       )
       {
         return return_helper<ix_t>(
-          frac<opts, ix_t>( 
+          frac<opts, ix_t>(
               abs(psi(pi<d>(i+1, j, k+1)))
             + abs(psi(pi<d>(i  , j, k+1)))
             - abs(psi(pi<d>(i+1, j, k-1)))
@@ -445,8 +445,8 @@ namespace libmpdataxx
       // dz/psi * dpsi/dz at (i+1/2, j, k) - infinite gauge version
       template<opts_t opts, int d, class arr_3d_t, class ix_t>
       inline auto ndz_psi(
-        const arr_3d_t &psi, 
-        const ix_t &i, 
+        const arr_3d_t &psi,
+        const ix_t &i,
         const ix_t &j,
         const ix_t &k,
         typename std::enable_if<opts::isset(opts, opts::iga)>::type* = 0
@@ -556,7 +556,7 @@ namespace libmpdataxx
                 psi(pi<dim>(i+1, j+1, k))
               - psi(pi<dim>(i  , j+1, k))
               - psi(pi<dim>(i+1, j-1, k))
-              + psi(pi<dim>(i  , j-1, k))            
+              + psi(pi<dim>(i  , j-1, k))
               ,
                 psi(pi<dim>(i+1, j+1, k))
               + psi(pi<dim>(i  , j+1, k))
@@ -583,7 +583,7 @@ namespace libmpdataxx
                 abs(psi(pi<dim>(i+1, j+1, k)))
               - abs(psi(pi<dim>(i  , j+1, k)))
               - abs(psi(pi<dim>(i+1, j-1, k)))
-              + abs(psi(pi<dim>(i  , j-1, k)))            
+              + abs(psi(pi<dim>(i  , j-1, k)))
               ,
                 abs(psi(pi<dim>(i+1, j+1, k)))
               + abs(psi(pi<dim>(i  , j+1, k)))
@@ -610,11 +610,11 @@ namespace libmpdataxx
                 psi(pi<dim>(i+1, j+1, k))
               - psi(pi<dim>(i  , j+1, k))
               - psi(pi<dim>(i+1, j-1, k))
-              + psi(pi<dim>(i  , j-1, k))            
+              + psi(pi<dim>(i  , j-1, k))
           ) / 4
         );
       }
-      
+
       // nondimensionalised xz derivative of psi i.e.
       // dx*dz/psi * dpsi/dxdz at (i+1/2, j, k) - positive sign scalar version
       template<opts_t opts, int dim, class arr_3d_t, class ix_t>
@@ -632,7 +632,7 @@ namespace libmpdataxx
                 psi(pi<dim>(i+1, j, k+1))
               - psi(pi<dim>(i  , j, k+1))
               - psi(pi<dim>(i+1, j, k-1))
-              + psi(pi<dim>(i  , j, k-1))            
+              + psi(pi<dim>(i  , j, k-1))
               ,
                 psi(pi<dim>(i+1, j, k+1))
               + psi(pi<dim>(i  , j, k+1))
@@ -641,7 +641,7 @@ namespace libmpdataxx
           )
         );
       }
-      
+
       // nondimensionalised xz derivative of psi i.e.
       // dx*dz/psi * dpsi/dxdz at (i+1/2, j, k) - variable sign scalar version
       template<opts_t opts, int dim, class arr_3d_t, class ix_t>
@@ -659,7 +659,7 @@ namespace libmpdataxx
                 abs(psi(pi<dim>(i+1, j, k+1)))
               - abs(psi(pi<dim>(i  , j, k+1)))
               - abs(psi(pi<dim>(i+1, j, k-1)))
-              + abs(psi(pi<dim>(i  , j, k-1)))            
+              + abs(psi(pi<dim>(i  , j, k-1)))
               ,
                 abs(psi(pi<dim>(i+1, j, k+1)))
               + abs(psi(pi<dim>(i  , j, k+1)))
@@ -668,7 +668,7 @@ namespace libmpdataxx
           )
         );
       }
-      
+
       // nondimensionalised xz derivative of psi i.e.
       // dx*dz/psi * dpsi/dxdz at (i+1/2, j, k) - infinite gauge version
       template<opts_t opts, int dim, class arr_3d_t, class ix_t>
@@ -686,7 +686,7 @@ namespace libmpdataxx
                 psi(pi<dim>(i+1, j, k+1))
               - psi(pi<dim>(i  , j, k+1))
               - psi(pi<dim>(i+1, j, k-1))
-              + psi(pi<dim>(i  , j, k-1))            
+              + psi(pi<dim>(i  , j, k-1))
           ) / 4
         );
       }
@@ -707,19 +707,19 @@ namespace libmpdataxx
               psi(pi<dim>(i+1, j+1, k+1))
             + psi(pi<dim>(i+1, j-1, k-1))
             - psi(pi<dim>(i+1, j+1, k-1))
-            - psi(pi<dim>(i+1, j-1, k+1))           
+            - psi(pi<dim>(i+1, j-1, k+1))
             + psi(pi<dim>(i  , j+1, k+1))
             + psi(pi<dim>(i  , j-1, k-1))
-            - psi(pi<dim>(i  , j+1, k-1))           
+            - psi(pi<dim>(i  , j+1, k-1))
             - psi(pi<dim>(i  , j-1, k+1))
             , //-------------------------
               psi(pi<dim>(i+1, j+1, k+1))
             + psi(pi<dim>(i+1, j-1, k-1))
             + psi(pi<dim>(i+1, j+1, k-1))
-            + psi(pi<dim>(i+1, j-1, k+1))           
+            + psi(pi<dim>(i+1, j-1, k+1))
             + psi(pi<dim>(i  , j+1, k+1))
             + psi(pi<dim>(i  , j-1, k-1))
-            + psi(pi<dim>(i  , j+1, k-1))           
+            + psi(pi<dim>(i  , j+1, k-1))
             + psi(pi<dim>(i  , j-1, k+1))
             )
         );
@@ -741,19 +741,19 @@ namespace libmpdataxx
               abs(psi(pi<dim>(i+1, j+1, k+1)))
             + abs(psi(pi<dim>(i+1, j-1, k-1)))
             - abs(psi(pi<dim>(i+1, j+1, k-1)))
-            - abs(psi(pi<dim>(i+1, j-1, k+1)))           
+            - abs(psi(pi<dim>(i+1, j-1, k+1)))
             + abs(psi(pi<dim>(i  , j+1, k+1)))
             + abs(psi(pi<dim>(i  , j-1, k-1)))
-            - abs(psi(pi<dim>(i  , j+1, k-1)))          
+            - abs(psi(pi<dim>(i  , j+1, k-1)))
             - abs(psi(pi<dim>(i  , j-1, k+1)))
             , //------------------------------
               abs(psi(pi<dim>(i+1, j+1, k+1)))
             + abs(psi(pi<dim>(i+1, j-1, k-1)))
             + abs(psi(pi<dim>(i+1, j+1, k-1)))
-            + abs(psi(pi<dim>(i+1, j-1, k+1)))           
+            + abs(psi(pi<dim>(i+1, j-1, k+1)))
             + abs(psi(pi<dim>(i  , j+1, k+1)))
             + abs(psi(pi<dim>(i  , j-1, k-1)))
-            + abs(psi(pi<dim>(i  , j+1, k-1)))          
+            + abs(psi(pi<dim>(i  , j+1, k-1)))
             + abs(psi(pi<dim>(i  , j-1, k+1)))
             )
         );
@@ -768,7 +768,7 @@ namespace libmpdataxx
         const ix_t &j,
         const ix_t &k,
         typename std::enable_if<opts::isset(opts, opts::iga)>::type* = 0
-      ) 
+      )
       {
         static_assert(!opts::isset(opts, opts::abs), "iga & abs options are mutually exclusive");
         return return_helper<ix_t>(
@@ -778,7 +778,7 @@ namespace libmpdataxx
             - psi(pi<dim>(i+1, j+1, k-1))
             - psi(pi<dim>(i+1, j-1, k+1))
             + psi(pi<dim>(i  , j+1, k+1))
-            + psi(pi<dim>(i  , j-1, k-1))           
+            + psi(pi<dim>(i  , j-1, k-1))
             - psi(pi<dim>(i  , j+1, k-1))
             - psi(pi<dim>(i  , j-1, k+1))
             )
@@ -788,14 +788,14 @@ namespace libmpdataxx
             )
         );
       }
-      
+
       // nondimensionalised tx derivative of psi i.e.
-      // dx*dt/psi * dpsi/dtx at (i+1/2, j, k) - positive sign scalar version  
+      // dx*dt/psi * dpsi/dtx at (i+1/2, j, k) - positive sign scalar version
       template<opts_t opts, int d, class arr_3d_t, class ix_t>
       inline auto ndtx_psi(
-        const arr_3d_t &psi_np1, 
-        const arr_3d_t &psi_n, 
-        const ix_t &i, 
+        const arr_3d_t &psi_np1,
+        const arr_3d_t &psi_n,
+        const ix_t &i,
         const ix_t &j,
         const ix_t &k,
         typename std::enable_if<!opts::isset(opts, opts::iga) && !opts::isset(opts, opts::abs)>::type* = 0
@@ -816,14 +816,14 @@ namespace libmpdataxx
           )
         );
       }
-      
+
       // nondimensionalised tx derivative of psi i.e.
-      // dx*dt/psi * dpsi/dtx at (i+1/2, j, k) - variable sign scalar version  
+      // dx*dt/psi * dpsi/dtx at (i+1/2, j, k) - variable sign scalar version
       template<opts_t opts, int d, class arr_3d_t, class ix_t>
       inline auto ndtx_psi(
-        const arr_3d_t &psi_np1, 
-        const arr_3d_t &psi_n, 
-        const ix_t &i, 
+        const arr_3d_t &psi_np1,
+        const arr_3d_t &psi_n,
+        const ix_t &i,
         const ix_t &j,
         const ix_t &k,
         typename std::enable_if<!opts::isset(opts, opts::iga) && opts::isset(opts, opts::abs)>::type* = 0
@@ -844,14 +844,14 @@ namespace libmpdataxx
           )
         );
       }
-      
+
       // nondimensionalised tx derivative of psi i.e.
       // dx*dt/psi * dpsi/dtx at (i+1/2, j, k) - infinite gauge version
       template<opts_t opts, int d, class arr_3d_t, class ix_t>
       inline auto ndtx_psi(
-        const arr_3d_t &psi_np1, 
-        const arr_3d_t &psi_n, 
-        const ix_t &i, 
+        const arr_3d_t &psi_np1,
+        const arr_3d_t &psi_n,
+        const ix_t &i,
         const ix_t &j,
         const ix_t &k,
         typename std::enable_if<opts::isset(opts, opts::iga)>::type* = 0
@@ -862,14 +862,14 @@ namespace libmpdataxx
           psi_np1(pi<d>(i+1, j, k)) - psi_n(pi<d>(i+1, j, k)) - psi_np1(pi<d>(i, j, k)) + psi_n(pi<d>(i, j, k))
         );
       }
-      
+
       // nondimensionalised t derivative of psi i.e.
-      // dt/psi * dpsi/dt at (i+1/2, j, k) - positive sign scalar version  
+      // dt/psi * dpsi/dt at (i+1/2, j, k) - positive sign scalar version
       template<opts_t opts, int d, class arr_3d_t, class ix_t>
       inline auto ndt_psi(
-        const arr_3d_t &psi_np1, 
-        const arr_3d_t &psi_n, 
-        const ix_t &i, 
+        const arr_3d_t &psi_np1,
+        const arr_3d_t &psi_n,
+        const ix_t &i,
         const ix_t &j,
         const ix_t &k,
         typename std::enable_if<!opts::isset(opts, opts::iga) && !opts::isset(opts, opts::abs)>::type* = 0
@@ -890,14 +890,14 @@ namespace libmpdataxx
           )
         );
       }
-      
+
       // nondimensionalised t derivative of psi i.e.
-      // dt/psi * dpsi/dt at (i+1/2, j, k) - variable sign scalar version  
+      // dt/psi * dpsi/dt at (i+1/2, j, k) - variable sign scalar version
       template<opts_t opts, int d, class arr_3d_t, class ix_t>
       inline auto ndt_psi(
-        const arr_3d_t &psi_np1, 
-        const arr_3d_t &psi_n, 
-        const ix_t &i, 
+        const arr_3d_t &psi_np1,
+        const arr_3d_t &psi_n,
+        const ix_t &i,
         const ix_t &j,
         const ix_t &k,
         typename std::enable_if<!opts::isset(opts, opts::iga) && opts::isset(opts, opts::abs)>::type* = 0
@@ -918,14 +918,14 @@ namespace libmpdataxx
           )
         );
       }
-      
+
       // nondimensionalised tx derivative of psi i.e.
       // dt/psi * dpsi/dt at (i+1/2, j, k) - infinite gauge version
       template<opts_t opts, int d, class arr_3d_t, class ix_t>
       inline auto ndt_psi(
-        const arr_3d_t &psi_np1, 
-        const arr_3d_t &psi_n, 
-        const ix_t &i, 
+        const arr_3d_t &psi_np1,
+        const arr_3d_t &psi_n,
+        const ix_t &i,
         const ix_t &j,
         const ix_t &k,
         typename std::enable_if<opts::isset(opts, opts::iga)>::type* = 0
@@ -938,4 +938,4 @@ namespace libmpdataxx
       }
     } // namespace mpdata
   } // namespace formulae
-} // namespcae libmpdataxx 
+} // namespcae libmpdataxx
diff --git a/libmpdata++/formulae/nabla_formulae.hpp b/libmpdata++/formulae/nabla_formulae.hpp
index 58a22742..c5523e5f 100644
--- a/libmpdata++/formulae/nabla_formulae.hpp
+++ b/libmpdata++/formulae/nabla_formulae.hpp
@@ -28,7 +28,7 @@ namespace libmpdataxx
       {
         return blitz::safeToReturn(
             (
-            x(i+1) - 
+            x(i+1) -
             x(i-1)
           ) / dx / 2
         );
@@ -45,12 +45,12 @@ namespace libmpdataxx
       {
         return blitz::safeToReturn(
             (
-              x(pi<d>(i+1, j)) - 
+              x(pi<d>(i+1, j)) -
               x(pi<d>(i-1, j))
             ) / dx / 2
         );
       }
-      
+
       // 3D version
       template <int d, class arg_t, typename real_t>
       inline auto grad(
@@ -59,16 +59,16 @@ namespace libmpdataxx
         const rng_t &j,
         const rng_t &k,
         const real_t dx
-      ) 
+      )
       {
         return blitz::safeToReturn(
             (
-            x(pi<d>(i+1, j, k)) - 
+            x(pi<d>(i+1, j, k)) -
             x(pi<d>(i-1, j, k))
           ) / dx / 2
         );
       }
-      
+
       template <class arg_t, typename real_t>
       inline auto grad_cmpct(
         const arg_t &x,
@@ -78,7 +78,7 @@ namespace libmpdataxx
       {
         return blitz::safeToReturn(
           (
-            x(i+1) - 
+            x(i+1) -
             x(i)
           ) / dx
         );
@@ -91,16 +91,16 @@ namespace libmpdataxx
         const rng_t &i,
         const rng_t &j,
         const real_t dx
-      ) 
+      )
       {
         return blitz::safeToReturn(
           (
-            x(pi<d>(i+1, j)) - 
+            x(pi<d>(i+1, j)) -
             x(pi<d>(i  , j))
           ) / dx
         );
       }
-      
+
       // 3D version
       template <int d, class arg_t, typename real_t>
       inline auto grad_cmpct(
@@ -113,14 +113,14 @@ namespace libmpdataxx
       {
         return blitz::safeToReturn(
           (
-            x(pi<d>(i+1, j, k)) - 
+            x(pi<d>(i+1, j, k)) -
             x(pi<d>(i, j, k))
           ) / dx
         );
       }
-      
+
       // helper function to calculate gradient components of a scalar field
-      
+
       // 1D version
       template <int nd, class arrvec_t, class arr_t, class ijk_t, class dijk_t>
       inline void calc_grad(arrvec_t v, arr_t a, ijk_t ijk, dijk_t dijk, typename std::enable_if<nd == 1>::type* = 0)
@@ -144,7 +144,7 @@ namespace libmpdataxx
         v[1](ijk) = formulae::nabla::grad<1>(a, ijk[1], ijk[2], ijk[0], dijk[1]);
         v[2](ijk) = formulae::nabla::grad<2>(a, ijk[2], ijk[0], ijk[1], dijk[2]);
       }
-      
+
       // 2D version
       template <int nd, class arrvec_t, class arr_t, class ijk_t, class ijkm_t, class dijk_t>
       inline void calc_grad_cmpct(arrvec_t v, arr_t a, ijk_t ijk, ijkm_t ijkm, dijk_t dijk, typename std::enable_if<nd == 2>::type* = 0)
@@ -161,9 +161,9 @@ namespace libmpdataxx
         v[1](ijk[0], ijkm[1] + h, ijk[2]) = formulae::nabla::grad_cmpct<1>(a, ijkm[1], ijk[2], ijk[0], dijk[1]);
         v[2](ijk[0], ijk[1], ijkm[2] + h) = formulae::nabla::grad_cmpct<2>(a, ijkm[2], ijk[0], ijk[1], dijk[2]);
       }
-      
+
       // divergence
-      
+
       // 2D version
       template <int nd, class arrvec_t, class ijk_t, class dijk_t>
       inline auto div(
@@ -171,7 +171,7 @@ namespace libmpdataxx
         const ijk_t &ijk,
         const dijk_t dijk,
         typename std::enable_if<nd == 2>::type* = 0
-      ) 
+      )
       {
         return blitz::safeToReturn(
           (v[0](ijk[0]+1, ijk[1]) - v[0](ijk[0]-1, ijk[1])) / dijk[0] / 2
@@ -179,7 +179,7 @@ namespace libmpdataxx
           (v[1](ijk[0], ijk[1]+1) - v[1](ijk[0], ijk[1]-1)) / dijk[1] / 2
         );
       }
-      
+
       // 3D version
       template <int nd, class arrvec_t, class ijk_t, class dijk_t>
       inline auto div(
@@ -187,7 +187,7 @@ namespace libmpdataxx
         const ijk_t &ijk,
         const dijk_t dijk,
         typename std::enable_if<nd == 3>::type* = 0
-      ) 
+      )
       {
         return blitz::safeToReturn(
           (v[0](ijk[0]+1, ijk[1], ijk[2]) - v[0](ijk[0]-1, ijk[1], ijk[2])) / dijk[0] / 2
diff --git a/libmpdata++/formulae/stress_formulae.hpp b/libmpdata++/formulae/stress_formulae.hpp
index cfd4a9e9..710853e6 100644
--- a/libmpdata++/formulae/stress_formulae.hpp
+++ b/libmpdata++/formulae/stress_formulae.hpp
@@ -53,12 +53,12 @@ namespace libmpdataxx
         vg[7](ijk) = formulae::nabla::grad<2>(v[1], ijk[2], ijk[0], ijk[1], dijk[2]);
         vg[8](ijk) = formulae::nabla::grad<2>(v[2], ijk[2], ijk[0], ijk[1], dijk[2]);
       }
-      
+
       // calculates unique deformation tensor components
       // 2D version
       template <int nd, class arrvec_t, class ijk_t>
       inline void calc_deform(arrvec_t &tau,
-                              const arrvec_t &vg, 
+                              const arrvec_t &vg,
                               const ijk_t &ijk,
                               typename std::enable_if<nd == 2>::type* = 0)
       {
@@ -66,7 +66,7 @@ namespace libmpdataxx
         tau[1](ijk) = vg[1](ijk) + vg[2](ijk);
         tau[2](ijk) = 2 * vg[3](ijk);
       }
-      
+
       // 3D version
       template <int nd, class arrvec_t, class ijk_t>
       inline void calc_deform(arrvec_t &tau,
@@ -81,7 +81,7 @@ namespace libmpdataxx
         tau[4](ijk) = vg[5](ijk) + vg[7](ijk);
         tau[5](ijk) = 2 * vg[8](ijk);
       }
-      
+
       // calculate elements of stress tensor divergence
       // 2D version
       template <int nd, class arrvec_t, class ijk_t, class dijk_t>
@@ -97,7 +97,7 @@ namespace libmpdataxx
         sdiv[2](ijk) = formulae::nabla::grad<1>(tau[1], ijk[1], ijk[0], dijk[1]);
         sdiv[3](ijk) = formulae::nabla::grad<1>(tau[2], ijk[1], ijk[0], dijk[1]);
       }
-      
+
       // 3D version
       template <int nd, class arrvec_t, class ijk_t, class dijk_t>
       inline void calc_stress_div(arrvec_t &sdiv,
@@ -118,7 +118,7 @@ namespace libmpdataxx
         sdiv[7](ijk) = formulae::nabla::grad<2>(tau[4], ijk[2], ijk[0], ijk[1], dijk[2]);
         sdiv[8](ijk) = formulae::nabla::grad<2>(tau[5], ijk[2], ijk[0], ijk[1], dijk[2]);
       }
-      
+
 
       // add stress forces
       // 2D version
@@ -128,7 +128,7 @@ namespace libmpdataxx
         rhs[0](ijk) += coeff * (drv[0](ijk) + drv[2](ijk));
         rhs[1](ijk) += coeff * (drv[1](ijk) + drv[3](ijk));
       }
-      
+
       // 3D version
       template <int nd, class arrvec_t, class ijk_t, class real_t>
       inline void calc_stress_rhs(arrvec_t &rhs, const arrvec_t &drv, ijk_t &ijk, real_t coeff, typename std::enable_if<nd == 3>::type* = 0)
@@ -137,7 +137,7 @@ namespace libmpdataxx
         rhs[1](ijk) += coeff * (drv[1](ijk) + drv[4](ijk) + drv[7](ijk));
         rhs[2](ijk) += coeff * (drv[2](ijk) + drv[5](ijk) + drv[8](ijk));
       }
-      
+
       // Total deformation
       // 2D version
       template <int nd, class arrvec_t, class ijk_t>
@@ -145,23 +145,23 @@ namespace libmpdataxx
         const arrvec_t &tau,
         const ijk_t &ijk,
         typename std::enable_if<nd == 2>::type* = 0
-      ) 
-      { 
+      )
+      {
         return blitz::safeToReturn(
           pow2(tau[0](ijk)) / 2 + pow2(tau[1](ijk)) + pow2(tau[2](ijk)) / 2
         );
       }
-      
+
       // 3D version
       template <int nd, class arrvec_t, class ijk_t>
       inline auto calc_tdef_sq(
         const arrvec_t &tau,
         const ijk_t &ijk,
         typename std::enable_if<nd == 3>::type* = 0
-      ) 
+      )
       {
         return blitz::safeToReturn(
-          pow2(tau[0](ijk)) / 2 + 
+          pow2(tau[0](ijk)) / 2 +
           pow2(tau[1](ijk)) +
           pow2(tau[2](ijk)) +
           pow2(tau[3](ijk)) / 2 +
@@ -169,9 +169,9 @@ namespace libmpdataxx
           pow2(tau[5](ijk)) / 2
         );
       }
-      
+
       // Compact formulation
-      
+
       // surface stress
       // 2D version
       template <int nd, opts_t opts, class arr_t, class arrvec_t, class real_t, class ijk_t, class ijkm_t>
@@ -184,7 +184,7 @@ namespace libmpdataxx
                                   typename std::enable_if<nd == 2>::type* = 0)
       {
         auto zro = rng_t(0, 0);
-        tau[0](ijkm[0] + h, zro) = cdrag / 8 * 
+        tau[0](ijkm[0] + h, zro) = cdrag / 8 *
                                    abs((v[0](ijkm[0] + 1, zro) + v[0](ijkm[0], zro))) *
                                    (v[0](ijkm[0] + 1, zro) + v[0](ijkm[0], zro)) *
                                    (  G<opts, 0>(rho, ijkm[0] + 1, zro)
@@ -219,7 +219,7 @@ namespace libmpdataxx
                                            (  G<opts, 0>(rho, ijk[0], ijkm[1] + 1, zro)
                                             + G<opts, 0>(rho, ijk[0], ijkm[1]    , zro) );
       }
-      
+
       // velocity divergence
       // 2D version
       template <int nd, class arr_t, class arrvec_t, class ijk_t, class dijk_t>
@@ -234,7 +234,7 @@ namespace libmpdataxx
                                     (rho(ijk[0], ijk[1] + 1) - rho(ijk[0], ijk[1])) /
                                     (dijk[1] * (rho(ijk[0], ijk[1] + 1) + rho(ijk[0], ijk[1])));
       }
-      
+
       // 3D version
       template <int nd, class arr_t, class arrvec_t, class ijk_t, class dijk_t>
       inline void calc_vip_div_cmpct(arr_t &div,
@@ -264,10 +264,10 @@ namespace libmpdataxx
         =
         2 * (
           (v[0](ijkm[0] + 1, ijk[1]) - v[0](ijkm[0], ijk[1])) / dijk[0]
-          - fconst<arr_t>(0.25 / 2) * ( div_v(ijkm[0], ijk[1] + h) + div_v(ijkm[0], ijk[1] - h) 
+          - fconst<arr_t>(0.25 / 2) * ( div_v(ijkm[0], ijk[1] + h) + div_v(ijkm[0], ijk[1] - h)
                                       + div_v(ijkm[0] + 1, ijk[1] + h) + div_v(ijkm[0] + 1, ijk[1] - h))
         );
-        
+
         tau[1](ijk[0], ijkm[1] + h)
         =
         2 * (
@@ -279,11 +279,11 @@ namespace libmpdataxx
         =
         fconst<arr_t>(0.5) *
         (
-          ( v[0](ijkm[0], ijkm[1] + 1) - v[0](ijkm[0], ijkm[1]) 
+          ( v[0](ijkm[0], ijkm[1] + 1) - v[0](ijkm[0], ijkm[1])
           + v[0](ijkm[0] + 1, ijkm[1] + 1) - v[0](ijkm[0] + 1, ijkm[1])
           ) / dijk[1]
           +
-          ( v[1](ijkm[0] + 1, ijkm[1]) - v[1](ijkm[0], ijkm[1]) 
+          ( v[1](ijkm[0] + 1, ijkm[1]) - v[1](ijkm[0], ijkm[1])
           + v[1](ijkm[0] + 1, ijkm[1] + 1) - v[1](ijkm[0], ijkm[1] + 1)
           ) / dijk[0]
         );
@@ -306,7 +306,7 @@ namespace libmpdataxx
           - fconst<arr_t>(0.25 / 3) * ( div_v(ijkm[0], ijk[1], ijk[2] + h) + div_v(ijkm[0], ijk[1], ijk[2] - h)
                                       + div_v(ijkm[0] + 1, ijk[1], ijk[2] + h) + div_v(ijkm[0] + 1, ijk[1], ijk[2] - h))
         );
-        
+
         tau[1](ijk[0], ijkm[1] + h, ijk[2])
         =
         2 * (
@@ -314,27 +314,27 @@ namespace libmpdataxx
           - fconst<arr_t>(0.25 / 3) * ( div_v(ijk[0], ijkm[1], ijk[2] + h) + div_v(ijk[0], ijkm[1], ijk[2] - h)
                                     + div_v(ijk[0], ijkm[1] + 1, ijk[2] + h) + div_v(ijk[0], ijkm[1] + 1, ijk[2] - h))
         );
-        
+
         tau[2](ijk[0], ijk[1], ijkm[2] + h)
         =
         2 * (
           (v[2](ijk[0], ijk[1], ijkm[2] + 1) - v[2](ijk[0], ijk[1], ijkm[2])) / dijk[2]
-          - fconst<arr_t>(1.0 / 3)  * div_v(ijk[0], ijk[1], ijkm[2] + h) 
+          - fconst<arr_t>(1.0 / 3)  * div_v(ijk[0], ijk[1], ijkm[2] + h)
         );
 
         tau[3](ijkm[0] + h, ijkm[1] + h, ijk[2])
         =
         fconst<arr_t>(0.5) *
         (
-          ( v[0](ijkm[0], ijkm[1] + 1, ijk[2]) - v[0](ijkm[0], ijkm[1], ijk[2]) 
+          ( v[0](ijkm[0], ijkm[1] + 1, ijk[2]) - v[0](ijkm[0], ijkm[1], ijk[2])
           + v[0](ijkm[0] + 1, ijkm[1] + 1, ijk[2]) - v[0](ijkm[0] + 1, ijkm[1], ijk[2])
           ) / dijk[1]
           +
-          ( v[1](ijkm[0] + 1, ijkm[1], ijk[2]) - v[1](ijkm[0], ijkm[1], ijk[2]) 
+          ( v[1](ijkm[0] + 1, ijkm[1], ijk[2]) - v[1](ijkm[0], ijkm[1], ijk[2])
           + v[1](ijkm[0] + 1, ijkm[1] + 1, ijk[2]) - v[1](ijkm[0], ijkm[1] + 1, ijk[2])
           ) / dijk[0]
         );
-        
+
         tau[4](ijkm[0] + h, ijk[1], ijkm[2] + h)
         =
         fconst<arr_t>(0.5) *
@@ -343,11 +343,11 @@ namespace libmpdataxx
           + v[0](ijkm[0] + 1, ijk[1], ijkm[2] + 1) - v[0](ijkm[0] + 1, ijk[1], ijkm[2])
           ) / dijk[2]
           +
-          ( v[2](ijkm[0] + 1, ijk[1], ijkm[2]) - v[2](ijkm[0], ijk[1], ijkm[2]) 
+          ( v[2](ijkm[0] + 1, ijk[1], ijkm[2]) - v[2](ijkm[0], ijk[1], ijkm[2])
           + v[2](ijkm[0] + 1, ijk[1], ijkm[2] + 1) - v[2](ijkm[0], ijk[1], ijkm[2] + 1)
           ) / dijk[0]
         );
-        
+
         tau[5](ijk[0], ijkm[1] + h, ijkm[2] + h)
         =
         fconst<arr_t>(0.5) *
@@ -356,12 +356,12 @@ namespace libmpdataxx
           + v[1](ijk[0], ijkm[1] + 1, ijkm[2] + 1) - v[1](ijk[0], ijkm[1] + 1, ijkm[2])
           ) / dijk[2]
           +
-          ( v[2](ijk[0], ijkm[1] + 1, ijkm[2]) - v[2](ijk[0], ijkm[1], ijkm[2]) 
+          ( v[2](ijk[0], ijkm[1] + 1, ijkm[2]) - v[2](ijk[0], ijkm[1], ijkm[2])
           + v[2](ijk[0], ijkm[1] + 1, ijkm[2] + 1) - v[2](ijk[0], ijkm[1], ijkm[2] + 1)
           ) / dijk[1]
         );
       }
-      
+
       // Total deformation
       // 2D version
       template <int nd, class arrvec_t, class ijk_t>
@@ -369,10 +369,10 @@ namespace libmpdataxx
         const arrvec_t &tau,
         const ijk_t &ijk,
         typename std::enable_if<nd == 2>::type* = 0
-      ) 
+      )
       {
         return blitz::safeToReturn(
-          // one half taken as an average 
+          // one half taken as an average
           (
             pow2(tau[0](ijk[0] + h, ijk[1])) + pow2(tau[0](ijk[0] - h, ijk[1]))
           + pow2(tau[1](ijk[0], ijk[1] + h)) + pow2(tau[1](ijk[0], ijk[1] - h))
@@ -399,7 +399,7 @@ namespace libmpdataxx
       )
       {
         return blitz::safeToReturn(
-          // one half taken as an average 
+          // one half taken as an average
           (
             pow2(tau[0](ijk[0] + h, ijk[1], ijk[2])) + pow2(tau[0](ijk[0] - h, ijk[1], ijk[2]))
           + pow2(tau[1](ijk[0], ijk[1] + h, ijk[2])) + pow2(tau[1](ijk[0], ijk[1] - h, ijk[2]))
@@ -426,7 +426,7 @@ namespace libmpdataxx
           ) / 4
         );
       }
-     
+
       // multiplication of compact vector components by constant molecular viscosity
       // 2D version
       template <int nd, class arrvec_t, class real_t, class ijk_t>
@@ -461,12 +461,12 @@ namespace libmpdataxx
                                       const ijk_t &ijk,
                                       typename std::enable_if<nd == 2>::type* = 0)
       {
-        av[0](ijk[0] + h, ijk[1]) *= coeff * 
+        av[0](ijk[0] + h, ijk[1]) *= coeff *
                            real_t(0.5) * (k_m(ijk[0] + 1, ijk[1]) + k_m(ijk[0], ijk[1])) *
                            real_t(0.5) * (G<opts, 0>(rho, ijk[0] + 1, ijk[1]) + G<opts, 0>(rho, ijk[0], ijk[1]));
-        
+
         av[1](ijk[0], ijk[1] + h) *= coeff *
-                           real_t(0.5) * (k_m(ijk[0], ijk[1] + 1) + k_m(ijk[0], ijk[1])) * 
+                           real_t(0.5) * (k_m(ijk[0], ijk[1] + 1) + k_m(ijk[0], ijk[1])) *
                            real_t(0.5) * (G<opts, 0>(rho, ijk[0], ijk[1] + 1) + G<opts, 0>(rho, ijk[0], ijk[1]));
       }
 
@@ -482,16 +482,16 @@ namespace libmpdataxx
         av[0](ijk[0] + h, ijk[1], ijk[2]) *= coeff *
                            real_t(0.5) * (k_m(ijk[0] + 1, ijk[1], ijk[2]) + k_m(ijk[0], ijk[1], ijk[2])) *
                            real_t(0.5) * (G<opts, 0>(rho, ijk[0] + 1, ijk[1], ijk[2]) + G<opts, 0>(rho,ijk[0], ijk[1], ijk[2]));
-        
+
         av[1](ijk[0], ijk[1] + h, ijk[2]) *= coeff *
                            real_t(0.5) * (k_m(ijk[0], ijk[1] + 1, ijk[2]) + k_m(ijk[0], ijk[1], ijk[2])) *
                            real_t(0.5) * (G<opts, 0>(rho, ijk[0], ijk[1] + 1, ijk[2]) + G<opts, 0>(rho, ijk[0], ijk[1], ijk[2]));
-        
+
         av[2](ijk[0], ijk[1], ijk[2] + h) *= coeff *
                            real_t(0.5) * (k_m(ijk[0], ijk[1], ijk[2] + 1) + k_m(ijk[0], ijk[1], ijk[2])) *
                            real_t(0.5) * (G<opts, 0>(rho, ijk[0], ijk[1], ijk[2] + 1) + G<opts, 0>(rho, ijk[0], ijk[1], ijk[2]));
       }
-      
+
       // multiplication of compact tensor components by constant molecular viscosity
       // 2D version
       template <int nd, class arrvec_t, class real_t, class ijk_t>
@@ -527,7 +527,7 @@ namespace libmpdataxx
                                       typename std::enable_if<nd == 2>::type* = 0)
       {
         multiply_vctr_cmpct<nd, opts>(av, coeff, k_m, rho, ijk);
-        av[2](ijk[0] + h, ijk[1] + h) *= coeff * 
+        av[2](ijk[0] + h, ijk[1] + h) *= coeff *
                                          real_t(0.25) * ( k_m(ijk[0] + 1, ijk[1]    )
                                                         + k_m(ijk[0]    , ijk[1]    )
                                                         + k_m(ijk[0] + 1, ijk[1] + 1)
@@ -561,7 +561,7 @@ namespace libmpdataxx
                                                                 + G<opts, 0>(rho, ijk[0] + 1, ijk[1] + 1, ijk[2])
                                                                 + G<opts, 0>(rho, ijk[0]    , ijk[1] + 1, ijk[2])
                                                                 );
-        
+
         av[4](ijk[0] + h, ijk[1], ijk[2] + h) *= coeff *
                                                  real_t(0.25) * ( k_m(ijk[0] + 1, ijk[1], ijk[2]    )
                                                                 + k_m(ijk[0]    , ijk[1], ijk[2]    )
@@ -596,7 +596,7 @@ namespace libmpdataxx
         const ijk_t &ijk,
         const dijk_t dijk,
         typename std::enable_if<nd == 2>::type* = 0
-      ) 
+      )
       {
         return blitz::safeToReturn(
           ( (f[0](ijk[0]+h, ijk[1]) - f[0](ijk[0]-h, ijk[1])) / dijk[0]
@@ -605,7 +605,7 @@ namespace libmpdataxx
           ) / G<opts>(rho, ijk)
         );
       }
-      
+
       // 3D version
       template <int nd, opts_t opts, class arrvec_t, class arr_t, class ijk_t, class dijk_t>
       inline auto flux_div_cmpct(
@@ -614,7 +614,7 @@ namespace libmpdataxx
         const ijk_t &ijk,
         const dijk_t dijk,
         typename std::enable_if<nd == 3>::type* = 0
-      ) 
+      )
       {
         return blitz::safeToReturn(
           ( (f[0](ijk[0]+h, ijk[1], ijk[2]) - f[0](ijk[0]-h, ijk[1], ijk[2])) / dijk[0]
@@ -625,7 +625,7 @@ namespace libmpdataxx
           ) / G<opts>(rho, ijk)
         );
       }
-     
+
       // add stress forces
       // 2D version
       template <int nd, opts_t opts, class arrvec_t, class arr_t, class ijk_t, class dijk_t, class real_t>
@@ -642,17 +642,17 @@ namespace libmpdataxx
         coeff *
         (
           (tau[0](ijk[0] + h, ijk[1]) - tau[0](ijk[0] - h, ijk[1])) / dijk[0]
-          + real_t(0.5) * 
+          + real_t(0.5) *
           ( tau[2](ijk[0] + h, ijk[1] + h) - tau[2](ijk[0] + h, ijk[1] - h)
           + tau[2](ijk[0] - h, ijk[1] + h) - tau[2](ijk[0] - h, ijk[1] - h)
           ) / dijk[1]
         ) / G<opts>(rho, ijk);
-        
+
         rhs[1](ijk)
         +=
         coeff *
         (
-          real_t(0.5) * 
+          real_t(0.5) *
           ( tau[2](ijk[0] + h, ijk[1] + h) - tau[2](ijk[0] - h, ijk[1] + h)
           + tau[2](ijk[0] + h, ijk[1] - h) - tau[2](ijk[0] - h, ijk[1] - h)
           ) / dijk[0]
@@ -676,41 +676,41 @@ namespace libmpdataxx
         coeff *
         (
           (tau[0](ijk[0] + h, ijk[1], ijk[2]) - tau[0](ijk[0] - h, ijk[1], ijk[2])) / dijk[0]
-          + real_t(0.5) * 
+          + real_t(0.5) *
           ( tau[3](ijk[0] + h, ijk[1] + h, ijk[2]) - tau[3](ijk[0] + h, ijk[1] - h, ijk[2])
           + tau[3](ijk[0] - h, ijk[1] + h, ijk[2]) - tau[3](ijk[0] - h, ijk[1] - h, ijk[2])
           ) / dijk[1]
-          + real_t(0.5) * 
+          + real_t(0.5) *
           ( tau[4](ijk[0] + h, ijk[1], ijk[2] + h) - tau[4](ijk[0] + h, ijk[1], ijk[2] - h)
           + tau[4](ijk[0] - h, ijk[1], ijk[2] + h) - tau[4](ijk[0] - h, ijk[1], ijk[2] - h)
           ) / dijk[2]
         ) / G<opts>(rho, ijk);
-        
+
         rhs[1](ijk)
         +=
         coeff *
         (
-          real_t(0.5) * 
+          real_t(0.5) *
           ( tau[3](ijk[0] + h, ijk[1] + h, ijk[2]) - tau[3](ijk[0] - h, ijk[1] + h, ijk[2])
           + tau[3](ijk[0] + h, ijk[1] - h, ijk[2]) - tau[3](ijk[0] - h, ijk[1] - h, ijk[2])
           ) / dijk[0]
           +
           (tau[1](ijk[0], ijk[1] + h, ijk[2]) - tau[1](ijk[0], ijk[1] - h, ijk[2])) / dijk[1]
-          + real_t(0.5) * 
+          + real_t(0.5) *
           ( tau[5](ijk[0], ijk[1] + h, ijk[2] + h) - tau[5](ijk[0], ijk[1] + h, ijk[2] - h)
           + tau[5](ijk[0], ijk[1] - h, ijk[2] + h) - tau[5](ijk[0], ijk[1] - h, ijk[2] - h)
           ) / dijk[2]
         ) / G<opts>(rho, ijk);
-        
+
         rhs[2](ijk)
         +=
         coeff *
         (
-          real_t(0.5) * 
+          real_t(0.5) *
           ( tau[4](ijk[0] + h, ijk[1], ijk[2] + h) - tau[4](ijk[0] - h, ijk[1], ijk[2] + h)
           + tau[4](ijk[0] + h, ijk[1], ijk[2] - h) - tau[4](ijk[0] - h, ijk[1], ijk[2] - h)
           ) / dijk[0]
-          + real_t(0.5) * 
+          + real_t(0.5) *
           ( tau[5](ijk[0], ijk[1] + h, ijk[2] + h) - tau[5](ijk[0], ijk[1] - h, ijk[2] + h)
           + tau[5](ijk[0], ijk[1] + h, ijk[2] - h) - tau[5](ijk[0], ijk[1] - h, ijk[2] - h)
           ) / dijk[1]
@@ -727,7 +727,7 @@ namespace libmpdataxx
         const arg_t &x,
         const rng_t &i,
         const rng_t &j
-      ) 
+      )
       {
         return blitz::safeToReturn(
           x(idxperm::pi<d>(i + 1, j)) + 2 * x(idxperm::pi<d>(i, j)) + x(idxperm::pi<d>(i - 1, j))
@@ -760,7 +760,7 @@ namespace libmpdataxx
         const rng_t &i,
         const rng_t &j,
         const rng_t &k
-      ) 
+      )
       {
         return blitz::safeToReturn(
           x(idxperm::pi<d>(i+1, j, k)) + 2 * x(idxperm::pi<d>(i, j, k)) + x(idxperm::pi<d>(i - 1, j, k))
diff --git a/libmpdata++/kahan_reduction.hpp b/libmpdata++/kahan_reduction.hpp
index 551cb191..2e89c708 100644
--- a/libmpdata++/kahan_reduction.hpp
+++ b/libmpdata++/kahan_reduction.hpp
@@ -13,7 +13,7 @@
 namespace blitz
 {
   template<typename P_sourcetype, typename P_resulttype = BZ_SUMTYPE(P_sourcetype)>
-  class ReduceKahanSum 
+  class ReduceKahanSum
   {
     public:
 
@@ -23,33 +23,33 @@ namespace blitz
 
     static const bool needIndex = false, needInit = false;
 
-    ReduceKahanSum() { } 
+    ReduceKahanSum() { }
 
 #pragma GCC push_options
 #pragma GCC optimize ("O3") // assuming -Ofast could optimise out the algorithm
-    bool operator()(const T_sourcetype& x, const int=0) const 
-    { 
+    bool operator()(const T_sourcetype& x, const int=0) const
+    {
 #if defined(__FAST_MATH__) && defined(__llvm__)
       volatile // without volatile clang optimises the algorithm out with -Ofast
 #endif
-      T_resulttype t, y; 
+      T_resulttype t, y;
       y = x - c_;
       t = sum_ + y;
       c_ = (t - sum_) - y;
       sum_ = t;
       return true;
-    }   
+    }
 #pragma GCC pop_options
 
     T_resulttype result(const int) const { return sum_; }
 
-    void reset() const 
-    { 
-      sum_ = c_ = zero(T_resulttype()); 
+    void reset() const
+    {
+      sum_ = c_ = zero(T_resulttype());
     }
- 
+
     static const char* name() { return "sum"; }
- 
+
     protected:
 
     mutable T_resulttype sum_, c_;
diff --git a/libmpdata++/opts.hpp b/libmpdata++/opts.hpp
index bd93ea67..35a89ea2 100644
--- a/libmpdata++/opts.hpp
+++ b/libmpdata++/opts.hpp
@@ -36,7 +36,7 @@ namespace libmpdataxx
       fct = opts::bit(0), // flux-corrected transport
       abs = opts::bit(1), // use the abs() trick to handle variable-sign signal
       tot = opts::bit(2), // third-order accuracy terms
-      pfc = opts::bit(3), // use conditional statements like frac=where(den!=0,nom/den,0) instead of frac=nom/(den+eps) in psi-fraction factors 
+      pfc = opts::bit(3), // use conditional statements like frac=where(den!=0,nom/den,0) instead of frac=nom/(den+eps) in psi-fraction factors
       npa = opts::bit(4), // use nprt=(x-abs(x))/2 instead of nprt=min(0,x), and analogous formulae for pprt
       iga = opts::bit(5), // infinite-gauge option
       nug = opts::bit(6), // non-unit G (default G = 1) - see Smolarkiewicz 2006 eq (25) and discussion below for info on G
diff --git a/libmpdata++/output/detail/output_common.hpp b/libmpdata++/output/detail/output_common.hpp
index 31c8d82f..47920259 100644
--- a/libmpdata++/output/detail/output_common.hpp
+++ b/libmpdata++/output/detail/output_common.hpp
@@ -1,4 +1,4 @@
-/** 
+/**
  * @file
  * @copyright University of Warsaw
  * @section LICENSE
@@ -15,7 +15,7 @@ namespace libmpdataxx
 {
   namespace output
   {
-    namespace detail 
+    namespace detail
     {
       template <class solver_t>
       class output_common : public solver_t
@@ -38,7 +38,7 @@ namespace libmpdataxx
 
         virtual void record(const int var) {}
         virtual void start(const typename parent_t::advance_arg_t nt) {}
-        
+
         typename parent_t::arr_t out_data(const int var)
         {
           return this->var_dt ? intrp_vars[var] : this->mem->advectee(var);
@@ -55,7 +55,7 @@ namespace libmpdataxx
             this->mem->barrier();
           }
 
-          if (this->rank == 0) 
+          if (this->rank == 0)
           {
             record_time = this->time;
             start(nt);
@@ -72,7 +72,7 @@ namespace libmpdataxx
         void hook_ante_step()
         {
           parent_t::hook_ante_step();
-         
+
           if (this->var_dt)
           {
             static_assert(parent_t::ct_params_t_::out_intrp_ord == 1 ||
@@ -85,7 +85,7 @@ namespace libmpdataxx
             int curr_idx = std::floor(this->time / outfreq);
 
             do_record_cnt = (do_record_cnt > 0 ? do_record_cnt - 1 : 0);
-            if (next_idx > curr_idx || do_record_cnt > 0) 
+            if (next_idx > curr_idx || do_record_cnt > 0)
             {
               if (do_record_cnt == 0)
               {
@@ -101,7 +101,7 @@ namespace libmpdataxx
           }
         }
 
-        void hook_post_step() 
+        void hook_post_step()
         {
           parent_t::hook_post_step();
 
@@ -133,10 +133,10 @@ namespace libmpdataxx
                     const auto & y0 = intrp_vars[v.first](this->ijk);
                     const auto & y1 = intrp_vars[v.first + parent_t::n_eqns](this->ijk);
                     const auto & y2 = this->mem->advectee(v.first)(this->ijk);
-                    
+
                     intrp_vars[v.first](this->ijk) = y0 +
                                                      (y1 - y0) / (t1 - t0) * (t - t0) +
-                                                     ( (y2 - y1) / ((t2 - t1) * (t2 - t0)) 
+                                                     ( (y2 - y1) / ((t2 - t1) * (t2 - t0))
                                                      - (y1 - y0) / ((t1 - t0) * (t2 - t0)) ) * (t - t0) * (t - t1);
                     break;
                   }
@@ -162,14 +162,14 @@ namespace libmpdataxx
               }
             }
           }
-          
+
           this->mem->barrier(); // waiting for the output to be finished
         }
 
         public:
 
-        struct rt_params_t : parent_t::rt_params_t 
-        { 
+        struct rt_params_t : parent_t::rt_params_t
+        {
           typename parent_t::advance_arg_t outfreq = 1;
           int outwindow = 1;
           std::map<int, info_t> outvars;
@@ -183,7 +183,7 @@ namespace libmpdataxx
           const rt_params_t &p
         ) :
           parent_t(args, p),
-          outfreq(p.outfreq), 
+          outfreq(p.outfreq),
           outwindow(p.outwindow),
           outvars(p.outvars),
           outdir(p.outdir),
@@ -193,9 +193,9 @@ namespace libmpdataxx
           if (this->outvars.size() == 0 && parent_t::n_eqns == 1)
             outvars = {{0, {"", ""}}};
 
-          
+
           // assign 1 to dt, di, dj, dk for output purposes if they are not defined by the user
-          for (auto ref : 
+          for (auto ref :
                 std::vector<std::reference_wrapper<typename parent_t::real_t>>{this->dt, this->di, this->dj, this->dk})
           {
             if (ref.get() == 0)
diff --git a/libmpdata++/output/detail/xdmf_writer.hpp b/libmpdata++/output/detail/xdmf_writer.hpp
index a3a6db29..173ebd4a 100644
--- a/libmpdata++/output/detail/xdmf_writer.hpp
+++ b/libmpdata++/output/detail/xdmf_writer.hpp
@@ -130,7 +130,7 @@ namespace libmpdataxx
             attrs.insert(make_attribute(n, dimensions + 1));
           }
         }
-        
+
 
         void add_attribute(const std::string& name,
                                  const std::string& hdf_name,
@@ -171,7 +171,7 @@ namespace libmpdataxx
 
           for (auto a : attrs)
             a.add(grid_node);
-          
+
           for (auto ca : c_attrs)
             ca.add(grid_node);
 
diff --git a/libmpdata++/output/gnuplot.hpp b/libmpdata++/output/gnuplot.hpp
index a9e3193f..2b02d2ed 100644
--- a/libmpdata++/output/gnuplot.hpp
+++ b/libmpdata++/output/gnuplot.hpp
@@ -1,9 +1,9 @@
-/** 
+/**
  * @file
  * @copyright University of Warsaw
  * @section LICENSE
  * GPLv3+ (see the COPYING file or http://www.gnu.org/licenses/)
- * @brief a thin wrapper around the gnuplot-iostream library intended 
+ * @brief a thin wrapper around the gnuplot-iostream library intended
  *   for debugging/demonstration purposes only (used in many tests)
  */
 
@@ -25,7 +25,7 @@ namespace libmpdataxx
       protected:
 
       using output_t = gnuplot<solver_t>;
-      
+
       private:
 
       using parent_t = detail::output_common<solver_t>;
@@ -42,7 +42,7 @@ namespace libmpdataxx
         // fixed instead of scientific to allow automatic comparison of test results for values near zero
 
         // some common 1D/2D settings
-        *gp 
+        *gp
            << (p.gnuplot_grid ? "" : "un") << "set grid\n"
            << "set border " << p.gnuplot_border << "\n"
            << "set palette " << p.gnuplot_palette << "\n"
@@ -56,19 +56,19 @@ namespace libmpdataxx
            << "set termoption font \"," << p.gnuplot_fontsize << "\"\n"
            << "set termoption solid\n"
         ;
-        if (p.gnuplot_xrange == "[*:*]") 
-           *gp << "set xrange [" 
-               << this->mem->grid_size[0].first() 
-               << ":" 
+        if (p.gnuplot_xrange == "[*:*]")
+           *gp << "set xrange ["
+               << this->mem->grid_size[0].first()
+               << ":"
                << this->mem->grid_size[0].last()
                << "]\n";
-        else 
+        else
            *gp << "set xrange " << p.gnuplot_xrange << "\n";
 
         // 1D settings
         if (parent_t::n_dims == 1) // known at compile time
         {
-          if (p.gnuplot_command == "splot") 
+          if (p.gnuplot_command == "splot")
           {
             *gp << "set yrange [0:" << nt << "]\n"
                 << "set xtics out\n"
@@ -81,19 +81,19 @@ namespace libmpdataxx
 
             if (p.gnuplot_ylabel == "") *gp << "set ylabel 't/dt'\n";
           }
-          else if (p.gnuplot_command == "plot") 
+          else if (p.gnuplot_command == "plot")
           {
             if (p.gnuplot_with != "histeps") throw std::runtime_error("histeps is the only meaningfull style for 1D plots");
             *gp << "set yrange " << p.gnuplot_yrange << "\n";
-          } 
+          }
           else throw std::runtime_error("gnuplot_command must equal plot or splot");
-          
-          *gp 
+
+          *gp
              << "set output '" << p.gnuplot_output;
-          if (this->mem->distmem.size() > 1) 
+          if (this->mem->distmem.size() > 1)
             *gp << "." << this->mem->distmem.rank();
           *gp <<"'\n";
-          *gp 
+          *gp
              << "set title '" << p.gnuplot_title << "'\n"
              << p.gnuplot_command << " 1/0 notitle" // for the comma below :)
           ;
@@ -103,7 +103,7 @@ namespace libmpdataxx
             for (const auto &v : this->outvars)
             {
               *gp << ", '-'";
-              if (p.gnuplot_command == "splot") 
+              if (p.gnuplot_command == "splot")
               {
                 *gp << " using (((int($0)+1)/2+(int($0)-1)/2)*.5";
                 if (this->mem->distmem.size() > 1)
@@ -111,13 +111,13 @@ namespace libmpdataxx
                 *gp << "):(" << t << "):1";
               }
               *gp << " with " << p.gnuplot_with; // TODO: assert histeps -> emulation
- 
+
               *gp << " lt ";
               if (this->outvars.size() == 1) *gp <<  p.gnuplot_lt;
               else *gp << v.first + 1; // +1 so that the "0" lt is not used (gives dashed lines)
 
               *gp << (
-                t == 0 
+                t == 0
                 ? std::string(" title '") + v.second.name + "'"
                 : std::string(" notitle")
               );
@@ -129,12 +129,12 @@ namespace libmpdataxx
         // 2D settings
         if (parent_t::n_dims == 2) // known at compile time
         {
-          if (p.gnuplot_yrange == "[*:*]") 
+          if (p.gnuplot_yrange == "[*:*]")
              *gp << "set yrange [0:" << this->mem->advectee(0).extent(1)-1 << "]\n";
-          else 
+          else
              *gp << "set yrange " << p.gnuplot_yrange << "\n";
 
-          *gp 
+          *gp
              << "set cbrange " << p.gnuplot_cbrange << "\n"
              << "set xtics out\n"
              << "set ytics out\n"
@@ -144,7 +144,7 @@ namespace libmpdataxx
 
           if (p.gnuplot_contour)
           {
-            *gp 
+            *gp
                << "unset clabel\n"
                << "set cntrparam " << p.gnuplot_cntrparam << "\n";
           }
@@ -155,7 +155,7 @@ namespace libmpdataxx
       {
         gp.reset();
       }
- 
+
       // helper constructs to make it compilable for both 1D and 2D versions
       std::string binfmt(blitz::Array<typename parent_t::real_t, 1>) { throw std::logic_error("binfmt() only for 2D!"); }
       std::string binfmt(blitz::Array<typename parent_t::real_t, 2> a) { return gp->binfmt(a.transpose(blitz::secondDim, blitz::firstDim)) + " scan=yx "; }
@@ -163,13 +163,13 @@ namespace libmpdataxx
       void record(const int var)
       {
         if (parent_t::n_dims == 1) // known at compile time
-        { 
-          if (p.gnuplot_command == "splot") 
+        {
+          if (p.gnuplot_command == "splot")
           {
             // emulating histeps
-            decltype(this->mem->advectee(var)) 
+            decltype(this->mem->advectee(var))
               tmp(2 * this->mem->grid_size[0].length());
-            for (int i = 0; i < tmp.extent(0); ++i) 
+            for (int i = 0; i < tmp.extent(0); ++i)
               tmp(i) = this->mem->advectee(var)(this->mem->grid_size[0].first() + i/2);
             gp->send(tmp);
           }
@@ -181,10 +181,10 @@ namespace libmpdataxx
           {
             std::ostringstream tmp;
             tmp << "set output '" << boost::format(p.gnuplot_output)  % this->outvars[var].name  % this->timestep;
-            if (this->mem->distmem.size() > 1) 
+            if (this->mem->distmem.size() > 1)
               tmp << "." << this->mem->distmem.rank();
             tmp << "'\n";
-            if (p.gnuplot_title == "notitle") 
+            if (p.gnuplot_title == "notitle")
               tmp << "set title ''\n";
             else
               tmp << "set title '"<< this->outvars[var].name << "  (" // TODO: handle the option
@@ -198,7 +198,7 @@ namespace libmpdataxx
             // ox = oy = .5; // old: x = (i+.5) * dx
             // ox = oy = 0;  // new: x =   i    * dx
             ox = this->mem->grid_size[0].first();
-            oy = 0; 
+            oy = 0;
             auto data = this->mem->advectee(var).copy();
             data.reindexSelf({0,0});
             if (imagebg)
@@ -207,12 +207,12 @@ namespace libmpdataxx
               int count = sscanf(p.gnuplot_zrange.c_str(), "[%g:%g]", &zmin, &zmax);
               if (count != 2) zmin = 0;
               *gp << " '-' binary " << binfmt(data)
-                  << " origin=(" << ox << "," << oy << "," << zmin << ")"     
+                  << " origin=(" << ox << "," << oy << "," << zmin << ")"
                   << " with image failsafe notitle,";
             }
-            *gp << " '-'" 
-                << " binary" << binfmt(data) 
-                << " origin=(" << ox << "," << oy << ",0)" 
+            *gp << " '-'"
+                << " binary" << binfmt(data)
+                << " origin=(" << ox << "," << oy << ",0)"
                 << " with " << p.gnuplot_with << " lt " << p.gnuplot_lt << " notitle\n";
             data = blitz::rint(data * pow(10, precision)) * pow(10, -precision);
             gp->sendBinary(data);
@@ -223,31 +223,31 @@ namespace libmpdataxx
 
       public:
 
-      struct rt_params_t : parent_t::rt_params_t 
-      { 
-        std::string 
+      struct rt_params_t : parent_t::rt_params_t
+      {
+        std::string
           gnuplot_output = std::string("out.svg"),
-          gnuplot_with = ( 
-            parent_t::n_dims == 2 
+          gnuplot_with = (
+            parent_t::n_dims == 2
               ? std::string("image failsafe") // 2D
               : std::string("histeps")
           ),
           gnuplot_command = std::string("splot"),
           gnuplot_xlabel = std::string("x/dx"),
           gnuplot_ylabel = (
-            parent_t::n_dims == 2 
+            parent_t::n_dims == 2
               ? std::string("y/dy") // 2D
               : std::string("")  // 1D
           ),
           gnuplot_size = (
-            parent_t::n_dims == 2 
+            parent_t::n_dims == 2
               ? std::string("square") // 2D
               : std::string("noratio")  // 1D
           ),
-          gnuplot_fontsize = std::string("15"), 
-          gnuplot_ticslevel = std::string("0"), 
-          gnuplot_view = std::string(""), 
-          gnuplot_title = std::string(""), 
+          gnuplot_fontsize = std::string("15"),
+          gnuplot_ticslevel = std::string("0"),
+          gnuplot_view = std::string(""),
+          gnuplot_title = std::string(""),
           gnuplot_zrange = std::string("[*:*]"),
           gnuplot_yrange = std::string("[*:*]"),
           gnuplot_xrange = std::string("[*:*]"),
@@ -259,7 +259,7 @@ namespace libmpdataxx
           gnuplot_term = std::string("svg dynamic"),
           gnuplot_palette = std::string(""),
           gnuplot_cbtics = std::string("");
-        bool 
+        bool
           gnuplot_contour = false,
           gnuplot_grid = true,
           gnuplot_surface = true;
@@ -274,9 +274,9 @@ namespace libmpdataxx
         const rt_params_t &p
       ) : parent_t(args, p), p(p)
       {
-        if (!this->outdir.empty()) 
+        if (!this->outdir.empty())
           this->p.gnuplot_output = this->outdir + "/" + p.gnuplot_output; // TODO: get rid of gnuplot_output
       }
-    }; 
+    };
   } // namespace output
 } // namespace libmpdataxx
diff --git a/libmpdata++/output/hdf5.hpp b/libmpdata++/output/hdf5.hpp
index 3018d589..553cedef 100644
--- a/libmpdata++/output/hdf5.hpp
+++ b/libmpdata++/output/hdf5.hpp
@@ -74,20 +74,20 @@ namespace libmpdataxx
           boost::filesystem::create_directory(this->outdir);
         }
 
- 
+
         {
           // creating the const file
 #if defined(USE_MPI)
           this->mem->distmem.barrier();
-          H5Pset_fapl_mpio(fapl_id, MPI_COMM_WORLD, MPI_INFO_NULL); 
-          H5Pset_dxpl_mpio(dxpl_id, H5FD_MPIO_COLLECTIVE); 
+          H5Pset_fapl_mpio(fapl_id, MPI_COMM_WORLD, MPI_INFO_NULL);
+          H5Pset_dxpl_mpio(dxpl_id, H5FD_MPIO_COLLECTIVE);
 #endif
           hdfp.reset(new H5::H5File(const_file, H5F_ACC_TRUNC
 #if defined(USE_MPI)
             , H5P_DEFAULT, fapl_id
 #endif
           ));
-          
+
           // save selected compile and runtime parameters, the choice depends on the solver family
 //          record_params(*hdfp, typename parent_t::solver_family{});
         }
@@ -117,7 +117,7 @@ namespace libmpdataxx
             shape[0] = this->mem->grid_size[0].length();
             cshape[0] = this->mem->grid_size[0].length();
 
-            if (this->mem->distmem.rank() == this->mem->distmem.size() - 1) 
+            if (this->mem->distmem.rank() == this->mem->distmem.size() - 1)
               cshape[0] += 1;
 
             offst[0] = this->mem->grid_size[0].first();
@@ -173,10 +173,10 @@ namespace libmpdataxx
               dim_space.selectHyperslab(H5S_SELECT_SET, cshape.data(), offst.data());
               curr_dim.write(coord.data(), flttype_solver, H5::DataSpace(parent_t::n_dims, cshape.data()), dim_space, dxpl_id);
             }
-            
+
             // T
             {
-              const hsize_t 
+              const hsize_t
                 nt_out = nt / this->outfreq + 1; // incl. t=0
               float dt = this->dt;
 
@@ -189,14 +189,14 @@ namespace libmpdataxx
               dim_space.selectHyperslab(H5S_SELECT_SET, &nt_out, &zero);
               curr_dim.write(coord.data(), flttype_solver, H5::DataSpace(1, &nt_out), dim_space, dxpl_id);
             }
-            
+
             // G factor
             if (this->mem->G.get() != nullptr)
             {
               auto g_set = (*hdfp).createDataSet("G", flttype_output, sspace);
               record_dsc_helper(g_set, *this->mem->G);
             }
-            
+
             // save selected compile and runtime parameters, the choice depends on the solver family
             record_params(*hdfp, typename parent_t::solver_family{});
           }
@@ -209,7 +209,7 @@ namespace libmpdataxx
         ss << "timestep" << std::setw(10) << std::setfill('0') << this->timestep;
         return ss.str();
       }
-      
+
       std::string hdf_name()
       {
         // TODO: add option of .nc extension for Paraview sake ?
@@ -266,7 +266,7 @@ namespace libmpdataxx
           }
           case 2:
           {
-            typename solver_t::arr_t contiguous_arr(this->mem->grid_size[0], zro); 
+            typename solver_t::arr_t contiguous_arr(this->mem->grid_size[0], zro);
             contiguous_arr = arr(this->mem->grid_size[0], zro); // create a copy that is contiguous
             dset.write(contiguous_arr.data(), flttype_solver, H5::DataSpace(parent_t::n_dims, srfcshape.data()), space, dxpl_id);
             break;
@@ -281,7 +281,7 @@ namespace libmpdataxx
           default: assert(false);
         };
       }
-      
+
       void record_dsc_helper(const H5::DataSet &dset, const typename solver_t::arr_t &arr)
       {
         H5::DataSpace space = dset.getSpace();
@@ -333,7 +333,7 @@ namespace libmpdataxx
       {
         record_aux_hlpr(name, data, *hdfp);
       }
-      
+
       // for discontiguous array with halos
       void record_aux_dsc_hlpr(const std::string &name, const typename solver_t::arr_t &arr, H5::H5File hdf, bool srfc = false)
       {
@@ -341,14 +341,14 @@ namespace libmpdataxx
 
         if(srfc)
           params.setChunk(parent_t::n_dims, srfcchunk.data());
-        
+
         auto aux = hdf.createDataSet(
           name,
           flttype_output,
           srfc ? srfcspace : sspace,
           params
         );
- 
+
         if(srfc)
         {
           // revert to default chunk
@@ -421,12 +421,12 @@ namespace libmpdataxx
       {
         record_aux_scalar(name, "/", data);
       }
-      
+
       // see above, also assumes that z is the last dimension
       void record_prof_const(const std::string &name, typename solver_t::real_t *data)
       {
         assert(this->rank == 0);
-        
+
         H5::H5File hdfcp(const_file, H5F_ACC_RDWR
 #if defined(USE_MPI)
           , H5P_DEFAULT, fapl_id
@@ -485,7 +485,7 @@ namespace libmpdataxx
           group.createAttribute("n_iters", type, H5::DataSpace(1, &one)).write(type, &data);
         }
       }
-      
+
       // as above but for solvers with rhs
       void record_params(const H5::H5File &hdfcp, typename solvers::mpdata_rhs_family_tag)
       {
@@ -497,24 +497,24 @@ namespace libmpdataxx
         const auto type = H5::StrType(H5::PredType::C_S1, scheme_str.size());
         group.createAttribute("rhs_scheme", type, H5::DataSpace(1, &one)).write(type, scheme_str.data());
       }
-      
+
       // as above but for solvers with velocities
       void record_params(const H5::H5File &hdfcp, typename solvers::mpdata_rhs_vip_family_tag)
       {
         record_params(hdfcp, typename solvers::mpdata_rhs_family_tag{});
-        
+
         hdfcp.createGroup("vip");
         const auto &group = hdfcp.openGroup("vip");
         const auto vab_str = solvers::vab2string.at(static_cast<solvers::vip_vab_t>(parent_t::ct_params_t_::vip_vab));
         const auto type = H5::StrType(H5::PredType::C_S1, vab_str.size());
         group.createAttribute("vip_abs", type, H5::DataSpace(1, &one)).write(type, vab_str.data());
       }
-      
+
       // as above but for solvers with pressure equation
       void record_params(const H5::H5File &hdfcp, typename solvers::mpdata_rhs_vip_prs_family_tag)
       {
         record_params(hdfcp, typename solvers::mpdata_rhs_vip_family_tag{});
-        
+
         hdfcp.createGroup("prs");
         const auto &group = hdfcp.openGroup("prs");
         {
@@ -527,12 +527,12 @@ namespace libmpdataxx
           group.createAttribute("prs_tol", type, H5::DataSpace(1, &one)).write(type, &this->prs_tol);
         }
       }
-      
+
       // as above but for solvers with subgrid model (parameters common to all subgrid models)
       void record_params(const H5::H5File &hdfcp, typename solvers::mpdata_rhs_vip_prs_sgs_family_tag)
       {
         record_params(hdfcp, typename solvers::mpdata_rhs_vip_prs_family_tag{});
-        
+
         hdfcp.createGroup("sgs");
         const auto &group = hdfcp.openGroup("sgs");
         {
@@ -550,24 +550,24 @@ namespace libmpdataxx
           group.createAttribute("cdrag", type, H5::DataSpace(1, &one)).write(type, &this->cdrag);
         }
       }
-      
+
       // as above but for solvers with the dns subgrid model
       void record_params(const H5::H5File &hdfcp, typename solvers::mpdata_rhs_vip_prs_sgs_dns_family_tag)
       {
         record_params(hdfcp, typename solvers::mpdata_rhs_vip_prs_sgs_family_tag{});
-        
+
         const auto &group = hdfcp.openGroup("sgs");
         {
           const auto type = flttype_solver;
           group.createAttribute("eta", type, H5::DataSpace(1, &one)).write(type, &this->eta);
         }
       }
-      
+
       // as above but for solvers with the smg subgrid model
       void record_params(const H5::H5File &hdfcp, typename solvers::mpdata_rhs_vip_prs_sgs_smg_family_tag)
       {
         record_params(hdfcp, typename solvers::mpdata_rhs_vip_prs_sgs_family_tag{});
-        
+
         const auto &group = hdfcp.openGroup("sgs");
         {
           const auto type = flttype_solver;
@@ -575,7 +575,7 @@ namespace libmpdataxx
           group.createAttribute("c_m", type, H5::DataSpace(1, &one)).write(type, &this->c_m);
         }
       }
-      
+
       // as above but for the boussinesq solver
       void record_params(const H5::H5File &hdfcp, typename solvers::mpdata_boussinesq_family_tag)
       {
@@ -601,7 +601,7 @@ namespace libmpdataxx
       void record_params(const H5::H5File &hdfcp, typename solvers::mpdata_boussinesq_sgs_family_tag)
       {
         record_params(hdfcp, typename solvers::mpdata_boussinesq_family_tag{});
-        
+
         const auto &group = hdfcp.openGroup("boussinesq");
         {
           const auto type = flttype_solver;
diff --git a/libmpdata++/output/hdf5_xdmf.hpp b/libmpdata++/output/hdf5_xdmf.hpp
index 96ac1547..91425fce 100644
--- a/libmpdata++/output/hdf5_xdmf.hpp
+++ b/libmpdata++/output/hdf5_xdmf.hpp
@@ -29,12 +29,12 @@ namespace libmpdataxx
     class hdf5_xdmf : public hdf5<solver_t>
     {
       protected:
-      
+
       using output_t = hdf5_xdmf<solver_t>;
       using parent_t = hdf5<solver_t>;
-      
+
       static_assert(parent_t::n_dims > 1, "only 2D and 3D output supported");
-      
+
       std::vector<std::string> timesteps;
       //xdmf writer
       detail::xdmf_writer<parent_t::n_dims> xdmfw;
@@ -53,7 +53,7 @@ namespace libmpdataxx
           {
             attr_names.push_back(v.second.name);
           }
-          
+
           if (this->mem->G.get() != nullptr) xdmfw.add_const_attribute("G", this->const_name, this->mem->distmem.grid_size.data());
 
           xdmfw.setup(this->const_name, this->dim_names, attr_names, this->mem->distmem.grid_size.data());
@@ -88,10 +88,10 @@ namespace libmpdataxx
 #if defined(USE_MPI)
         if (this->mem->distmem.rank() == 0)
 #endif
-          xdmfw.add_attribute(name, this->hdf_name(), this->mem->distmem.grid_size.data()); 
+          xdmfw.add_attribute(name, this->hdf_name(), this->mem->distmem.grid_size.data());
         parent_t::record_aux(name, data);
       }
-      
+
       void record_aux_dsc(const std::string &name, const typename solver_t::arr_t &arr, bool srfc = false)
       {
         auto shape = this->mem->distmem.grid_size;
@@ -99,7 +99,7 @@ namespace libmpdataxx
 #if defined(USE_MPI)
         if (this->mem->distmem.rank() == 0)
 #endif
-          xdmfw.add_attribute(name, this->hdf_name(), shape.data()); 
+          xdmfw.add_attribute(name, this->hdf_name(), shape.data());
         parent_t::record_aux_dsc(name, arr, srfc);
       }
 
diff --git a/libmpdata++/solvers/boussinesq.hpp b/libmpdata++/solvers/boussinesq.hpp
index 01fc3da8..ee7919c9 100644
--- a/libmpdata++/solvers/boussinesq.hpp
+++ b/libmpdata++/solvers/boussinesq.hpp
@@ -35,7 +35,7 @@ namespace libmpdataxx
                                                                   mpdata_boussinesq_family_tag,
                                                                   mpdata_boussinesq_sgs_family_tag>::type;
     };
-    
+
     template<typename ct_params_t>
     class boussinesq<
       ct_params_t,
@@ -44,7 +44,7 @@ namespace libmpdataxx
     {
       using parent_t = detail::boussinesq_impl<ct_params_t>;
       using parent_t::parent_t; // inheriting constructors
-      
+
       protected:
       using solver_family = typename std::conditional<static_cast<sgs_scheme_t>(ct_params_t::sgs_scheme) == iles,
                                                       mpdata_boussinesq_family_tag,
diff --git a/libmpdata++/solvers/detail/boussinesq_common.hpp b/libmpdata++/solvers/detail/boussinesq_common.hpp
index 6424e222..c7c1a6c0 100644
--- a/libmpdata++/solvers/detail/boussinesq_common.hpp
+++ b/libmpdata++/solvers/detail/boussinesq_common.hpp
@@ -1,4 +1,4 @@
-/** 
+/**
  * @file
  * @copyright University of Warsaw
  * @section LICENSE
@@ -29,14 +29,14 @@ namespace libmpdataxx
         virtual void calc_full_tht(typename parent_t::arr_t&) = 0;
 
         public:
-        struct rt_params_t : parent_t::rt_params_t 
-        { 
+        struct rt_params_t : parent_t::rt_params_t
+        {
           real_t g = 9.81, Tht_ref = 0, hflux_const = 0;
         };
 
         // ctor
-        boussinesq_common( 
-          typename parent_t::ctor_args_t args, 
+        boussinesq_common(
+          typename parent_t::ctor_args_t args,
           const rt_params_t &p
         ) :
           parent_t(args, p),
diff --git a/libmpdata++/solvers/detail/boussinesq_expl.hpp b/libmpdata++/solvers/detail/boussinesq_expl.hpp
index 48b68966..ab7caede 100644
--- a/libmpdata++/solvers/detail/boussinesq_expl.hpp
+++ b/libmpdata++/solvers/detail/boussinesq_expl.hpp
@@ -1,4 +1,4 @@
-/** 
+/**
  * @file
  * @copyright University of Warsaw
  * @section LICENSE
@@ -30,8 +30,8 @@ namespace libmpdataxx
         using ix = typename ct_params_t::ix;
         bool buoy_filter;
         typename parent_t::arr_t &tmp1, &tmp2;
-        
-        template <int nd = ct_params_t::n_dims> 
+
+        template <int nd = ct_params_t::n_dims>
         void filter(typename std::enable_if<nd == 2>::type* = 0)
         {
           const auto &i(this->i), &j(this->j);
@@ -39,14 +39,14 @@ namespace libmpdataxx
           tmp2(i, j) = real_t(0.25) * (tmp1(i, j + 1) + 2 * tmp1(i, j) + tmp1(i, j - 1));
         }
 
-        template <int nd = ct_params_t::n_dims> 
+        template <int nd = ct_params_t::n_dims>
         void filter(typename std::enable_if<nd == 3>::type* = 0)
         {
           const auto &i(this->i), &j(this->j), &k(this->k);
           this->xchng_sclr(tmp1, this->ijk);
           tmp2(i, j, k) = real_t(0.25) * (tmp1(i, j, k + 1) + 2 * tmp1(i, j, k) + tmp1(i, j, k - 1));
         }
-        
+
         // helpers for buoyancy forces
         template<class ijk_t>
         inline auto buoy_at_0(const ijk_t &ijk)
@@ -55,7 +55,7 @@ namespace libmpdataxx
             this->g * (this->state(ix::tht)(ijk) - this->tht_e(ijk)) / this->Tht_ref
           );
         }
-        
+
         template<class ijk_t>
         inline auto buoy_at_1(const ijk_t &ijk)
         {
@@ -74,17 +74,17 @@ namespace libmpdataxx
           full_tht(this->ijk) = this->state(ix::tht)(this->ijk);
         }
 
-        // explicit forcings 
+        // explicit forcings
         void update_rhs(
           libmpdataxx::arrvec_t<
             typename parent_t::arr_t
-          > &rhs, 
-          const real_t &dt, 
-          const int &at 
+          > &rhs,
+          const real_t &dt,
+          const int &at
         ) {
-          parent_t::update_rhs(rhs, dt, at); 
+          parent_t::update_rhs(rhs, dt, at);
 
-          const auto &tht = this->state(ix::tht); 
+          const auto &tht = this->state(ix::tht);
           const auto &ijk = this->ijk;
 
           auto ix_w = this->vip_ixs[ct_params_t::n_dims - 1];
@@ -94,7 +94,7 @@ namespace libmpdataxx
             case (0):
             {
               rhs.at(ix::tht)(ijk) += this->hflux_frc(ijk) - this->tht_abs(ijk) * (tht(ijk) - this->tht_e(ijk));
-              
+
               if (!buoy_filter)
               {
                 rhs.at(ix_w)(ijk) += buoy_at_0(ijk);
@@ -128,16 +128,16 @@ namespace libmpdataxx
             }
           }
         }
-        
+
         public:
-        struct rt_params_t : parent_t::rt_params_t 
-        { 
-          bool buoy_filter = false; 
+        struct rt_params_t : parent_t::rt_params_t
+        {
+          bool buoy_filter = false;
         };
 
         // ctor
-        boussinesq_expl( 
-          typename parent_t::ctor_args_t args, 
+        boussinesq_expl(
+          typename parent_t::ctor_args_t args,
           const rt_params_t &p
         ) :
           parent_t(args, p),
diff --git a/libmpdata++/solvers/detail/boussinesq_impl.hpp b/libmpdata++/solvers/detail/boussinesq_impl.hpp
index 0f47e8a7..e2617be8 100644
--- a/libmpdata++/solvers/detail/boussinesq_impl.hpp
+++ b/libmpdata++/solvers/detail/boussinesq_impl.hpp
@@ -1,4 +1,4 @@
-/** 
+/**
  * @file
  * @copyright University of Warsaw
  * @section LICENSE
@@ -29,14 +29,14 @@ namespace libmpdataxx
         // member fields
         using ix = typename ct_params_t::ix;
         typename parent_t::arr_t &dtht_e;
-        
+
         template <int nd = ct_params_t::n_dims>
         void calc_dtht_e(typename std::enable_if<nd == 2>::type* = 0)
         {
           this->xchng_sclr(this->tht_e, this->ijk);
           this->dtht_e(this->ijk) = formulae::nabla::grad<1>(this->tht_e, this->j, this->i, this->dj);
         }
-        
+
         template <int nd = ct_params_t::n_dims>
         void calc_dtht_e(typename std::enable_if<nd == 3>::type* = 0)
         {
@@ -49,8 +49,8 @@ namespace libmpdataxx
           full_tht(this->ijk) = this->state(ix::tht)(this->ijk) + this->tht_e(this->ijk);
         }
 
-        void hook_ante_loop(const typename parent_t::advance_arg_t nt) 
-        {   
+        void hook_ante_loop(const typename parent_t::advance_arg_t nt)
+        {
           calc_dtht_e();
           parent_t::hook_ante_loop(nt);
         }
@@ -75,19 +75,19 @@ namespace libmpdataxx
                  / (1 + real_t(0.5) * this->dt * this->tht_abs(this->ijk)));
           }
         }
-        
+
         void update_rhs(
           libmpdataxx::arrvec_t<
             typename parent_t::arr_t
-          > &rhs, 
-          const real_t &dt, 
-          const int &at 
+          > &rhs,
+          const real_t &dt,
+          const int &at
         ) {
           parent_t::update_rhs(rhs, dt, at);
 
           auto ix_w = this->vip_ixs[ct_params_t::n_dims - 1];
 
-          const auto &tht = this->state(ix::tht); 
+          const auto &tht = this->state(ix::tht);
           const auto &w = this->state(ix_w);
           const auto &ijk = this->ijk;
 
@@ -110,23 +110,23 @@ namespace libmpdataxx
             }
           }
         }
-        
+
         void vip_rhs_impl_fnlz()
         {
           parent_t::vip_rhs_impl_fnlz();
-          
+
           const auto &w = this->vips()[ct_params_t::n_dims - 1];
-          this->state(ix::tht)(this->ijk) = ( this->state(ix::tht)(this->ijk) 
+          this->state(ix::tht)(this->ijk) = ( this->state(ix::tht)(this->ijk)
                                             - real_t(0.5) * this->dt * w(this->ijk) * this->dtht_e(this->ijk))
                                             / (1 + real_t(0.5) * this->dt * this->tht_abs(this->ijk));
           this->rhs.at(ix::tht)(this->ijk) += -w(this->ijk) * this->dtht_e(this->ijk)
                                               -this->tht_abs(this->ijk) * this->state(ix::tht)(this->ijk);
         }
-        
+
         public:
         // ctor
-        boussinesq_impl( 
-          typename parent_t::ctor_args_t args, 
+        boussinesq_impl(
+          typename parent_t::ctor_args_t args,
           const typename parent_t::rt_params_t &p
         ) :
           parent_t(args, p),
diff --git a/libmpdata++/solvers/detail/boussinesq_sgs_common.hpp b/libmpdata++/solvers/detail/boussinesq_sgs_common.hpp
index 74a40b6d..51c7317b 100644
--- a/libmpdata++/solvers/detail/boussinesq_sgs_common.hpp
+++ b/libmpdata++/solvers/detail/boussinesq_sgs_common.hpp
@@ -1,4 +1,4 @@
-/** 
+/**
  * @file
  * @copyright University of Warsaw
  * @section LICENSE
@@ -27,7 +27,7 @@ namespace libmpdataxx
         typename parent_t::arr_t &rcdsn_num, &full_tht, &tdef_sq, &mix_len, &hflux_srfc;
         arrvec_t<typename parent_t::arr_t> &grad_tht;
 
-        template <int nd = ct_params_t::n_dims> 
+        template <int nd = ct_params_t::n_dims>
         void calc_rcdsn_num(typename std::enable_if<nd == 2>::type* = 0)
         {
           rcdsn_num(this->ijk) = this->g * real_t(0.5) * (
@@ -36,7 +36,7 @@ namespace libmpdataxx
                                            ) / (this->Tht_ref * tdef_sq(this->ijk));
         }
 
-        template <int nd = ct_params_t::n_dims> 
+        template <int nd = ct_params_t::n_dims>
         void calc_rcdsn_num(typename std::enable_if<nd == 3>::type* = 0)
         {
           rcdsn_num(this->ijk) = this->g * real_t(0.5) * (
@@ -69,7 +69,7 @@ namespace libmpdataxx
 
           this->xchng_pres(full_tht, this->ijk);
           formulae::nabla::calc_grad_cmpct<parent_t::n_dims>(grad_tht, this->full_tht, this->ijk, this->ijkm, this->dijk);
-          
+
           tdef_sq(this->ijk) = formulae::stress::calc_tdef_sq_cmpct<ct_params_t::n_dims>(this->tau, this->ijk);
 
           calc_rcdsn_num();
@@ -86,7 +86,7 @@ namespace libmpdataxx
           ijp1.ubound(ct_params_t::n_dims - 1) = 1;
 
           this->k_m(ij) = this->k_m(ijp1);
-          
+
           this->xchng_sclr(this->k_m, this->ijk, 1);
 
           // havo to use modified ijkm due to shared-memory parallelisation, otherwise overlapping ranges
@@ -120,14 +120,14 @@ namespace libmpdataxx
         }
 
         public:
-        struct rt_params_t : parent_t::rt_params_t 
-        { 
+        struct rt_params_t : parent_t::rt_params_t
+        {
           real_t prandtl_num = 0;
         };
 
         // ctor
-        boussinesq_sgs_common( 
-          typename parent_t::ctor_args_t args, 
+        boussinesq_sgs_common(
+          typename parent_t::ctor_args_t args,
           const rt_params_t &p
         ) :
           parent_t(args, p),
diff --git a/libmpdata++/solvers/detail/monitor.hpp b/libmpdata++/solvers/detail/monitor.hpp
index 976d0ed4..0f33a705 100644
--- a/libmpdata++/solvers/detail/monitor.hpp
+++ b/libmpdata++/solvers/detail/monitor.hpp
@@ -30,7 +30,7 @@ namespace libmpdataxx
         // ... 10 11 12 13 14 15
         sprintf(
           &name[std::min(len, 10)], // taking care of short thread-names
-          " %3d%%", 
+          " %3d%%",
           int(frac * 100)
         );
 #if defined(__linux__)
diff --git a/libmpdata++/solvers/detail/mpdata_common.hpp b/libmpdata++/solvers/detail/mpdata_common.hpp
index 65fb9604..e744fb41 100644
--- a/libmpdata++/solvers/detail/mpdata_common.hpp
+++ b/libmpdata++/solvers/detail/mpdata_common.hpp
@@ -14,19 +14,19 @@ namespace libmpdataxx
     {
       template <typename ct_params_t, int minhalo>
       class mpdata_common : public detail::solver<
-        ct_params_t, 
-        formulae::mpdata::n_tlev, 
+        ct_params_t,
+        formulae::mpdata::n_tlev,
         detail::max(minhalo, formulae::mpdata::halo(ct_params_t::opts))
       >
       {
         using parent_t = detail::solver<
-          ct_params_t, 
-          formulae::mpdata::n_tlev, 
+          ct_params_t,
+          formulae::mpdata::n_tlev,
           detail::max(minhalo, formulae::mpdata::halo(ct_params_t::opts))
         >;
 
         using GC_t = arrvec_t<typename parent_t::arr_t>;
- 
+
         protected:
 
         // static constants
@@ -38,13 +38,13 @@ namespace libmpdataxx
 
         // methods
         GC_t &GC_unco(int iter)
-        {   
-          return (iter == 1)  
-            ? this->mem->GC 
-            : (iter % 2)  
+        {
+          return (iter == 1)
+            ? this->mem->GC
+            : (iter % 2)
               ? *tmp[1]  // odd iters
               : *tmp[0]; // even iters
-        }   
+        }
 
         GC_t &GC_corr(int iter)
         {
@@ -59,22 +59,22 @@ namespace libmpdataxx
           return GC_corr(iter);
         }
 
-        // for Flux-Corrected Transport 
+        // for Flux-Corrected Transport
         virtual void fct_init(int e) { }
         virtual void fct_adjust_antidiff(int e, int iter) { }
 
-        //  
+        //
         static int n_tmp(const int &n_iters)
         {
-          return n_iters > 2 ? 2 : 1; 
+          return n_iters > 2 ? 2 : 1;
         }
 
         public:
 
         struct rt_params_t : parent_t::rt_params_t
         {
-          int n_iters = 2; 
-          int upwind_filter_freq = 0; 
+          int n_iters = 2;
+          int upwind_filter_freq = 0;
         };
 
         protected:
@@ -83,7 +83,7 @@ namespace libmpdataxx
         mpdata_common(
           typename parent_t::ctor_args_t args,
           const rt_params_t &p
-        ) : 
+        ) :
           parent_t(args, p),
           n_iters(p.n_iters),
           upwind_filter_freq(p.upwind_filter_freq),
@@ -100,18 +100,18 @@ namespace libmpdataxx
 
         // memory allocation
         static void alloc(
-          typename parent_t::mem_t *mem, 
+          typename parent_t::mem_t *mem,
           const int &n_iters
-        ) {   
+        ) {
           parent_t::alloc(mem, n_iters);
           for (int n = 0; n < n_tmp(n_iters); ++n)
             parent_t::alloc_tmp_vctr(mem, __FILE__);
           parent_t::alloc_tmp_vctr(mem, __FILE__); // fluxes
-        }   
+        }
       };
 
       // partial specialisations
-      template<typename ct_params_t, int minhalo, class enableif = void> 
+      template<typename ct_params_t, int minhalo, class enableif = void>
       class mpdata_osc
       {};
     } // namespace detail
diff --git a/libmpdata++/solvers/detail/mpdata_fct_1d.hpp b/libmpdata++/solvers/detail/mpdata_fct_1d.hpp
index 7967e270..38714bb7 100644
--- a/libmpdata++/solvers/detail/mpdata_fct_1d.hpp
+++ b/libmpdata++/solvers/detail/mpdata_fct_1d.hpp
@@ -1,4 +1,4 @@
-/** 
+/**
  * @file
  * @copyright University of Warsaw
  * @section LICENSE
@@ -18,14 +18,14 @@ namespace libmpdataxx
   {
     namespace detail
     {
-      template <typename ct_params_t, int minhalo> 
+      template <typename ct_params_t, int minhalo>
       class mpdata_fct<
-        ct_params_t, 
+        ct_params_t,
         minhalo,
         typename std::enable_if<ct_params_t::n_dims == 1>::type
-      > : public detail::mpdata_fct_common<ct_params_t, minhalo> 
+      > : public detail::mpdata_fct_common<ct_params_t, minhalo>
       {
-        using parent_t = detail::mpdata_fct_common<ct_params_t, minhalo>; 
+        using parent_t = detail::mpdata_fct_common<ct_params_t, minhalo>;
         using parent_t::parent_t; // inheriting constructors
 
         void fct_init(int e)
@@ -33,11 +33,11 @@ namespace libmpdataxx
           const auto i1 = this->i^1; // TODO: isn't it a race condition with more than one thread?
           const auto psi = this->mem->psi[e][this->n[e]];
 
-          /// \f$ \psi^{max}_{i}=max_{I}(\psi^{n}_{i-1},\psi^{n}_{i},\psi^{n}_{i+1},\psi^{*}_{i-1},\psi^{*}_{i},\psi^{*}_{i+1}) \f$ \n  
-          /// \f$ \psi^{min}_{i}=min_{I}(\psi^{n}_{i-1},\psi^{n}_{i},\psi^{n}_{i+1},\psi^{*}_{i-1},\psi^{*}_{i},\psi^{*}_{i+1}) \f$ \n    
+          /// \f$ \psi^{max}_{i}=max_{I}(\psi^{n}_{i-1},\psi^{n}_{i},\psi^{n}_{i+1},\psi^{*}_{i-1},\psi^{*}_{i},\psi^{*}_{i+1}) \f$ \n
+          /// \f$ \psi^{min}_{i}=min_{I}(\psi^{n}_{i-1},\psi^{n}_{i},\psi^{n}_{i+1},\psi^{*}_{i-1},\psi^{*}_{i},\psi^{*}_{i+1}) \f$ \n
           /// eq.(20a, 20b) in Smolarkiewicz & Grabowski 1990 (J.Comp.Phys.,86,355-375)
           this->psi_min(i1) = min(min(psi(i1-1), psi(i1)), psi(i1+1));
-          this->psi_max(i1) = max(max(psi(i1-1), psi(i1)), psi(i1+1)); 
+          this->psi_max(i1) = max(max(psi(i1-1), psi(i1)), psi(i1+1));
         }
 
         void fct_adjust_antidiff(int e, int iter)
@@ -47,8 +47,8 @@ namespace libmpdataxx
           auto &GC_corr = parent_t::GC_corr(iter);
           const auto &G = *this->mem->G;
           const auto &im = this->im; // calculating once for i-1/2 and i+1/2
-          const auto i1 = this->i^1; 
-          const auto im1 = this->im^1; 
+          const auto i1 = this->i^1;
+          const auto im1 = this->im^1;
 
           // fill halos in GC_corr
           this->xchng_vctr_alng(GC_corr, true);
@@ -72,7 +72,7 @@ namespace libmpdataxx
           // calculating betas
           formulae::mpdata::beta_up<ct_params_t::opts>(this->beta_up, psi, this->psi_max, flx, G, i1);
           formulae::mpdata::beta_dn<ct_params_t::opts>(this->beta_dn, psi, this->psi_min, flx, G, i1);
-          
+
           // assuring flx, psi_min and psi_max are not overwritten
           this->beta_barrier(iter);
 
diff --git a/libmpdata++/solvers/detail/mpdata_fct_2d.hpp b/libmpdata++/solvers/detail/mpdata_fct_2d.hpp
index 9c0e2b70..fb4b61ac 100644
--- a/libmpdata++/solvers/detail/mpdata_fct_2d.hpp
+++ b/libmpdata++/solvers/detail/mpdata_fct_2d.hpp
@@ -1,4 +1,4 @@
-/** 
+/**
  * @file
  * @copyright University of Warsaw
  * @section LICENSE
@@ -18,20 +18,20 @@ namespace libmpdataxx
   {
     namespace detail
     {
-      template <typename ct_params_t, int minhalo> 
+      template <typename ct_params_t, int minhalo>
       class mpdata_fct<
-        ct_params_t, 
+        ct_params_t,
         minhalo,
         typename std::enable_if<ct_params_t::n_dims == 2>::type
-      > : public detail::mpdata_fct_common<ct_params_t, minhalo> 
+      > : public detail::mpdata_fct_common<ct_params_t, minhalo>
       {
-        using parent_t = detail::mpdata_fct_common<ct_params_t, minhalo>; 
+        using parent_t = detail::mpdata_fct_common<ct_params_t, minhalo>;
         using parent_t::parent_t; // inheriting ctors
 
         void fct_init(int e)
         {
           const auto i1 = this->i^1, j1 = this->j^1; // not optimal - with multiple threads some indices are repeated among threads
-          const auto psi = this->mem->psi[e][this->n[e]]; 
+          const auto psi = this->mem->psi[e][this->n[e]];
 
           this->psi_min(i1,j1) = min(min(min(min(
                            psi(i1,j1+1),
@@ -40,9 +40,9 @@ namespace libmpdataxx
           );
           this->psi_max(i1,j1) = max(max(max(max(
                            psi(i1,j1+1),
-            psi(i1-1,j1)), psi(i1,j1  )), psi(i1+1,j1)), 
+            psi(i1-1,j1)), psi(i1,j1  )), psi(i1+1,j1)),
                            psi(i1,j1-1)
-          ); 
+          );
         }
 
         void fct_adjust_antidiff(int e, int iter)
@@ -50,8 +50,8 @@ namespace libmpdataxx
           const auto psi = this->mem->psi[e][this->n[e]];
           auto &GC_corr = parent_t::GC_corr(iter);
           const auto &G = *this->mem->G;
-          const auto i1 = this->i^1, j1 = this->j^1; 
-          const auto im1 = this->im^1, jm1 = this->jm^1; 
+          const auto i1 = this->i^1, j1 = this->j^1;
+          const auto im1 = this->im^1, jm1 = this->jm^1;
           const auto &im(this->im), &jm(this->jm); // calculating once for (i/j)-1/2 and (i/j)+1/2
 
           // fill halos of GC_corr -> mpdata works with halo=1, we need halo=2
diff --git a/libmpdata++/solvers/detail/mpdata_fct_3d.hpp b/libmpdata++/solvers/detail/mpdata_fct_3d.hpp
index 3cfc0583..180e9270 100644
--- a/libmpdata++/solvers/detail/mpdata_fct_3d.hpp
+++ b/libmpdata++/solvers/detail/mpdata_fct_3d.hpp
@@ -1,4 +1,4 @@
-/** 
+/**
  * @file
  * @copyright University of Warsaw
  * @section LICENSE
@@ -18,20 +18,20 @@ namespace libmpdataxx
   {
     namespace detail
     {
-      template <typename ct_params_t, int minhalo> 
+      template <typename ct_params_t, int minhalo>
       class mpdata_fct<
-        ct_params_t, 
+        ct_params_t,
         minhalo,
         typename std::enable_if<ct_params_t::n_dims == 3>::type
-      > : public detail::mpdata_fct_common<ct_params_t, minhalo> 
+      > : public detail::mpdata_fct_common<ct_params_t, minhalo>
       {
-        using parent_t = detail::mpdata_fct_common<ct_params_t, minhalo>; 
+        using parent_t = detail::mpdata_fct_common<ct_params_t, minhalo>;
         using parent_t::parent_t; // inheriting ctors
 
         void fct_init(int e)
         {
           const auto i1 = this->i^1, j1 = this->j^1, k1 = this->k^1; // not optimal - with multiple threads some indices are repeated among threads
-          const auto psi = this->mem->psi[e][this->n[e]]; 
+          const auto psi = this->mem->psi[e][this->n[e]];
 
           this->psi_min(i1,j1,k1) = min(min(min(min(min(min(
                         psi(i1,  j1,  k1),
@@ -42,16 +42,16 @@ namespace libmpdataxx
                         psi(i1,  j1,  k1+1)),
                         psi(i1,  j1,  k1-1)
           );
-                        
+
           this->psi_max(i1,j1,k1) = max(max(max(max(max(max(
                         psi(i1,  j1,  k1),
-                        psi(i1+1,j1,  k1)), 
+                        psi(i1+1,j1,  k1)),
                         psi(i1-1,j1,  k1)),
                         psi(i1,  j1+1,k1)),
                         psi(i1,  j1-1,k1)),
-                        psi(i1,  j1,  k1+1)), 
-                        psi(i1,  j1,  k1-1) 
-          ); 
+                        psi(i1,  j1,  k1+1)),
+                        psi(i1,  j1,  k1-1)
+          );
         }
 
         void fct_adjust_antidiff(int e, int iter)
@@ -62,15 +62,15 @@ namespace libmpdataxx
           const auto &im(this->im), &jm(this->jm), &km(this->km); // calculating once for (i/j/k)-1/2 and (i/j/k)+1/2
 
           // not optimal - with multiple threads some indices are repeated among threads
-          const auto 
+          const auto
             i = this->i, j = this->j, k = this->k,
             i1 = i^1, j1 = j^1, k1 = k^1,
-            im1 = this->im^1, jm1 = this->jm^1, km1 = this->km^1; 
+            im1 = this->im^1, jm1 = this->jm^1, km1 = this->km^1;
 
           // fill halos -> mpdata works with halo=1, we need halo=2
           this->xchng_vctr_alng(GC_corr, true);
           this->xchng_vctr_nrml(this->GC_corr(iter), this->ijk);
-          
+
           // calculation of fluxes for betas denominators
           if (opts::isset(ct_params_t::opts, opts::iga))
           {
@@ -90,7 +90,7 @@ namespace libmpdataxx
           // calculating betas
           formulae::mpdata::beta_up<ct_params_t::opts>(this->beta_up, psi, this->psi_max, flx, G, i1, j1, k1);
           formulae::mpdata::beta_dn<ct_params_t::opts>(this->beta_dn, psi, this->psi_min, flx, G, i1, j1, k1);
-        
+
 
           // should detect the need for ext=1 in hallo-filling above
           assert(std::isfinite(sum(this->beta_up(i1, j, k))));
diff --git a/libmpdata++/solvers/detail/mpdata_fct_common.hpp b/libmpdata++/solvers/detail/mpdata_fct_common.hpp
index d56a6294..6622e205 100644
--- a/libmpdata++/solvers/detail/mpdata_fct_common.hpp
+++ b/libmpdata++/solvers/detail/mpdata_fct_common.hpp
@@ -26,10 +26,10 @@ namespace libmpdataxx
         protected:
 
         // member fields
-        typename parent_t::arr_t psi_min, psi_max, beta_up, beta_dn; 
-        arrvec_t<typename parent_t::arr_t> GC_mono; 
+        typename parent_t::arr_t psi_min, psi_max, beta_up, beta_dn;
+        arrvec_t<typename parent_t::arr_t> GC_mono;
 
-        arrvec_t<typename parent_t::arr_t> &GC(int iter) 
+        arrvec_t<typename parent_t::arr_t> &GC(int iter)
         {
           if (iter > 0) return GC_mono;
           return parent_t::GC(iter);
@@ -47,7 +47,7 @@ namespace libmpdataxx
         mpdata_fct_common(
           typename parent_t::ctor_args_t args,
           const typename parent_t::rt_params_t &p
-        ) : 
+        ) :
           parent_t(args, p),
           psi_min(args.mem->tmp[__FILE__][0][0]),
           psi_max(args.mem->tmp[__FILE__][0][1]),
@@ -57,7 +57,7 @@ namespace libmpdataxx
         {}
 
         static void alloc(
-          typename parent_t::mem_t *mem, 
+          typename parent_t::mem_t *mem,
           const int &n_iters
         ) {
           parent_t::alloc(mem, n_iters);
@@ -68,9 +68,9 @@ namespace libmpdataxx
       };
 
       // partial specialisations
-      template<typename ct_params_t, int minhalo, class enableif = void> 
+      template<typename ct_params_t, int minhalo, class enableif = void>
       class mpdata_fct
-      {}; 
+      {};
     } // namespace detail
   } // namespace solvers
 } // namescpae libmpdataxx
diff --git a/libmpdata++/solvers/detail/mpdata_osc_1d.hpp b/libmpdata++/solvers/detail/mpdata_osc_1d.hpp
index 9da27080..ff74c3e4 100644
--- a/libmpdata++/solvers/detail/mpdata_osc_1d.hpp
+++ b/libmpdata++/solvers/detail/mpdata_osc_1d.hpp
@@ -23,7 +23,7 @@ namespace libmpdataxx
 
       template<typename ct_params_t, int minhalo>
       class mpdata_osc<
-        ct_params_t, 
+        ct_params_t,
         minhalo,
         typename std::enable_if<ct_params_t::n_dims == 1>::type
       > : public detail::mpdata_common<ct_params_t, minhalo>
@@ -39,14 +39,14 @@ namespace libmpdataxx
   //  note that it's not needed for upstream
           parent_t::hook_ante_loop(nt);
           if (opts::isset(ct_params_t::opts, opts::nug))
-            this->xchng_sclr(*this->mem->G); 
+            this->xchng_sclr(*this->mem->G);
 
           // set time derivatives of GC to zero
           // needed for stationary flows prescribed using the advector method
           if (opts::isset(ct_params_t::opts, opts::div_3rd_dt) || opts::isset(ct_params_t::opts, opts::div_3rd))
           {
             this->mem->ndt_GC[0](this->im + h) = 0;
-            
+
             this->mem->ndtt_GC[0](this->im + h) = 0;
 
             this->xchng_vctr_alng(this->mem->ndt_GC);
@@ -59,19 +59,19 @@ namespace libmpdataxx
         {
           this->fct_init(e); // e.g. store psi_min, psi_max in FCT
 
-          for (int iter = 0; iter < this->n_iters; ++iter) 
+          for (int iter = 0; iter < this->n_iters; ++iter)
           {
-            if (iter != 0) 
+            if (iter != 0)
             {
               this->cycle(e); // cycles subdomain's "n", and global "n" if it's the last equation
               this->xchng(e);
 
-              // calculating the antidiffusive C 
+              // calculating the antidiffusive C
               formulae::mpdata::antidiff<ct_params_t::opts,
                                          static_cast<sptl_intrp_t>(ct_params_t::sptl_intrp),
                                          static_cast<tmprl_extrp_t>(ct_params_t::tmprl_extrp)>(
                 this->GC_corr(iter)[0],
-                this->mem->psi[e][this->n[e]], 
+                this->mem->psi[e][this->n[e]],
                 this->GC_unco(iter),
                 this->mem->ndt_GC,
                 this->mem->ndtt_GC,
@@ -96,7 +96,7 @@ namespace libmpdataxx
             {
               this->flux[0](im+h) = formulae::donorcell::make_flux<ct_params_t::opts>(
                 this->mem->psi[e][this->n[e]],
-                this->GC(iter)[0], 
+                this->GC(iter)[0],
                 im
               );
               this->flux_ptr = &this->flux; // TODO: if !iga this is needed only once per simulation, TODO: move to common
@@ -108,7 +108,7 @@ namespace libmpdataxx
             }
 
             // sanity checks for input // TODO: move to common
-            //assert(std::isfinite(sum(psi[this->n[e]](this->ijk)))); 
+            //assert(std::isfinite(sum(psi[this->n[e]](this->ijk))));
             //assert(std::isfinite(sum(flux_ref[0](i^h))));
 
             // donor-cell call // TODO: could be made common for 1D/2D/3D
@@ -127,7 +127,7 @@ namespace libmpdataxx
               break;
             }
             // sanity checks for output // TODO: move to common
-            //assert(std::isfinite(sum(psi[this->n[e]+1](this->ijk)))); 
+            //assert(std::isfinite(sum(psi[this->n[e]+1](this->ijk))));
           }
         }
 
@@ -139,17 +139,17 @@ namespace libmpdataxx
           const auto &i(this->i);
           auto &GC(this->mem->GC);
           using namespace formulae::donorcell;
- 
+
           this->xchng_sclr(field, this->ijk);
- 
+
           // calculation of fluxes
           this->flux[0](im+h) = make_flux<ct_params_t::opts>(field, GC[0], im);
- 
+
           // sanity check for input
           assert(std::isfinite(sum(field(i))));
           assert(std::isfinite(sum(this->flux[0](i^h))));
- 
-          // donor-cell call 
+
+          // donor-cell call
           donorcell_sum<ct_params_t::opts>(
             this->mem->khn_tmp,
             i,
@@ -171,7 +171,7 @@ namespace libmpdataxx
         mpdata_osc(
           typename parent_t::ctor_args_t args,
           const typename parent_t::rt_params_t &p
-        ) : 
+        ) :
           parent_t(args, p),
           im(args.i.first() - 1, args.i.last())
         {}
diff --git a/libmpdata++/solvers/detail/mpdata_osc_2d.hpp b/libmpdata++/solvers/detail/mpdata_osc_2d.hpp
index cb121c58..f86e9927 100644
--- a/libmpdata++/solvers/detail/mpdata_osc_2d.hpp
+++ b/libmpdata++/solvers/detail/mpdata_osc_2d.hpp
@@ -25,7 +25,7 @@ namespace libmpdataxx
 
       template<typename ct_params_t, int minhalo>
       class mpdata_osc<
-        ct_params_t, 
+        ct_params_t,
         minhalo,
         typename std::enable_if<ct_params_t::n_dims == 2>::type
       > : public detail::mpdata_common<ct_params_t, minhalo>
@@ -38,12 +38,12 @@ namespace libmpdataxx
         const rng_t im, jm;
 
         void hook_ante_loop(const typename parent_t::advance_arg_t nt)
-        {   
+        {
           //  note that it's not needed for upstream
           parent_t::hook_ante_loop(nt);
           if (opts::isset(ct_params_t::opts, opts::nug))
             this->xchng_sclr(*this->mem->G, this->ijk, this->halo);
-          
+
           // filling Y halos for GC_x, and X halos for GC_y
           auto ex = this->halo - 1;
           this->xchng_vctr_nrml(this->mem->GC, this->ijk, ex);
@@ -54,7 +54,7 @@ namespace libmpdataxx
           {
             this->mem->ndt_GC[0](this->im + h, this->j) = 0;
             this->mem->ndt_GC[1](this->i, this->jm + h) = 0;
-            
+
             this->mem->ndtt_GC[0](this->im + h, this->j) = 0;
             this->mem->ndtt_GC[1](this->i, this->jm + h) = 0;
 
@@ -64,32 +64,32 @@ namespace libmpdataxx
             this->xchng_vctr_nrml(this->mem->ndt_GC, this->ijk, ex);
             this->xchng_vctr_nrml(this->mem->ndtt_GC, this->ijk, ex);
           }
-        } 
+        }
 
         // method invoked by the solver
         void advop(int e)
         {
           this->fct_init(e);
 
-          for (int iter = 0; iter < this->n_iters; ++iter) 
+          for (int iter = 0; iter < this->n_iters; ++iter)
           {
             if (iter != 0)
             {
               this->cycle(e);
               this->xchng(e);
 
-              // calculating the antidiffusive C 
+              // calculating the antidiffusive C
               formulae::mpdata::antidiff<ct_params_t::opts, 0,
                                          static_cast<sptl_intrp_t>(ct_params_t::sptl_intrp),
                                          static_cast<tmprl_extrp_t>(ct_params_t::tmprl_extrp)>(
                 this->GC_corr(iter)[0],
-                this->mem->psi[e][this->n[e]], 
+                this->mem->psi[e][this->n[e]],
                 this->mem->psi[e][this->n[e]-1],
                 this->GC_unco(iter),
                 this->mem->ndt_GC,
                 this->mem->ndtt_GC,
                 *this->mem->G,
-                this->im, 
+                this->im,
                 this->j
               );
               assert(std::isfinite(sum(this->GC_corr(iter)[0](this->im+h, this->j))));
@@ -98,13 +98,13 @@ namespace libmpdataxx
                                          static_cast<sptl_intrp_t>(ct_params_t::sptl_intrp),
                                          static_cast<tmprl_extrp_t>(ct_params_t::tmprl_extrp)>(
                 this->GC_corr(iter)[1],
-                this->mem->psi[e][this->n[e]], 
+                this->mem->psi[e][this->n[e]],
                 this->mem->psi[e][this->n[e]-1],
                 this->GC_unco(iter),
                 this->mem->ndt_GC,
                 this->mem->ndtt_GC,
                 *this->mem->G,
-                this->jm, 
+                this->jm,
                 this->i
               );
               assert(std::isfinite(sum(this->GC_corr(iter)[1](this->i, this->jm+h))));
@@ -134,22 +134,22 @@ namespace libmpdataxx
             if (!opts::isset(ct_params_t::opts, opts::iga) || iter == 0)
             {
               this->flux[0](im+h, this->j) = formulae::donorcell::make_flux<ct_params_t::opts, 0>(
-                this->mem->psi[e][this->n[e]], 
-                this->GC(iter)[0], 
+                this->mem->psi[e][this->n[e]],
+                this->GC(iter)[0],
                 im, this->j
               );
               this->flux[1](this->i, jm+h) = formulae::donorcell::make_flux<ct_params_t::opts, 1>(
-                this->mem->psi[e][this->n[e]], 
-                this->GC(iter)[1], 
+                this->mem->psi[e][this->n[e]],
+                this->GC(iter)[1],
                 jm, this->i
               );
               this->flux_ptr = &this->flux; // TODO: if !iga this is needed only once per simulation, TODO: move to common
             }
             else
-            {   
+            {
               assert(iter == 1); // infinite gauge option uses just one corrective step // TODO: not true?
               this->flux_ptr = &this->GC(iter);
-            }   
+            }
 
             auto &flx = (*(this->flux_ptr));
             this->xchng_flux(flx);
@@ -159,19 +159,19 @@ namespace libmpdataxx
             //assert(std::isfinite(sum(flx[0](i^h, j  ))));
             //assert(std::isfinite(sum(flx[1](i,   j^h))));
 
-            // donor-cell call 
+            // donor-cell call
             // TODO: doing antidiff,upstream,antidiff,upstream (for each dimension separately) could help optimise memory consumption!
             formulae::donorcell::donorcell_sum<ct_params_t::opts>(
               this->mem->khn_tmp,
               this->ijk,
-              this->mem->psi[e][this->n[e]+1](this->ijk), 
-              this->mem->psi[e][this->n[e]  ](this->ijk), 
+              this->mem->psi[e][this->n[e]+1](this->ijk),
+              this->mem->psi[e][this->n[e]  ](this->ijk),
               flx[0](this->i+h, this->j  ),
               flx[0](this->i-h, this->j  ),
               flx[1](this->i,   this->j+h),
               flx[1](this->i,   this->j-h),
               formulae::G<ct_params_t::opts, 0>(*this->mem->G, this->i, this->j)
-            ); 
+            );
 
             if (this->upwind_filter_freq > 0 && this->timestep % this->upwind_filter_freq == 0)
             {
@@ -191,21 +191,21 @@ namespace libmpdataxx
           const auto &ijk(this->ijk);
           auto &GC(this->mem->GC);
           using namespace formulae::donorcell;
- 
+
           this->xchng_sclr(field, this->ijk);
- 
+
           // calculation of fluxes
           this->flux[0](im+h, j) = make_flux<ct_params_t::opts, 0>(field, GC[0], im, j);
           this->flux[1](i, jm+h) = make_flux<ct_params_t::opts, 1>(field, GC[1], jm, i);
- 
+
           this->xchng_flux(this->flux);
- 
+
           // sanity check for input
           assert(std::isfinite(sum(field(ijk))));
           assert(std::isfinite(sum(this->flux[0](i^h, j  ))));
           assert(std::isfinite(sum(this->flux[1](i,   j^h))));
- 
-          // donor-cell call 
+
+          // donor-cell call
           donorcell_sum<ct_params_t::opts>(
             this->mem->khn_tmp,
             ijk,
@@ -228,7 +228,7 @@ namespace libmpdataxx
         mpdata_osc(
           typename parent_t::ctor_args_t args,
           const typename parent_t::rt_params_t &p
-        ) : 
+        ) :
           parent_t(args, p),
           im(args.i.first() - 1, args.i.last()),
           jm(args.j.first() - 1, args.j.last())
diff --git a/libmpdata++/solvers/detail/mpdata_osc_3d.hpp b/libmpdata++/solvers/detail/mpdata_osc_3d.hpp
index f2190c28..8c7901e8 100644
--- a/libmpdata++/solvers/detail/mpdata_osc_3d.hpp
+++ b/libmpdata++/solvers/detail/mpdata_osc_3d.hpp
@@ -25,7 +25,7 @@ namespace libmpdataxx
 
       template<typename ct_params_t, int minhalo>
       class mpdata_osc<
-        ct_params_t, 
+        ct_params_t,
         minhalo,
         typename std::enable_if<ct_params_t::n_dims == 3>::type
       > : public detail::mpdata_common<ct_params_t, minhalo>
@@ -36,19 +36,19 @@ namespace libmpdataxx
 
         // member fields
         const rng_t im, jm, km;
-  
+
         void hook_ante_loop(const typename parent_t::advance_arg_t nt)
-        {   
+        {
   //  note that it's not needed for upstream
           parent_t::hook_ante_loop(nt);
           if (opts::isset(ct_params_t::opts, opts::nug))
             this->xchng_sclr(*this->mem->G, this->ijk, this->halo);
-          
+
           // filling Y and Z halos for GC_x, X and Z halos for GC_y, X and Y
           // halos for GC_z
           auto ex = this->halo - 1;
           this->xchng_vctr_nrml(this->mem->GC, this->ijk, ex);
-         
+
           // set time derivatives of GC to zero
           // needed for stationary flows prescribed using the advector method
           if (opts::isset(ct_params_t::opts, opts::div_3rd_dt) || opts::isset(ct_params_t::opts, opts::div_3rd))
@@ -56,7 +56,7 @@ namespace libmpdataxx
             this->mem->ndt_GC[0](this->im + h, this->j, this->k) = 0;
             this->mem->ndt_GC[1](this->i, this->jm + h, this->k) = 0;
             this->mem->ndt_GC[2](this->i, this->j, this->km + h) = 0;
-            
+
             this->mem->ndtt_GC[0](this->im + h, this->j, this->k) = 0;
             this->mem->ndtt_GC[1](this->i, this->jm + h, this->k) = 0;
             this->mem->ndtt_GC[2](this->i, this->j, this->km + h) = 0;
@@ -67,26 +67,26 @@ namespace libmpdataxx
             this->xchng_vctr_nrml(this->mem->ndt_GC, this->ijk, ex);
             this->xchng_vctr_nrml(this->mem->ndtt_GC, this->ijk, ex);
           }
-        } 
+        }
 
         // method invoked by the solver
         void advop(int e)
         {
           this->fct_init(e);
 
-          for (int iter = 0; iter < this->n_iters; ++iter) 
+          for (int iter = 0; iter < this->n_iters; ++iter)
           {
             if (iter != 0)
             {
               this->cycle(e);
               this->xchng(e);
 
-              // calculating the antidiffusive C 
+              // calculating the antidiffusive C
               formulae::mpdata::antidiff<ct_params_t::opts, 0,
                                          static_cast<sptl_intrp_t>(ct_params_t::sptl_intrp),
                                          static_cast<tmprl_extrp_t>(ct_params_t::tmprl_extrp)>(
                 this->GC_corr(iter)[0],
-                this->mem->psi[e][this->n[e]], 
+                this->mem->psi[e][this->n[e]],
                 this->mem->psi[e][this->n[e]-1],
                 this->GC_unco(iter),
                 this->mem->ndt_GC,
@@ -101,8 +101,8 @@ namespace libmpdataxx
                                          static_cast<sptl_intrp_t>(ct_params_t::sptl_intrp),
                                          static_cast<tmprl_extrp_t>(ct_params_t::tmprl_extrp)>(
                 this->GC_corr(iter)[1],
-                this->mem->psi[e][this->n[e]], 
-                this->mem->psi[e][this->n[e]-1], 
+                this->mem->psi[e][this->n[e]],
+                this->mem->psi[e][this->n[e]-1],
                 this->GC_unco(iter),
                 this->mem->ndt_GC,
                 this->mem->ndtt_GC,
@@ -111,13 +111,13 @@ namespace libmpdataxx
                 this->k,
                 this->i
               );
-            
+
               formulae::mpdata::antidiff<ct_params_t::opts, 2,
                                          static_cast<sptl_intrp_t>(ct_params_t::sptl_intrp),
                                          static_cast<tmprl_extrp_t>(ct_params_t::tmprl_extrp)>(
                 this->GC_corr(iter)[2],
-                this->mem->psi[e][this->n[e]], 
-                this->mem->psi[e][this->n[e]-1], 
+                this->mem->psi[e][this->n[e]],
+                this->mem->psi[e][this->n[e]-1],
                 this->GC_unco(iter),
                 this->mem->ndt_GC,
                 this->mem->ndtt_GC,
@@ -126,11 +126,11 @@ namespace libmpdataxx
                 this->i,
                 this->j
               );
-            
+
               if (opts::isset(ct_params_t::opts, opts::div_3rd_dt))
                 this->mem->barrier();
-              
-              // filling Y and Z halos for GC_x, X and Z halos for GC_y, X and Y halos for GC_z 
+
+              // filling Y and Z halos for GC_x, X and Z halos for GC_y, X and Y halos for GC_z
               // needed for calculation of antidiffusive velocities in the third and subsequent
               // iterations, also needed for fct but it is done there independently hence
               // the following check
@@ -166,7 +166,7 @@ namespace libmpdataxx
               assert(iter == 1); // infinite gauge option uses just one corrective step // TODO: not true?
               this->flux_ptr = &GC;
             }
-            
+
             auto &flx = (*(this->flux_ptr));
             this->xchng_flux(flx);
 
@@ -176,13 +176,13 @@ namespace libmpdataxx
             assert(std::isfinite(sum(flx[1](i,   j^h, k  ))));
             assert(std::isfinite(sum(flx[2](i,   j,   k^h))));
 
-            // donor-cell call 
+            // donor-cell call
             // TODO: doing antidiff,upstream,antidiff,upstream (for each dimension separately) could help optimise memory consumption!
             donorcell_sum<ct_params_t::opts>(
               this->mem->khn_tmp,
               ijk,
-              psi[n+1](ijk), 
-              psi[n  ](ijk), 
+              psi[n+1](ijk),
+              psi[n  ](ijk),
               flx[0](i+h, j,   k  ),
               flx[0](i-h, j,   k  ),
               flx[1](i,   j+h, k  ),
@@ -190,8 +190,8 @@ namespace libmpdataxx
               flx[2](i,   j,   k+h),
               flx[2](i,   j,   k-h),
               formulae::G<ct_params_t::opts, 0>(*this->mem->G, i, j, k)
-            ); 
-            
+            );
+
             if (this->upwind_filter_freq > 0 && this->timestep % this->upwind_filter_freq == 0)
             {
               break;
@@ -210,23 +210,23 @@ namespace libmpdataxx
           const auto &ijk(this->ijk);
           auto &GC(this->mem->GC);
           using namespace formulae::donorcell;
- 
+
           this->xchng_sclr(field, this->ijk);
- 
+
           // calculation of fluxes
           this->flux[0](im+h, j, k) = make_flux<ct_params_t::opts, 0>(field, GC[0], im, j, k);
           this->flux[1](i, jm+h, k) = make_flux<ct_params_t::opts, 1>(field, GC[1], jm, k, i);
           this->flux[2](i, j, km+h) = make_flux<ct_params_t::opts, 2>(field, GC[2], km, i, j);
- 
+
           this->xchng_flux(this->flux);
- 
+
           // sanity check for input
           assert(std::isfinite(sum(field(ijk))));
           assert(std::isfinite(sum(this->flux[0](i^h, j,   k  ))));
           assert(std::isfinite(sum(this->flux[1](i,   j^h, k  ))));
           assert(std::isfinite(sum(this->flux[2](i,   j,   k^h))));
- 
-          // donor-cell call 
+
+          // donor-cell call
           donorcell_sum<ct_params_t::opts>(
             this->mem->khn_tmp,
             ijk,
@@ -251,7 +251,7 @@ namespace libmpdataxx
         mpdata_osc(
           typename parent_t::ctor_args_t args,
           const typename parent_t::rt_params_t &p
-        ) : 
+        ) :
           parent_t(args, p),
           im(args.i.first() - 1, args.i.last()),
           jm(args.j.first() - 1, args.j.last()),
diff --git a/libmpdata++/solvers/detail/mpdata_rhs_vip_common.hpp b/libmpdata++/solvers/detail/mpdata_rhs_vip_common.hpp
index f48cd96b..5f382f33 100644
--- a/libmpdata++/solvers/detail/mpdata_rhs_vip_common.hpp
+++ b/libmpdata++/solvers/detail/mpdata_rhs_vip_common.hpp
@@ -1,4 +1,4 @@
-/** 
+/**
  * @file
  * @copyright University of Warsaw
  * @section LICENSE
@@ -31,7 +31,7 @@ namespace libmpdataxx
     namespace detail
     {
       // override default extrapolation/interpolation in ct_params
-      template <class ct_params_t> 
+      template <class ct_params_t>
       struct ct_params_vip_default_t : ct_params_t
       {
         // only override if it has the default value, preserve any special options
@@ -40,7 +40,7 @@ namespace libmpdataxx
         enum {tmprl_extrp = linear2};
       };
 
-      template <class ct_params_t, int minhalo> 
+      template <class ct_params_t, int minhalo>
       class mpdata_rhs_vip_common : public mpdata_rhs<ct_params_vip_default_t<ct_params_t>, minhalo>
       {
         using parent_t = mpdata_rhs<ct_params_vip_default_t<ct_params_t>, minhalo>;
@@ -60,7 +60,7 @@ namespace libmpdataxx
           // t_lev ==  0 -> output for extrapolation/derivatives
           // t_lev == -1 -> (n-1) state
           // t_lev == -2 -> (n-2) state, only available with div3_mpdata
-          
+
           static thread_local arrvec_t<typename parent_t::arr_t> ret;
           ret.resize(parent_t::n_dims);
           for (int d = 0; d < parent_t::n_dims; ++d)
@@ -87,7 +87,7 @@ namespace libmpdataxx
               // and the (n-2) state and juggle them around to avoid array copying
               assert(t_lev == 0 || t_lev == -1 || t_lev == -2);
               auto& comp = (t_lev == 0 ? stash[d] :
-                this->timestep % 2 == 0 ? stash[d - t_lev * ct_params_t::n_dims] : 
+                this->timestep % 2 == 0 ? stash[d - t_lev * ct_params_t::n_dims] :
                   stash[d + (3 + t_lev) * ct_params_t::n_dims]
               );
               ret.replace(ret.begin() + d, this->mem->never_delete(&(comp)));
@@ -102,14 +102,14 @@ namespace libmpdataxx
           int save_t_lev = parent_t::div3_mpdata ? -2 : -1;
           if (ix::vip_den == -1)
             vip_stash(save_t_lev)[d](this->ijk) = vips()[d](this->ijk);
-          else if (eps == 0) // this is the default  
+          else if (eps == 0) // this is the default
           {
             // for those simulations advecting momentum where the division by mass will not cause division by zero
             // (for shallow water simulations it means simulations with no collapsing/inflating shallow water layers)
             vip_stash(save_t_lev)[d](this->ijk) = vips()[d](this->ijk) / this->state(ix::vip_den)(this->ijk);
           }
           else
-          {  
+          {
             vip_stash(save_t_lev)[d](this->ijk) = where(
               // if
               this->state(ix::vip_den)(this->ijk) > eps,
@@ -123,13 +123,13 @@ namespace libmpdataxx
           assert(std::isfinite(sum(vip_stash(save_t_lev)[d](this->ijk))));
         }
 
-        void fill_stash() 
+        void fill_stash()
         {
           for (int d = 0; d < ct_params_t::n_dims; ++d) fill_stash_helper(d);
         }
 
         void extrp(const int d, const int e) // extrapolate velocity field in time to t+1/2
-        {                 
+        {
           using namespace arakawa_c;
 
           const auto beta = this->dt_stash[0] > 0 ? this->dt / (2 * this->dt_stash[0]) : 0;
@@ -143,13 +143,13 @@ namespace libmpdataxx
             this->vip_stash(0)[d](this->ijk) = -beta * this->vip_stash(-1)[d](this->ijk);
           }
 
-          if (ix::vip_den == -1) 
+          if (ix::vip_den == -1)
             this->vip_stash(0)[d](this->ijk) += (1 + beta) * this->state(e)(this->ijk);
           else if (eps == 0) //this is the default
-          {             
+          {
             // for those simulations advecting momentum where the division by mass will not cause division by zero
             // (for shallow water simulations it means simulations with no collapsing/inflating shallow water layers)
-            this->vip_stash(0)[d](this->ijk) += (1 + beta) * (this->state(e)(this->ijk) / this->state(ix::vip_den)(this->ijk)); 
+            this->vip_stash(0)[d](this->ijk) += (1 + beta) * (this->state(e)(this->ijk) / this->state(ix::vip_den)(this->ijk));
           }
           else
           {
@@ -159,12 +159,12 @@ namespace libmpdataxx
               // then
               (1 + beta) * this->state(e)(this->ijk) / this->state(ix::vip_den)(this->ijk),
               // else
-              0   
-            );  
+              0
+            );
           }
 
           assert(std::isfinite(sum(this->vip_stash(0)[d](this->ijk))));
-        }   
+        }
 
         arrvec_t<typename parent_t::arr_t>& vips()
         {
@@ -185,7 +185,7 @@ namespace libmpdataxx
           {
             if (static_cast<vip_vab_t>(ct_params_t::vip_vab) == impl)
             {
-              vip_rhs[d](this->ijk) = - 
+              vip_rhs[d](this->ijk) = -
                 (*this->mem->vab_coeff)(this->ijk) * (vips()[d](this->ijk) - this->mem->vab_relax[d](this->ijk));
             }
             else
@@ -194,7 +194,7 @@ namespace libmpdataxx
             }
           }
         }
-        
+
         virtual void vip_rhs_expl_calc()
         {
           if (static_cast<vip_vab_t>(ct_params_t::vip_vab) == expl)
@@ -202,12 +202,12 @@ namespace libmpdataxx
             for (int d = 0; d < parent_t::n_dims; ++d)
             {
               // factor of 2 because it is multiplied by 0.5 * dt in vip_rhs_apply
-              vip_rhs[d](this->ijk) += -2 * 
+              vip_rhs[d](this->ijk) += -2 *
                 (*this->mem->vab_coeff)(this->ijk) * (vips()[d](this->ijk) - this->mem->vab_relax[d](this->ijk));
             }
           }
         }
-        
+
         virtual void vip_rhs_impl_fnlz()
         {
           if (static_cast<vip_vab_t>(ct_params_t::vip_vab) == impl)
@@ -225,9 +225,9 @@ namespace libmpdataxx
             }
           }
         }
-        
+
         void vip_rhs_apply()
-        {    
+        {
           for (int d = 0; d < parent_t::n_dims; ++d)
           {
             vips()[d](this->ijk) += real_t(0.5) * this->dt * vip_rhs[d](this->ijk);
@@ -245,7 +245,7 @@ namespace libmpdataxx
             }
           }
         }
-        
+
         void add_relax()
         {
           for (int d = 0; d < parent_t::n_dims; ++d)
@@ -259,7 +259,7 @@ namespace libmpdataxx
         {
           // fill Courant numbers with zeros so that the divergence test does no harm
           if (this->rank == 0)
-            for (int d=0; d < parent_t::n_dims; ++d) this->mem->GC.at(d) = 0; 
+            for (int d=0; d < parent_t::n_dims; ++d) this->mem->GC.at(d) = 0;
           this->mem->barrier();
 
           for (int d = 0; d < parent_t::n_dims; ++d)
@@ -271,7 +271,7 @@ namespace libmpdataxx
           }
 
           parent_t::hook_ante_loop(nt);
-          
+
           vip_rhs_impl_init();
         }
 
@@ -288,7 +288,7 @@ namespace libmpdataxx
 
           return true;
         }
-        
+
         void calc_ndt_gc() final
         {
           if (parent_t::div3_mpdata)
@@ -304,7 +304,7 @@ namespace libmpdataxx
               interpolate_in_space(this->mem->ndt_GC, vip_stash(0));
               this->mem->barrier();
             }
-            
+
             if (this->dt_stash[0] > 0 && this->dt_stash[1] > 0)
             {
               for (int d = 0; d < parent_t::n_dims; ++d)
@@ -325,8 +325,8 @@ namespace libmpdataxx
         }
 
         void hook_ante_step()
-        { 
-          // filling the stash with data from current velocity field 
+        {
+          // filling the stash with data from current velocity field
           // (so that in the next time step they can be used for extrapolation in time)
           fill_stash();
 
@@ -336,25 +336,25 @@ namespace libmpdataxx
           // finish calculating velocity forces before moving on
           this->mem->barrier();
 
-          parent_t::hook_ante_step(); 
+          parent_t::hook_ante_step();
           vip_rhs_apply();
         }
-        
+
         void hook_post_step()
-        { 
-          parent_t::hook_post_step(); 
+        {
+          parent_t::hook_post_step();
           vip_rhs_impl_fnlz();
         }
 
         public:
-        
+
         struct rt_params_t : parent_t::rt_params_t
         {
           real_t vip_eps = 0;
         };
 
         static void alloc(
-          typename parent_t::mem_t *mem, 
+          typename parent_t::mem_t *mem,
           const int &n_iters
         ) {
           parent_t::alloc(mem, n_iters);
@@ -362,14 +362,14 @@ namespace libmpdataxx
             (parent_t::div3_mpdata ? 3 : ct_params_t::var_dt ? 2 : 1) * parent_t::n_dims); // stash
           parent_t::alloc_tmp_sclr(mem, __FILE__, parent_t::n_dims); // vip_rhs
         }
- 
+
         protected:
 
         // ctor
         mpdata_rhs_vip_common(
           typename parent_t::ctor_args_t args,
           const rt_params_t &p
-        ) : 
+        ) :
           parent_t(args, p),
           stash(args.mem->tmp[__FILE__][0]),
           vip_rhs(args.mem->tmp[__FILE__][1]),
diff --git a/libmpdata++/solvers/detail/mpdata_rhs_vip_prs_common.hpp b/libmpdata++/solvers/detail/mpdata_rhs_vip_prs_common.hpp
index cd568ad5..d014a882 100644
--- a/libmpdata++/solvers/detail/mpdata_rhs_vip_prs_common.hpp
+++ b/libmpdata++/solvers/detail/mpdata_rhs_vip_prs_common.hpp
@@ -1,4 +1,4 @@
-/** 
+/**
  * @file
  * @copyright University of Warsaw
  * @section LICENSE
@@ -8,7 +8,7 @@
 #pragma once
 
 #include <libmpdata++/formulae/nabla_formulae.hpp>
-#include <libmpdata++/solvers/mpdata_rhs_vip.hpp> 
+#include <libmpdata++/solvers/mpdata_rhs_vip.hpp>
 
 namespace libmpdataxx
 {
@@ -48,9 +48,9 @@ namespace libmpdataxx
         }
 
         auto lap(
-          arr_t &arr, 
-          const ijk_t &ijk, 
-          const std::array<real_t, parent_t::n_dims>& dijk, 
+          arr_t &arr,
+          const ijk_t &ijk,
+          const std::array<real_t, parent_t::n_dims>& dijk,
           bool err_init, // if true then subtract initial state for error calculation
           bool simple // if true do not normalize gradients (simple laplacian)
         ) return_macro(
@@ -82,7 +82,7 @@ namespace libmpdataxx
         )
 
         void ini_pressure()
-        { 
+        {
           Phi(this->ijk) = 0;
     int npoints = 1;
     for (int d = 0; d < parent_t::n_dims; ++d)
@@ -90,7 +90,7 @@ namespace libmpdataxx
             Phi(this->ijk) -= real_t(0.5) * pow2(this->vips()[d](this->ijk));
       npoints *= (this->mem->distmem.grid_size[d]);
     }
-        
+
     auto Phi_mean = prs_sum(Phi, this->ijk) / npoints;
           Phi(this->ijk) -= Phi_mean;
         }
@@ -105,7 +105,7 @@ namespace libmpdataxx
             tmp_uvw[d](this->ijk) = this->vips()[d](this->ijk);
           }
 
-          //initial error   
+          //initial error
           err(this->ijk) = lap(Phi, this->ijk, this->dijk, true, simple);
 
           iters = 0;
@@ -142,7 +142,7 @@ namespace libmpdataxx
         {
           // save initial edge velocities
           this->save_edges(this->vips(), this->ijk);
-          
+
           // correct initial velocity
           Phi(this->ijk) = real_t(0);
           this->xchng_pres(Phi, this->ijk);
@@ -153,12 +153,12 @@ namespace libmpdataxx
           formulae::nabla::calc_grad<parent_t::n_dims>(tmp_uvw, Phi, this->ijk, this->dijk);
           pressure_solver_apply();
           this->set_edges(this->vips(), this->ijk, 1);
-          
+
           parent_t::hook_ante_loop(nt);
 
           // potential pressure
           ini_pressure();
- 
+
           // allow pressure_solver_apply at the first time step
           this->xchng_pres(this->Phi, this->ijk);
           formulae::nabla::calc_grad<parent_t::n_dims>(tmp_uvw, Phi, this->ijk, this->dijk);
@@ -190,8 +190,8 @@ namespace libmpdataxx
 
         public:
 
-        struct rt_params_t : parent_t::rt_params_t 
-        { 
+        struct rt_params_t : parent_t::rt_params_t
+        {
           real_t prs_tol;
         };
 
@@ -199,7 +199,7 @@ namespace libmpdataxx
         mpdata_rhs_vip_prs_common(
           typename parent_t::ctor_args_t args,
           const rt_params_t &p
-        ) : 
+        ) :
           parent_t(args, p),
           prs_tol(p.prs_tol),
           err_tol(p.prs_tol / this->dt), // make stopping criterion correspond to dimensionless divergence
@@ -207,10 +207,10 @@ namespace libmpdataxx
                err(args.mem->tmp[__FILE__][0][1]),
            tmp_uvw(args.mem->tmp[__FILE__][1]),
            lap_tmp(args.mem->tmp[__FILE__][2])
-        {} 
+        {}
 
         static void alloc(
-          typename parent_t::mem_t *mem, 
+          typename parent_t::mem_t *mem,
           const int &n_iters
         ) {
           parent_t::alloc(mem, n_iters);
@@ -218,7 +218,7 @@ namespace libmpdataxx
           parent_t::alloc_tmp_sclr(mem, __FILE__, parent_t::n_dims); // tmp_uvw
           parent_t::alloc_tmp_sclr(mem, __FILE__, parent_t::n_dims); // lap_tmp
         }
-      }; 
+      };
     } // namespace detail
   } // namespace solvers
 } // namespace libmpdataxx
diff --git a/libmpdata++/solvers/detail/mpdata_rhs_vip_prs_gcrk.hpp b/libmpdata++/solvers/detail/mpdata_rhs_vip_prs_gcrk.hpp
index d5c4d79a..141fcc46 100644
--- a/libmpdata++/solvers/detail/mpdata_rhs_vip_prs_gcrk.hpp
+++ b/libmpdata++/solvers/detail/mpdata_rhs_vip_prs_gcrk.hpp
@@ -1,12 +1,12 @@
-/** 
+/**
   * @file
   * @copyright University of Warsaw
   * @section LICENSE
   * GPLv3+ (see the COPYING file or http://www.gnu.org/licenses/)
   *
-  * @brief generalized conjugate residual pressure solver 
-  *   (for more detailed discussion consult Smolarkiewicz & Margolin 1994 
-  *  Appl. Math and Comp. Sci. 
+  * @brief generalized conjugate residual pressure solver
+  *   (for more detailed discussion consult Smolarkiewicz & Margolin 1994
+  *  Appl. Math and Comp. Sci.
   *  Variational solver for elliptic problems in atmospheric flows)
   */
 
@@ -35,7 +35,7 @@ namespace libmpdataxx
         std::vector<real_t> alpha, tmp_den;
         typename parent_t::arr_t lap_err;
         arrvec_t<typename parent_t::arr_t> p_err, lap_p_err;
-        
+
         void pressure_solver_loop_init(bool simple) final
         {
           p_err[0](this->ijk) = this->err(this->ijk);
@@ -52,7 +52,7 @@ namespace libmpdataxx
             this->err(this->ijk) += beta * lap_p_err[v](this->ijk);
 
             real_t error = std::max(
-              std::abs(this->mem->max(this->rank, this->err(this->ijk))), 
+              std::abs(this->mem->max(this->rank, this->err(this->ijk))),
               std::abs(this->mem->min(this->rank, this->err(this->ijk)))
             );
 
@@ -62,15 +62,15 @@ namespace libmpdataxx
 
             for (int l = 0; l <= v; ++l)
             {
-              if (tmp_den[l] != 0) 
+              if (tmp_den[l] != 0)
                 alpha[l] = - this->prs_sum(lap_err, lap_p_err[l], this->ijk) / tmp_den[l];
             }
-            
+
             if (v < (k_iters - 1))
             {
-              p_err[v + 1](this->ijk) = this->err(this->ijk);  
+              p_err[v + 1](this->ijk) = this->err(this->ijk);
               lap_p_err[v + 1](this->ijk) = lap_err(this->ijk);
-              
+
               for (int l = 0; l <= v; ++l)
               {
                 p_err[v + 1](this->ijk) += alpha[l] * p_err[l](this->ijk);
@@ -80,7 +80,7 @@ namespace libmpdataxx
             }
             else
             {
-              p_err[0](this->ijk) = this->err(this->ijk) + alpha[0] * p_err[0](this->ijk);  
+              p_err[0](this->ijk) = this->err(this->ijk) + alpha[0] * p_err[0](this->ijk);
               lap_p_err[0](this->ijk) = lap_err(this->ijk) + alpha[0] * lap_p_err[0](this->ijk);
               for (int l = 1; l <= v; ++l)
               {
@@ -110,7 +110,7 @@ namespace libmpdataxx
         {}
 
         static void alloc(
-          typename parent_t::mem_t *mem, 
+          typename parent_t::mem_t *mem,
           const int &n_iters
         ) {
           parent_t::alloc(mem, n_iters);
@@ -118,7 +118,7 @@ namespace libmpdataxx
           parent_t::alloc_tmp_sclr(mem, __FILE__, k_iters);
           parent_t::alloc_tmp_sclr(mem, __FILE__, k_iters);
         }
-      }; 
+      };
     } // namespace detail
   } // namespace solvers
 } // namespace libmpdataxx
diff --git a/libmpdata++/solvers/detail/mpdata_rhs_vip_prs_mr.hpp b/libmpdata++/solvers/detail/mpdata_rhs_vip_prs_mr.hpp
index e27cf61f..fbf0db22 100644
--- a/libmpdata++/solvers/detail/mpdata_rhs_vip_prs_mr.hpp
+++ b/libmpdata++/solvers/detail/mpdata_rhs_vip_prs_mr.hpp
@@ -1,12 +1,12 @@
-/** 
+/**
   * @file
   * @copyright University of Warsaw
   * @section LICENSE
   * GPLv3+ (see the COPYING file or http://www.gnu.org/licenses/)
   *
-  * @brief minimum residual pressure solver 
-  *   (for more detailed discussion consult Smolarkiewicz & Margolin 1994 
-  *  Appl. Math and Comp. Sci. 
+  * @brief minimum residual pressure solver
+  *   (for more detailed discussion consult Smolarkiewicz & Margolin 1994
+  *  Appl. Math and Comp. Sci.
   *  Variational solver for elliptic problems in atmospheric flows)
 */
 
@@ -48,7 +48,7 @@ namespace libmpdataxx
           this->err(this->ijk) += beta * this->lap_err(this->ijk);
 
           real_t error = std::max(
-            std::abs(this->mem->max(this->rank, this->err(this->ijk))), 
+            std::abs(this->mem->max(this->rank, this->err(this->ijk))),
             std::abs(this->mem->min(this->rank, this->err(this->ijk)))
           );
 
@@ -71,13 +71,13 @@ namespace libmpdataxx
         {}
 
         static void alloc(
-          typename parent_t::mem_t *mem, 
+          typename parent_t::mem_t *mem,
           const int &n_iters
         ) {
           parent_t::alloc(mem, n_iters);
           parent_t::alloc_tmp_sclr(mem, __FILE__, 1);
         }
-      }; 
+      };
     } // namespace detail
   } // namespace solvers
 } // namespace libmpdataxx
diff --git a/libmpdata++/solvers/detail/mpdata_rhs_vip_prs_pc.hpp b/libmpdata++/solvers/detail/mpdata_rhs_vip_prs_pc.hpp
index 87d6770e..3c24e1c6 100644
--- a/libmpdata++/solvers/detail/mpdata_rhs_vip_prs_pc.hpp
+++ b/libmpdata++/solvers/detail/mpdata_rhs_vip_prs_pc.hpp
@@ -1,10 +1,10 @@
-/** 
+/**
   * @file
   * @copyright University of Warsaw
   * @section LICENSE
   * GPLv3+ (see the COPYING file or http://www.gnu.org/licenses/)
   *
-  * @brief preconditioned conjugate residual pressure solver 
+  * @brief preconditioned conjugate residual pressure solver
   *   (for more detailed discussion consult Smolarkiewicz & Szmelter 2011
   *    A Nonhydrostatic Unstructured-Mesh Soundproof Model for Simulation of Internal Gravity Waves
   *    Acta Geophysica)
@@ -24,7 +24,7 @@ namespace libmpdataxx
       class mpdata_rhs_vip_prs_pc : public detail::mpdata_rhs_vip_prs_common<ct_params_t, minhalo>
       {
         public:
-        
+
         using real_t = typename ct_params_t::real_t;
 
         private:
@@ -42,10 +42,10 @@ namespace libmpdataxx
           //initail q_err for preconditioner
           q_err(this->ijk) = real_t(0);
 
-          //initail preconditioner error   
+          //initail preconditioner error
           this->pcnd_err(this->ijk) = this->lap(this->q_err, this->ijk, this->dijk, false, simple) - this->err(this->ijk);
             //TODO does it change with non_const density?
-          
+
           assert(pc_iters >= 0 && pc_iters < 10 && "params.pc_iters not specified?");
           for (int it=0; it<=pc_iters; it++)
           {
@@ -65,12 +65,12 @@ namespace libmpdataxx
         {
           tmp_den = this->prs_sum(lap_p_err, lap_p_err, this->ijk);
           if (tmp_den != 0) beta = -this->prs_sum(this->err, lap_p_err, this->ijk) / tmp_den;
- 
+
           this->Phi(this->ijk) += beta * p_err(this->ijk);
           this->err(this->ijk) += beta * lap_p_err(this->ijk);
 
           real_t error = std::max(
-            std::abs(this->mem->max(this->rank, this->err(this->ijk))), 
+            std::abs(this->mem->max(this->rank, this->err(this->ijk))),
             std::abs(this->mem->min(this->rank, this->err(this->ijk)))
           );
 
@@ -83,8 +83,8 @@ namespace libmpdataxx
           if (tmp_den != 0) alpha = -this->prs_sum(lap_q_err, lap_p_err, this->ijk) / tmp_den;
 
           p_err(this->ijk) *= alpha;
-          p_err(this->ijk) += q_err(this->ijk);  
- 
+          p_err(this->ijk) += q_err(this->ijk);
+
           lap_p_err(this->ijk) *= alpha;
           lap_p_err(this->ijk) += lap_q_err(this->ijk);
         }
@@ -111,13 +111,13 @@ namespace libmpdataxx
         {}
 
         static void alloc(
-          typename parent_t::mem_t *mem, 
+          typename parent_t::mem_t *mem,
           const int &n_iters
         ) {
           parent_t::alloc(mem, n_iters);
           parent_t::alloc_tmp_sclr(mem, __FILE__, 5);
         }
-      }; 
+      };
     } // namespcae detail
   } // namespace solvers
 } // namespace libmpdataxx
diff --git a/libmpdata++/solvers/detail/mpdata_rhs_vip_prs_sgs_common.hpp b/libmpdata++/solvers/detail/mpdata_rhs_vip_prs_sgs_common.hpp
index d3e318ff..4292e670 100644
--- a/libmpdata++/solvers/detail/mpdata_rhs_vip_prs_sgs_common.hpp
+++ b/libmpdata++/solvers/detail/mpdata_rhs_vip_prs_sgs_common.hpp
@@ -94,7 +94,7 @@ namespace libmpdataxx
           // TODO: get rid of superfluous barriers
           for (auto& vip : this->vips())
             this->xchng_sclr(vip, this->ijk, 1);
-          
+
           if (static_cast<stress_diff_t>(ct_params_t::stress_diff) == compact)
           {
             calc_drag_cmpct();
@@ -115,10 +115,10 @@ namespace libmpdataxx
 
             this->xchng_sgs_tnsr_diag(tau, this->vips()[ct_params_t::n_dims - 1], vip_div, this->ijk);
             this->xchng_sgs_tnsr_offdiag(tau, tau_srfc, this->ijk, this->ijkm);
-            
+
             // multiply deformation tensor by sgs viscosity to obtain stress tensor
             multiply_sgs_visc();
-            
+
             // update forces
             formulae::stress::calc_stress_rhs_cmpct<ct_params_t::n_dims, ct_params_t::opts>(this->vip_rhs,
                                                                                             tau,
@@ -138,13 +138,13 @@ namespace libmpdataxx
               for (int d = 0; d < std::pow(static_cast<int>(ct_params_t::n_dims), 2); ++d)
                 pade_deriv(d);
             }
-            
+
             // calculate independent components of deformation tensor
             formulae::stress::calc_deform<ct_params_t::n_dims>(tau, drv, this->ijk);
-            
+
             // multiply deformation tensor by sgs viscosity to obtain stress tensor
             multiply_sgs_visc();
-            
+
             // TODO: get rid of superfluous barriers
             for (auto& t : tau)
             {
@@ -167,9 +167,9 @@ namespace libmpdataxx
 
         public:
 
-        struct rt_params_t : parent_t::rt_params_t 
-        { 
-          real_t cdrag = 0; 
+        struct rt_params_t : parent_t::rt_params_t
+        {
+          real_t cdrag = 0;
         };
 
         // ctor
@@ -195,11 +195,11 @@ namespace libmpdataxx
         }
 
         static void alloc(
-          typename parent_t::mem_t *mem, 
+          typename parent_t::mem_t *mem,
           const int &n_iters
         ) {
           parent_t::alloc(mem, n_iters);
-          // no staggering for non-compact differencing  
+          // no staggering for non-compact differencing
           if (static_cast<stress_diff_t>(ct_params_t::stress_diff) != compact)
           {
             parent_t::alloc_tmp_sclr(mem, __FILE__, 3 * (ct_params_t::n_dims - 1)); // unique strain rate tensor elements
diff --git a/libmpdata++/solvers/detail/mpdata_rhs_vip_prs_sgs_dns.hpp b/libmpdata++/solvers/detail/mpdata_rhs_vip_prs_sgs_dns.hpp
index afdc3e27..23f4ce4f 100644
--- a/libmpdata++/solvers/detail/mpdata_rhs_vip_prs_sgs_dns.hpp
+++ b/libmpdata++/solvers/detail/mpdata_rhs_vip_prs_sgs_dns.hpp
@@ -1,4 +1,4 @@
-/** 
+/**
   * @file
   * @copyright University of Warsaw
   * @section LICENSE
@@ -20,7 +20,7 @@ namespace libmpdataxx
         using parent_t = detail::mpdata_rhs_vip_prs_sgs_common<ct_params_t, minhalo>;
 
         protected:
-        
+
         typename ct_params_t::real_t eta;
 
         void multiply_sgs_visc() final
@@ -58,7 +58,7 @@ namespace libmpdataxx
         {
           if (eta == 0) throw std::runtime_error("eta == 0");
         }
-      }; 
+      };
     } // namespace detail
   } // namespace solvers
 } // namespace libmpdataxx
diff --git a/libmpdata++/solvers/detail/mpdata_rhs_vip_prs_sgs_smg.hpp b/libmpdata++/solvers/detail/mpdata_rhs_vip_prs_sgs_smg.hpp
index bc5e0f6e..d8c57950 100644
--- a/libmpdata++/solvers/detail/mpdata_rhs_vip_prs_sgs_smg.hpp
+++ b/libmpdata++/solvers/detail/mpdata_rhs_vip_prs_sgs_smg.hpp
@@ -1,4 +1,4 @@
-/** 
+/**
   * @file
   * @copyright University of Warsaw
   * @section LICENSE
@@ -24,7 +24,7 @@ namespace libmpdataxx
         using real_t = typename ct_params_t::real_t;
 
         protected:
-        
+
         real_t smg_c, c_m;
         typename parent_t::arr_t &k_m;
 
@@ -77,13 +77,13 @@ namespace libmpdataxx
         }
 
         static void alloc(
-          typename parent_t::mem_t *mem, 
+          typename parent_t::mem_t *mem,
           const int &n_iters
         ) {
           parent_t::alloc(mem, n_iters);
           parent_t::alloc_tmp_sclr(mem, __FILE__, 1); // k_m
         }
-      }; 
+      };
     } // namespace detail
   } // namespace solvers
 } // namespace libmpdataxx
diff --git a/libmpdata++/solvers/detail/solver_1d.hpp b/libmpdata++/solvers/detail/solver_1d.hpp
index 9eb96d3e..ff8c0890 100644
--- a/libmpdata++/solvers/detail/solver_1d.hpp
+++ b/libmpdata++/solvers/detail/solver_1d.hpp
@@ -17,8 +17,8 @@ namespace libmpdataxx
     {
       template<typename ct_params_t, int n_tlev, int minhalo>
       class solver<
-        ct_params_t, 
-        n_tlev, 
+        ct_params_t,
+        n_tlev,
         minhalo,
         typename std::enable_if<ct_params_t::n_dims == 1 >::type
       > : public solver_common<ct_params_t, n_tlev, minhalo>
@@ -60,7 +60,7 @@ namespace libmpdataxx
           this->mem->barrier();
           if (!cyclic)
           {
-            for (auto &bc : this->bcs[0]) bc->fill_halos_vctr_alng(arrvec, ad); 
+            for (auto &bc : this->bcs[0]) bc->fill_halos_vctr_alng(arrvec, ad);
           }
           else
           {
@@ -74,7 +74,7 @@ namespace libmpdataxx
           stat_field(this->ijk) = real_t(0.5) * (abs(arrvec[0](i+h) + arrvec[0](i-h)));
           return this->mem->max(this->rank, stat_field(this->ijk));
         }
-        
+
         real_t max_abs_vctr_div(const arrvec_t<typename parent_t::arr_t> &arrvec) final
         {
           stat_field(this->ijk) = abs((arrvec[0](i+h) - arrvec[0](i-h)));
@@ -90,14 +90,14 @@ namespace libmpdataxx
         }
 
         public:
- 
+
         struct ctor_args_t
         {
           // <TODO> these should be common for 1D,2D,3D
           int rank;
           typename parent_t::mem_t *mem;
           // </TODO>
-          typename parent_t::bcp_t &bcxl, &bcxr; 
+          typename parent_t::bcp_t &bcxl, &bcxr;
           const rng_t &i;
         };
 
@@ -115,10 +115,10 @@ namespace libmpdataxx
         ) :
           parent_t(
             args.rank,
-            args.mem, 
-            p, 
+            args.mem,
+            p,
             idx_t<parent_t::n_dims>(args.i)
-          ), 
+          ),
           i(args.i),
           stat_field(args.mem->tmp[__FILE__][0][0])
         {
@@ -132,14 +132,14 @@ namespace libmpdataxx
         private:
 
         static void alloc_tmp(
-          typename parent_t::mem_t *mem, 
-          const char * __file__, 
+          typename parent_t::mem_t *mem,
+          const char * __file__,
           const int n_arr,
           const rng_t rng,
           std::string name = ""
         )
         {
-          mem->tmp[__file__].push_back(new arrvec_t<typename parent_t::arr_t>()); 
+          mem->tmp[__file__].push_back(new arrvec_t<typename parent_t::arr_t>());
 
           if (!name.empty()) mem->avail_tmp[name] = std::make_pair(__file__, mem->tmp[__file__].size() - 1);
 
@@ -147,22 +147,22 @@ namespace libmpdataxx
           {
             mem->tmp[__file__].back().push_back(
               mem->old(new typename parent_t::arr_t( rng ))
-            ); 
+            );
           }
         }
 
         public:
 
         static void alloc(
-          typename parent_t::mem_t *mem, 
+          typename parent_t::mem_t *mem,
           const int &n_iters
         ) {
           mem->psi.resize(parent_t::n_eqns);
           for (int e = 0; e < parent_t::n_eqns; ++e) // equations
             for (int n = 0; n < n_tlev; ++n) // time levels
               mem->psi[e].push_back(mem->old(new typename parent_t::arr_t(parent_t::rng_sclr(mem->grid_size[0]))));
-    
-          mem->GC.push_back(mem->old(new typename parent_t::arr_t(parent_t::rng_vctr(mem->grid_size[0])))); 
+
+          mem->GC.push_back(mem->old(new typename parent_t::arr_t(parent_t::rng_vctr(mem->grid_size[0]))));
 
           // fully third-order accurate mpdata needs also time derivatives of
           // the Courant field
@@ -179,33 +179,33 @@ namespace libmpdataxx
 
           // allocate Kahan summation temporary vars
           if (opts::isset(ct_params_t::opts, opts::khn))
-            for (int n = 0; n < 3; ++n) 
-              mem->khn_tmp.push_back(mem->old(new typename parent_t::arr_t( 
+            for (int n = 0; n < 3; ++n)
+              mem->khn_tmp.push_back(mem->old(new typename parent_t::arr_t(
                 parent_t::rng_sclr(mem->grid_size[0])
               )));
 
           // courant field
           alloc_tmp_sclr(mem, __FILE__, 1);
-        } 
+        }
 
         protected:
 
         // helper method to allocate a vector-component temporary array
         static void alloc_tmp_vctr(
-          typename parent_t::mem_t *mem, 
+          typename parent_t::mem_t *mem,
           const char * __file__
         )
         {
           alloc_tmp(mem, __file__, 1, parent_t::rng_vctr(mem->grid_size[0])); // always one-component vectors
         }
 
-        // helper method to allocate n_arr scalar temporary arrays 
+        // helper method to allocate n_arr scalar temporary arrays
         static void alloc_tmp_sclr(
-          typename parent_t::mem_t *mem, 
+          typename parent_t::mem_t *mem,
           const char * __file__, const int n_arr
         )
         {
-          alloc_tmp(mem, __file__, n_arr, parent_t::rng_sclr(mem->grid_size[0])); 
+          alloc_tmp(mem, __file__, n_arr, parent_t::rng_sclr(mem->grid_size[0]));
         }
       };
     } // namespace detail
diff --git a/libmpdata++/solvers/detail/solver_2d.hpp b/libmpdata++/solvers/detail/solver_2d.hpp
index 31f86a26..59ad0107 100644
--- a/libmpdata++/solvers/detail/solver_2d.hpp
+++ b/libmpdata++/solvers/detail/solver_2d.hpp
@@ -1,4 +1,4 @@
-/** @file 
+/** @file
 * @copyright University of Warsaw
 * @section LICENSE
 * GPLv3+ (see the COPYING file or http://www.gnu.org/licenses/)
@@ -18,8 +18,8 @@ namespace libmpdataxx
 
       template<typename ct_params_t, int n_tlev, int minhalo>
       class solver<
-        ct_params_t, 
-        n_tlev,  
+        ct_params_t,
+        n_tlev,
         minhalo,
         typename std::enable_if<ct_params_t::n_dims == 2 >::type
       > : public solver_common<ct_params_t, n_tlev, minhalo>
@@ -31,7 +31,7 @@ namespace libmpdataxx
         using real_t = typename ct_params_t::real_t;
 
         protected:
-      
+
         const rng_t i, j; // TODO: to be removed
 
         // generic field used for various statistics (currently Courant number and divergence)
@@ -71,7 +71,7 @@ namespace libmpdataxx
           // TODO: open bc nust be last!!!
           this->mem->barrier();
         }
-        
+
         virtual void xchng_flux(arrvec_t<typename parent_t::arr_t> &arrvec) final
         {
           this->mem->barrier();
@@ -79,7 +79,7 @@ namespace libmpdataxx
           for (auto &bc : this->bcs[1]) bc->fill_halos_flux(arrvec, i);
           this->mem->barrier();
         }
-        
+
         virtual void xchng_sgs_div(
           typename parent_t::arr_t &arr,
           const idx_t<2> &range_ijk
@@ -90,7 +90,7 @@ namespace libmpdataxx
           for (auto &bc : this->bcs[1]) bc->fill_halos_sgs_div(arr, range_ijk[0]);
           this->mem->barrier();
         }
-        
+
         virtual void xchng_sgs_vctr(arrvec_t<typename parent_t::arr_t> &av,
                             const typename parent_t::arr_t &b,
                             const idx_t<2> &range_ijk
@@ -115,7 +115,7 @@ namespace libmpdataxx
         }
 
         virtual void xchng_sgs_tnsr_offdiag(arrvec_t<typename parent_t::arr_t> &av,
-                                            const arrvec_t<typename parent_t::arr_t> &bv, 
+                                            const arrvec_t<typename parent_t::arr_t> &bv,
                                             const idx_t<2> &range_ijk,
                                             const std::array<rng_t, 2> &range_ijkm
         ) final
@@ -129,7 +129,7 @@ namespace libmpdataxx
         }
 
         virtual void xchng_vctr_nrml(
-          arrvec_t<typename parent_t::arr_t> &arrvec, 
+          arrvec_t<typename parent_t::arr_t> &arrvec,
           const idx_t<2> &range_ijk,
           const int ext = 0,
           const bool cyclic = false
@@ -197,7 +197,7 @@ namespace libmpdataxx
           {
             real_t max_abs_div = max_abs_vctr_div(this->mem->GC);
 
-            if (max_abs_div > this->max_abs_div_eps) 
+            if (max_abs_div > this->max_abs_div_eps)
               throw std::runtime_error("initial advector field is divergent");
           }
         }
@@ -210,7 +210,7 @@ namespace libmpdataxx
                                         ) / formulae::G<ct_params_t::opts, 0>(*this->mem->G, i, j);
           return this->mem->max(this->rank, stat_field(this->ijk));
         }
-        
+
         real_t max_abs_vctr_div(const arrvec_t<typename parent_t::arr_t> &arrvec) final
         {
           stat_field(this->ijk) = abs(
@@ -219,7 +219,7 @@ namespace libmpdataxx
                                      ) / formulae::G<ct_params_t::opts, 0>(*this->mem->G, i, j);
           return this->mem->max(this->rank, stat_field(this->ijk));
         }
-        
+
         void scale_gc(const real_t time,
                       const real_t cur_dt,
                       const real_t old_dt) final
@@ -232,17 +232,17 @@ namespace libmpdataxx
         }
 
         public:
- 
+
         struct ctor_args_t
-        {   
+        {
           // <TODO> these should be common for 1D,2D,3D
           int rank;
           typename parent_t::mem_t *mem;
           // </TODO>
-          typename parent_t::bcp_t &bcxl, &bcxr, &bcyl, &bcyr; 
-          const rng_t &i, &j; 
-        };  
-        
+          typename parent_t::bcp_t &bcxl, &bcxr, &bcyl, &bcyr;
+          const rng_t &i, &j;
+        };
+
         struct rt_params_t : parent_t::rt_params_t
         {
           real_t di = 0, dj = 0;
@@ -257,18 +257,18 @@ namespace libmpdataxx
         ) :
           parent_t(
             args.rank,
-            args.mem, 
-            p, 
+            args.mem,
+            p,
             idx_t<parent_t::n_dims>({args.i, args.j})
           ),
-          i(args.i), 
+          i(args.i),
           j(args.j),
           stat_field(args.mem->tmp[__FILE__][0][0])
         {
           this->di = p.di;
           this->dj = p.dj;
           this->dijk = {p.di, p.dj};
-          this->set_bcs(0, args.bcxl, args.bcxr); 
+          this->set_bcs(0, args.bcxl, args.bcxr);
           this->set_bcs(1, args.bcyl, args.bcyr);
         }
 
@@ -277,26 +277,26 @@ namespace libmpdataxx
         public:
 
         static void alloc(
-          typename parent_t::mem_t *mem, 
+          typename parent_t::mem_t *mem,
           const int &n_iters
         ) {
-          // psi 
+          // psi
           mem->psi.resize(parent_t::n_eqns);
           for (int e = 0; e < parent_t::n_eqns; ++e) // equations
             for (int n = 0; n < n_tlev; ++n) // time levels
-              mem->psi[e].push_back(mem->old(new typename parent_t::arr_t( 
-                parent_t::rng_sclr(mem->grid_size[0]), 
+              mem->psi[e].push_back(mem->old(new typename parent_t::arr_t(
+                parent_t::rng_sclr(mem->grid_size[0]),
                 parent_t::rng_sclr(mem->grid_size[1])
               )));
 
           // Courant field components (Arakawa-C grid)
-          mem->GC.push_back(mem->old(new typename parent_t::arr_t( 
-            parent_t::rng_vctr(mem->grid_size[0]), 
-            parent_t::rng_sclr(mem->grid_size[1]) 
+          mem->GC.push_back(mem->old(new typename parent_t::arr_t(
+            parent_t::rng_vctr(mem->grid_size[0]),
+            parent_t::rng_sclr(mem->grid_size[1])
           )));
-          mem->GC.push_back(mem->old(new typename parent_t::arr_t( 
-            parent_t::rng_sclr(mem->grid_size[0]), 
-            parent_t::rng_vctr(mem->grid_size[1]) 
+          mem->GC.push_back(mem->old(new typename parent_t::arr_t(
+            parent_t::rng_sclr(mem->grid_size[0]),
+            parent_t::rng_vctr(mem->grid_size[1])
           )));
 
           // fully third-order accurate mpdata needs also time derivatives of
@@ -305,24 +305,24 @@ namespace libmpdataxx
               opts::isset(ct_params_t::opts, opts::div_3rd_dt))
           {
             // TODO: why for (auto f : {mem->ndt_GC, mem->ndtt_GC}) doesn't work ?
-            mem->ndt_GC.push_back(mem->old(new typename parent_t::arr_t( 
-              parent_t::rng_vctr(mem->grid_size[0]), 
-              parent_t::rng_sclr(mem->grid_size[1]) 
+            mem->ndt_GC.push_back(mem->old(new typename parent_t::arr_t(
+              parent_t::rng_vctr(mem->grid_size[0]),
+              parent_t::rng_sclr(mem->grid_size[1])
             )));
-            mem->ndt_GC.push_back(mem->old(new typename parent_t::arr_t( 
-              parent_t::rng_sclr(mem->grid_size[0]), 
-              parent_t::rng_vctr(mem->grid_size[1]) 
+            mem->ndt_GC.push_back(mem->old(new typename parent_t::arr_t(
+              parent_t::rng_sclr(mem->grid_size[0]),
+              parent_t::rng_vctr(mem->grid_size[1])
             )));
-            mem->ndtt_GC.push_back(mem->old(new typename parent_t::arr_t( 
-              parent_t::rng_vctr(mem->grid_size[0]), 
-              parent_t::rng_sclr(mem->grid_size[1]) 
+            mem->ndtt_GC.push_back(mem->old(new typename parent_t::arr_t(
+              parent_t::rng_vctr(mem->grid_size[0]),
+              parent_t::rng_sclr(mem->grid_size[1])
             )));
-            mem->ndtt_GC.push_back(mem->old(new typename parent_t::arr_t( 
-              parent_t::rng_sclr(mem->grid_size[0]), 
-              parent_t::rng_vctr(mem->grid_size[1]) 
+            mem->ndtt_GC.push_back(mem->old(new typename parent_t::arr_t(
+              parent_t::rng_sclr(mem->grid_size[0]),
+              parent_t::rng_vctr(mem->grid_size[1])
             )));
           }
- 
+
           // allocate G
           if (opts::isset(ct_params_t::opts, opts::nug))
             mem->G.reset(mem->old(new typename parent_t::arr_t(
@@ -332,9 +332,9 @@ namespace libmpdataxx
 
           // allocate Kahan summation temporary vars
           if (opts::isset(ct_params_t::opts, opts::khn))
-            for (int n = 0; n < 3; ++n) 
-              mem->khn_tmp.push_back(mem->old(new typename parent_t::arr_t( 
-                parent_t::rng_sclr(mem->grid_size[0]), 
+            for (int n = 0; n < 3; ++n)
+              mem->khn_tmp.push_back(mem->old(new typename parent_t::arr_t(
+                parent_t::rng_sclr(mem->grid_size[0]),
                 parent_t::rng_sclr(mem->grid_size[1])
               )));
           // courant field
@@ -360,10 +360,10 @@ namespace libmpdataxx
               srfc ? rng_t(0, 0) :
                 stgr[n][1] ? parent_t::rng_vctr(mem->grid_size[1]) :
                   parent_t::rng_sclr(mem->grid_size[1])
-            ))); 
+            )));
           }
         }
-        
+
         // helper method to allocate a temporary space composed of vector-component arrays
         static void alloc_tmp_vctr(
           typename parent_t::mem_t *mem,
@@ -373,24 +373,24 @@ namespace libmpdataxx
           alloc_tmp_stgr(mem, __file__, 2, {{true, false}, {false, true}});
         }
 
-        // helper method to allocate n_arr scalar temporary arrays 
+        // helper method to allocate n_arr scalar temporary arrays
         static void alloc_tmp_sclr(
-          typename parent_t::mem_t *mem, 
+          typename parent_t::mem_t *mem,
           const char * __file__, const int n_arr,
           std::string name = "",
           bool srfc = false
-        )   
-        {   
+        )
+        {
           mem->tmp[__file__].push_back(new arrvec_t<typename parent_t::arr_t>());
 
           if (!name.empty()) mem->avail_tmp[name] = std::make_pair(__file__, mem->tmp[__file__].size() - 1);
 
           for (int n = 0; n < n_arr; ++n)
-            mem->tmp[__file__].back().push_back(mem->old(new typename parent_t::arr_t( 
+            mem->tmp[__file__].back().push_back(mem->old(new typename parent_t::arr_t(
               parent_t::rng_sclr(mem->grid_size[0]),
               srfc ? rng_t(0, 0) : parent_t::rng_sclr(mem->grid_size[1])
             )));
-        } 
+        }
       };
     } // namespace detail
   } // namespace solvers
diff --git a/libmpdata++/solvers/detail/solver_3d.hpp b/libmpdata++/solvers/detail/solver_3d.hpp
index d0a81f18..bbf041d7 100644
--- a/libmpdata++/solvers/detail/solver_3d.hpp
+++ b/libmpdata++/solvers/detail/solver_3d.hpp
@@ -15,11 +15,11 @@ namespace libmpdataxx
     namespace detail
     {
       using namespace arakawa_c;
-    
+
       template<typename ct_params_t, int n_tlev, int minhalo>
       class solver<
-        ct_params_t, 
-        n_tlev, 
+        ct_params_t,
+        n_tlev,
         minhalo,
         typename std::enable_if<ct_params_t::n_dims == 3 >::type
       > : public solver_common<ct_params_t, n_tlev, minhalo>
@@ -60,19 +60,19 @@ namespace libmpdataxx
           this->mem->barrier();
           if (!cyclic)
           {
-            for (auto &bc : this->bcs[0]) bc->fill_halos_vctr_alng(arrvec, j, k, ad); 
-            for (auto &bc : this->bcs[1]) bc->fill_halos_vctr_alng(arrvec, k, i, ad); 
+            for (auto &bc : this->bcs[0]) bc->fill_halos_vctr_alng(arrvec, j, k, ad);
+            for (auto &bc : this->bcs[1]) bc->fill_halos_vctr_alng(arrvec, k, i, ad);
             for (auto &bc : this->bcs[2]) bc->fill_halos_vctr_alng(arrvec, i, j, ad);
           }
           else
           {
-            for (auto &bc : this->bcs[0]) bc->fill_halos_vctr_alng_cyclic(arrvec, j, k, ad); 
-            for (auto &bc : this->bcs[1]) bc->fill_halos_vctr_alng_cyclic(arrvec, k, i, ad); 
+            for (auto &bc : this->bcs[0]) bc->fill_halos_vctr_alng_cyclic(arrvec, j, k, ad);
+            for (auto &bc : this->bcs[1]) bc->fill_halos_vctr_alng_cyclic(arrvec, k, i, ad);
             for (auto &bc : this->bcs[2]) bc->fill_halos_vctr_alng_cyclic(arrvec, i, j, ad);
           }
           this->mem->barrier();
         }
-        
+
         virtual void xchng_flux(arrvec_t<typename parent_t::arr_t> &arrvec) final
         {
           this->mem->barrier();
@@ -80,7 +80,7 @@ namespace libmpdataxx
           for (auto &bc : this->bcs[1]) bc->fill_halos_flux(arrvec, k, i);
           for (auto &bc : this->bcs[2]) bc->fill_halos_flux(arrvec, i, j);
         }
-        
+
         virtual void xchng_sgs_div(
           typename parent_t::arr_t &arr,
           const idx_t<3> &range_ijk
@@ -92,9 +92,9 @@ namespace libmpdataxx
           for (auto &bc : this->bcs[2]) bc->fill_halos_sgs_div(arr, range_ijk[0], range_ijk[1]);
           this->mem->barrier();
         }
-        
+
         virtual void xchng_sgs_vctr(arrvec_t<typename parent_t::arr_t> &av,
-                                    const typename parent_t::arr_t &b, 
+                                    const typename parent_t::arr_t &b,
                                     const idx_t<3> &range_ijk
         ) final
         {
@@ -104,7 +104,7 @@ namespace libmpdataxx
           for (auto &bc : this->bcs[2]) bc->fill_halos_sgs_vctr(av, b, range_ijk[0], range_ijk[1]);
           this->mem->barrier();
         }
-        
+
         virtual void xchng_sgs_tnsr_diag(arrvec_t<typename parent_t::arr_t> &av,
                                          const typename parent_t::arr_t &w,
                                          const typename parent_t::arr_t &vip_div,
@@ -117,9 +117,9 @@ namespace libmpdataxx
           for (auto &bc : this->bcs[2]) bc->fill_halos_sgs_tnsr(av, w, vip_div, range_ijk[0], range_ijk[1], this->dijk[2]);
           this->mem->barrier();
         }
-        
+
         virtual void xchng_sgs_tnsr_offdiag(arrvec_t<typename parent_t::arr_t> &av,
-                                            const arrvec_t<typename parent_t::arr_t> &bv, 
+                                            const arrvec_t<typename parent_t::arr_t> &bv,
                                             const idx_t<3> &range_ijk,
                                             const std::array<rng_t, 3> &range_ijkm
         ) final
@@ -159,7 +159,7 @@ namespace libmpdataxx
           if (!cyclic)
           {
             for (auto &bc : this->bcs[1]) bc->fill_halos_vctr_nrml(arrvec[0], range_ijk[2]^ext^1, range_ijk[0]^ext^h);
-  
+
             // without this barrier, there is a race condition when some threads handle subdomains
             // with one gridpoint width, the problem manifests itself, for example, in pbl test
             // TODO: figure out the exact cause and try to avoid this barrier, what about 2D,
@@ -173,7 +173,7 @@ namespace libmpdataxx
 
             for (auto &bc : this->bcs[0]) bc->fill_halos_vctr_nrml(arrvec[1], range_ijk[1]^ext^h, range_ijk[2]^ext^1);
             for (auto &bc : this->bcs[2]) bc->fill_halos_vctr_nrml(arrvec[1], range_ijk_0__ext_1, range_ijk[1]^ext^h);
-       
+
             for (auto &bc : this->bcs[0]) bc->fill_halos_vctr_nrml(arrvec[2], range_ijk[1]^ext^1, range_ijk[2]^ext^h);
             for (auto &bc : this->bcs[1]) bc->fill_halos_vctr_nrml(arrvec[2], range_ijk[2]^ext^h, range_ijk_0__ext_1);
           }
@@ -184,7 +184,7 @@ namespace libmpdataxx
 
             for (auto &bc : this->bcs[0]) bc->fill_halos_vctr_nrml_cyclic(arrvec[1], range_ijk[1]^ext^h, range_ijk[2]^ext^1);
             for (auto &bc : this->bcs[2]) bc->fill_halos_vctr_nrml_cyclic(arrvec[1], range_ijk_0__ext_1, range_ijk[1]^ext^h);
-       
+
             for (auto &bc : this->bcs[0]) bc->fill_halos_vctr_nrml_cyclic(arrvec[2], range_ijk[1]^ext^1, range_ijk[2]^ext^h);
             for (auto &bc : this->bcs[1]) bc->fill_halos_vctr_nrml_cyclic(arrvec[2], range_ijk[2]^ext^h, range_ijk_0__ext_1);
           }
@@ -216,7 +216,7 @@ namespace libmpdataxx
           for (auto &bc : this->bcs[2]) bc->set_edge_pres(av[2], range_ijk[0], range_ijk[1], sign);
           this->mem->barrier();
         }
-        
+
         virtual void save_edges(
           const arrvec_t<typename parent_t::arr_t> &av,
           const idx_t<3> &range_ijk
@@ -227,18 +227,18 @@ namespace libmpdataxx
           for (auto &bc : this->bcs[2]) bc->save_edge_vel(av[2], range_ijk[0], range_ijk[1]);
           this->mem->barrier();
         }
-        
+
         void hook_ante_loop(const typename parent_t::advance_arg_t nt) // TODO: this nt conflicts in fact with multiple-advance()-call logic!
         {
           parent_t::hook_ante_loop(nt);
- 
+
           // sanity check for non-divergence of the initial Courant number field
           // TODO: same in 1D
           if (!opts::isset(ct_params_t::opts, opts::dfl))
           {
             real_t max_abs_div = max_abs_vctr_div(this->mem->GC);
 
-            if (max_abs_div > this->max_abs_div_eps) 
+            if (max_abs_div > this->max_abs_div_eps)
               throw std::runtime_error("initial advector field is divergent");
           }
         }
@@ -252,7 +252,7 @@ namespace libmpdataxx
                                          ) / formulae::G<ct_params_t::opts, 0>(*this->mem->G, i, j, k);
           return this->mem->max(this->rank, stat_field(this->ijk));
         }
-        
+
         real_t max_abs_vctr_div(const arrvec_t<typename parent_t::arr_t> &arrvec) final
         {
           stat_field(this->ijk) =  abs(
@@ -279,17 +279,17 @@ namespace libmpdataxx
         public:
 
         struct ctor_args_t
-        {   
+        {
           // <TODO> these should be common for 1D,2D,3D
           int rank;
           typename parent_t::mem_t *mem;
           // </TODO>
-          typename parent_t::bcp_t 
-            &bcxl, &bcxr, 
+          typename parent_t::bcp_t
+            &bcxl, &bcxr,
             &bcyl, &bcyr,
-            &bczl, &bczr; 
-          const rng_t &i, &j, &k; 
-        };  
+            &bczl, &bczr;
+          const rng_t &i, &j, &k;
+        };
 
         struct rt_params_t : parent_t::rt_params_t
         {
@@ -305,12 +305,12 @@ namespace libmpdataxx
         ) :
           parent_t(
             args.rank,
-            args.mem, 
+            args.mem,
             p,
             idx_t<parent_t::n_dims>({args.i, args.j, args.k})
           ),
-          i(args.i), 
-          j(args.j), 
+          i(args.i),
+          j(args.j),
           k(args.k),
           stat_field(args.mem->tmp[__FILE__][0][0])
         {
@@ -321,14 +321,14 @@ namespace libmpdataxx
           this->set_bcs(0, args.bcxl, args.bcxr);
           this->set_bcs(1, args.bcyl, args.bcyr);
           this->set_bcs(2, args.bczl, args.bczr);
-        } 
+        }
 
         public:
 
         static void alloc(
           typename parent_t::mem_t *mem,
           const int &n_iters
-        )   
+        )
         {
           // psi
           mem->psi.resize(parent_t::n_eqns);
@@ -338,10 +338,10 @@ namespace libmpdataxx
                 parent_t::rng_sclr(mem->grid_size[0]),
                 parent_t::rng_sclr(mem->grid_size[1]),
                 parent_t::rng_sclr(mem->grid_size[2])
-              ))); 
+              )));
 
           // Courant field components (Arakawa-C grid)
-          mem->GC.push_back(mem->old(new typename parent_t::arr_t( 
+          mem->GC.push_back(mem->old(new typename parent_t::arr_t(
             parent_t::rng_vctr(mem->grid_size[0]),
             parent_t::rng_sclr(mem->grid_size[1]),
             parent_t::rng_sclr(mem->grid_size[2])
@@ -363,7 +363,7 @@ namespace libmpdataxx
               opts::isset(ct_params_t::opts, opts::div_3rd_dt))
           {
             // TODO: why for (auto f : {mem->ndt_GC, mem->ndtt_GC}) doesn't work ?
-            mem->ndt_GC.push_back(mem->old(new typename parent_t::arr_t( 
+            mem->ndt_GC.push_back(mem->old(new typename parent_t::arr_t(
               parent_t::rng_vctr(mem->grid_size[0]),
               parent_t::rng_sclr(mem->grid_size[1]),
               parent_t::rng_sclr(mem->grid_size[2])
@@ -378,8 +378,8 @@ namespace libmpdataxx
               parent_t::rng_sclr(mem->grid_size[1]),
               parent_t::rng_vctr(mem->grid_size[2])
             )));
-            
-            mem->ndtt_GC.push_back(mem->old(new typename parent_t::arr_t( 
+
+            mem->ndtt_GC.push_back(mem->old(new typename parent_t::arr_t(
               parent_t::rng_vctr(mem->grid_size[0]),
               parent_t::rng_sclr(mem->grid_size[1]),
               parent_t::rng_sclr(mem->grid_size[2])
@@ -406,16 +406,16 @@ namespace libmpdataxx
 
           // allocate Kahan summation temporary vars
           if (opts::isset(ct_params_t::opts, opts::khn))
-            for (int n = 0; n < 3; ++n) 
-              mem->khn_tmp.push_back(mem->old(new typename parent_t::arr_t( 
-                parent_t::rng_sclr(mem->grid_size[0]), 
+            for (int n = 0; n < 3; ++n)
+              mem->khn_tmp.push_back(mem->old(new typename parent_t::arr_t(
+                parent_t::rng_sclr(mem->grid_size[0]),
                 parent_t::rng_sclr(mem->grid_size[1]),
                 parent_t::rng_sclr(mem->grid_size[2])
               )));
           // courant field
           alloc_tmp_sclr(mem, __FILE__, 1);
-        }  
-        
+        }
+
         // helper method to allocate a temporary space composed of arbitrarily staggered arrays
         static void alloc_tmp_stgr(
           typename parent_t::mem_t *mem,
@@ -434,10 +434,10 @@ namespace libmpdataxx
               srfc ? rng_t(0, 0) :
                 stgr[n][2] ? parent_t::rng_vctr(mem->grid_size[2]) :
                   parent_t::rng_sclr(mem->grid_size[2])
-            ))); 
+            )));
           }
         }
-        
+
         // helper method to allocate a temporary space composed of vector-component arrays
         static void alloc_tmp_vctr(
           typename parent_t::mem_t *mem,
@@ -447,25 +447,25 @@ namespace libmpdataxx
           alloc_tmp_stgr(mem, __file__, 3, {{true, false, false}, {false, true, false}, {false, false, true}});
         }
 
-        // helper method to allocate n_arr scalar temporary arrays 
+        // helper method to allocate n_arr scalar temporary arrays
         static void alloc_tmp_sclr(
           typename parent_t::mem_t *mem,
           const char * __file__, const int n_arr,
           std::string name = "",
           bool srfc = false // allocate only surface data
-        )   
-        {   
+        )
+        {
           mem->tmp[__file__].push_back(new arrvec_t<typename parent_t::arr_t>());
 
           if (!name.empty()) mem->avail_tmp[name] = std::make_pair(__file__, mem->tmp[__file__].size() - 1);
 
           for (int n = 0; n < n_arr; ++n)
-            mem->tmp[__file__].back().push_back(mem->old(new typename parent_t::arr_t( 
+            mem->tmp[__file__].back().push_back(mem->old(new typename parent_t::arr_t(
               parent_t::rng_sclr(mem->grid_size[0]),
               parent_t::rng_sclr(mem->grid_size[1]),
               srfc ? rng_t(0, 0) : parent_t::rng_sclr(mem->grid_size[2])
             )));
-        } 
+        }
       };
     } // namespace detail
   } // namespace solvers
diff --git a/libmpdata++/solvers/detail/solver_common.hpp b/libmpdata++/solvers/detail/solver_common.hpp
index 1038f289..c70cb54e 100644
--- a/libmpdata++/solvers/detail/solver_common.hpp
+++ b/libmpdata++/solvers/detail/solver_common.hpp
@@ -35,10 +35,10 @@ namespace libmpdataxx
         public:
 
         enum { n_eqns = ct_params_t::n_eqns };
-        enum { halo = minhalo }; 
+        enum { halo = minhalo };
         enum { n_dims = ct_params_t::n_dims };
         enum { n_tlev = n_tlev_ };
-        
+
         using ct_params_t_ = ct_params_t; // propagate ct_params_t mainly for output purposes
         using real_t = typename ct_params_t::real_t;
         typedef blitz::Array<real_t, n_dims> arr_t;
@@ -49,7 +49,7 @@ namespace libmpdataxx
         using advance_arg_t = typename std::conditional<ct_params_t::var_dt, real_t, int>::type;
 
 
-        protected: 
+        protected:
         // TODO: output common doesnt know about ct_params_t
         static constexpr bool var_dt = ct_params_t::var_dt;
 
@@ -70,15 +70,15 @@ namespace libmpdataxx
 
         long long int timestep = 0;
         real_t time = 0;
-        std::vector<int> n; 
+        std::vector<int> n;
 
-        typedef concurr::detail::sharedmem<real_t, n_dims, n_tlev> mem_t; 
+        typedef concurr::detail::sharedmem<real_t, n_dims, n_tlev> mem_t;
         mem_t *mem;
 
         // helper methods invoked by solve()
         virtual void advop(int e) = 0;
 
-        // helper method telling us if equation e is the last one advected assuming increasing order, 
+        // helper method telling us if equation e is the last one advected assuming increasing order,
         // but taking into account possible delay of advection of some equations
         // and assuming that is_last_eqn is not called for delayed equations before it's called for non-delayed equations
         constexpr bool is_last_eqn(int e)
@@ -89,9 +89,9 @@ namespace libmpdataxx
         }
 
         virtual void cycle(int e) final
-        { 
+        {
           n[e] = (n[e] + 1) % n_tlev - n_tlev;  // -n_tlev so that n+1 does not give out of bounds
-          if(is_last_eqn(e)) mem->cycle(rank); 
+          if(is_last_eqn(e)) mem->cycle(rank);
         }
 
         virtual void xchng(int e) = 0;
@@ -103,7 +103,7 @@ namespace libmpdataxx
         {
           // with distributed memory and cyclic boundary conditions,
           // leftmost node must send left first, as
-          // rightmost node is waiting 
+          // rightmost node is waiting
           if (d == 0 && this->mem->distmem.size() > 0 && this->mem->distmem.rank() == 0)
             std::swap(bcl, bcr);
 
@@ -113,10 +113,10 @@ namespace libmpdataxx
 
         virtual real_t courant_number(const arrvec_t<arr_t>&) = 0;
         virtual real_t max_abs_vctr_div(const arrvec_t<arr_t>&) = 0;
-       
+
         // return false if advector does not change in time
         virtual bool calc_gc() {return false;}
-       
+
         // used to calculate nondimensionalised first and second time derivatives of advector
         virtual void calc_ndt_gc() {}
 
@@ -142,7 +142,7 @@ namespace libmpdataxx
                                rank == mem->size - 1 ? rng_t(r.first(), (r + n).last()) :
                                  r;
         }
-        
+
         // thread-aware range extension, variadic version
         template <class n_t, class... ns_t>
         rng_t extend_range(const rng_t &r, const n_t n, const ns_t... ns) const
@@ -151,49 +151,49 @@ namespace libmpdataxx
         }
 
         private:
-      
+
 #if !defined(NDEBUG)
-        bool 
-          hook_ante_step_called         = true, // initially true to handle nt=0 
-          hook_ante_delayed_step_called = true, 
-          hook_post_step_called         = true,  
+        bool
+          hook_ante_step_called         = true, // initially true to handle nt=0
+          hook_ante_delayed_step_called = true,
+          hook_post_step_called         = true,
           hook_ante_loop_called         = true;
 #endif
 
 
         protected:
 
-        virtual void hook_ante_step() 
-        { 
+        virtual void hook_ante_step()
+        {
           // sanity check if all subclasses call their parents' hooks
 #if !defined(NDEBUG)
           hook_ante_step_called = true;
 #endif
         }
 
-        virtual void hook_ante_delayed_step() 
-        { 
+        virtual void hook_ante_delayed_step()
+        {
           // sanity check if all subclasses call their parents' hooks
 #if !defined(NDEBUG)
           hook_ante_delayed_step_called = true;
 #endif
         }
 
-        virtual void hook_post_step() 
+        virtual void hook_post_step()
         {
 #if !defined(NDEBUG)
           hook_post_step_called = true;
 #endif
         }
 
-        virtual void hook_ante_loop(const advance_arg_t nt) 
+        virtual void hook_ante_loop(const advance_arg_t nt)
         {
 #if !defined(NDEBUG)
           hook_ante_loop_called = true;
 #endif
           // fill halos in velocity field
           this->xchng_vctr_alng(mem->GC);
-         
+
           // adaptive timestepping - for constant in time velocity it suffices
           // to change the timestep once and do a simple scaling of advector
           if (ct_params_t::var_dt)
@@ -212,7 +212,7 @@ namespace libmpdataxx
 
         const real_t time_() const { return time;}
 
-        struct rt_params_t 
+        struct rt_params_t
         {
           std::array<int, n_dims> grid_size;
           real_t dt=0, max_abs_div_eps = blitz::epsilon(real_t(44)), max_courant = real_t(0.5);
@@ -220,12 +220,12 @@ namespace libmpdataxx
 
         // ctor
         solver_common(
-          const int &rank, 
-          mem_t *mem, 
-          const rt_params_t &p, 
+          const int &rank,
+          mem_t *mem,
+          const rt_params_t &p,
           const decltype(ijk) &ijk
         ) :
-          rank(rank), 
+          rank(rank),
           dt_stash{},
           dt(p.dt),
           di(0),
@@ -233,7 +233,7 @@ namespace libmpdataxx
           dk(0),
           max_abs_div_eps(p.max_abs_div_eps),
           max_courant(p.max_courant),
-          n(n_eqns, 0), 
+          n(n_eqns, 0),
           mem(mem),
           ijk(ijk)
         {
@@ -242,7 +242,7 @@ namespace libmpdataxx
 
           // run-time sanity checks
           for (int d = 0; d < n_dims; ++d)
-            if (p.grid_size[d] < 1) 
+            if (p.grid_size[d] < 1)
               throw std::runtime_error("bogus grid size");
         }
 
@@ -252,9 +252,9 @@ namespace libmpdataxx
 #if defined(USE_MPI)
           // finalize mpi if it was initialized by distmem,
           // otherwise it would break programs that instantiate many solvers;
-          // TODO: MPI standard requires that the same thread that called mpi_init 
+          // TODO: MPI standard requires that the same thread that called mpi_init
           // calls mpi_finalize, we don't ensure it
-          if(!libmpdataxx::concurr::detail::mpi_initialized_before && rank==0) 
+          if(!libmpdataxx::concurr::detail::mpi_initialized_before && rank==0)
             MPI::Finalize();
 #endif
 #if !defined(NDEBUG)
@@ -266,12 +266,12 @@ namespace libmpdataxx
         }
 
         virtual void solve(advance_arg_t nt) final
-        {   
+        {
           // multiple calls to sovlve() are meant to advance the solution by nt
           // TODO: does it really work with var_dt ? we do not advance by time exactly ...
           nt += ct_params_t::var_dt ? time : timestep;
 
-          // being generous about out-of-loop barriers 
+          // being generous about out-of-loop barriers
           if (timestep == 0)
           {
             mem->barrier();
@@ -291,7 +291,7 @@ namespace libmpdataxx
           // higher-order temporal interpolation for output requires doing a few additional steps
           int additional_steps = ct_params_t::out_intrp_ord;
           while (ct_params_t::var_dt ? (time < nt || additional_steps > 0) : timestep < nt)
-          {   
+          {
             // progress-bar info through thread name (check top -H)
             monitor(float(ct_params_t::var_dt ? time : timestep) / nt);  // TODO: does this value make sanse with repeated advence() calls?
 
@@ -300,7 +300,7 @@ namespace libmpdataxx
             if (mem->panic) break;
 
             // proper solver stuff
-            
+
             // for variable in time velocity calculate advector at n+1/2, returns false if
             // velocity does not change in time
             bool var_gc = calc_gc();
@@ -312,7 +312,7 @@ namespace libmpdataxx
               real_t cfl = courant_number(mem->GC);
               if (cfl > 0)
               {
-                do 
+                do
                 {
                   dt *= max_courant / cfl;
                   calc_gc();
@@ -321,11 +321,11 @@ namespace libmpdataxx
                 while (cfl > max_courant);
               }
             }
-            
+
             // once we set the time step
             // for third-order MPDATA we need to calculate time derivatives of the advector field
             if (var_gc && div3_mpdata) calc_ndt_gc();
-            
+
             hook_ante_step();
 
             for (int e = 0; e < n_eqns; ++e)
@@ -349,7 +349,7 @@ namespace libmpdataxx
             hook_post_step();
 
             if (time >= nt) additional_steps--;
-          }   
+          }
 
           mem->barrier();
           // note: hook_post_loop was removed as conficling with multiple-advance()-call logic
@@ -358,7 +358,7 @@ namespace libmpdataxx
         protected:
 
         // psi[n] getter - just to shorten the code
-        // note that e.g. in hook_post_loop it points rather to 
+        // note that e.g. in hook_post_loop it points rather to
         // psi^{n+1} than psi^{n} (hence not using the name psi_n)
         virtual arr_t &state(const int &e) final
         {
@@ -379,7 +379,7 @@ namespace libmpdataxx
 
       template<typename ct_params_t, int n_tlev, int minhalo, class enableif = void>
       class solver
-      {}; 
+      {};
     } // namespace detail
   } // namespace solvers
 } // namespace libmpdataxx
diff --git a/libmpdata++/solvers/mpdata.hpp b/libmpdata++/solvers/mpdata.hpp
index aec7b161..1e19bde3 100644
--- a/libmpdata++/solvers/mpdata.hpp
+++ b/libmpdata++/solvers/mpdata.hpp
@@ -6,13 +6,13 @@
 
 #pragma once
 
-#include <libmpdata++/solvers/detail/mpdata_osc_1d.hpp> 
-#include <libmpdata++/solvers/detail/mpdata_osc_2d.hpp> 
-#include <libmpdata++/solvers/detail/mpdata_osc_3d.hpp> 
+#include <libmpdata++/solvers/detail/mpdata_osc_1d.hpp>
+#include <libmpdata++/solvers/detail/mpdata_osc_2d.hpp>
+#include <libmpdata++/solvers/detail/mpdata_osc_3d.hpp>
 
-#include <libmpdata++/solvers/detail/mpdata_fct_1d.hpp> 
-#include <libmpdata++/solvers/detail/mpdata_fct_2d.hpp> 
-#include <libmpdata++/solvers/detail/mpdata_fct_3d.hpp> 
+#include <libmpdata++/solvers/detail/mpdata_fct_1d.hpp>
+#include <libmpdata++/solvers/detail/mpdata_fct_2d.hpp>
+#include <libmpdata++/solvers/detail/mpdata_fct_3d.hpp>
 
 namespace libmpdataxx
 {
@@ -21,18 +21,18 @@ namespace libmpdataxx
     struct mpdata_family_tag {};
 
     // the mpdata class
-    template<typename ct_params_t, int minhalo = 0, class enableif = void> 
+    template<typename ct_params_t, int minhalo = 0, class enableif = void>
     class mpdata
     {};
 
     // oscillatory version
     template<typename ct_params_t, int minhalo>
     class mpdata<
-      ct_params_t, minhalo, 
+      ct_params_t, minhalo,
       typename std::enable_if<!opts::isset(ct_params_t::opts, opts::fct)>::type
     > : public detail::mpdata_osc<ct_params_t, minhalo>
     {
-      using parent_t = detail::mpdata_osc<ct_params_t, minhalo>; 
+      using parent_t = detail::mpdata_osc<ct_params_t, minhalo>;
       using parent_t::parent_t; // inheriting constructors
 
       protected:
@@ -46,9 +46,9 @@ namespace libmpdataxx
       typename std::enable_if<opts::isset(ct_params_t::opts, opts::fct)>::type
     > : public detail::mpdata_fct<ct_params_t, minhalo>
     {
-      using parent_t = detail::mpdata_fct<ct_params_t, minhalo>; 
+      using parent_t = detail::mpdata_fct<ct_params_t, minhalo>;
       using parent_t::parent_t; // inheriting constructors
-      
+
       protected:
       using solver_family = mpdata_family_tag;
     };
diff --git a/libmpdata++/solvers/mpdata_rhs.hpp b/libmpdata++/solvers/mpdata_rhs.hpp
index bb035d55..351d7c43 100644
--- a/libmpdata++/solvers/mpdata_rhs.hpp
+++ b/libmpdata++/solvers/mpdata_rhs.hpp
@@ -1,10 +1,10 @@
-/** 
+/**
  * @file
  * @copyright University of Warsaw
  * @section LICENSE
  * GPLv3+ (see the COPYING file or http://www.gnu.org/licenses/)
  *
- * @brief improved Euler inhomogeneous solver  
+ * @brief improved Euler inhomogeneous solver
  *        (cf. eq. 32 in Smolarkiewicz 1998) // TODO cite
  */
 
@@ -16,11 +16,11 @@ namespace libmpdataxx
 {
   namespace solvers
   {
-    enum rhs_scheme_t 
-    { 
+    enum rhs_scheme_t
+    {
       euler_a, // Euler's method, Eulerian spirit:        psi^n+1 = ADV(psi^n) + R^n
       euler_b, // Euler's method, semi-Lagrangian spirit: psi^n+1 = ADV(psi^n + R^n)
-      trapez,  // paraphrase of trapezoidal rule:         psi^n+1 = ADV(psi^n + 1/2 * R^n) + 1/2 * R^n+1 
+      trapez,  // paraphrase of trapezoidal rule:         psi^n+1 = ADV(psi^n + 1/2 * R^n) + 1/2 * R^n+1
       mixed    // allows implementation of arbitrary mixed explicit/implicit schemes
     };
 
@@ -30,7 +30,7 @@ namespace libmpdataxx
       {trapez , "trapez" },
       {mixed  , "mixed"  }
     };
-    
+
     struct mpdata_rhs_family_tag {};
 
     template <class ct_params_t, int minhalo = 0>
@@ -51,17 +51,17 @@ namespace libmpdataxx
       arrvec_t<typename parent_t::arr_t> &rhs;
 
       virtual void update_rhs(
-        arrvec_t<typename parent_t::arr_t> &rhs, 
+        arrvec_t<typename parent_t::arr_t> &rhs,
         const typename parent_t::real_t &dt,
         const int &at
-      ) 
+      )
       {
         assert(at == n || at == n+1);
 #if !defined(NDEBUG)
         update_rhs_called = true;
 #endif
         // zero-out all rhs arrays
-        for (int e = 0; e < parent_t::n_eqns; ++e) 
+        for (int e = 0; e < parent_t::n_eqns; ++e)
         {
           // do nothing for equations with no rhs
           if (opts::isset(ct_params_t::hint_norhs, opts::bit(e))) continue;
@@ -78,7 +78,7 @@ namespace libmpdataxx
         const typename parent_t::real_t &dt_arg
       ) final
       {
-        for (int e = 0; e < parent_t::n_eqns; ++e) 
+        for (int e = 0; e < parent_t::n_eqns; ++e)
         {
           // do nothing for equations with no rhs
           if (opts::isset(ct_params_t::hint_norhs, opts::bit(e))) continue;
@@ -89,13 +89,13 @@ namespace libmpdataxx
       }
 
       public:
-      
+
       // ctor
       mpdata_rhs(
-        typename parent_t::ctor_args_t args, 
+        typename parent_t::ctor_args_t args,
         const typename parent_t::rt_params_t &p
       ) :
-        parent_t(args, p), 
+        parent_t(args, p),
         rhs(args.mem->tmp[__FILE__][0])
       {
         assert(this->dt != 0);
@@ -130,8 +130,8 @@ namespace libmpdataxx
 
         switch ((rhs_scheme_t)ct_params_t::rhs_scheme)
         {
-          case rhs_scheme_t::euler_a: 
-          case rhs_scheme_t::euler_b: 
+          case rhs_scheme_t::euler_a:
+          case rhs_scheme_t::euler_b:
             break;
           case rhs_scheme_t::trapez:
             update_rhs(rhs, this->dt / 2, n);
@@ -139,7 +139,7 @@ namespace libmpdataxx
           case rhs_scheme_t::mixed:
             hook_mixed_rhs_ante_loop();
             break;
-          default: 
+          default:
             assert(false);
         }
       }
@@ -154,20 +154,20 @@ namespace libmpdataxx
 
         switch ((rhs_scheme_t)ct_params_t::rhs_scheme)
         {
-          case rhs_scheme_t::euler_a: 
+          case rhs_scheme_t::euler_a:
             update_rhs(rhs, this->dt, n);
             break;
-          case rhs_scheme_t::euler_b: 
+          case rhs_scheme_t::euler_b:
             update_rhs(rhs, this->dt, n);
-            apply_rhs(this->dt); 
+            apply_rhs(this->dt);
             break;
-          case rhs_scheme_t::trapez: 
-            apply_rhs(this->dt / 2); 
+          case rhs_scheme_t::trapez:
+            apply_rhs(this->dt / 2);
             break;
-          case rhs_scheme_t::mixed: 
+          case rhs_scheme_t::mixed:
             hook_mixed_rhs_ante_step();
             break;
-          default: 
+          default:
             assert(false);
         }
       }
@@ -177,25 +177,25 @@ namespace libmpdataxx
         parent_t::hook_post_step();
         switch ((rhs_scheme_t)ct_params_t::rhs_scheme)
         {
-          case rhs_scheme_t::euler_a: 
+          case rhs_scheme_t::euler_a:
             apply_rhs(this->dt);
             break;
-          case rhs_scheme_t::euler_b: 
+          case rhs_scheme_t::euler_b:
             break;
-          case rhs_scheme_t::trapez: 
+          case rhs_scheme_t::trapez:
             update_rhs(rhs, this->dt / 2, n+1);
             apply_rhs(this->dt / 2);
             break;
-          case rhs_scheme_t::mixed: 
+          case rhs_scheme_t::mixed:
             hook_mixed_rhs_post_step();
             break;
           default:
             assert(false);
         }
-      } 
+      }
 
       static void alloc(
-        typename parent_t::mem_t *mem, 
+        typename parent_t::mem_t *mem,
         const int &n_iters
       ) {
         // TODO: optimise to skip allocs for equations with no rhs
diff --git a/libmpdata++/solvers/mpdata_rhs_vip.hpp b/libmpdata++/solvers/mpdata_rhs_vip.hpp
index 04ad2d54..a037049e 100644
--- a/libmpdata++/solvers/mpdata_rhs_vip.hpp
+++ b/libmpdata++/solvers/mpdata_rhs_vip.hpp
@@ -1,4 +1,4 @@
-/** 
+/**
  * @file
  * @copyright University of Warsaw
  * @section LICENSE
@@ -16,11 +16,11 @@ namespace libmpdataxx
 
     // to be specialised
     template <typename ct_params_t, int minhalo = 0, class enableif = void>
-    class mpdata_rhs_vip 
+    class mpdata_rhs_vip
     {};
 
     // 1D version
-    template <class ct_params_t, int minhalo> 
+    template <class ct_params_t, int minhalo>
     class mpdata_rhs_vip<
       ct_params_t, minhalo,
       typename std::enable_if<ct_params_t::n_dims == 1>::type
@@ -45,12 +45,12 @@ namespace libmpdataxx
         if (!this->mem->G)
         {
           dst[0](im + h) = this->dt / this->di * real_t(.5) * (
-            src[0](im    ) + 
+            src[0](im    ) +
             src[0](im + 1)
           );
-        } 
+        }
         else
-        { 
+        {
           assert(false); // TODO: and if G is not const...
         }
         this->xchng_vctr_alng(dst, /*ad*/ false, /*cyclic*/ true);
@@ -58,8 +58,8 @@ namespace libmpdataxx
 
       void extrapolate_in_time() final
       {
-        this->extrp(0, ix::vip_i);     
-        this->xchng_sclr(this->vip_stash(0)[0]);      // filling halos 
+        this->extrp(0, ix::vip_i);
+        this->xchng_sclr(this->vip_stash(0)[0]);      // filling halos
       }
 
       public:
@@ -68,18 +68,18 @@ namespace libmpdataxx
       mpdata_rhs_vip(
         typename parent_t::ctor_args_t args,
         const typename parent_t::rt_params_t &p
-      ) : 
+      ) :
         parent_t(args, p),
         im(args.i.first() - 1, args.i.last())
       {
         assert(this->di != 0);
 
         this->vip_ixs = {ix::vip_i};
-      } 
+      }
     };
 
     // 2D version
-    template <class ct_params_t, int minhalo> 
+    template <class ct_params_t, int minhalo>
     class mpdata_rhs_vip<
       ct_params_t, minhalo,
       typename std::enable_if<ct_params_t::n_dims == 2>::type
@@ -95,34 +95,34 @@ namespace libmpdataxx
       // member fields
       const rng_t im, jm;
 
-      template<int d, class arr_t> 
+      template<int d, class arr_t>
       void intrp(
         arr_t &dst,
         const arr_t &src,
-        const rng_t &i, 
-        const rng_t &j, 
-        const typename ct_params_t::real_t &di 
+        const rng_t &i,
+        const rng_t &j,
+        const typename ct_params_t::real_t &di
       )
-      {   
+      {
         using idxperm::pi;
         using namespace arakawa_c;
         using real_t = typename ct_params_t::real_t;
-  
+
         if (!this->mem->G)
         {
           dst(pi<d>(i+h,j)) = this->dt / di * real_t(.5) * (
-            src(pi<d>(i,    j)) + 
+            src(pi<d>(i,    j)) +
             src(pi<d>(i + 1,j))
           );
-        } 
+        }
         else
-        { 
+        {
           dst(pi<d>(i+h,j)) = this->dt / di * real_t(.5) * (
-            (*this->mem->G)(pi<d>(i,    j)) * src(pi<d>(i,    j)) + 
+            (*this->mem->G)(pi<d>(i,    j)) * src(pi<d>(i,    j)) +
             (*this->mem->G)(pi<d>(i + 1,j)) * src(pi<d>(i + 1,j))
           );
         }
-      }  
+      }
 
       void interpolate_in_space(arrvec_t<typename parent_t::arr_t> &dst,
                                 const arrvec_t<typename parent_t::arr_t> &src) final
@@ -137,7 +137,7 @@ namespace libmpdataxx
 
       void extrapolate_in_time() final
       {
-        using namespace libmpdataxx::arakawa_c; 
+        using namespace libmpdataxx::arakawa_c;
 
         this->extrp(0, ix::vip_i);
         // using xchng_pres because bcs have to be consistent with those used in
@@ -149,24 +149,24 @@ namespace libmpdataxx
       }
 
       public:
-      
+
       // ctor
       mpdata_rhs_vip(
         typename parent_t::ctor_args_t args,
         const typename parent_t::rt_params_t &p
-      ) : 
+      ) :
         parent_t(args, p),
         im(args.i.first() - 1, args.i.last()),
         jm(args.j.first() - 1, args.j.last())
       {
         assert(this->di != 0);
         assert(this->dj != 0);
-        
+
         this->vip_ixs = {ix::vip_i, ix::vip_j};
       }
-      
+
       static void alloc(
-        typename parent_t::mem_t *mem, 
+        typename parent_t::mem_t *mem,
         const int &n_iters
       ) {
         parent_t::alloc(mem, n_iters);
@@ -178,7 +178,7 @@ namespace libmpdataxx
                   parent_t::rng_sclr(mem->grid_size[0]),
                   parent_t::rng_sclr(mem->grid_size[1])
           )));
-          
+
           for (int n = 0; n < ct_params_t::n_dims; ++n)
             mem->vab_relax.push_back(mem->old(new typename parent_t::arr_t(
                     parent_t::rng_sclr(mem->grid_size[0]),
@@ -187,9 +187,9 @@ namespace libmpdataxx
         }
       }
     };
-    
+
     // 3D version
-    template <class ct_params_t, int minhalo> 
+    template <class ct_params_t, int minhalo>
     class mpdata_rhs_vip<
       ct_params_t, minhalo,
       typename std::enable_if<ct_params_t::n_dims == 3>::type
@@ -198,42 +198,42 @@ namespace libmpdataxx
       using ix = typename ct_params_t::ix;
 
       protected:
-      
+
       using solver_family = mpdata_rhs_vip_family_tag;
       using parent_t = detail::mpdata_rhs_vip_common<ct_params_t, minhalo>;
 
       // member fields
       const rng_t im, jm, km;
 
-      template<int d, class arr_t> 
+      template<int d, class arr_t>
       void intrp(
         arr_t &dst,
         const arr_t &src,
-        const rng_t &i, 
-        const rng_t &j, 
-        const rng_t &k, 
-        const typename ct_params_t::real_t &di 
+        const rng_t &i,
+        const rng_t &j,
+        const rng_t &k,
+        const typename ct_params_t::real_t &di
       )
-      {   
+      {
         using idxperm::pi;
         using namespace arakawa_c;
         using real_t = typename ct_params_t::real_t;
-  
+
         if (!this->mem->G)
         {
           dst(pi<d>(i+h, j, k)) = this->dt / di * real_t(.5) * (
-            src(pi<d>(i,     j, k)) + 
+            src(pi<d>(i,     j, k)) +
             src(pi<d>(i + 1, j, k))
           );
-        } 
+        }
         else
-        { 
+        {
           dst(pi<d>(i+h, j, k)) = this->dt / di * real_t(.5) * (
-            (*this->mem->G)(pi<d>(i  , j, k)) * src(pi<d>(i,   j, k)) + 
+            (*this->mem->G)(pi<d>(i  , j, k)) * src(pi<d>(i,   j, k)) +
             (*this->mem->G)(pi<d>(i+1, j, k)) * src(pi<d>(i+1, j, k))
           );
         }
-      }  
+      }
 
       void interpolate_in_space(arrvec_t<typename parent_t::arr_t> &dst,
                                 const arrvec_t<typename parent_t::arr_t> &src) final
@@ -249,12 +249,12 @@ namespace libmpdataxx
 
       void extrapolate_in_time() final
       {
-        using namespace libmpdataxx::arakawa_c; 
+        using namespace libmpdataxx::arakawa_c;
 
         // using xchng_pres because bcs have to be consistent with those used in
         // pressure solver to obtain non-divergent advector field
         auto ex = this->halo - 1;
-        this->extrp(0, ix::vip_i);     
+        this->extrp(0, ix::vip_i);
         this->extrp(1, ix::vip_j);
         this->extrp(2, ix::vip_k);
         this->xchng_pres(this->vip_stash(0)[0], this->ijk, ex);
@@ -263,11 +263,11 @@ namespace libmpdataxx
       }
 
       public:
-      
+
       static void alloc(
-        typename parent_t::mem_t *mem, 
+        typename parent_t::mem_t *mem,
         const int &n_iters
-      ) 
+      )
       {
         parent_t::alloc(mem, n_iters);
 
@@ -279,7 +279,7 @@ namespace libmpdataxx
                   parent_t::rng_sclr(mem->grid_size[1]),
                   parent_t::rng_sclr(mem->grid_size[2])
           )));
-          
+
           for (int n = 0; n < ct_params_t::n_dims; ++n)
             mem->vab_relax.push_back(mem->old(new typename parent_t::arr_t(
                     parent_t::rng_sclr(mem->grid_size[0]),
@@ -293,7 +293,7 @@ namespace libmpdataxx
       mpdata_rhs_vip(
         typename parent_t::ctor_args_t args,
         const typename parent_t::rt_params_t &p
-      ) : 
+      ) :
         parent_t(args, p),
         im(args.i.first() - 1, args.i.last()),
         jm(args.j.first() - 1, args.j.last()),
@@ -302,9 +302,9 @@ namespace libmpdataxx
         assert(this->di != 0);
         assert(this->dj != 0);
         assert(this->dk != 0);
-        
+
         this->vip_ixs = {ix::vip_i, ix::vip_j, ix::vip_k};
-      } 
-    }; 
+      }
+    };
   } // namespace solvers
 } // namespace libmpdataxx
diff --git a/libmpdata++/solvers/mpdata_rhs_vip_prs.hpp b/libmpdata++/solvers/mpdata_rhs_vip_prs.hpp
index a08b6ced..52e1f9d1 100644
--- a/libmpdata++/solvers/mpdata_rhs_vip_prs.hpp
+++ b/libmpdata++/solvers/mpdata_rhs_vip_prs.hpp
@@ -6,16 +6,16 @@
 
 #pragma once
 
-#include <libmpdata++/solvers/detail/mpdata_rhs_vip_prs_gcrk.hpp> 
-#include <libmpdata++/solvers/detail/mpdata_rhs_vip_prs_mr.hpp> 
-#include <libmpdata++/solvers/detail/mpdata_rhs_vip_prs_pc.hpp> 
+#include <libmpdata++/solvers/detail/mpdata_rhs_vip_prs_gcrk.hpp>
+#include <libmpdata++/solvers/detail/mpdata_rhs_vip_prs_mr.hpp>
+#include <libmpdata++/solvers/detail/mpdata_rhs_vip_prs_pc.hpp>
 
 namespace libmpdataxx
 {
   namespace solvers
   {
-    enum prs_scheme_t 
-    {   
+    enum prs_scheme_t
+    {
       mr, // minimal residual
       cr, // conjugate residual
       gcrk, // generalized conjugate residual (restarted after k steps)
@@ -32,7 +32,7 @@ namespace libmpdataxx
     struct mpdata_rhs_vip_prs_family_tag {};
 
     // the mpdata class
-    template<typename ct_params_t, int minhalo = 0, class enableif = void> 
+    template<typename ct_params_t, int minhalo = 0, class enableif = void>
     class mpdata_rhs_vip_prs
     {
       static_assert(!std::is_void<enableif>::value, "please specify pressure scheme type !");
@@ -45,7 +45,7 @@ namespace libmpdataxx
       typename std::enable_if<(int)ct_params_t::prs_scheme == (int)mr>::type
     > : public detail::mpdata_rhs_vip_prs_mr<ct_params_t, minhalo>
     {
-      using parent_t = detail::mpdata_rhs_vip_prs_mr<ct_params_t, minhalo>; 
+      using parent_t = detail::mpdata_rhs_vip_prs_mr<ct_params_t, minhalo>;
       using parent_t::parent_t; // inheriting constructors
 
       protected:
@@ -59,13 +59,13 @@ namespace libmpdataxx
       typename std::enable_if<(int)ct_params_t::prs_scheme == (int)cr>::type
     > : public detail::mpdata_rhs_vip_prs_gcrk<ct_params_t, 1, minhalo>
     {
-      using parent_t = detail::mpdata_rhs_vip_prs_gcrk<ct_params_t, 1, minhalo>; 
+      using parent_t = detail::mpdata_rhs_vip_prs_gcrk<ct_params_t, 1, minhalo>;
       using parent_t::parent_t; // inheriting constructors
-      
+
       protected:
       using solver_family = mpdata_rhs_vip_prs_family_tag;
     };
-    
+
     // generalized conjugate residual
     template<typename ct_params_t, int minhalo>
     class mpdata_rhs_vip_prs<
@@ -73,9 +73,9 @@ namespace libmpdataxx
       typename std::enable_if<(int)ct_params_t::prs_scheme == (int)gcrk>::type
     > : public detail::mpdata_rhs_vip_prs_gcrk<ct_params_t, ct_params_t::prs_k_iters, minhalo>
     {
-      using parent_t = detail::mpdata_rhs_vip_prs_gcrk<ct_params_t, ct_params_t::prs_k_iters, minhalo>; 
+      using parent_t = detail::mpdata_rhs_vip_prs_gcrk<ct_params_t, ct_params_t::prs_k_iters, minhalo>;
       using parent_t::parent_t; // inheriting constructors
-      
+
       protected:
       using solver_family = mpdata_rhs_vip_prs_family_tag;
     };
@@ -87,9 +87,9 @@ namespace libmpdataxx
       typename std::enable_if<(int)ct_params_t::prs_scheme == (int)pc>::type
     > : public detail::mpdata_rhs_vip_prs_pc<ct_params_t, minhalo>
     {
-      using parent_t = detail::mpdata_rhs_vip_prs_pc<ct_params_t, minhalo>; 
+      using parent_t = detail::mpdata_rhs_vip_prs_pc<ct_params_t, minhalo>;
       using parent_t::parent_t; // inheriting constructors
-      
+
       protected:
       using solver_family = mpdata_rhs_vip_prs_family_tag;
     };
diff --git a/libmpdata++/solvers/mpdata_rhs_vip_prs_sgs.hpp b/libmpdata++/solvers/mpdata_rhs_vip_prs_sgs.hpp
index 93459cb4..fba000d7 100644
--- a/libmpdata++/solvers/mpdata_rhs_vip_prs_sgs.hpp
+++ b/libmpdata++/solvers/mpdata_rhs_vip_prs_sgs.hpp
@@ -33,7 +33,7 @@ namespace libmpdataxx
     struct mpdata_rhs_vip_prs_sgs_dns_family_tag {};
     struct mpdata_rhs_vip_prs_sgs_smg_family_tag {};
 
-    template<typename ct_params_t, int minhalo = 0, class enableif = void> 
+    template<typename ct_params_t, int minhalo = 0, class enableif = void>
     class mpdata_rhs_vip_prs_sgs;
 
     template<typename ct_params_t, int minhalo>
@@ -42,32 +42,32 @@ namespace libmpdataxx
       typename std::enable_if<(int)ct_params_t::sgs_scheme == (int)iles>::type
     > : public mpdata_rhs_vip_prs<ct_params_t, minhalo>
     {
-      using parent_t = mpdata_rhs_vip_prs<ct_params_t, minhalo>; 
+      using parent_t = mpdata_rhs_vip_prs<ct_params_t, minhalo>;
       using parent_t::parent_t; // inheriting constructors
     };
-    
+
     template <class ct_params_t, int minhalo>
     class mpdata_rhs_vip_prs_sgs<
       ct_params_t, minhalo,
       typename std::enable_if<(int)ct_params_t::sgs_scheme == (int)dns>::type
     > : public detail::mpdata_rhs_vip_prs_sgs_dns<ct_params_t, minhalo>
     {
-      using parent_t = detail::mpdata_rhs_vip_prs_sgs_dns<ct_params_t, minhalo>; 
+      using parent_t = detail::mpdata_rhs_vip_prs_sgs_dns<ct_params_t, minhalo>;
       using parent_t::parent_t; // inheriting constructors
 
       protected:
       using solver_family = mpdata_rhs_vip_prs_sgs_dns_family_tag;
     };
-    
+
     template<typename ct_params_t, int minhalo>
     class mpdata_rhs_vip_prs_sgs<
       ct_params_t, minhalo,
       typename std::enable_if<(int)ct_params_t::sgs_scheme == (int)smg>::type
     > : public detail::mpdata_rhs_vip_prs_sgs_smg<ct_params_t, minhalo>
     {
-      using parent_t = detail::mpdata_rhs_vip_prs_sgs_smg<ct_params_t, minhalo>; 
+      using parent_t = detail::mpdata_rhs_vip_prs_sgs_smg<ct_params_t, minhalo>;
       using parent_t::parent_t; // inheriting constructors
-      
+
       protected:
       using solver_family = mpdata_rhs_vip_prs_sgs_smg_family_tag;
     };
diff --git a/libmpdata++/solvers/shallow_water.hpp b/libmpdata++/solvers/shallow_water.hpp
index 464f635e..84a0173b 100644
--- a/libmpdata++/solvers/shallow_water.hpp
+++ b/libmpdata++/solvers/shallow_water.hpp
@@ -1,11 +1,11 @@
-/** 
+/**
  * @file
  * @copyright University of Warsaw
  * @section LICENSE
  * GPLv3+ (see the COPYING file or http://www.gnu.org/licenses/)
  */
 
-#include <libmpdata++/solvers/mpdata_rhs_vip.hpp> 
+#include <libmpdata++/solvers/mpdata_rhs_vip.hpp>
 #include <libmpdata++/formulae/nabla_formulae.hpp>
 
 /** @brief the 2D shallow-water equations system
@@ -22,7 +22,7 @@
   * - \f$ \eta_0(x,y) \f$ - bathymetry
   * - \f$ h = \eta - \eta_0 \f$ - thickness of the fluid layer
   * - \f$ \vec{u} = (u,v) \f$
-  * - \f$ \nabla_z = (\partial_x, \partial_y) \f$ 
+  * - \f$ \nabla_z = (\partial_x, \partial_y) \f$
   *
   * momentum equation:
   * \f$ \partial_t u + u \cdot \nabla_z u = - \frac{1}{\rho} \nabla_z p \f$
@@ -36,12 +36,12 @@
   * h times momentum eq. plus u times mass continuity equation:
   * \f$ \partial_t (uh) + \nabla_z (u \cdot uh) = -g h \nabla_z \eta \f$
   */
-namespace libmpdataxx 
+namespace libmpdataxx
 {
   namespace solvers
   {
     template <typename ct_params_t, class enableif = void>
-    class shallow_water 
+    class shallow_water
     {};
 
     namespace detail
@@ -56,7 +56,7 @@ namespace libmpdataxx
         // member fields
         const typename ct_params_t::real_t g;
 
-        // 
+        //
         void update_rhs(
           libmpdataxx::arrvec_t<typename parent_t::arr_t> &rhs,
           const typename parent_t::real_t &dt,
@@ -65,7 +65,7 @@ namespace libmpdataxx
           parent_t::update_rhs(rhs, dt, at);
           enum { n = 0 };    // just to make n, n+1 look nice :)
           assert(
-            this->timestep == 0 && at == n 
+            this->timestep == 0 && at == n
             ||
             this->timestep  > 0 && at == n+1
           ); // note: we know only how to calculate R^{n+1}
@@ -76,29 +76,29 @@ namespace libmpdataxx
         void hook_post_step()
         {
           parent_t::hook_post_step();
-          assert(min(this->state(ct_params_t::ix::h)(this->ijk)) >= 0);  
+          assert(min(this->state(ct_params_t::ix::h)(this->ijk)) >= 0);
         }
 
         void hook_ante_step()
         {
           parent_t::hook_ante_step();
-          assert(min(this->state(ct_params_t::ix::h)(this->ijk)) >= 0);  
+          assert(min(this->state(ct_params_t::ix::h)(this->ijk)) >= 0);
         }
 
         public:
 
         // run-time parameters
-        struct rt_params_t : parent_t::rt_params_t 
-        {   
-          typename parent_t::real_t g = 9.81; // default value 
+        struct rt_params_t : parent_t::rt_params_t
+        {
+          typename parent_t::real_t g = 9.81; // default value
         };
 
         // ctor
-        shallow_water_common( 
-          typename parent_t::ctor_args_t args, 
+        shallow_water_common(
+          typename parent_t::ctor_args_t args,
           const rt_params_t &p
         ) :
-          parent_t(args, p), 
+          parent_t(args, p),
           g(p.g)
         {}
       };
@@ -107,7 +107,7 @@ namespace libmpdataxx
     // 1D version
     template <typename ct_params_t>
     class shallow_water<
-      ct_params_t, 
+      ct_params_t,
       typename std::enable_if<ct_params_t::n_dims == 1>::type
     > : public detail::shallow_water_common<ct_params_t>
     {
@@ -127,17 +127,17 @@ namespace libmpdataxx
 
         parent_t::update_rhs(rhs, dt, at);
 
-        rhs.at(ix::qx)(this->i) -= 
-          this->g 
-          * this->state(ix::h)(this->i) 
-          * grad(this->state(ix::h), this->i, this->di); 
+        rhs.at(ix::qx)(this->i) -=
+          this->g
+          * this->state(ix::h)(this->i)
+          * grad(this->state(ix::h), this->i, this->di);
       }
     };
 
     // 2D version
     template <typename ct_params_t>
     class shallow_water<
-      ct_params_t, 
+      ct_params_t,
       typename std::enable_if<ct_params_t::n_dims == 2>::type
     > : public detail::shallow_water_common<ct_params_t>
     {
@@ -155,9 +155,9 @@ namespace libmpdataxx
       )
       {
         using namespace libmpdataxx::formulae::nabla;
-        rhs(pi<d>(i,j)) -= this->g * this->state(ix::h)(pi<d>(i,j)) * grad<d>(this->state(ix::h), i, j, di); 
+        rhs(pi<d>(i,j)) -= this->g * this->state(ix::h)(pi<d>(i,j)) * grad<d>(this->state(ix::h), i, j, di);
       }
- 
+
       /// @brief Shallow Water Equations: Momentum forcings for the X and Y coordinates
       void update_rhs(
         libmpdataxx::arrvec_t<typename parent_t::arr_t> &rhs,
@@ -169,7 +169,7 @@ namespace libmpdataxx
 
         //
         forcings_helper<0>(rhs.at(ix::qx), this->i, this->j, this->di);
-        forcings_helper<1>(rhs.at(ix::qy), this->j, this->i, this->dj); 
+        forcings_helper<1>(rhs.at(ix::qy), this->j, this->i, this->dj);
       }
     };
   } // namespace solvers