diff --git a/examples/CSCKokkos.cpp b/examples/CSCKokkos.cpp
index e46c0ed6..f3f95beb 100644
--- a/examples/CSCKokkos.cpp
+++ b/examples/CSCKokkos.cpp
@@ -1,5 +1,5 @@
 /**********************************************************************************************
- © 2020. Triad National Security, LLC. All rights reserved.
+ ï¿½ 2020. Triad National Security, LLC. All rights reserved.
  This program was produced under U.S. Government contract 89233218CNA000001 for Los Alamos
  National Laboratory (LANL), which is operated by Triad National Security, LLC for the U.S.
  Department of Energy/National Nuclear Security Administration. All rights in the program are
@@ -98,21 +98,21 @@ int main(int argc, char* argv[])
 
         int loc_total = 0;
         loc_total += 0; // Get rid of warning
-        REDUCE_SUM(i, 0, nnz,
-            loc_total, {
+        FOR_REDUCE_SUM(i, 0, nnz,
+                       loc_total, {
                 loc_total += values[i];
         }, total);
         printf("Sum of nnz from pointer method %d\n", total);
         total = 0;
-        REDUCE_SUM(i, 0, nnz,
-            loc_total, {
+        FOR_REDUCE_SUM(i, 0, nnz,
+                       loc_total, {
                 loc_total += a_start[i];
         }, total);
         printf("Sum of start indices form .get_starts() %d\n", total);
         total = 0;
 
-        REDUCE_SUM(i, 0, dim1,
-                    j, 0, dim2 - 1,
+        FOR_REDUCE_SUM(i, 0, dim1,
+                       j, 0, dim2 - 1,
             loc_total, {
                 loc_total += A(i, j);
         }, total);
diff --git a/examples/CSRKokkos.cpp b/examples/CSRKokkos.cpp
index 2bcb3499..7dab444d 100644
--- a/examples/CSRKokkos.cpp
+++ b/examples/CSRKokkos.cpp
@@ -1,5 +1,5 @@
 /**********************************************************************************************
- © 2020. Triad National Security, LLC. All rights reserved.
+ ï¿½ 2020. Triad National Security, LLC. All rights reserved.
  This program was produced under U.S. Government contract 89233218CNA000001 for Los Alamos
  National Laboratory (LANL), which is operated by Triad National Security, LLC for the U.S.
  Department of Energy/National Nuclear Security Administration. All rights in the program are
@@ -96,22 +96,22 @@ int main(int argc, char* argv[])
             printf("And has %ld non zero elements\n", A.nnz());
         });
 
-        REDUCE_SUM(i, 0, nnz,
-                    loc_total, {
-                        loc_total += res[i];
+        FOR_REDUCE_SUM(i, 0, nnz,
+                       loc_total, {
+                loc_total += res[i];
                         }, total);
         printf("Sum of nnz from pointer method %d\n", total);
         total = 0;
-        REDUCE_SUM(i, 0, nnz,
-                    loc_total, {
-                        loc_total += a_start[i];
+        FOR_REDUCE_SUM(i, 0, nnz,
+                       loc_total, {
+                loc_total += a_start[i];
                         }, total);
         printf("Sum of start indices form .get_starts() %d\n", total);
        total = 0;
-        REDUCE_SUM(i, 0, dim1,
-                   j, 0, dim2,
-                    loc_total, {
-                        loc_total += A(i,j);
+        FOR_REDUCE_SUM(i, 0, dim1,
+                       j, 0, dim2,
+                       loc_total, {
+                loc_total += A(i,j);
                         }, total);
         printf("Sum of nnz in array notation %d\n", total);
         auto ss = A.begin(0);
diff --git a/examples/kokkos_for.cpp b/examples/kokkos_for.cpp
index 35185191..64bce3fc 100644
--- a/examples/kokkos_for.cpp
+++ b/examples/kokkos_for.cpp
@@ -1,5 +1,5 @@
 /**********************************************************************************************
- © 2020. Triad National Security, LLC. All rights reserved.
+ ï¿½ 2020. Triad National Security, LLC. All rights reserved.
  This program was produced under U.S. Government contract 89233218CNA000001 for Los Alamos
  National Laboratory (LANL), which is operated by Triad National Security, LLC for the U.S.
  Department of Energy/National Nuclear Security Administration. All rights in the program are
@@ -60,24 +60,24 @@ int main(int argc, char* argv[])
     // Kokkos::View<int ***> arr_3D("ARR_3D", 10,10,10);
     CArrayKokkos<int> arr_3D(10, 10, 10);
     FOR_ALL(i, 0, 10,
-         j, 0, 10,
-         k, 0, 10, {
+            j, 0, 10,
+            k, 0, 10, {
         arr_3D(i, j, k) = k * 10 * 10 + j * 10 + i;
     });
 
     int loc_sum = 0;
     int result  = 0;
-    REDUCE_SUM(i, 0, 10,
-        loc_sum, {
+    FOR_REDUCE_SUM(i, 0, 10,
+                   loc_sum, {
         loc_sum += arr(i) * arr(i);
     }, result);
     printf("1D reduce sum: %i vs. 985960\n", result);
 
     loc_sum = 0;
     result  = 0;
-    REDUCE_SUM(i, 0, 10,
-               j, 0, 10,
-                loc_sum, {
+    FOR_REDUCE_SUM(i, 0, 10,
+                   j, 0, 10,
+                   loc_sum, {
             loc_sum += arr_2D(i, j) * arr_2D(i, j);
     }, result);
 
@@ -85,10 +85,10 @@ int main(int argc, char* argv[])
 
     loc_sum = 0;
     result  = 0;
-    REDUCE_SUM(i, 0, 10,
-           j, 0, 10,
-           k, 0, 10,
-           loc_sum, {
+    FOR_REDUCE_SUM(i, 0, 10,
+                   j, 0, 10,
+                   k, 0, 10,
+                   loc_sum, {
                 loc_sum += arr_3D(i, j, k) * arr_3D(i, j, k);
     }, result);
 
@@ -96,10 +96,10 @@ int main(int argc, char* argv[])
 
     result = 0;
     int loc_max = 2000;
-    REDUCE_MAX(i, 0, 10,
-           j, 0, 10,
-           k, 0, 10,
-           loc_max, {
+    FOR_REDUCE_MAX(i, 0, 10,
+                   j, 0, 10,
+                   k, 0, 10,
+                   loc_max, {
         if (loc_max < arr_3D(i, j, k)) {
             loc_max = arr_3D(i, j, k);
         }
@@ -122,10 +122,10 @@ int main(int argc, char* argv[])
 
     result = 0;
     int loc_min = 2000;
-    REDUCE_MIN(i, 0, 10,
-           j, 0, 10,
-           k, 0, 10,
-           loc_min, {
+    FOR_REDUCE_MIN(i, 0, 10,
+                   j, 0, 10,
+                   k, 0, 10,
+                   loc_min, {
         if (loc_min > arr_3D(i, j, k)) {
             loc_min = arr_3D(i, j, k);
         }
@@ -171,7 +171,7 @@ int main(int argc, char* argv[])
     }); // end parallel do
 
     DO_REDUCE_MAX(i, 1, 10,
-           loc_max, {
+                  loc_max, {
         if (loc_max < matrix1D(i)) {
             loc_max = matrix1D(i);
         }
@@ -180,8 +180,8 @@ int main(int argc, char* argv[])
     printf("result max 1D matrix = %i\n", result);
 
     DO_REDUCE_MAX(j, 1, 10,
-              i, 1, 10,
-              loc_max, {
+                  i, 1, 10,
+                  loc_max, {
         if (loc_max < matrix2D(i, j)) {
             loc_max = matrix2D(i, j);
         }
@@ -189,9 +189,9 @@ int main(int argc, char* argv[])
     printf("result max 2D matrix = %i\n", result);
 
     DO_REDUCE_MAX(k, 1, 10,
-              j, 1, 10,
-              i, 1, 10,
-              loc_max, {
+                  j, 1, 10,
+                  i, 1, 10,
+                  loc_max, {
         if (loc_max < matrix3D(i, j, k)) {
             loc_max = matrix3D(i, j, k);
         }
@@ -199,7 +199,7 @@ int main(int argc, char* argv[])
     printf("result max 3D matrix = %i\n", result);
 
     DO_REDUCE_MIN(i, 1, 10,
-           loc_min, {
+                  loc_min, {
         if (loc_min > matrix1D(i)) {
             loc_min = matrix1D(i);
         }
@@ -207,8 +207,8 @@ int main(int argc, char* argv[])
     printf("result min 1D matrix = %i\n", result);
 
     DO_REDUCE_MIN(j, 1, 10,
-              i, 1, 10,
-              loc_min, {
+                  i, 1, 10,
+                  loc_min, {
         if (loc_min > matrix2D(i, j)) {
             loc_min = matrix2D(i, j);
         }
@@ -216,9 +216,9 @@ int main(int argc, char* argv[])
     printf("result min 2D matrix = %i\n", result);
 
     DO_REDUCE_MIN(k, 1, 10,
-              j, 1, 10,
-              i, 1, 10,
-              loc_min, {
+                  j, 1, 10,
+                  i, 1, 10,
+                  loc_min, {
         if (loc_min > matrix3D(i, j, k)) {
             loc_min = matrix3D(i, j, k);
         }
diff --git a/examples/laplaceMPI/laplace_mpi.cpp b/examples/laplaceMPI/laplace_mpi.cpp
index b5ddfc87..3cb407f3 100644
--- a/examples/laplaceMPI/laplace_mpi.cpp
+++ b/examples/laplaceMPI/laplace_mpi.cpp
@@ -1,5 +1,5 @@
 /**********************************************************************************************
- © 2020. Triad National Security, LLC. All rights reserved.
+ ï¿½ 2020. Triad National Security, LLC. All rights reserved.
  This program was produced under U.S. Government contract 89233218CNA000001 for Los Alamos
  National Laboratory (LANL), which is operated by Triad National Security, LLC for the U.S.
  Department of Energy/National Nuclear Security Administration. All rights in the program are
@@ -292,9 +292,9 @@ int main(int argc, char* argv[])
 
             // calculate max difference between temperature and temperature_previous
             double loc_max_value = 100.0;
-            REDUCE_MAX(i, height_index_start, height_index_end,
-               j, 1, width_loc - 1,
-               loc_max_value, {
+            FOR_REDUCE_MAX(i, height_index_start, height_index_end,
+                           j, 1, width_loc - 1,
+                           loc_max_value, {
                 double value = fabs(temperature_loc(i, j) - temperature_previous_loc(i, j));
                 if (value > loc_max_value) {
                     loc_max_value = value;
diff --git a/examples/main_kokkos.cpp b/examples/main_kokkos.cpp
index cb67bb9c..6eda2072 100644
--- a/examples/main_kokkos.cpp
+++ b/examples/main_kokkos.cpp
@@ -146,10 +146,10 @@ class ModelA
 
         // NOTE: if private vars are accessed, requires REDUCE_SUM_CLASS
         // do summation in parallel on GPU
-        REDUCE_SUM_CLASS(k, 1, 6,
-                         j, 1, 5,
-                         i, 1, 4,
-                         loc_sum, {
+        FOR_REDUCE_SUM_CLASS(k, 1, 6,
+                             j, 1, 5,
+                             i, 1, 4,
+                             loc_sum, {
             loc_sum += matrix(i, j, k, 1);
         }, val);
 
@@ -182,10 +182,10 @@ class ModelB
 
         // NOTE: if private vars are accessed, requires REDUCE_SUM_CLASS
         // do summation in parallel on GPU
-        REDUCE_SUM_CLASS(k, 1, 6,
-                         j, 1, 5,
-                         i, 1, 4,
-                         loc_sum, {
+        FOR_REDUCE_SUM_CLASS(k, 1, 6,
+                             j, 1, 5,
+                             i, 1, 4,
+                             loc_sum, {
             loc_sum += matrix(i, j, k, 1);
         }, val);
 
@@ -960,10 +960,10 @@ void pass_by_ref_two(const FMatrixKokkos<int>& matrix)
     int val = 0;
 
     // do summation in parallel on GPU
-    REDUCE_SUM(k, 1, 6,
-               j, 1, 5,
-               i, 1, 4,
-               loc_sum, {
+    FOR_REDUCE_SUM(k, 1, 6,
+                   j, 1, 5,
+                   i, 1, 4,
+                   loc_sum, {
         loc_sum += matrix(i, j, k, 1);
     }, val);
 
diff --git a/examples/mtr-kokkos-simple.cpp b/examples/mtr-kokkos-simple.cpp
index b6a9d11b..1690cb4c 100644
--- a/examples/mtr-kokkos-simple.cpp
+++ b/examples/mtr-kokkos-simple.cpp
@@ -1,5 +1,5 @@
 /**********************************************************************************************
- Â© 2020. Triad National Security, LLC. All rights reserved.
+ Ã‚Â© 2020. Triad National Security, LLC. All rights reserved.
  This program was produced under U.S. Government contract 89233218CNA000001 for Los Alamos
  National Laboratory (LANL), which is operated by Triad National Security, LLC for the U.S.
  Department of Energy/National Nuclear Security Administration. All rights in the program are
@@ -134,11 +134,11 @@ int main(int argc, char *argv[]) {
 
 
         FMatrixDevice <real_t> matrix3D;        // declare variable and allocate sizes and dimensions later
-        matrix3D = FMatrixDevice <real_t> (10,10,10); // allocate dimensions and sizes 
+        matrix3D = FMatrixDevice <real_t> (10,10,10, "mat3d"); // allocate dimensions and sizes 
 
         // Array example following the Fortran index convention,
         // indicies go from 0 to less than N, first index varies the fastest
-        FArrayDevice <int> arr3D(10,10,10);
+        FArrayDevice <int> arr3D(10,10,10, "arr3d");
 
 
         // Initialize matrix1D
@@ -182,19 +182,19 @@ int main(int argc, char *argv[]) {
         // ===============
 
 
-        int N=20;  // array dimensions are NxN
+        int N=200;  // array dimensions are NxN
 
         // A 2D array example following the C index convention
         // indicies go from 0 to less than N, last index varies the fastest
-        CArrayDevice <double> A(N,N); // dense array
-        CArrayDevice <double> B(N,N);
-        CArrayDevice <double> C(N,N);
-        CArrayDevice <double> D(N,N);
+        CArrayDevice <double> A(N,N, "A"); // dense array
+        CArrayDevice <double> B(N,N, "B");
+        CArrayDevice <double> C(N,N, "C");
+        CArrayDevice <double> D(N,N, "D");
 
-        CArrayDevice <double> L(N,N); // lower triangular array
-        CArrayDevice <double> U(N,N); // upper triangular array
-        CArrayDevice <double> x(N);
-        CArrayDevice <double> y(N);
+        CArrayDevice <double> L(N,N, "L"); // lower triangular array
+        CArrayDevice <double> U(N,N, "U"); // upper triangular array
+        CArrayDevice <double> x(N, "x");
+        CArrayDevice <double> y(N, "y");
 
         
 
@@ -249,7 +249,10 @@ int main(int argc, char *argv[]) {
         }); // end parallel for
 
         // backwards substitution
-        for (int k = N-1; k>=0; k--){
+        //for (int k = N-1; k>=0; k--){
+	    FOR_FIRST(id, 0, N,{
+	    
+	        int k = ((N-1) - id);  // make it count backwards
 
             x(k) = y(k);
             
@@ -257,31 +260,32 @@ int main(int argc, char *argv[]) {
             int result;
             // calculate dot product
             if(k<N-1){
-                REDUCE_SUM(i, k, N-1,
-                           loc_sum, {
+                FOR_REDUCE_SUM_SECOND(i, k, N-1,
+                               loc_sum, {
                         loc_sum += U(k,i)*x(i);
                 }, result);
             } // end if
             x(k) -= result;
             x(k) /= U(k,k);
-        } // end for k backwards
+        }); // end for k backwards
 
 
         // forward substitution
-        for (int i = 0; i<N; i++){
+        //for (int i = 0; i<N; i++){
+	    FOR_FIRST(i, 0, N,{
 
             int loc_sum;
             int result;
             // calculate dot product
             if(i-1>0){
-                REDUCE_SUM(j, 0, i-1,
+                FOR_REDUCE_SUM_SECOND(j, 0, i-1,
                            loc_sum, {
                         loc_sum += L(i,j)*x(j);
                 }, result);
             } // end if
 
             x(i) = (y(i)- result)/U(i,i);
-        } // end for i  
+        }); // end for i  
 
 
 
@@ -321,8 +325,8 @@ int main(int argc, char *argv[]) {
         int length = 20;
 
         // Parallel Jacobi solver for steady 2D heat transfer
-        CArrayDevice <double> Temp(length+2, length+2);
-        CArrayDevice <double> Temp_previous(length+2, length+2);
+        CArrayDual <double> Temp(length+2, length+2, "Temp");
+        CArrayDevice <double> Temp_previous(length+2, length+2, "Temp_old");
 
         // heat source is bottom right corner of mesh, T=100 in that corner
         // temperature of left wall is T_cold=0.
@@ -423,10 +427,14 @@ int main(int argc, char *argv[]) {
 
         printf("\n");
         printf("Temperature profile\n");
-        // print temperature result
+	
+	// copy values to the CPU, if on a GPU
+	Temp.update_host();
+	
+        // print temperature result on CPU
         for(int i=length+1; i>=0; i--){
             for (int j=0; j<=length+1; j++){
-                printf(" %5.2f ", Temp(i,j));
+                printf(" %5.2f ", Temp.host(i,j));
             } // for j
             printf("\n");
         }; // for i
diff --git a/examples/phaseField/srcMacros/local_free_energy.cpp b/examples/phaseField/srcMacros/local_free_energy.cpp
index a26ad265..ea76cc0a 100644
--- a/examples/phaseField/srcMacros/local_free_energy.cpp
+++ b/examples/phaseField/srcMacros/local_free_energy.cpp
@@ -1,5 +1,5 @@
 /**********************************************************************************************
- © 2020. Triad National Security, LLC. All rights reserved.
+ ï¿½ 2020. Triad National Security, LLC. All rights reserved.
  This program was produced under U.S. Government contract 89233218CNA000001 for Los Alamos
  National Laboratory (LANL), which is operated by Triad National Security, LLC for the U.S.
  Department of Energy/National Nuclear Security Administration. All rights in the program are
@@ -49,10 +49,10 @@ double calculate_total_free_energy(int* nn, double* delta, double kappa, DCArray
     //
     double total_energy = 0.0;
     double loc_sum = 0.0;
-    REDUCE_SUM(i, 1, nx - 1,
-               j, 1, ny - 1,
-               k, 1, nz - 1,
-               loc_sum, {
+    FOR_REDUCE_SUM(i, 1, nx - 1,
+                   j, 1, ny - 1,
+                   k, 1, nz - 1,
+                   loc_sum, {
         // central difference spatial derivative of comp
         double dcdx = (comp(i + 1, j, k) - comp(i - 1, j, k)) / (2.0 * dx);
         double dcdy = (comp(i, j + 1, k) - comp(i, j - 1, k)) / (2.0 * dy);
diff --git a/examples/phaseField/srcMacros/outputs.cpp b/examples/phaseField/srcMacros/outputs.cpp
index a323f3cf..6f863953 100644
--- a/examples/phaseField/srcMacros/outputs.cpp
+++ b/examples/phaseField/srcMacros/outputs.cpp
@@ -1,5 +1,5 @@
 /**********************************************************************************************
- © 2020. Triad National Security, LLC. All rights reserved.
+ ï¿½ 2020. Triad National Security, LLC. All rights reserved.
  This program was produced under U.S. Government contract 89233218CNA000001 for Los Alamos
  National Laboratory (LANL), which is operated by Triad National Security, LLC for the U.S.
  Department of Energy/National Nuclear Security Administration. All rights in the program are
@@ -47,20 +47,20 @@ void track_progress(int iter, int* nn, DCArrayKokkos<double>& comp)
     // sum of comp field
     double sum_comp = 0.0;
     double loc_sum  = 0.0;
-    REDUCE_SUM(i, 0, nx,
-               j, 0, ny,
-               k, 0, nz,
-               loc_sum, {
+    FOR_REDUCE_SUM(i, 0, nx,
+                   j, 0, ny,
+                   k, 0, nz,
+                   loc_sum, {
         loc_sum += comp(i, j, k);
                }, sum_comp);
 
     // max of comp field
     double max_comp;
     double loc_max;
-    REDUCE_MAX(i, 0, nx,
-               j, 0, ny,
-               k, 0, nz,
-               loc_max, {
+    FOR_REDUCE_MAX(i, 0, nx,
+                   j, 0, ny,
+                   k, 0, nz,
+                   loc_max, {
         if (loc_max < comp(i, j, k)) {
             loc_max = comp(i, j, k);
         }
@@ -70,10 +70,10 @@ void track_progress(int iter, int* nn, DCArrayKokkos<double>& comp)
     // min of comp field
     double min_comp;
     double loc_min;
-    REDUCE_MIN(i, 0, nx,
-               j, 0, ny,
-               k, 0, nz,
-               loc_min, {
+    FOR_REDUCE_MIN(i, 0, nx,
+                   j, 0, ny,
+                   k, 0, nz,
+                   loc_min, {
         if (loc_min > comp(i, j, k)) {
             loc_min = comp(i, j, k);
         }
diff --git a/examples/phaseFieldMPI/system.cpp b/examples/phaseFieldMPI/system.cpp
index 7c3f940c..7967b7be 100644
--- a/examples/phaseFieldMPI/system.cpp
+++ b/examples/phaseFieldMPI/system.cpp
@@ -137,7 +137,7 @@ double System::calculate_total_free_energy()
 
 #if 0
     // bulk free energy + interfacial energy
-    REDUCE_SUM(k, 1, ga.comp.dims(0) - 1,
+    FOR_REDUCE_SUM(k, 1, ga.comp.dims(0) - 1,
                j, 1, ga.comp.dims(1) - 1,
                i, 1, ga.comp.dims(2) - 1,
                loc_sum, {
@@ -152,7 +152,7 @@ double System::calculate_total_free_energy()
 #endif
 
     // bulk free energy only
-    REDUCE_SUM(k, 0, ga.comp.dims(0),
+    FOR_REDUCE_SUM(k, 0, ga.comp.dims(0),
                j, 0, ga.comp.dims(1),
                i, 0, ga.comp.dims(2),
                loc_sum, {
@@ -200,7 +200,7 @@ void System::track_progress(int iter)
     // sum of comp field
     double sum_comp = 0.0;
     double loc_sum  = 0.0;
-    REDUCE_SUM(k, 0, ga.comp.dims(0),
+    FOR_REDUCE_SUM(k, 0, ga.comp.dims(0),
                j, 0, ga.comp.dims(1),
                i, 0, ga.comp.dims(2),
                loc_sum, {
@@ -210,7 +210,7 @@ void System::track_progress(int iter)
     // max of comp field
     double max_comp;
     double loc_max;
-    REDUCE_MAX(k, 0, ga.comp.dims(0),
+    FOR_REDUCE_MAX(k, 0, ga.comp.dims(0),
                j, 0, ga.comp.dims(1),
                i, 0, ga.comp.dims(2),
                loc_max, {
@@ -222,7 +222,7 @@ void System::track_progress(int iter)
     // min of comp field
     double min_comp;
     double loc_min;
-    REDUCE_MIN(k, 0, ga.comp.dims(0),
+    FOR_REDUCE_MIN(k, 0, ga.comp.dims(0),
                j, 0, ga.comp.dims(1),
                i, 0, ga.comp.dims(2),
                loc_min, {
diff --git a/examples/sparsetests/powerIter.cpp b/examples/sparsetests/powerIter.cpp
index 270be239..66877ddc 100644
--- a/examples/sparsetests/powerIter.cpp
+++ b/examples/sparsetests/powerIter.cpp
@@ -1,5 +1,5 @@
 /**********************************************************************************************
- © 2020. Triad National Security, LLC. All rights reserved.
+ ï¿½ 2020. Triad National Security, LLC. All rights reserved.
  This program was produced under U.S. Government contract 89233218CNA000001 for Los Alamos
  National Laboratory (LANL), which is operated by Triad National Security, LLC for the U.S.
  Department of Energy/National Nuclear Security Administration. All rights in the program are
@@ -72,7 +72,7 @@ void renorm(CArrayKokkos<double>& b)
     double loc_total = 0;
     int    n = b.dims(0);
     int    i = 0;
-    REDUCE_SUM(i, 0, n,
+    FOR_REDUCE_SUM(i, 0, n,
                 loc_total, { loc_total += b(i) * b(i); }
         , total);
     total = 1 / sqrt(total);
@@ -98,7 +98,7 @@ double innerProd(CArrayKokkos<double>& a, CArrayKokkos<double>& b)
     double total     = 0;
     double loc_total = 0;
     int    n = b.dims(0);
-    REDUCE_SUM(i, 0, n,
+    FOR_REDUCE_SUM(i, 0, n,
         loc_total, { 
             loc_total += a(i) * b(i); 
         }, total);
@@ -110,7 +110,7 @@ double l1Change(CArrayKokkos<double>& a, CArrayKokkos<double>& b)
     double total     = 0;
     double loc_total = 0;
     int    n = b.dims(0);
-    REDUCE_SUM(i, 0, n,
+    FOR_REDUCE_SUM(i, 0, n,
         loc_total, { 
             loc_total += abs(a(i) - b(i)); 
         }, total);
@@ -151,7 +151,7 @@ void renormSp(CArrayKokkos<double>& b)
     double loc_total = 0;
     int    n = b.dims(0);
     int    i = 0;
-    REDUCE_SUM(i, 0, n,
+    FOR_REDUCE_SUM(i, 0, n,
                 loc_total, { loc_total += b(i) * b(i); }
         , total);
     total = 1 / sqrt(total);
@@ -177,7 +177,7 @@ double innerProdSp(CArrayKokkos<double>& a, CArrayKokkos<double>& b)
     double total     = 0;
     double loc_total = 0;
     int    n = b.dims(0);
-    REDUCE_SUM(i, 0, n,
+    FOR_REDUCE_SUM(i, 0, n,
         loc_total, { 
             loc_total += a(i) * b(i); 
         }, total);
@@ -189,7 +189,7 @@ double l1ChangeSp(CArrayKokkos<double>& a, CArrayKokkos<double>& b)
     double total     = 0;
     double loc_total = 0;
     int    n = b.dims(0);
-    REDUCE_SUM(i, 0, n,
+    FOR_REDUCE_SUM(i, 0, n,
         loc_total, { 
             loc_total += abs(a(i) - b(i)); 
         }, total);
diff --git a/examples/sparsetests/sparsePowerIter.cpp b/examples/sparsetests/sparsePowerIter.cpp
index 5d066f2b..72be7bc9 100644
--- a/examples/sparsetests/sparsePowerIter.cpp
+++ b/examples/sparsetests/sparsePowerIter.cpp
@@ -1,5 +1,5 @@
 /**********************************************************************************************
- © 2020. Triad National Security, LLC. All rights reserved.
+ ï¿½ 2020. Triad National Security, LLC. All rights reserved.
  This program was produced under U.S. Government contract 89233218CNA000001 for Los Alamos
  National Laboratory (LANL), which is operated by Triad National Security, LLC for the U.S.
  Department of Energy/National Nuclear Security Administration. All rights in the program are
@@ -61,7 +61,7 @@ void renormSp(CArrayKokkos<double>& b)
     double loc_total = 0;
     int    n = b.dims(0);
     int    i = 0;
-    REDUCE_SUM(i, 0, n,
+    FOR_REDUCE_SUM(i, 0, n,
                 loc_total, { loc_total += b(i) * b(i); }
         , total);
     total = 1 / sqrt(total);
@@ -87,7 +87,7 @@ double innerProdSp(CArrayKokkos<double>& a, CArrayKokkos<double>& b)
     double total     = 0;
     double loc_total = 0;
     int    n = b.dims(0);
-    REDUCE_SUM(i, 0, n,
+    FOR_REDUCE_SUM(i, 0, n,
         loc_total, { 
             loc_total += a(i) * b(i); 
         }, total);
@@ -99,7 +99,7 @@ double l1ChangeSp(CArrayKokkos<double>& a, CArrayKokkos<double>& b)
     double total     = 0;
     double loc_total = 0;
     int    n = b.dims(0);
-    REDUCE_SUM(i, 0, n,
+    FOR_REDUCE_SUM(i, 0, n,
         loc_total, { 
             loc_total += abs(a(i) - b(i)); 
         }, total);
diff --git a/examples/test_for.cpp b/examples/test_for.cpp
index a4acab89..4aac408e 100644
--- a/examples/test_for.cpp
+++ b/examples/test_for.cpp
@@ -1,5 +1,5 @@
 /**********************************************************************************************
- © 2020. Triad National Security, LLC. All rights reserved.
+ ï¿½ 2020. Triad National Security, LLC. All rights reserved.
  This program was produced under U.S. Government contract 89233218CNA000001 for Los Alamos
  National Laboratory (LANL), which is operated by Triad National Security, LLC for the U.S.
  Department of Energy/National Nuclear Security Administration. All rights in the program are
@@ -66,7 +66,7 @@ int main()
 
     int loc_sum = 0;
     int result  = 0;
-    REDUCE_SUM(i, 0, 10,
+    FOR_REDUCE_SUM(i, 0, 10,
                loc_sum, {
         loc_sum += arr(i) * arr(i);
                }, result);
@@ -80,9 +80,9 @@ int main()
 
     loc_sum = 0;
     result  = 0;
-    REDUCE_SUM(i, 0, 10,
-               j, 0, 10,
-               loc_sum, {
+    FOR_REDUCE_SUM(i, 0, 10,
+                   j, 0, 10,
+                   loc_sum, {
         loc_sum += arr_2D(i, j) * arr_2D(i, j);
     }, result);
 
@@ -97,10 +97,10 @@ int main()
 
     loc_sum = 0;
     result  = 0;
-    REDUCE_SUM(i, 0, 10,
-               j, 0, 10,
-               k, 0, 10,
-               loc_sum, {
+    FOR_REDUCE_SUM(i, 0, 10,
+                   j, 0, 10,
+                   k, 0, 10,
+                   loc_sum, {
         loc_sum += arr_3D(i, j, k) * arr_3D(i, j, k);
     }, result);
 
@@ -116,29 +116,29 @@ int main()
     std::cout << "3D reduce : " << result << " vs. " << loc_sum << " \n";
 
     int loc_max;
-    REDUCE_MAX(i, 0, 10,
-               j, 0, 10,
-               k, 0, 10,
-               loc_max, {
+    FOR_REDUCE_MAX(i, 0, 10,
+                   j, 0, 10,
+                   k, 0, 10,
+                   loc_max, {
         loc_max = std::max<int>(arr_3D(i, j, k), loc_max);
     }, result);
 
     std::cout << "3D reduce MAX : " << result << " \n";
 
     int loc_min;
-    REDUCE_MIN(i, 0, 10,
-               j, 0, 10,
-               k, 0, 10,
-               loc_min, {
+    FOR_REDUCE_MIN(i, 0, 10,
+                   j, 0, 10,
+                   k, 0, 10,
+                   loc_min, {
         loc_min = std::min<int>(arr_3D(i, j, k), loc_min);
     }, result);
 
     std::cout << "3D reduce MIN : " << result << " \n";
 
-    REDUCE_MIN_CLASS(i, 0, 10,
-               j, 0, 10,
-               k, 0, 10,
-               loc_min, {
+    FOR_REDUCE_MIN_CLASS(i, 0, 10,
+                         j, 0, 10,
+                         k, 0, 10,
+                         loc_min, {
         loc_min = std::min<int>(arr_3D(i, j, k), loc_min);
     }, result);
 
diff --git a/examples/watt-graph/kokkos_floyd.cpp b/examples/watt-graph/kokkos_floyd.cpp
index 16f8d409..6f8dfb5f 100644
--- a/examples/watt-graph/kokkos_floyd.cpp
+++ b/examples/watt-graph/kokkos_floyd.cpp
@@ -1,5 +1,5 @@
 /**********************************************************************************************
- © 2020. Triad National Security, LLC. All rights reserved.
+ ï¿½ 2020. Triad National Security, LLC. All rights reserved.
  This program was produced under U.S. Government contract 89233218CNA000001 for Los Alamos
  National Laboratory (LANL), which is operated by Triad National Security, LLC for the U.S.
  Department of Energy/National Nuclear Security Administration. All rights in the program are
@@ -109,7 +109,7 @@ double averageDistance(CArrayKokkos<float> G, int n)
 {
     double total = 0;
     double loc_sum;
-    REDUCE_SUM(i, 0, n,
+    FOR_REDUCE_SUM(i, 0, n,
                j, 0, n,
                loc_sum, {
         loc_sum += ((double) G(i, j)) / n;
diff --git a/src/include/kokkos_types.h b/src/include/kokkos_types.h
index 3a923b36..b7e1ef05 100644
--- a/src/include/kokkos_types.h
+++ b/src/include/kokkos_types.h
@@ -6729,7 +6729,7 @@ class DynamicArrayKokkos {
  
     void push_back(T value);
  
-    void pop();
+    void pop_back();
  
     // Methods returns the raw pointer (most likely GPU) of the Kokkos View
     KOKKOS_INLINE_FUNCTION
@@ -7040,7 +7040,7 @@ size_t DynamicArrayKokkos<T,Layout,ExecSpace,MemoryTraits>::order() const {
 }
 
 template <typename T, typename Layout, typename ExecSpace, typename MemoryTraits>
-void DynamicArrayKokkos<T,Layout,ExecSpace,MemoryTraits>::pop() {
+void DynamicArrayKokkos<T,Layout,ExecSpace,MemoryTraits>::pop_back() {
     dims_actual_size_[0]--;
 }
 
@@ -7175,7 +7175,7 @@ class DynamicMatrixKokkos {
  
     void push_back(T value);
  
-    void pop();
+    void pop_back();
  
     // Methods returns the raw pointer (most likely GPU) of the Kokkos View
     KOKKOS_INLINE_FUNCTION
@@ -7486,7 +7486,7 @@ size_t DynamicMatrixKokkos<T,Layout,ExecSpace,MemoryTraits>::order() const {
 }
 
 template <typename T, typename Layout, typename ExecSpace, typename MemoryTraits>
-void DynamicMatrixKokkos<T,Layout,ExecSpace,MemoryTraits>::pop() {
+void DynamicMatrixKokkos<T,Layout,ExecSpace,MemoryTraits>::pop_back() {
     dims_actual_size_[0]--;
 }
 
diff --git a/src/include/macros.h b/src/include/macros.h
index 3fafe5f8..9baf643f 100644
--- a/src/include/macros.h
+++ b/src/include/macros.h
@@ -194,7 +194,7 @@
             (result) )
 
 #define \
-    REDUCE_SUM(...) \
+    FOR_REDUCE_SUM(...) \
     GET_MACRO(__VA_ARGS__, _13, RSUM3D, _11, _10, RSUM2D, _8, _7, RSUM1D)(__VA_ARGS__)
 
 
@@ -246,7 +246,7 @@
                         Kokkos::Max< decltype(result) > ( (result) ) )
 
 #define \
-    REDUCE_MAX(...) \
+    FOR_REDUCE_MAX(...) \
     GET_MACRO(__VA_ARGS__, _13, RMAX3D, _11, _10, RMAX2D, _8, _7, RMAX1D)(__VA_ARGS__)
 
 
@@ -301,7 +301,7 @@
                         Kokkos::Min< decltype(result) >(result) )
 
 #define \
-    REDUCE_MIN(...) \
+    FOR_REDUCE_MIN(...) \
     GET_MACRO(__VA_ARGS__, _13, RMIN3D, _11, _10, RMIN2D, _8, _7, RMIN1D)(__VA_ARGS__)
 
 
@@ -377,7 +377,7 @@ Kokkos::parallel_reduce( \
                         (result) )
 
 #define \
-REDUCE_SUM_CLASS(...) \
+FOR_REDUCE_SUM_CLASS(...) \
 GET_MACRO(__VA_ARGS__, _13, RSUMCLASS3D, _11, _10, RSUMCLASS2D, _8, _7, RSUMCLASS1D)(__VA_ARGS__)
 
 
@@ -406,7 +406,7 @@ Kokkos::parallel_reduce( \
                         Kokkos::Max< decltype(result) > ( (result) ) )
 
 #define \
-REDUCE_MAX_CLASS(...) \
+FOR_REDUCE_MAX_CLASS(...) \
 GET_MACRO(__VA_ARGS__, _13, RMAXCLASS3D, _11, _10, RMAXCLASS2D, _8, _7, RMAXCLASS1D)(__VA_ARGS__)
 
 
@@ -433,7 +433,7 @@ Kokkos::parallel_reduce( \
                         Kokkos::Min< decltype(result) >(result) )
 
 #define \
-REDUCE_MIN_CLASS(...) \
+FOR_REDUCE_MIN_CLASS(...) \
 GET_MACRO(__VA_ARGS__, _13, RMINCLASS3D, _11, _10, RMINCLASS2D, _8, _7, RMINCLASS1D)(__VA_ARGS__)
 
 #define \
@@ -889,7 +889,7 @@ void reduce_max (int i_start, int i_end,
                 (result) )
 
 #define \
-    REDUCE_SUM(...) \
+    FOR_REDUCE_SUM(...) \
     GET_MACRO(__VA_ARGS__, _13, RSUM3D, _11, _10, RSUM2D, _8, _7, RSUM1D)(__VA_ARGS__)
 
 
@@ -933,7 +933,7 @@ void reduce_max (int i_start, int i_end,
                 (result) )
 
 #define \
-    REDUCE_MAX(...) \
+    FOR_REDUCE_MAX(...) \
     GET_MACRO(__VA_ARGS__, _13, RMAX3D, _11, _10, RMAX2D, _8, _7, RMAX1D)(__VA_ARGS__)
 
 
@@ -977,7 +977,7 @@ void reduce_max (int i_start, int i_end,
                 (result) )
 
 #define \
-    REDUCE_MIN(...) \
+    FOR_REDUCE_MIN(...) \
     GET_MACRO(__VA_ARGS__, _13, RMIN3D, _11, _10, RMIN2D, _8, _7, RMIN1D)(__VA_ARGS__)