diff --git a/examples/CSCKokkos.cpp b/examples/CSCKokkos.cpp index 5d26453d..e46c0ed6 100644 --- a/examples/CSCKokkos.cpp +++ b/examples/CSCKokkos.cpp @@ -1,95 +1,122 @@ +/********************************************************************************************** + © 2020. Triad National Security, LLC. All rights reserved. + This program was produced under U.S. Government contract 89233218CNA000001 for Los Alamos + National Laboratory (LANL), which is operated by Triad National Security, LLC for the U.S. + Department of Energy/National Nuclear Security Administration. All rights in the program are + reserved by Triad National Security, LLC, and the U.S. Department of Energy/National Nuclear + Security Administration. The Government is granted for itself and others acting on its behalf a + nonexclusive, paid-up, irrevocable worldwide license in this material to reproduce, prepare + derivative works, distribute copies to the public, perform publicly and display publicly, and + to permit others to do so. + This program is open source under the BSD-3 License. + Redistribution and use in source and binary forms, with or without modification, are permitted + provided that the following conditions are met: + 1. Redistributions of source code must retain the above copyright notice, this list of + conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright notice, this list of + conditions and the following disclaimer in the documentation and/or other materials + provided with the distribution. + 3. Neither the name of the copyright holder nor the names of its contributors may be used + to endorse or promote products derived from this software without specific prior + written permission. + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + **********************************************************************************************/ #include #include #include #include using namespace mtr; // matar namespace - -int main(int argc, char* argv[]){ - - Kokkos::initialize(); { - size_t nnz = 6 ; - size_t dim1 = 3; - size_t dim2 = 10; - CArrayKokkos starts(dim2 + 1); - CArrayKokkos rows(nnz); - CArrayKokkos array(nnz + 1); - RUN ({ - starts(1) = 1; - starts(2) = 2; - starts(3) = 3; - starts(4) = 4; - starts(5) = 5; - starts(6) = 6; - starts(7) = 6; - starts(8) = 6; - starts(9) = 6; +int main(int argc, char* argv[]) +{ + Kokkos::initialize(); { + size_t nnz = 6; + size_t dim1 = 3; + size_t dim2 = 10; + CArrayKokkos starts(dim2 + 1); + CArrayKokkos rows(nnz); + CArrayKokkos array(nnz + 1); + RUN({ + starts(1) = 1; + starts(2) = 2; + starts(3) = 3; + starts(4) = 4; + starts(5) = 5; + starts(6) = 6; + starts(7) = 6; + starts(8) = 6; + starts(9) = 6; - rows(0) = 0; - rows(1) = 0; - rows(2) = 1; - rows(3) = 1; - rows(4) = 2; - rows(5) = 2; + rows(0) = 0; + rows(1) = 0; + rows(2) = 1; + rows(3) = 1; + rows(4) = 2; + rows(5) = 2; - array(0) = 1; - array(1) = 2; - array(2) = 3; - array(3) = 4; - array(4) = 5; - array(5) = 6; - array(6) = 0; - - }); + array(0) = 1; + array(1) = 2; + array(2) = 3; + array(3) = 4; + array(4) = 5; + array(5) = 6; + array(6) = 0; + }); - /* - |1 2 2 0 0 0 0 0 0 0| - |0 0 3 4 0 0 0 0 0 0| - |0 0 0 0 5 6 0 0 0 0| - */ - - const std::string s = "hello"; - // Testing = op - auto pre_A = CSCArrayKokkos(array, starts,rows, dim1, dim2, s); - auto A = pre_A; - int* values = A.pointer(); - auto a_start = A.get_starts(); - int total = 0; - - RUN ({ - printf("This matix is %ld x %ld \n" , A.dim1(), A.dim2()); - }); + /* + |1 2 2 0 0 0 0 0 0 0| + |0 0 3 4 0 0 0 0 0 0| + |0 0 0 0 5 6 0 0 0 0| + */ - RUN ({ - printf("nnz : %ld \n", A.nnz()); - }); + const std::string s = "hello"; + // Testing = op + auto pre_A = CSCArrayKokkos(array, starts, rows, dim1, dim2, s); + auto A = pre_A; + int* values = A.pointer(); + auto a_start = A.get_starts(); + int total = 0; + RUN({ + printf("This matix is %ld x %ld \n", A.dim1(), A.dim2()); + }); - int loc_total = 0; - loc_total += 0; // Get rid of warning - REDUCE_SUM(i, 0, nnz, - loc_total, { - loc_total += values[i]; - }, total); - printf("Sum of nnz from pointer method %d\n", total); - total = 0; - REDUCE_SUM(i, 0, nnz, - loc_total, { - loc_total += a_start[i]; - }, total); - printf("Sum of start indices form .get_starts() %d\n", total); - total = 0; - - REDUCE_SUM(i, 0, dim1, - j, 0, dim2-1, - loc_total, { - loc_total += A(i,j); - }, total); - printf("Sum of nnz in array notation %d\n", total); - - } Kokkos::finalize(); - return 0; + RUN({ + printf("nnz : %ld \n", A.nnz()); + }); + + int loc_total = 0; + loc_total += 0; // Get rid of warning + REDUCE_SUM(i, 0, nnz, + loc_total, { + loc_total += values[i]; + }, total); + printf("Sum of nnz from pointer method %d\n", total); + total = 0; + REDUCE_SUM(i, 0, nnz, + loc_total, { + loc_total += a_start[i]; + }, total); + printf("Sum of start indices form .get_starts() %d\n", total); + total = 0; - + REDUCE_SUM(i, 0, dim1, + j, 0, dim2 - 1, + loc_total, { + loc_total += A(i, j); + }, total); + printf("Sum of nnz in array notation %d\n", total); + } Kokkos::finalize(); + return 0; } diff --git a/examples/CSRKokkos.cpp b/examples/CSRKokkos.cpp index 338476b7..2bcb3499 100644 --- a/examples/CSRKokkos.cpp +++ b/examples/CSRKokkos.cpp @@ -1,3 +1,36 @@ +/********************************************************************************************** + © 2020. Triad National Security, LLC. All rights reserved. + This program was produced under U.S. Government contract 89233218CNA000001 for Los Alamos + National Laboratory (LANL), which is operated by Triad National Security, LLC for the U.S. + Department of Energy/National Nuclear Security Administration. All rights in the program are + reserved by Triad National Security, LLC, and the U.S. Department of Energy/National Nuclear + Security Administration. The Government is granted for itself and others acting on its behalf a + nonexclusive, paid-up, irrevocable worldwide license in this material to reproduce, prepare + derivative works, distribute copies to the public, perform publicly and display publicly, and + to permit others to do so. + This program is open source under the BSD-3 License. + Redistribution and use in source and binary forms, with or without modification, are permitted + provided that the following conditions are met: + 1. Redistributions of source code must retain the above copyright notice, this list of + conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright notice, this list of + conditions and the following disclaimer in the documentation and/or other materials + provided with the distribution. + 3. Neither the name of the copyright holder nor the names of its contributors may be used + to endorse or promote products derived from this software without specific prior + written permission. + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + **********************************************************************************************/ #include #include #include @@ -5,87 +38,84 @@ using namespace mtr; // matar namespace -int main(int argc, char* argv[]){ - - Kokkos::initialize(); { - size_t nnz = 6 ; - size_t dim1 = 3; - size_t dim2 = 10; - CArrayKokkos starts(dim1 + 1); - CArrayKokkos columns(nnz); - CArrayKokkos array(nnz); - RUN ({ - for(int i =0; i < 4; i++){ - starts(i) = 2*i; - for(int j = 0; j < 2; j++){ - columns(2*i + j) = i + j; - array(2*i + j) = 2*i + j ; +int main(int argc, char* argv[]) +{ + Kokkos::initialize(); { + size_t nnz = 6; + size_t dim1 = 3; + size_t dim2 = 10; + CArrayKokkos starts(dim1 + 1); + CArrayKokkos columns(nnz); + CArrayKokkos array(nnz); + RUN({ + for (int i = 0; i < 4; i++) { + starts(i) = 2 * i; + for (int j = 0; j < 2; j++) { + columns(2 * i + j) = i + j; + array(2 * i + j) = 2 * i + j; + } } - } - }); + }); - int column_arr[] = {0, 2, 2, 0, 1, 2}; - CArrayKokkos data(6); - CArrayKokkos row(4); - CArrayKokkos column(6); - RUN ({ - for(size_t i =0; i < 6; i++){ - data(i) = i+1.5; - column(i) = column_arr[i]; - } - row(0) = 0; - row(1) = 2; - row(2) = 3; - row(3) = 6; - }); + int column_arr[] = { 0, 2, 2, 0, 1, 2 }; + CArrayKokkos data(6); + CArrayKokkos row(4); + CArrayKokkos column(6); + RUN({ + for (size_t i = 0; i < 6; i++) { + data(i) = i + 1.5; + column(i) = column_arr[i]; + } + row(0) = 0; + row(1) = 2; + row(2) = 3; + row(3) = 6; + }); - const std::string s = "Example"; - CSRArrayKokkos E( data, row, column, 3, 3, s); - + const std::string s = "Example"; + CSRArrayKokkos E(data, row, column, 3, 3, s); - /* - |1 2 0 0 0 0 0 0 0 0| - |0 0 3 4 0 0 0 0 0 0| - |0 0 0 0 5 6 0 0 0 0| - */ - /*const std::string s = "hello"; - auto pre_A = CSRArrayKokkos(array, starts, columns, dim1, dim2, s); - auto A = pre_A; - - - int* res = A.pointer(); - auto a_start = A.get_starts(); - int total = 0; - int loc_total = 0; - loc_total += 0; //Get rid of warning + /* + |1 2 0 0 0 0 0 0 0 0| + |0 0 3 4 0 0 0 0 0 0| + |0 0 0 0 5 6 0 0 0 0| + */ + /*const std::string s = "hello"; + auto pre_A = CSRArrayKokkos(array, starts, columns, dim1, dim2, s); + auto A = pre_A; - RUN ({ - printf("A is %ld x %ld \n", A.dim1(), A.dim2()); - printf("And has %ld non zero elements\n", A.nnz()); - }); - REDUCE_SUM(i, 0, nnz, - loc_total, { - loc_total += res[i]; - }, total); - printf("Sum of nnz from pointer method %d\n", total); - total = 0; - REDUCE_SUM(i, 0, nnz, - loc_total, { - loc_total += a_start[i]; - }, total); - printf("Sum of start indices form .get_starts() %d\n", total); - total = 0; - REDUCE_SUM(i, 0, dim1, - j, 0, dim2, - loc_total, { - loc_total += A(i,j); - }, total); - printf("Sum of nnz in array notation %d\n", total); - auto ss = A.begin(0); - */ - } Kokkos::finalize(); - return 0; + int* res = A.pointer(); + auto a_start = A.get_starts(); + int total = 0; + int loc_total = 0; + loc_total += 0; //Get rid of warning - + RUN ({ + printf("A is %ld x %ld \n", A.dim1(), A.dim2()); + printf("And has %ld non zero elements\n", A.nnz()); + }); + + REDUCE_SUM(i, 0, nnz, + loc_total, { + loc_total += res[i]; + }, total); + printf("Sum of nnz from pointer method %d\n", total); + total = 0; + REDUCE_SUM(i, 0, nnz, + loc_total, { + loc_total += a_start[i]; + }, total); + printf("Sum of start indices form .get_starts() %d\n", total); + total = 0; + REDUCE_SUM(i, 0, dim1, + j, 0, dim2, + loc_total, { + loc_total += A(i,j); + }, total); + printf("Sum of nnz in array notation %d\n", total); + auto ss = A.begin(0); + */ + } Kokkos::finalize(); + return 0; } diff --git a/examples/halfspace_cooling/halfspace_cooling.cpp b/examples/halfspace_cooling/halfspace_cooling.cpp index ce7719cd..f2c9be96 100644 --- a/examples/halfspace_cooling/halfspace_cooling.cpp +++ b/examples/halfspace_cooling/halfspace_cooling.cpp @@ -1,4 +1,4 @@ -// Populate a dynamic ragged down array with the +// Populate a dynamic ragged down array with the // temperatures from the halfspace cooling model as a function of // age and depth @@ -21,27 +21,26 @@ using namespace mtr; // matar namespace // set up constant parameters -const int max_age = 1000; -const double mantle_temp = 1350.0; +const int max_age = 1000; +const double mantle_temp = 1350.0; const double thermal_diff = 0.000001; -int main() { - - Kokkos::initialize(); +int main() { - // depth will need to be adjusted for larger max ages - // age 2000 Ma, depth 250 - // age 3000 Ma, depth 280 - // age 4000 Ma, depth 320 - int depth = 200; - auto begin = std::chrono::high_resolution_clock::now(); // start clock + Kokkos::initialize(); + { + // depth will need to be adjusted for larger max ages + // age 2000 Ma, depth 250 + // age 3000 Ma, depth 280 + // age 4000 Ma, depth 320 + int depth = 200; + auto begin = std::chrono::high_resolution_clock::now(); // start clock - DynamicRaggedDownArrayKokkos dyn_ragged_down(max_age+1, depth+1); // create array + DynamicRaggedDownArrayKokkos dyn_ragged_down(max_age + 1, depth + 1); // create array - DO_ALL(i, 0, max_age, { + DO_ALL(i, 0, max_age, { for (int j = 0; j <= depth; j++) { - if (i == 0 && j == 0) - { // when depth and age are 0, give mantle_temp + if (i == 0 && j == 0) { // when depth and age are 0, give mantle_temp dyn_ragged_down.stride(j)++; dyn_ragged_down(i, j) = mantle_temp; } @@ -50,20 +49,19 @@ int main() { dyn_ragged_down(i, j) = temp; // check if we have reached the mantle, if yes, move on to next age - if (round(dyn_ragged_down(i, j)) == 1350) - { + if (round(dyn_ragged_down(i, j)) == 1350) { printf("Depth to mantle %d km, age of lithosphere %d Ma \n", j, i); break; } } }); - // Stop counting time and calculate elapsed time - auto end = std::chrono::high_resolution_clock::now(); - auto elapsed = std::chrono::duration_cast(end-begin); - - printf("Total time was %f seconds.\n", elapsed.count() * 1e-9); -} + // Stop counting time and calculate elapsed time + auto end = std::chrono::high_resolution_clock::now(); + auto elapsed = std::chrono::duration_cast(end - begin); + + printf("Total time was %f seconds.\n", elapsed.count() * 1e-9); + } Kokkos::finalize(); return 0; diff --git a/examples/kokkos_for.cpp b/examples/kokkos_for.cpp index 7a1579be..9effb331 100644 --- a/examples/kokkos_for.cpp +++ b/examples/kokkos_for.cpp @@ -1,234 +1,230 @@ - +/********************************************************************************************** + © 2020. Triad National Security, LLC. All rights reserved. + This program was produced under U.S. Government contract 89233218CNA000001 for Los Alamos + National Laboratory (LANL), which is operated by Triad National Security, LLC for the U.S. + Department of Energy/National Nuclear Security Administration. All rights in the program are + reserved by Triad National Security, LLC, and the U.S. Department of Energy/National Nuclear + Security Administration. The Government is granted for itself and others acting on its behalf a + nonexclusive, paid-up, irrevocable worldwide license in this material to reproduce, prepare + derivative works, distribute copies to the public, perform publicly and display publicly, and + to permit others to do so. + This program is open source under the BSD-3 License. + Redistribution and use in source and binary forms, with or without modification, are permitted + provided that the following conditions are met: + 1. Redistributions of source code must retain the above copyright notice, this list of + conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright notice, this list of + conditions and the following disclaimer in the documentation and/or other materials + provided with the distribution. + 3. Neither the name of the copyright holder nor the names of its contributors may be used + to endorse or promote products derived from this software without specific prior + written permission. + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + **********************************************************************************************/ #include #include #include using namespace mtr; // matar namespace - // main -int main(){ - - - Kokkos::initialize(); +int main() { - + Kokkos::initialize(); + { printf("starting test of loop macros \n"); - - //Kokkos::View arr("ARR", 10); - CArrayKokkos arr(10); - FOR_ALL (i, 0, 10, { + + // Kokkos::View arr("ARR", 10); + CArrayKokkos arr(10); + FOR_ALL(i, 0, 10, { arr(i) = 314; }); - //Kokkos::View arr_2D("ARR_2D", 10,10); - CArrayKokkos arr_2D(10,10); - FOR_ALL (i, 0, 10, - j, 0, 10,{ - arr_2D(i,j) = 314; + // Kokkos::View arr_2D("ARR_2D", 10,10); + CArrayKokkos arr_2D(10, 10); + FOR_ALL(i, 0, 10, + j, 0, 10, { + arr_2D(i, j) = 314; }); - //Kokkos::View arr_3D("ARR_3D", 10,10,10); - CArrayKokkos arr_3D(10,10,10); - FOR_ALL (i, 0, 10, - j, 0, 10, - k, 0, 10,{ - arr_3D(i,j,k) = 314; + // Kokkos::View arr_3D("ARR_3D", 10,10,10); + CArrayKokkos arr_3D(10, 10, 10); + FOR_ALL(i, 0, 10, + j, 0, 10, + k, 0, 10, { + arr_3D(i, j, k) = 314; }); - int loc_sum = 0; - int result = 0; + int result = 0; REDUCE_SUM(i, 0, 10, - loc_sum, { - loc_sum += arr(i)*arr(i); + loc_sum, { + loc_sum += arr(i) * arr(i); }, result); printf("1D reduce sum: %i vs. 985960\n", result); - - - - loc_sum = 0; - result = 0; + result = 0; REDUCE_SUM(i, 0, 10, j, 0, 10, - loc_sum, { - loc_sum += arr_2D(i,j)*arr_2D(i,j); - }, result); - + loc_sum, { + loc_sum += arr_2D(i, j) * arr_2D(i, j); + }, result); printf("2D reduce sum: %i vs. 9859600\n", result); - - + loc_sum = 0; - result = 0; + result = 0; REDUCE_SUM(i, 0, 10, - j, 0, 10, - k, 0, 10, - loc_sum, { - loc_sum += arr_3D(i,j,k)*arr_3D(i,j,k); - }, result); - + j, 0, 10, + k, 0, 10, + loc_sum, { + loc_sum += arr_3D(i, j, k) * arr_3D(i, j, k); + }, result); printf("3D reduce: %i vs. 98596000\n", result); - result = 0; int loc_max = 2000; REDUCE_MAX(i, 0, 10, - j, 0, 10, - k, 0, 10, - loc_max, { - - if(loc_max < arr_3D(i,j,k)){ - loc_max = arr_3D(i,j,k); - } - - }, - result); - + j, 0, 10, + k, 0, 10, + loc_max, { + if (loc_max < arr_3D(i, j, k)) { + loc_max = arr_3D(i, j, k); + } + },result); + printf("3D reduce MAX %i\n", result); - // verbose version int loc_max_value = 20000; - int max_value = 20000; + int max_value = 20000; Kokkos::parallel_reduce( - Kokkos::MDRangePolicy>({0,0}, {10,10}), - KOKKOS_LAMBDA(const int i, const int j, int& loc_max_value) - { - if(arr_2D(i,j) > loc_max_value){ - loc_max_value = arr_2D(i,j); - } - }, - Kokkos::Max(max_value) - ); + Kokkos::MDRangePolicy>({ 0, 0 }, { 10, 10 }), + KOKKOS_LAMBDA(const int i, const int j, int& loc_max_value) + { + if (arr_2D(i, j) > loc_max_value) { + loc_max_value = arr_2D(i, j); + } + },Kokkos::Max(max_value)); printf("2D reduce MAX kokkos verbose : %i\n", max_value); - result = 0; int loc_min = 2000; REDUCE_MIN(i, 0, 10, - j, 0, 10, - k, 0, 10, - loc_min, { - - if(loc_min > arr_3D(i,j,k)){ - loc_min = arr_3D(i,j,k); - } - - }, - result); - - printf("3D reduce MIN %i\n", result); + j, 0, 10, + k, 0, 10, + loc_min, { + if (loc_min > arr_3D(i, j, k)) { + loc_min = arr_3D(i, j, k); + } + },result); - - + printf("3D reduce MIN %i\n", result); - // DO ALL - FMatrixKokkos matrix1D(10); + FMatrixKokkos matrix1D(10); // Initialize matrix2D - DO_ALL (i, 1, 10, { - matrix1D(i) = 1; + DO_ALL(i, 1, 10, { + matrix1D(i) = 1; }); // end parallel do - - FMatrixKokkos matrix2D(10,10); + FMatrixKokkos matrix2D(10, 10); // Initialize matrix2D - DO_ALL (j, 1, 10, - i, 1, 10, { - matrix2D(i,j) = 1; + DO_ALL(j, 1, 10, + i, 1, 10, { + matrix2D(i, j) = 1; }); // end parallel do - FMatrixKokkos matrix3D(10,10,10); + FMatrixKokkos matrix3D(10, 10, 10); // Initialize matrix3D - DO_ALL (k, 1, 10, - j, 1, 10, - i, 1, 10, { - matrix3D(i,j,k) = 1; + DO_ALL(k, 1, 10, + j, 1, 10, + i, 1, 10, { + matrix3D(i, j, k) = 1; }); // end parallel do - // Initialize matrix2D - DO_ALL (i, 1, 1, { - matrix1D(1) = 10; - matrix2D(1,1) = 20; - matrix3D(1,1,1) = 30; - - matrix1D(10) = -10; - matrix2D(10,10) = -20; - matrix3D(10,10,10) = -30; + DO_ALL(i, 1, 1, { + matrix1D(1) = 10; + matrix2D(1, 1) = 20; + matrix3D(1, 1, 1) = 30; + + matrix1D(10) = -10; + matrix2D(10, 10) = -20; + matrix3D(10, 10, 10) = -30; }); // end parallel do - DO_REDUCE_MAX(i, 1, 10, - loc_max, { - if(loc_max < matrix1D(i)){ - loc_max = matrix1D(i); - } - }, result); - - printf("result max 1D matrix = %i\n", result); - + loc_max, { + if (loc_max < matrix1D(i)) { + loc_max = matrix1D(i); + } + }, result); + printf("result max 1D matrix = %i\n", result); DO_REDUCE_MAX(j, 1, 10, - i, 1, 10, - loc_max, { - if(loc_max < matrix2D(i,j)){ - loc_max = matrix2D(i,j); - } - }, result); + i, 1, 10, + loc_max, { + if (loc_max < matrix2D(i, j)) { + loc_max = matrix2D(i, j); + } + }, result); printf("result max 2D matrix = %i\n", result); - DO_REDUCE_MAX(k, 1, 10, - j, 1, 10, - i, 1, 10, - loc_max, { - if(loc_max < matrix3D(i,j,k)){ - loc_max = matrix3D(i,j,k); - } - }, result); + j, 1, 10, + i, 1, 10, + loc_max, { + if (loc_max < matrix3D(i, j, k)) { + loc_max = matrix3D(i, j, k); + } + }, result); printf("result max 3D matrix = %i\n", result); - DO_REDUCE_MIN(i, 1, 10, - loc_min, { - if(loc_min > matrix1D(i)){ - loc_min = matrix1D(i); - } - }, result); + loc_min, { + if (loc_min > matrix1D(i)) { + loc_min = matrix1D(i); + } + }, result); printf("result min 1D matrix = %i\n", result); - DO_REDUCE_MIN(j, 1, 10, - i, 1, 10, - loc_min, { - if(loc_min > matrix2D(i,j)){ - loc_min = matrix2D(i,j); - } - }, result); + i, 1, 10, + loc_min, { + if (loc_min > matrix2D(i, j)) { + loc_min = matrix2D(i, j); + } + }, result); printf("result min 2D matrix = %i\n", result); - DO_REDUCE_MIN(k, 1, 10, - j, 1, 10, - i, 1, 10, - loc_min, { - if(loc_min > matrix3D(i,j,k)){ - loc_min = matrix3D(i,j,k); - } - }, result); - - printf("result min 3D matrix = %i\n", result); + j, 1, 10, + i, 1, 10, + loc_min, { + if (loc_min > matrix3D(i, j, k)) { + loc_min = matrix3D(i, j, k); + } + }, result); + printf("result min 3D matrix = %i\n", result); // testing serial FOR and DO loop macros. These // serial loops work on the host or the device. @@ -236,13 +232,13 @@ int main(){ // give the user a simple syntax to replace // the for(...){} syntax - CArray host_array1D(5); - CArray host_array2D(5,5); - CArray host_array3D(2,2,2); + CArray host_array1D(5); + CArray host_array2D(5, 5); + CArray host_array3D(2, 2, 2); - FMatrix host_matrix1D(3); - FMatrix host_matrix2D(3,3); - FMatrix host_matrix3D(3,3,3); + FMatrix host_matrix1D(3); + FMatrix host_matrix2D(3, 3); + FMatrix host_matrix3D(3, 3, 3); FOR_LOOP(i, 0, 5, { host_array1D(i) = i; @@ -253,28 +249,31 @@ int main(){ }); FOR_LOOP(i, 0, 5, - j, 0, 5, { - host_array2D(i,j) = i*j; + j, 0, 5, { + host_array2D(i, j) = i * j; }); printf("value in host array2D = \n"); FOR_LOOP(i, 0, 5, - j, 0, 5, { - printf(" %d ", host_array2D(i,j)); - if(j==4) printf("\n"); + j, 0, 5, { + printf(" %d ", host_array2D(i, j)); + if (j == 4) { + printf("\n"); + } }); - FOR_LOOP(i, 0, 2, - j, 0, 2, - k, 0, 2, { - host_array3D(i,j,k) = i*j*k; + j, 0, 2, + k, 0, 2, { + host_array3D(i, j, k) = i * j * k; }); printf("value in host array3D = \n"); FOR_LOOP(i, 0, 2, - j, 0, 2, - k, 0, 2, { - printf(" %d ", host_array3D(i,j,k)); - if(k==1) printf("\n"); + j, 0, 2, + k, 0, 2, { + printf(" %d ", host_array3D(i, j, k)); + if (k == 1) { + printf("\n"); + } }); DO_LOOP(i, 1, 3, { @@ -286,72 +285,72 @@ int main(){ }); DO_LOOP(j, 1, 3, - i, 1, 3, { - host_matrix2D(i,j) = i*j; + i, 1, 3, { + host_matrix2D(i, j) = i * j; }); printf("value in host matrix2D = \n"); DO_LOOP(j, 1, 3, - i, 1, 3, { - printf(" %d ", host_matrix2D(i,j)); - if(i==3) printf("\n"); + i, 1, 3, { + printf(" %d ", host_matrix2D(i, j)); + if (i == 3) { + printf("\n"); + } }); DO_LOOP(k, 1, 3, - j, 1, 3, - i, 1, 3, { - host_matrix3D(i,j,k) = i*j*k; + j, 1, 3, + i, 1, 3, { + host_matrix3D(i, j, k) = i * j * k; }); printf("value in host matrix3D = \n"); DO_LOOP(k, 1, 3, - j, 1, 3, - i, 1, 3, { - printf(" %d ", host_matrix3D(i,j,k)); - if(i==3) printf("\n"); - if(j==3 && i==3 ) printf("--\n"); + j, 1, 3, + i, 1, 3, { + printf(" %d ", host_matrix3D(i, j, k)); + if (i == 3) { + printf("\n"); + } + if (j == 3 && i == 3) { + printf("--\n"); + } }); - printf("testing for loop increments of 2 = \n"); FOR_LOOP(i, 0, 6, 2, { printf(" %d \n", i); }); printf("-- \n"); FOR_LOOP(i, 0, 6, 2, - j, 0, 6, 2, { + j, 0, 6, 2, { printf(" %d %d \n", i, j); }); printf("-- \n"); FOR_LOOP(i, 0, 6, 2, - j, 0, 6, 2, - k, 0, 6, 2, { + j, 0, 6, 2, + k, 0, 6, 2, { printf(" %d %d %d \n", i, j, k); }); - printf("testing do loop increments of 2 = \n"); DO_LOOP(i, 1, 6, 2, { printf(" %d \n", i); }); printf("-- \n"); DO_LOOP(i, 1, 6, 2, - j, 1, 6, 2, { + j, 1, 6, 2, { printf(" %d %d \n", i, j); }); printf("-- \n"); DO_LOOP(i, 1, 6, 2, - j, 1, 6, 2, - k, 1, 6, 2, { + j, 1, 6, 2, + k, 1, 6, 2, { printf(" %d %d %d \n", i, j, k); }); printf("done\n"); - -} + } Kokkos::finalize(); - return 0; } - - diff --git a/examples/laplace/main_carray_right.cpp b/examples/laplace/main_carray_right.cpp index d00406a3..c98df0d0 100644 --- a/examples/laplace/main_carray_right.cpp +++ b/examples/laplace/main_carray_right.cpp @@ -1,3 +1,36 @@ +/********************************************************************************************** + © 2020. Triad National Security, LLC. All rights reserved. + This program was produced under U.S. Government contract 89233218CNA000001 for Los Alamos + National Laboratory (LANL), which is operated by Triad National Security, LLC for the U.S. + Department of Energy/National Nuclear Security Administration. All rights in the program are + reserved by Triad National Security, LLC, and the U.S. Department of Energy/National Nuclear + Security Administration. The Government is granted for itself and others acting on its behalf a + nonexclusive, paid-up, irrevocable worldwide license in this material to reproduce, prepare + derivative works, distribute copies to the public, perform publicly and display publicly, and + to permit others to do so. + This program is open source under the BSD-3 License. + Redistribution and use in source and binary forms, with or without modification, are permitted + provided that the following conditions are met: + 1. Redistributions of source code must retain the above copyright notice, this list of + conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright notice, this list of + conditions and the following disclaimer in the documentation and/or other materials + provided with the distribution. + 3. Neither the name of the copyright holder nor the names of its contributors may be used + to endorse or promote products derived from this software without specific prior + written permission. + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + **********************************************************************************************/ #include #include #include @@ -5,20 +38,21 @@ using namespace mtr; // matar namespace -const int width = 1000; -const int height = 1000; +const int width = 1000; +const int height = 1000; const double temp_tolerance = 0.01; -void initialize(CArray &temperature_previous); -void track_progress(int iteration, CArray &temperature); +void initialize(CArray& temperature_previous); +void track_progress(int iteration, CArray& temperature); -int main() { - int i, j; - int iteration = 1; - double worst_dt = 100; +int main() +{ + int i, j; + int iteration = 1; + double worst_dt = 100; - auto temperature = CArray (height+2, width+2); - auto temperature_previous = CArray (height+2, width+2); + auto temperature = CArray(height + 2, width + 2); + auto temperature_previous = CArray(height + 2, width + 2); // Start measuring time auto begin = std::chrono::high_resolution_clock::now(); @@ -30,19 +64,19 @@ int main() { // finite difference for (i = 1; i <= height; i++) { for (j = 1; j <= width; j++) { - temperature(i,j) = 0.25 * (temperature_previous(i+1,j) - + temperature_previous(i-1,j) - + temperature_previous(i,j+1) - + temperature_previous(i,j-1)); + temperature(i, j) = 0.25 * (temperature_previous(i + 1, j) + + temperature_previous(i - 1, j) + + temperature_previous(i, j + 1) + + temperature_previous(i, j - 1)); } } - + // calculate max difference between temperature and temperature_previous worst_dt = 0.0; for (i = 1; i <= height; i++) { for (j = 1; j <= width; j++) { - worst_dt = fmax(fabs(temperature(i,j) - - temperature_previous(i,j)), + worst_dt = fmax(fabs(temperature(i, j) - + temperature_previous(i, j)), worst_dt); } } @@ -50,7 +84,7 @@ int main() { // update temperature_previous for (i = 1; i <= height; i++) { for (j = 1; j <= width; j++) { - temperature_previous(i,j) = temperature(i,j); + temperature_previous(i, j) = temperature(i, j); } } @@ -63,44 +97,46 @@ int main() { } // Stop measuring time and calculate the elapsed time - auto end = std::chrono::high_resolution_clock::now(); + auto end = std::chrono::high_resolution_clock::now(); auto elapsed = std::chrono::duration_cast(end - begin); printf("Total time was %f seconds.\n", elapsed.count() * 1e-9); - printf("\nMax error at iteration %d was %f\n", iteration-1, worst_dt); + printf("\nMax error at iteration %d was %f\n", iteration - 1, worst_dt); return 0; } -void initialize(CArray &temperature_previous) { +void initialize(CArray& temperature_previous) +{ int i, j; // initialize temperature_previous to 0.0 - for (i = 0; i <= height+1; i++) { - for (j = 0; j <= width+1; j++) { - temperature_previous(i,j) = 0.0; + for (i = 0; i <= height + 1; i++) { + for (j = 0; j <= width + 1; j++) { + temperature_previous(i, j) = 0.0; } } // setting the left and right boundary conditions - for (i = 0; i <= height+1; i++) { - temperature_previous(i,0) = 0.0; - temperature_previous(i,width+1) = (100.0/height)*i; + for (i = 0; i <= height + 1; i++) { + temperature_previous(i, 0) = 0.0; + temperature_previous(i, width + 1) = (100.0 / height) * i; } // setting the top and bottom boundary condition - for (j = 0; j <= width+1; j++) { - temperature_previous(0,j) = 0.0; - temperature_previous(height+1,j) = (100.0/width)*j; + for (j = 0; j <= width + 1; j++) { + temperature_previous(0, j) = 0.0; + temperature_previous(height + 1, j) = (100.0 / width) * j; } } -void track_progress(int iteration, CArray &temperature) { +void track_progress(int iteration, CArray& temperature) +{ int i; printf("---------- Iteration number: %d ----------\n", iteration); - for (i = height-5; i <= height; i++) { - printf("[%d,%d]: %5.2f ", i,i, temperature(i,i)); + for (i = height - 5; i <= height; i++) { + printf("[%d,%d]: %5.2f ", i, i, temperature(i, i)); } printf("\n"); } diff --git a/examples/laplace/main_carray_wrong.cpp b/examples/laplace/main_carray_wrong.cpp index dea9254e..c32ec393 100644 --- a/examples/laplace/main_carray_wrong.cpp +++ b/examples/laplace/main_carray_wrong.cpp @@ -1,3 +1,36 @@ +/********************************************************************************************** + © 2020. Triad National Security, LLC. All rights reserved. + This program was produced under U.S. Government contract 89233218CNA000001 for Los Alamos + National Laboratory (LANL), which is operated by Triad National Security, LLC for the U.S. + Department of Energy/National Nuclear Security Administration. All rights in the program are + reserved by Triad National Security, LLC, and the U.S. Department of Energy/National Nuclear + Security Administration. The Government is granted for itself and others acting on its behalf a + nonexclusive, paid-up, irrevocable worldwide license in this material to reproduce, prepare + derivative works, distribute copies to the public, perform publicly and display publicly, and + to permit others to do so. + This program is open source under the BSD-3 License. + Redistribution and use in source and binary forms, with or without modification, are permitted + provided that the following conditions are met: + 1. Redistributions of source code must retain the above copyright notice, this list of + conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright notice, this list of + conditions and the following disclaimer in the documentation and/or other materials + provided with the distribution. + 3. Neither the name of the copyright holder nor the names of its contributors may be used + to endorse or promote products derived from this software without specific prior + written permission. + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + **********************************************************************************************/ #include #include #include @@ -5,20 +38,21 @@ using namespace mtr; // matar namespace -const int width = 1000; -const int height = 1000; +const int width = 1000; +const int height = 1000; const double temp_tolerance = 0.01; -void initialize(CArray &temperature_previous); -void track_progress(int iteration, CArray &temperature); +void initialize(CArray& temperature_previous); +void track_progress(int iteration, CArray& temperature); -int main() { - int i, j; - int iteration = 1; - double worst_dt = 100; +int main() +{ + int i, j; + int iteration = 1; + double worst_dt = 100; - auto temperature = CArray (height+2, width+2); - auto temperature_previous = CArray (height+2, width+2); + auto temperature = CArray(height + 2, width + 2); + auto temperature_previous = CArray(height + 2, width + 2); // Start measuring time auto begin = std::chrono::high_resolution_clock::now(); @@ -30,19 +64,19 @@ int main() { // finite difference for (j = 1; j <= width; j++) { for (i = 1; i <= height; i++) { - temperature(i,j) = 0.25 * (temperature_previous(i+1,j) - + temperature_previous(i-1,j) - + temperature_previous(i,j+1) - + temperature_previous(i,j-1)); + temperature(i, j) = 0.25 * (temperature_previous(i + 1, j) + + temperature_previous(i - 1, j) + + temperature_previous(i, j + 1) + + temperature_previous(i, j - 1)); } } - + // calculate max difference between temperature and temperature_previous worst_dt = 0.0; for (j = 1; j <= width; j++) { for (i = 1; i <= height; i++) { - worst_dt = fmax(fabs(temperature(i,j) - - temperature_previous(i,j)), + worst_dt = fmax(fabs(temperature(i, j) - + temperature_previous(i, j)), worst_dt); } } @@ -50,7 +84,7 @@ int main() { // update temperature_previous for (j = 1; j <= width; j++) { for (i = 1; i <= height; i++) { - temperature_previous(i,j) = temperature(i,j); + temperature_previous(i, j) = temperature(i, j); } } @@ -63,44 +97,46 @@ int main() { } // Stop measuring time and calculate the elapsed time - auto end = std::chrono::high_resolution_clock::now(); + auto end = std::chrono::high_resolution_clock::now(); auto elapsed = std::chrono::duration_cast(end - begin); printf("Total time was %f seconds.\n", elapsed.count() * 1e-9); - printf("\nMax error at iteration %d was %f\n", iteration-1, worst_dt); + printf("\nMax error at iteration %d was %f\n", iteration - 1, worst_dt); return 0; } -void initialize(CArray &temperature_previous) { +void initialize(CArray& temperature_previous) +{ int i, j; // initialize temperature_previous to 0.0 - for (j = 0; j <= width+1; j++) { - for (i = 0; i <= height+1; i++) { - temperature_previous(i,j) = 0.0; + for (j = 0; j <= width + 1; j++) { + for (i = 0; i <= height + 1; i++) { + temperature_previous(i, j) = 0.0; } } // setting the left and right boundary conditions - for (i = 0; i <= height+1; i++) { - temperature_previous(i,0) = 0.0; - temperature_previous(i,width+1) = (100.0/height)*i; + for (i = 0; i <= height + 1; i++) { + temperature_previous(i, 0) = 0.0; + temperature_previous(i, width + 1) = (100.0 / height) * i; } // setting the top and bottom boundary condition - for (j = 0; j <= width+1; j++) { - temperature_previous(0,j) = 0.0; - temperature_previous(height+1,j) = (100.0/width)*j; + for (j = 0; j <= width + 1; j++) { + temperature_previous(0, j) = 0.0; + temperature_previous(height + 1, j) = (100.0 / width) * j; } } -void track_progress(int iteration, CArray &temperature) { +void track_progress(int iteration, CArray& temperature) +{ int i; printf("---------- Iteration number: %d ----------\n", iteration); - for (i = height-5; i <= height; i++) { - printf("[%d,%d]: %5.2f ", i,i, temperature(i,i)); + for (i = height - 5; i <= height; i++) { + printf("[%d,%d]: %5.2f ", i, i, temperature(i, i)); } printf("\n"); } diff --git a/examples/laplace/main_carraykokkos_c_indexing.cpp b/examples/laplace/main_carraykokkos_c_indexing.cpp index 956b3079..beb9b9d3 100644 --- a/examples/laplace/main_carraykokkos_c_indexing.cpp +++ b/examples/laplace/main_carraykokkos_c_indexing.cpp @@ -1,3 +1,36 @@ +/********************************************************************************************** + © 2020. Triad National Security, LLC. All rights reserved. + This program was produced under U.S. Government contract 89233218CNA000001 for Los Alamos + National Laboratory (LANL), which is operated by Triad National Security, LLC for the U.S. + Department of Energy/National Nuclear Security Administration. All rights in the program are + reserved by Triad National Security, LLC, and the U.S. Department of Energy/National Nuclear + Security Administration. The Government is granted for itself and others acting on its behalf a + nonexclusive, paid-up, irrevocable worldwide license in this material to reproduce, prepare + derivative works, distribute copies to the public, perform publicly and display publicly, and + to permit others to do so. + This program is open source under the BSD-3 License. + Redistribution and use in source and binary forms, with or without modification, are permitted + provided that the following conditions are met: + 1. Redistributions of source code must retain the above copyright notice, this list of + conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright notice, this list of + conditions and the following disclaimer in the documentation and/or other materials + provided with the distribution. + 3. Neither the name of the copyright holder nor the names of its contributors may be used + to endorse or promote products derived from this software without specific prior + written permission. + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + **********************************************************************************************/ #include #include #include @@ -5,116 +38,116 @@ using namespace mtr; // matar namespace -const int width = 1000; -const int height = 1000; +const int width = 1000; +const int height = 1000; const double temp_tolerance = 0.01; -void initialize(CArrayKokkos &temperature_previous); -void track_progress(int iteration, CArrayKokkos &temperature); +void initialize(CArrayKokkos& temperature_previous); +void track_progress(int iteration, CArrayKokkos& temperature); -int main(int argc, char* argv[]) { +int main(int argc, char* argv[]) +{ Kokkos::initialize(argc, argv); { - - auto temperature = CArrayKokkos(height+2, width+2); - auto temperature_previous = CArrayKokkos(height+2, width+2); - - int iteration = 1; - double worst_dt = 100; - double max_value; - - // Start measuring time - auto begin = std::chrono::high_resolution_clock::now(); - - // initialize temperature profile - initialize(temperature_previous); - - while (worst_dt > temp_tolerance) { - // finite difference - Kokkos::parallel_for( - Kokkos::MDRangePolicy>({1,1}, {height+1,width+1}), - KOKKOS_LAMBDA(const int i, const int j){ - temperature(i,j) = 0.25 * (temperature_previous(i+1,j) - + temperature_previous(i-1,j) - + temperature_previous(i,j+1) - + temperature_previous(i,j-1)); - }); - - // calculate max difference between temperature and temperature_previous - Kokkos::parallel_reduce( - Kokkos::MDRangePolicy>({1,1}, {height+1,width+1}), - KOKKOS_LAMBDA(const int i, const int j, double& loc_max_value){ - double value = fabs(temperature(i,j) - temperature_previous(i,j)); - if(value > loc_max_value) loc_max_value = value; - }, - Kokkos::Max(max_value) - ); - worst_dt = max_value; - - // update temperature_previous - Kokkos::parallel_for( - Kokkos::MDRangePolicy>({1,1}, {height+1,width+1}), - KOKKOS_LAMBDA(const int i, const int j){ - temperature_previous(i,j) = temperature(i,j); - }); - - // track progress - if (iteration % 100 == 0) { - track_progress(iteration, temperature); + auto temperature = CArrayKokkos(height + 2, width + 2); + auto temperature_previous = CArrayKokkos(height + 2, width + 2); + + int iteration = 1; + double worst_dt = 100; + double max_value; + + // Start measuring time + auto begin = std::chrono::high_resolution_clock::now(); + + // initialize temperature profile + initialize(temperature_previous); + + while (worst_dt > temp_tolerance) { + // finite difference + Kokkos::parallel_for( + Kokkos::MDRangePolicy>({ 1, 1 }, { height + 1, width + 1 }), + KOKKOS_LAMBDA(const int i, const int j) { + temperature(i, j) = 0.25 * (temperature_previous(i + 1, j) + + temperature_previous(i - 1, j) + + temperature_previous(i, j + 1) + + temperature_previous(i, j - 1)); + }); + + // calculate max difference between temperature and temperature_previous + Kokkos::parallel_reduce( + Kokkos::MDRangePolicy>({ 1, 1 }, { height + 1, width + 1 }), + KOKKOS_LAMBDA(const int i, const int j, double& loc_max_value) { + double value = fabs(temperature(i, j) - temperature_previous(i, j)); + if (value > loc_max_value) { + loc_max_value = value; + } + },Kokkos::Max(max_value)); + worst_dt = max_value; + + // update temperature_previous + Kokkos::parallel_for( + Kokkos::MDRangePolicy>({ 1, 1 }, { height + 1, width + 1 }), + KOKKOS_LAMBDA(const int i, const int j) { + temperature_previous(i, j) = temperature(i, j); + }); + + // track progress + if (iteration % 100 == 0) { + track_progress(iteration, temperature); + } + + iteration++; } - iteration++; - } - - // Stop measuring time and calculate the elapsed time - auto end = std::chrono::high_resolution_clock::now(); - auto elapsed = std::chrono::duration_cast(end - begin); - - printf("Total time was %f seconds.\n", elapsed.count() * 1e-9); - printf("\nMax error at iteration %d was %f\n", iteration-1, worst_dt); + // Stop measuring time and calculate the elapsed time + auto end = std::chrono::high_resolution_clock::now(); + auto elapsed = std::chrono::duration_cast(end - begin); + printf("Total time was %f seconds.\n", elapsed.count() * 1e-9); + printf("\nMax error at iteration %d was %f\n", iteration - 1, worst_dt); } Kokkos::finalize(); return 0; } -void initialize(CArrayKokkos &temperature_previous) { +void initialize(CArrayKokkos& temperature_previous) +{ // initialize temperature_previous to 0.0 Kokkos::parallel_for( - Kokkos::MDRangePolicy>({0,0}, {height+2,width+2}), - KOKKOS_LAMBDA(const int i, const int j){ - temperature_previous(i,j) = 0.0; + Kokkos::MDRangePolicy>({ 0, 0 }, { height + 2, width + 2 }), + KOKKOS_LAMBDA(const int i, const int j) { + temperature_previous(i, j) = 0.0; }); // setting the left and right boundary conditions Kokkos::parallel_for( - Kokkos::RangePolicy<>(0,height+2), - KOKKOS_LAMBDA(const int i){ - temperature_previous(i,0) = 0.0; - temperature_previous(i,width+1) = (100.0/height)*i; + Kokkos::RangePolicy<>(0, height + 2), + KOKKOS_LAMBDA(const int i) { + temperature_previous(i, 0) = 0.0; + temperature_previous(i, width + 1) = (100.0 / height) * i; }); // setting the top and bottom boundary condition Kokkos::parallel_for( - Kokkos::RangePolicy<>(0,width+2), - KOKKOS_LAMBDA(const int j){ - temperature_previous(0,j) = 0.0; - temperature_previous(height+1,j) = (100.0/width)*j; - + Kokkos::RangePolicy<>(0, width + 2), + KOKKOS_LAMBDA(const int j) { + temperature_previous(0, j) = 0.0; + temperature_previous(height + 1, j) = (100.0 / width) * j; }); } -void track_progress(int iteration, CArrayKokkos &temperature) { +void track_progress(int iteration, CArrayKokkos& temperature) +{ int i; // make a deep copy of temperature from device to host auto temperature_host = create_mirror_view_and_copy(Kokkos::HostSpace(), temperature.get_kokkos_view()); - auto temperature_host_view = ViewCArray (temperature_host.data(), height+2, width+2); + auto temperature_host_view = ViewCArray(temperature_host.data(), height + 2, width + 2); printf("---------- Iteration number: %d ----------\n", iteration); - for (i = height-5; i <= height; i++) { - printf("[%d,%d]: %5.2f ", i,i, temperature_host_view(i,i)); + for (i = height - 5; i <= height; i++) { + printf("[%d,%d]: %5.2f ", i, i, temperature_host_view(i, i)); } printf("\n"); } diff --git a/examples/laplace/main_carraykokkos_default_indexing.cpp b/examples/laplace/main_carraykokkos_default_indexing.cpp index 4db4841e..ae55a7e7 100644 --- a/examples/laplace/main_carraykokkos_default_indexing.cpp +++ b/examples/laplace/main_carraykokkos_default_indexing.cpp @@ -1,3 +1,36 @@ +/********************************************************************************************** + © 2020. Triad National Security, LLC. All rights reserved. + This program was produced under U.S. Government contract 89233218CNA000001 for Los Alamos + National Laboratory (LANL), which is operated by Triad National Security, LLC for the U.S. + Department of Energy/National Nuclear Security Administration. All rights in the program are + reserved by Triad National Security, LLC, and the U.S. Department of Energy/National Nuclear + Security Administration. The Government is granted for itself and others acting on its behalf a + nonexclusive, paid-up, irrevocable worldwide license in this material to reproduce, prepare + derivative works, distribute copies to the public, perform publicly and display publicly, and + to permit others to do so. + This program is open source under the BSD-3 License. + Redistribution and use in source and binary forms, with or without modification, are permitted + provided that the following conditions are met: + 1. Redistributions of source code must retain the above copyright notice, this list of + conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright notice, this list of + conditions and the following disclaimer in the documentation and/or other materials + provided with the distribution. + 3. Neither the name of the copyright holder nor the names of its contributors may be used + to endorse or promote products derived from this software without specific prior + written permission. + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + **********************************************************************************************/ #include #include #include @@ -5,116 +38,116 @@ using namespace mtr; // matar namespace -const int width = 1000; -const int height = 1000; +const int width = 1000; +const int height = 1000; const double temp_tolerance = 0.01; -void initialize(CArrayKokkos &temperature_previous); -void track_progress(int iteration, CArrayKokkos &temperature); +void initialize(CArrayKokkos& temperature_previous); +void track_progress(int iteration, CArrayKokkos& temperature); -int main(int argc, char* argv[]) { +int main(int argc, char* argv[]) +{ Kokkos::initialize(argc, argv); { - - auto temperature = CArrayKokkos(height+2, width+2); - auto temperature_previous = CArrayKokkos(height+2, width+2); - - int iteration = 1; - double worst_dt = 100; - double max_value; - - // Start measuring time - auto begin = std::chrono::high_resolution_clock::now(); - - // initialize temperature profile - initialize(temperature_previous); - - while (worst_dt > temp_tolerance) { - // finite difference - Kokkos::parallel_for( - Kokkos::MDRangePolicy>({1,1}, {height+1,width+1}), - KOKKOS_LAMBDA(const int i, const int j){ - temperature(i,j) = 0.25 * (temperature_previous(i+1,j) - + temperature_previous(i-1,j) - + temperature_previous(i,j+1) - + temperature_previous(i,j-1)); - }); - - // calculate max difference between temperature and temperature_previous - Kokkos::parallel_reduce( - Kokkos::MDRangePolicy>({1,1}, {height+1,width+1}), - KOKKOS_LAMBDA(const int i, const int j, double& loc_max_value){ - double value = fabs(temperature(i,j) - temperature_previous(i,j)); - if(value > loc_max_value) loc_max_value = value; - }, - Kokkos::Max(max_value) - ); - worst_dt = max_value; - - // update temperature_previous - Kokkos::parallel_for( - Kokkos::MDRangePolicy>({1,1}, {height+1,width+1}), - KOKKOS_LAMBDA(const int i, const int j){ - temperature_previous(i,j) = temperature(i,j); - }); - - // track progress - if (iteration % 100 == 0) { - track_progress(iteration, temperature); + auto temperature = CArrayKokkos(height + 2, width + 2); + auto temperature_previous = CArrayKokkos(height + 2, width + 2); + + int iteration = 1; + double worst_dt = 100; + double max_value; + + // Start measuring time + auto begin = std::chrono::high_resolution_clock::now(); + + // initialize temperature profile + initialize(temperature_previous); + + while (worst_dt > temp_tolerance) { + // finite difference + Kokkos::parallel_for( + Kokkos::MDRangePolicy>({ 1, 1 }, { height + 1, width + 1 }), + KOKKOS_LAMBDA(const int i, const int j) { + temperature(i, j) = 0.25 * (temperature_previous(i + 1, j) + + temperature_previous(i - 1, j) + + temperature_previous(i, j + 1) + + temperature_previous(i, j - 1)); + }); + + // calculate max difference between temperature and temperature_previous + Kokkos::parallel_reduce( + Kokkos::MDRangePolicy>({ 1, 1 }, { height + 1, width + 1 }), + KOKKOS_LAMBDA(const int i, const int j, double& loc_max_value) { + double value = fabs(temperature(i, j) - temperature_previous(i, j)); + if (value > loc_max_value) { + loc_max_value = value; + } + }, Kokkos::Max(max_value)); + worst_dt = max_value; + + // update temperature_previous + Kokkos::parallel_for( + Kokkos::MDRangePolicy>({ 1, 1 }, { height + 1, width + 1 }), + KOKKOS_LAMBDA(const int i, const int j) { + temperature_previous(i, j) = temperature(i, j); + }); + + // track progress + if (iteration % 100 == 0) { + track_progress(iteration, temperature); + } + + iteration++; } - iteration++; - } - - // Stop measuring time and calculate the elapsed time - auto end = std::chrono::high_resolution_clock::now(); - auto elapsed = std::chrono::duration_cast(end - begin); - - printf("Total time was %f seconds.\n", elapsed.count() * 1e-9); - printf("\nMax error at iteration %d was %f\n", iteration-1, worst_dt); + // Stop measuring time and calculate the elapsed time + auto end = std::chrono::high_resolution_clock::now(); + auto elapsed = std::chrono::duration_cast(end - begin); + printf("Total time was %f seconds.\n", elapsed.count() * 1e-9); + printf("\nMax error at iteration %d was %f\n", iteration - 1, worst_dt); } Kokkos::finalize(); return 0; } -void initialize(CArrayKokkos &temperature_previous) { +void initialize(CArrayKokkos& temperature_previous) +{ // initialize temperature_previous to 0.0 Kokkos::parallel_for( - Kokkos::MDRangePolicy>({0,0}, {height+2,width+2}), - KOKKOS_LAMBDA(const int i, const int j){ - temperature_previous(i,j) = 0.0; + Kokkos::MDRangePolicy>({ 0, 0 }, { height + 2, width + 2 }), + KOKKOS_LAMBDA(const int i, const int j) { + temperature_previous(i, j) = 0.0; }); // setting the left and right boundary conditions Kokkos::parallel_for( - Kokkos::RangePolicy<>(0,height+2), - KOKKOS_LAMBDA(const int i){ - temperature_previous(i,0) = 0.0; - temperature_previous(i,width+1) = (100.0/height)*i; + Kokkos::RangePolicy<>(0, height + 2), + KOKKOS_LAMBDA(const int i) { + temperature_previous(i, 0) = 0.0; + temperature_previous(i, width + 1) = (100.0 / height) * i; }); // setting the top and bottom boundary condition Kokkos::parallel_for( - Kokkos::RangePolicy<>(0,width+2), - KOKKOS_LAMBDA(const int j){ - temperature_previous(0,j) = 0.0; - temperature_previous(height+1,j) = (100.0/width)*j; - + Kokkos::RangePolicy<>(0, width + 2), + KOKKOS_LAMBDA(const int j) { + temperature_previous(0, j) = 0.0; + temperature_previous(height + 1, j) = (100.0 / width) * j; }); } -void track_progress(int iteration, CArrayKokkos &temperature) { +void track_progress(int iteration, CArrayKokkos& temperature) +{ int i; // make a deep copy of temperature from device to host auto temperature_host = create_mirror_view_and_copy(Kokkos::HostSpace(), temperature.get_kokkos_view()); - auto temperature_host_view = ViewCArray (temperature_host.data(), height+2, width+2); + auto temperature_host_view = ViewCArray(temperature_host.data(), height + 2, width + 2); printf("---------- Iteration number: %d ----------\n", iteration); - for (i = height-5; i <= height; i++) { - printf("[%d,%d]: %5.2f ", i,i, temperature_host_view(i,i)); + for (i = height - 5; i <= height; i++) { + printf("[%d,%d]: %5.2f ", i, i, temperature_host_view(i, i)); } printf("\n"); } diff --git a/examples/laplace/main_cpp.cpp b/examples/laplace/main_cpp.cpp index f5bf81b6..d6eef01c 100644 --- a/examples/laplace/main_cpp.cpp +++ b/examples/laplace/main_cpp.cpp @@ -1,23 +1,57 @@ +/********************************************************************************************** + © 2020. Triad National Security, LLC. All rights reserved. + This program was produced under U.S. Government contract 89233218CNA000001 for Los Alamos + National Laboratory (LANL), which is operated by Triad National Security, LLC for the U.S. + Department of Energy/National Nuclear Security Administration. All rights in the program are + reserved by Triad National Security, LLC, and the U.S. Department of Energy/National Nuclear + Security Administration. The Government is granted for itself and others acting on its behalf a + nonexclusive, paid-up, irrevocable worldwide license in this material to reproduce, prepare + derivative works, distribute copies to the public, perform publicly and display publicly, and + to permit others to do so. + This program is open source under the BSD-3 License. + Redistribution and use in source and binary forms, with or without modification, are permitted + provided that the following conditions are met: + 1. Redistributions of source code must retain the above copyright notice, this list of + conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright notice, this list of + conditions and the following disclaimer in the documentation and/or other materials + provided with the distribution. + 3. Neither the name of the copyright holder nor the names of its contributors may be used + to endorse or promote products derived from this software without specific prior + written permission. + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + **********************************************************************************************/ #include #include #include using namespace mtr; // matar namespace -const int width = 1000; -const int height = 1000; +const int width = 1000; +const int height = 1000; const double temp_tolerance = 0.01; -double temperature[height+2][width+2]; -double temperature_previous[height+2][width+2]; +double temperature[height + 2][width + 2]; +double temperature_previous[height + 2][width + 2]; void initialize(); void track_progress(int iter); -int main() { - int i, j; - int iteration = 1; - double worst_dt = 100; +int main() +{ + int i, j; + int iteration = 1; + double worst_dt = 100; // Start measuring time auto begin = std::chrono::high_resolution_clock::now(); @@ -29,19 +63,19 @@ int main() { // finite difference for (i = 1; i <= height; i++) { for (j = 1; j <= width; j++) { - temperature[i][j] = 0.25 * (temperature_previous[i+1][j] - + temperature_previous[i-1][j] - + temperature_previous[i][j+1] - + temperature_previous[i][j-1]); + temperature[i][j] = 0.25 * (temperature_previous[i + 1][j] + + temperature_previous[i - 1][j] + + temperature_previous[i][j + 1] + + temperature_previous[i][j - 1]); } } - + // calculate max difference between temperature and temperature_previous worst_dt = 0.0; for (i = 1; i <= height; i++) { for (j = 1; j <= width; j++) { - worst_dt = fmax(fabs(temperature[i][j] - - temperature_previous[i][j]), + worst_dt = fmax(fabs(temperature[i][j] - + temperature_previous[i][j]), worst_dt); } } @@ -62,46 +96,46 @@ int main() { } // Stop measuring time and calculate the elapsed time - auto end = std::chrono::high_resolution_clock::now(); + auto end = std::chrono::high_resolution_clock::now(); auto elapsed = std::chrono::duration_cast(end - begin); printf("Total time was %f seconds.\n", elapsed.count() * 1e-9); - printf("\nMax error at iteration %d was %f\n", iteration-1, worst_dt); + printf("\nMax error at iteration %d was %f\n", iteration - 1, worst_dt); return 0; - } -void initialize() { +void initialize() +{ int i, j; // initialize temperature_previous to 0.0 - for (i = 0; i <= height+1; i++) { - for (j = 0; j <= width+1; j++) { - temperature_previous[i][j] = 0.0; + for (i = 0; i <= height + 1; i++) { + for (j = 0; j <= width + 1; j++) { + temperature_previous[i][j] = 0.0; } } // setting the left and right boundary conditions - for (i = 0; i <= height+1; i++) { + for (i = 0; i <= height + 1; i++) { temperature_previous[i][0] = 0.0; - temperature_previous[i][width+1] = (100.0/height)*i; + temperature_previous[i][width + 1] = (100.0 / height) * i; } // setting the top and bottom boundary condition - for (j = 0; j <= width+1; j++) { + for (j = 0; j <= width + 1; j++) { temperature_previous[0][j] = 0.0; - temperature_previous[height+1][j] = (100.0/width)*j; + temperature_previous[height + 1][j] = (100.0 / width) * j; } - } -void track_progress(int iter) { +void track_progress(int iter) +{ int i; printf("---------- Iteration number: %d ----------\n", iter); - for (i = height-5; i <= height; i++) { - printf("[%d,%d]: %5.2f ", i,i, temperature[i][i]); + for (i = height - 5; i <= height; i++) { + printf("[%d,%d]: %5.2f ", i, i, temperature[i][i]); } printf("\n"); } diff --git a/examples/laplace/main_farray_right.cpp b/examples/laplace/main_farray_right.cpp index 3f6897fa..abc202f1 100644 --- a/examples/laplace/main_farray_right.cpp +++ b/examples/laplace/main_farray_right.cpp @@ -1,3 +1,36 @@ +/********************************************************************************************** + © 2020. Triad National Security, LLC. All rights reserved. + This program was produced under U.S. Government contract 89233218CNA000001 for Los Alamos + National Laboratory (LANL), which is operated by Triad National Security, LLC for the U.S. + Department of Energy/National Nuclear Security Administration. All rights in the program are + reserved by Triad National Security, LLC, and the U.S. Department of Energy/National Nuclear + Security Administration. The Government is granted for itself and others acting on its behalf a + nonexclusive, paid-up, irrevocable worldwide license in this material to reproduce, prepare + derivative works, distribute copies to the public, perform publicly and display publicly, and + to permit others to do so. + This program is open source under the BSD-3 License. + Redistribution and use in source and binary forms, with or without modification, are permitted + provided that the following conditions are met: + 1. Redistributions of source code must retain the above copyright notice, this list of + conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright notice, this list of + conditions and the following disclaimer in the documentation and/or other materials + provided with the distribution. + 3. Neither the name of the copyright holder nor the names of its contributors may be used + to endorse or promote products derived from this software without specific prior + written permission. + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + **********************************************************************************************/ #include #include #include @@ -5,20 +38,21 @@ using namespace mtr; // matar namespace -const int width = 1000; -const int height = 1000; +const int width = 1000; +const int height = 1000; const double temp_tolerance = 0.01; -void initialize(FArray &temperature_previous); -void track_progress(int iteration, FArray &temperature); +void initialize(FArray& temperature_previous); +void track_progress(int iteration, FArray& temperature); -int main() { - int i, j; - int iteration = 1; - double worst_dt = 100; +int main() +{ + int i, j; + int iteration = 1; + double worst_dt = 100; - auto temperature = FArray (height+2, width+2); - auto temperature_previous = FArray (height+2, width+2); + auto temperature = FArray(height + 2, width + 2); + auto temperature_previous = FArray(height + 2, width + 2); // Start measuring time auto begin = std::chrono::high_resolution_clock::now(); @@ -30,19 +64,19 @@ int main() { // finite difference for (j = 1; j <= width; j++) { for (i = 1; i <= height; i++) { - temperature(i,j) = 0.25 * (temperature_previous(i+1,j) - + temperature_previous(i-1,j) - + temperature_previous(i,j+1) - + temperature_previous(i,j-1)); + temperature(i, j) = 0.25 * (temperature_previous(i + 1, j) + + temperature_previous(i - 1, j) + + temperature_previous(i, j + 1) + + temperature_previous(i, j - 1)); } } - + // calculate max difference between temperature and temperature_previous worst_dt = 0.0; for (j = 1; j <= width; j++) { for (i = 1; i <= height; i++) { - worst_dt = fmax(fabs(temperature(i,j) - - temperature_previous(i,j)), + worst_dt = fmax(fabs(temperature(i, j) - + temperature_previous(i, j)), worst_dt); } } @@ -50,7 +84,7 @@ int main() { // update temperature_previous for (j = 1; j <= width; j++) { for (i = 1; i <= height; i++) { - temperature_previous(i,j) = temperature(i,j); + temperature_previous(i, j) = temperature(i, j); } } @@ -63,44 +97,46 @@ int main() { } // Stop measuring time and calculate the elapsed time - auto end = std::chrono::high_resolution_clock::now(); + auto end = std::chrono::high_resolution_clock::now(); auto elapsed = std::chrono::duration_cast(end - begin); printf("Total time was %f seconds.\n", elapsed.count() * 1e-9); - printf("\nMax error at iteration %d was %f\n", iteration-1, worst_dt); + printf("\nMax error at iteration %d was %f\n", iteration - 1, worst_dt); return 0; } -void initialize(FArray &temperature_previous) { +void initialize(FArray& temperature_previous) +{ int i, j; // initialize temperature_previous to 0.0 - for (j = 0; j <= width+1; j++) { - for (i = 0; i <= height+1; i++) { - temperature_previous(i,j) = 0.0; + for (j = 0; j <= width + 1; j++) { + for (i = 0; i <= height + 1; i++) { + temperature_previous(i, j) = 0.0; } } // setting the left and right boundary conditions - for (i = 0; i <= height+1; i++) { - temperature_previous(i,0) = 0.0; - temperature_previous(i,width+1) = (100.0/height)*i; + for (i = 0; i <= height + 1; i++) { + temperature_previous(i, 0) = 0.0; + temperature_previous(i, width + 1) = (100.0 / height) * i; } // setting the top and bottom boundary condition - for (j = 0; j <= width+1; j++) { - temperature_previous(0,j) = 0.0; - temperature_previous(height+1,j) = (100.0/width)*j; + for (j = 0; j <= width + 1; j++) { + temperature_previous(0, j) = 0.0; + temperature_previous(height + 1, j) = (100.0 / width) * j; } } -void track_progress(int iteration, FArray &temperature) { +void track_progress(int iteration, FArray& temperature) +{ int i; printf("---------- Iteration number: %d ----------\n", iteration); - for (i = height-5; i <= height; i++) { - printf("[%d,%d]: %5.2f ", i,i, temperature(i,i)); + for (i = height - 5; i <= height; i++) { + printf("[%d,%d]: %5.2f ", i, i, temperature(i, i)); } printf("\n"); } diff --git a/examples/laplace/main_farray_wrong.cpp b/examples/laplace/main_farray_wrong.cpp index 7e639b21..0747bcf5 100644 --- a/examples/laplace/main_farray_wrong.cpp +++ b/examples/laplace/main_farray_wrong.cpp @@ -1,3 +1,36 @@ +/********************************************************************************************** + © 2020. Triad National Security, LLC. All rights reserved. + This program was produced under U.S. Government contract 89233218CNA000001 for Los Alamos + National Laboratory (LANL), which is operated by Triad National Security, LLC for the U.S. + Department of Energy/National Nuclear Security Administration. All rights in the program are + reserved by Triad National Security, LLC, and the U.S. Department of Energy/National Nuclear + Security Administration. The Government is granted for itself and others acting on its behalf a + nonexclusive, paid-up, irrevocable worldwide license in this material to reproduce, prepare + derivative works, distribute copies to the public, perform publicly and display publicly, and + to permit others to do so. + This program is open source under the BSD-3 License. + Redistribution and use in source and binary forms, with or without modification, are permitted + provided that the following conditions are met: + 1. Redistributions of source code must retain the above copyright notice, this list of + conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright notice, this list of + conditions and the following disclaimer in the documentation and/or other materials + provided with the distribution. + 3. Neither the name of the copyright holder nor the names of its contributors may be used + to endorse or promote products derived from this software without specific prior + written permission. + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + **********************************************************************************************/ #include #include #include @@ -5,20 +38,21 @@ using namespace mtr; // matar namespace -const int width = 1000; -const int height = 1000; +const int width = 1000; +const int height = 1000; const double temp_tolerance = 0.01; -void initialize(FArray &temperature_previous); -void track_progress(int iteration, FArray &temperature); +void initialize(FArray& temperature_previous); +void track_progress(int iteration, FArray& temperature); -int main() { - int i, j; - int iteration = 1; - double worst_dt = 100; +int main() +{ + int i, j; + int iteration = 1; + double worst_dt = 100; - auto temperature = FArray (height+2, width+2); - auto temperature_previous = FArray (height+2, width+2); + auto temperature = FArray(height + 2, width + 2); + auto temperature_previous = FArray(height + 2, width + 2); // Start measuring time auto begin = std::chrono::high_resolution_clock::now(); @@ -30,19 +64,19 @@ int main() { // finite difference for (i = 1; i <= height; i++) { for (j = 1; j <= width; j++) { - temperature(i,j) = 0.25 * (temperature_previous(i+1,j) - + temperature_previous(i-1,j) - + temperature_previous(i,j+1) - + temperature_previous(i,j-1)); + temperature(i, j) = 0.25 * (temperature_previous(i + 1, j) + + temperature_previous(i - 1, j) + + temperature_previous(i, j + 1) + + temperature_previous(i, j - 1)); } } - + // calculate max difference between temperature and temperature_previous worst_dt = 0.0; for (i = 1; i <= height; i++) { for (j = 1; j <= width; j++) { - worst_dt = fmax(fabs(temperature(i,j) - - temperature_previous(i,j)), + worst_dt = fmax(fabs(temperature(i, j) - + temperature_previous(i, j)), worst_dt); } } @@ -50,7 +84,7 @@ int main() { // update temperature_previous for (i = 1; i <= height; i++) { for (j = 1; j <= width; j++) { - temperature_previous(i,j) = temperature(i,j); + temperature_previous(i, j) = temperature(i, j); } } @@ -63,44 +97,46 @@ int main() { } // Stop measuring time and calculate the elapsed time - auto end = std::chrono::high_resolution_clock::now(); + auto end = std::chrono::high_resolution_clock::now(); auto elapsed = std::chrono::duration_cast(end - begin); printf("Total time was %f seconds.\n", elapsed.count() * 1e-9); - printf("\nMax error at iteration %d was %f\n", iteration-1, worst_dt); + printf("\nMax error at iteration %d was %f\n", iteration - 1, worst_dt); return 0; } -void initialize(FArray &temperature_previous) { +void initialize(FArray& temperature_previous) +{ int i, j; // initialize temperature_previous to 0.0 - for (i = 0; i <= height+1; i++) { - for (j = 0; j <= width+1; j++) { - temperature_previous(i,j) = 0.0; + for (i = 0; i <= height + 1; i++) { + for (j = 0; j <= width + 1; j++) { + temperature_previous(i, j) = 0.0; } } // setting the left and right boundary conditions - for (i = 0; i <= height+1; i++) { - temperature_previous(i,0) = 0.0; - temperature_previous(i,width+1) = (100.0/height)*i; + for (i = 0; i <= height + 1; i++) { + temperature_previous(i, 0) = 0.0; + temperature_previous(i, width + 1) = (100.0 / height) * i; } // setting the top and bottom boundary condition - for (j = 0; j <= width+1; j++) { - temperature_previous(0,j) = 0.0; - temperature_previous(height+1,j) = (100.0/width)*j; + for (j = 0; j <= width + 1; j++) { + temperature_previous(0, j) = 0.0; + temperature_previous(height + 1, j) = (100.0 / width) * j; } } -void track_progress(int iteration, FArray &temperature) { +void track_progress(int iteration, FArray& temperature) +{ int i; printf("---------- Iteration number: %d ----------\n", iteration); - for (i = height-5; i <= height; i++) { - printf("[%d,%d]: %5.2f ", i,i, temperature(i,i)); + for (i = height - 5; i <= height; i++) { + printf("[%d,%d]: %5.2f ", i, i, temperature(i, i)); } printf("\n"); } diff --git a/examples/laplace/main_farraykokkos_default_indexing.cpp b/examples/laplace/main_farraykokkos_default_indexing.cpp index f36076fa..e558e6ec 100644 --- a/examples/laplace/main_farraykokkos_default_indexing.cpp +++ b/examples/laplace/main_farraykokkos_default_indexing.cpp @@ -1,3 +1,36 @@ +/********************************************************************************************** + © 2020. Triad National Security, LLC. All rights reserved. + This program was produced under U.S. Government contract 89233218CNA000001 for Los Alamos + National Laboratory (LANL), which is operated by Triad National Security, LLC for the U.S. + Department of Energy/National Nuclear Security Administration. All rights in the program are + reserved by Triad National Security, LLC, and the U.S. Department of Energy/National Nuclear + Security Administration. The Government is granted for itself and others acting on its behalf a + nonexclusive, paid-up, irrevocable worldwide license in this material to reproduce, prepare + derivative works, distribute copies to the public, perform publicly and display publicly, and + to permit others to do so. + This program is open source under the BSD-3 License. + Redistribution and use in source and binary forms, with or without modification, are permitted + provided that the following conditions are met: + 1. Redistributions of source code must retain the above copyright notice, this list of + conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright notice, this list of + conditions and the following disclaimer in the documentation and/or other materials + provided with the distribution. + 3. Neither the name of the copyright holder nor the names of its contributors may be used + to endorse or promote products derived from this software without specific prior + written permission. + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + **********************************************************************************************/ #include #include #include @@ -5,116 +38,116 @@ using namespace mtr; // matar namespace -const int width = 1000; -const int height = 1000; +const int width = 1000; +const int height = 1000; const double temp_tolerance = 0.01; -void initialize(FArrayKokkos &temperature_previous); -void track_progress(int iteration, FArrayKokkos &temperature); +void initialize(FArrayKokkos& temperature_previous); +void track_progress(int iteration, FArrayKokkos& temperature); -int main(int argc, char* argv[]) { +int main(int argc, char* argv[]) +{ Kokkos::initialize(argc, argv); { - - auto temperature = FArrayKokkos(height+2, width+2); - auto temperature_previous = FArrayKokkos(height+2, width+2); - - int iteration = 1; - double worst_dt = 100; - double max_value; - - // Start measuring time - auto begin = std::chrono::high_resolution_clock::now(); - - // initialize temperature profile - initialize(temperature_previous); - - while (worst_dt > temp_tolerance) { - // finite difference - Kokkos::parallel_for( - Kokkos::MDRangePolicy>({1,1}, {height+1,width+1}), - KOKKOS_LAMBDA(const int i, const int j){ - temperature(i,j) = 0.25 * (temperature_previous(i+1,j) - + temperature_previous(i-1,j) - + temperature_previous(i,j+1) - + temperature_previous(i,j-1)); - }); - - // calculate max difference between temperature and temperature_previous - Kokkos::parallel_reduce( - Kokkos::MDRangePolicy>({1,1}, {height+1,width+1}), - KOKKOS_LAMBDA(const int i, const int j, double& loc_max_value){ - double value = fabs(temperature(i,j) - temperature_previous(i,j)); - if(value > loc_max_value) loc_max_value = value; - }, - Kokkos::Max(max_value) - ); - worst_dt = max_value; - - // update temperature_previous - Kokkos::parallel_for( - Kokkos::MDRangePolicy>({1,1}, {height+1,width+1}), - KOKKOS_LAMBDA(const int i, const int j){ - temperature_previous(i,j) = temperature(i,j); - }); - - // track progress - if (iteration % 100 == 0) { - track_progress(iteration, temperature); + auto temperature = FArrayKokkos(height + 2, width + 2); + auto temperature_previous = FArrayKokkos(height + 2, width + 2); + + int iteration = 1; + double worst_dt = 100; + double max_value; + + // Start measuring time + auto begin = std::chrono::high_resolution_clock::now(); + + // initialize temperature profile + initialize(temperature_previous); + + while (worst_dt > temp_tolerance) { + // finite difference + Kokkos::parallel_for( + Kokkos::MDRangePolicy>({ 1, 1 }, { height + 1, width + 1 }), + KOKKOS_LAMBDA(const int i, const int j) { + temperature(i, j) = 0.25 * (temperature_previous(i + 1, j) + + temperature_previous(i - 1, j) + + temperature_previous(i, j + 1) + + temperature_previous(i, j - 1)); + }); + + // calculate max difference between temperature and temperature_previous + Kokkos::parallel_reduce( + Kokkos::MDRangePolicy>({ 1, 1 }, { height + 1, width + 1 }), + KOKKOS_LAMBDA(const int i, const int j, double& loc_max_value) { + double value = fabs(temperature(i, j) - temperature_previous(i, j)); + if (value > loc_max_value) { + loc_max_value = value; + } + }, Kokkos::Max(max_value)); + worst_dt = max_value; + + // update temperature_previous + Kokkos::parallel_for( + Kokkos::MDRangePolicy>({ 1, 1 }, { height + 1, width + 1 }), + KOKKOS_LAMBDA(const int i, const int j) { + temperature_previous(i, j) = temperature(i, j); + }); + + // track progress + if (iteration % 100 == 0) { + track_progress(iteration, temperature); + } + + iteration++; } - iteration++; - } - - // Stop measuring time and calculate the elapsed time - auto end = std::chrono::high_resolution_clock::now(); - auto elapsed = std::chrono::duration_cast(end - begin); - - printf("Total time was %f seconds.\n", elapsed.count() * 1e-9); - printf("\nMax error at iteration %d was %f\n", iteration-1, worst_dt); + // Stop measuring time and calculate the elapsed time + auto end = std::chrono::high_resolution_clock::now(); + auto elapsed = std::chrono::duration_cast(end - begin); + printf("Total time was %f seconds.\n", elapsed.count() * 1e-9); + printf("\nMax error at iteration %d was %f\n", iteration - 1, worst_dt); } Kokkos::finalize(); return 0; } -void initialize(FArrayKokkos &temperature_previous) { +void initialize(FArrayKokkos& temperature_previous) +{ // initialize temperature_previous to 0.0 Kokkos::parallel_for( - Kokkos::MDRangePolicy>({0,0}, {height+2,width+2}), - KOKKOS_LAMBDA(const int i, const int j){ - temperature_previous(i,j) = 0.0; + Kokkos::MDRangePolicy>({ 0, 0 }, { height + 2, width + 2 }), + KOKKOS_LAMBDA(const int i, const int j) { + temperature_previous(i, j) = 0.0; }); // setting the left and right boundary conditions Kokkos::parallel_for( - Kokkos::RangePolicy<>(0,height+2), - KOKKOS_LAMBDA(const int i){ - temperature_previous(i,0) = 0.0; - temperature_previous(i,width+1) = (100.0/height)*i; + Kokkos::RangePolicy<>(0, height + 2), + KOKKOS_LAMBDA(const int i) { + temperature_previous(i, 0) = 0.0; + temperature_previous(i, width + 1) = (100.0 / height) * i; }); // setting the top and bottom boundary condition Kokkos::parallel_for( - Kokkos::RangePolicy<>(0,width+2), - KOKKOS_LAMBDA(const int j){ - temperature_previous(0,j) = 0.0; - temperature_previous(height+1,j) = (100.0/width)*j; - + Kokkos::RangePolicy<>(0, width + 2), + KOKKOS_LAMBDA(const int j) { + temperature_previous(0, j) = 0.0; + temperature_previous(height + 1, j) = (100.0 / width) * j; }); } -void track_progress(int iteration, FArrayKokkos &temperature) { +void track_progress(int iteration, FArrayKokkos& temperature) +{ int i; // make a deep copy of temperature from device to host auto temperature_host = create_mirror_view_and_copy(Kokkos::HostSpace(), temperature.get_kokkos_view()); - auto temperature_host_view = ViewFArray (temperature_host.data(), height+2, width+2); + auto temperature_host_view = ViewFArray(temperature_host.data(), height + 2, width + 2); printf("---------- Iteration number: %d ----------\n", iteration); - for (i = height-5; i <= height; i++) { - printf("[%d,%d]: %5.2f ", i,i, temperature_host_view(i,i)); + for (i = height - 5; i <= height; i++) { + printf("[%d,%d]: %5.2f ", i, i, temperature_host_view(i, i)); } printf("\n"); } diff --git a/examples/laplace/main_farraykokkos_f_indexing.cpp b/examples/laplace/main_farraykokkos_f_indexing.cpp index d057d355..1470b6dd 100644 --- a/examples/laplace/main_farraykokkos_f_indexing.cpp +++ b/examples/laplace/main_farraykokkos_f_indexing.cpp @@ -1,3 +1,36 @@ +/********************************************************************************************** + © 2020. Triad National Security, LLC. All rights reserved. + This program was produced under U.S. Government contract 89233218CNA000001 for Los Alamos + National Laboratory (LANL), which is operated by Triad National Security, LLC for the U.S. + Department of Energy/National Nuclear Security Administration. All rights in the program are + reserved by Triad National Security, LLC, and the U.S. Department of Energy/National Nuclear + Security Administration. The Government is granted for itself and others acting on its behalf a + nonexclusive, paid-up, irrevocable worldwide license in this material to reproduce, prepare + derivative works, distribute copies to the public, perform publicly and display publicly, and + to permit others to do so. + This program is open source under the BSD-3 License. + Redistribution and use in source and binary forms, with or without modification, are permitted + provided that the following conditions are met: + 1. Redistributions of source code must retain the above copyright notice, this list of + conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright notice, this list of + conditions and the following disclaimer in the documentation and/or other materials + provided with the distribution. + 3. Neither the name of the copyright holder nor the names of its contributors may be used + to endorse or promote products derived from this software without specific prior + written permission. + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + **********************************************************************************************/ #include #include #include @@ -5,116 +38,116 @@ using namespace mtr; // matar namespace -const int width = 1000; -const int height = 1000; +const int width = 1000; +const int height = 1000; const double temp_tolerance = 0.01; -void initialize(FArrayKokkos &temperature_previous); -void track_progress(int iteration, FArrayKokkos &temperature); +void initialize(FArrayKokkos& temperature_previous); +void track_progress(int iteration, FArrayKokkos& temperature); -int main(int argc, char* argv[]) { +int main(int argc, char* argv[]) +{ Kokkos::initialize(argc, argv); { - - auto temperature = FArrayKokkos(height+2, width+2); - auto temperature_previous = FArrayKokkos(height+2, width+2); - - int iteration = 1; - double worst_dt = 100; - double max_value; - - // Start measuring time - auto begin = std::chrono::high_resolution_clock::now(); - - // initialize temperature profile - initialize(temperature_previous); - - while (worst_dt > temp_tolerance) { - // finite difference - Kokkos::parallel_for( - Kokkos::MDRangePolicy>({1,1}, {height+1,width+1}), - KOKKOS_LAMBDA(const int i, const int j){ - temperature(i,j) = 0.25 * (temperature_previous(i+1,j) - + temperature_previous(i-1,j) - + temperature_previous(i,j+1) - + temperature_previous(i,j-1)); - }); - - // calculate max difference between temperature and temperature_previous - Kokkos::parallel_reduce( - Kokkos::MDRangePolicy>({1,1}, {height+1,width+1}), - KOKKOS_LAMBDA(const int i, const int j, double& loc_max_value){ - double value = fabs(temperature(i,j) - temperature_previous(i,j)); - if(value > loc_max_value) loc_max_value = value; - }, - Kokkos::Max(max_value) - ); - worst_dt = max_value; - - // update temperature_previous - Kokkos::parallel_for( - Kokkos::MDRangePolicy>({1,1}, {height+1,width+1}), - KOKKOS_LAMBDA(const int i, const int j){ - temperature_previous(i,j) = temperature(i,j); - }); - - // track progress - if (iteration % 100 == 0) { - track_progress(iteration, temperature); + auto temperature = FArrayKokkos(height + 2, width + 2); + auto temperature_previous = FArrayKokkos(height + 2, width + 2); + + int iteration = 1; + double worst_dt = 100; + double max_value; + + // Start measuring time + auto begin = std::chrono::high_resolution_clock::now(); + + // initialize temperature profile + initialize(temperature_previous); + + while (worst_dt > temp_tolerance) { + // finite difference + Kokkos::parallel_for( + Kokkos::MDRangePolicy>({ 1, 1 }, { height + 1, width + 1 }), + KOKKOS_LAMBDA(const int i, const int j) { + temperature(i, j) = 0.25 * (temperature_previous(i + 1, j) + + temperature_previous(i - 1, j) + + temperature_previous(i, j + 1) + + temperature_previous(i, j - 1)); + }); + + // calculate max difference between temperature and temperature_previous + Kokkos::parallel_reduce( + Kokkos::MDRangePolicy>({ 1, 1 }, { height + 1, width + 1 }), + KOKKOS_LAMBDA(const int i, const int j, double& loc_max_value) { + double value = fabs(temperature(i, j) - temperature_previous(i, j)); + if (value > loc_max_value) { + loc_max_value = value; + } + }, Kokkos::Max(max_value)); + worst_dt = max_value; + + // update temperature_previous + Kokkos::parallel_for( + Kokkos::MDRangePolicy>({ 1, 1 }, { height + 1, width + 1 }), + KOKKOS_LAMBDA(const int i, const int j) { + temperature_previous(i, j) = temperature(i, j); + }); + + // track progress + if (iteration % 100 == 0) { + track_progress(iteration, temperature); + } + + iteration++; } - iteration++; - } - - // Stop measuring time and calculate the elapsed time - auto end = std::chrono::high_resolution_clock::now(); - auto elapsed = std::chrono::duration_cast(end - begin); - - printf("Total time was %f seconds.\n", elapsed.count() * 1e-9); - printf("\nMax error at iteration %d was %f\n", iteration-1, worst_dt); + // Stop measuring time and calculate the elapsed time + auto end = std::chrono::high_resolution_clock::now(); + auto elapsed = std::chrono::duration_cast(end - begin); + printf("Total time was %f seconds.\n", elapsed.count() * 1e-9); + printf("\nMax error at iteration %d was %f\n", iteration - 1, worst_dt); } Kokkos::finalize(); return 0; } -void initialize(FArrayKokkos &temperature_previous) { +void initialize(FArrayKokkos& temperature_previous) +{ // initialize temperature_previous to 0.0 Kokkos::parallel_for( - Kokkos::MDRangePolicy>({0,0}, {height+2,width+2}), - KOKKOS_LAMBDA(const int i, const int j){ - temperature_previous(i,j) = 0.0; + Kokkos::MDRangePolicy>({ 0, 0 }, { height + 2, width + 2 }), + KOKKOS_LAMBDA(const int i, const int j) { + temperature_previous(i, j) = 0.0; }); // setting the left and right boundary conditions Kokkos::parallel_for( - Kokkos::RangePolicy<>(0,height+2), - KOKKOS_LAMBDA(const int i){ - temperature_previous(i,0) = 0.0; - temperature_previous(i,width+1) = (100.0/height)*i; + Kokkos::RangePolicy<>(0, height + 2), + KOKKOS_LAMBDA(const int i) { + temperature_previous(i, 0) = 0.0; + temperature_previous(i, width + 1) = (100.0 / height) * i; }); // setting the top and bottom boundary condition Kokkos::parallel_for( - Kokkos::RangePolicy<>(0,width+2), - KOKKOS_LAMBDA(const int j){ - temperature_previous(0,j) = 0.0; - temperature_previous(height+1,j) = (100.0/width)*j; - + Kokkos::RangePolicy<>(0, width + 2), + KOKKOS_LAMBDA(const int j) { + temperature_previous(0, j) = 0.0; + temperature_previous(height + 1, j) = (100.0 / width) * j; }); } -void track_progress(int iteration, FArrayKokkos &temperature) { +void track_progress(int iteration, FArrayKokkos& temperature) +{ int i; // make a deep copy of temperature from device to host auto temperature_host = create_mirror_view_and_copy(Kokkos::HostSpace(), temperature.get_kokkos_view()); - auto temperature_host_view = ViewFArray (temperature_host.data(), height+2, width+2); + auto temperature_host_view = ViewFArray(temperature_host.data(), height + 2, width + 2); printf("---------- Iteration number: %d ----------\n", iteration); - for (i = height-5; i <= height; i++) { - printf("[%d,%d]: %5.2f ", i,i, temperature_host_view(i,i)); + for (i = height - 5; i <= height; i++) { + printf("[%d,%d]: %5.2f ", i, i, temperature_host_view(i, i)); } printf("\n"); } diff --git a/examples/laplace/main_kokkosview.cpp b/examples/laplace/main_kokkosview.cpp index d4af3713..4cb18827 100644 --- a/examples/laplace/main_kokkosview.cpp +++ b/examples/laplace/main_kokkosview.cpp @@ -1,124 +1,156 @@ +/********************************************************************************************** + © 2020. Triad National Security, LLC. All rights reserved. + This program was produced under U.S. Government contract 89233218CNA000001 for Los Alamos + National Laboratory (LANL), which is operated by Triad National Security, LLC for the U.S. + Department of Energy/National Nuclear Security Administration. All rights in the program are + reserved by Triad National Security, LLC, and the U.S. Department of Energy/National Nuclear + Security Administration. The Government is granted for itself and others acting on its behalf a + nonexclusive, paid-up, irrevocable worldwide license in this material to reproduce, prepare + derivative works, distribute copies to the public, perform publicly and display publicly, and + to permit others to do so. + This program is open source under the BSD-3 License. + Redistribution and use in source and binary forms, with or without modification, are permitted + provided that the following conditions are met: + 1. Redistributions of source code must retain the above copyright notice, this list of + conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright notice, this list of + conditions and the following disclaimer in the documentation and/or other materials + provided with the distribution. + 3. Neither the name of the copyright holder nor the names of its contributors may be used + to endorse or promote products derived from this software without specific prior + written permission. + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + **********************************************************************************************/ #include #include #include -const int width = 1000; -const int height = 1000; +const int width = 1000; +const int height = 1000; const double temp_tolerance = 0.01; -using view_type = Kokkos::View; +using view_type = Kokkos::View; void initialize(view_type temperature_previous); void track_progress(int iteration, view_type temperature); -int main(int argc, char* argv[]) { - Kokkos::initialize(argc, argv); - { // kokkos scope - - view_type temperature("T", height+2, width+2); - view_type temperature_previous("T_prev", height+2, width+2); - - int iteration = 1; - double worst_dt = 100; - double max_value; - - // Start measuring time - auto begin = std::chrono::high_resolution_clock::now(); - - // initialize temperature profile - initialize(temperature_previous); - - while (worst_dt > temp_tolerance) { - // finite difference - Kokkos::parallel_for( - Kokkos::MDRangePolicy>({1,1}, {height+1,width+1}), - KOKKOS_LAMBDA(const int i, const int j){ - temperature(i,j) = 0.25 * (temperature_previous(i+1,j) - + temperature_previous(i-1,j) - + temperature_previous(i,j+1) - + temperature_previous(i,j-1)); - }); - - // calculate max difference between temperature and temperature_previous - Kokkos::parallel_reduce( - Kokkos::MDRangePolicy>({1,1}, {height+1,width+1}), - KOKKOS_LAMBDA(const int i, const int j, double& loc_max_value){ - double value = fabs(temperature(i,j) - temperature_previous(i,j)); - if(value > loc_max_value) loc_max_value = value; - }, - Kokkos::Max(max_value) - ); - worst_dt = max_value; - - // update temperature_previous - Kokkos::parallel_for( - Kokkos::MDRangePolicy>({1,1}, {height+1,width+1}), - KOKKOS_LAMBDA(const int i, const int j){ - temperature_previous(i,j) = temperature(i,j); - }); - - // track progress - if (iteration % 100 == 0) { - track_progress(iteration, temperature); - } - - iteration++; - } - - // Stop measuring time and calculate the elapsed time - auto end = std::chrono::high_resolution_clock::now(); - auto elapsed = std::chrono::duration_cast(end - begin); - - printf("Total time was %f seconds.\n", elapsed.count() * 1e-9); - printf("\nMax error at iteration %d was %f\n", iteration-1, worst_dt); - - } // end kokkos scope - Kokkos::finalize(); - - return 0; +int main(int argc, char* argv[]) +{ + Kokkos::initialize(argc, argv); + { // kokkos scope + view_type temperature("T", height + 2, width + 2); + view_type temperature_previous("T_prev", height + 2, width + 2); + + int iteration = 1; + double worst_dt = 100; + double max_value; + + // Start measuring time + auto begin = std::chrono::high_resolution_clock::now(); + + // initialize temperature profile + initialize(temperature_previous); + + while (worst_dt > temp_tolerance) { + // finite difference + Kokkos::parallel_for( + Kokkos::MDRangePolicy>({ 1, 1 }, { height + 1, width + 1 }), + KOKKOS_LAMBDA(const int i, const int j) { + temperature(i, j) = 0.25 * (temperature_previous(i + 1, j) + + temperature_previous(i - 1, j) + + temperature_previous(i, j + 1) + + temperature_previous(i, j - 1)); + }); + + // calculate max difference between temperature and temperature_previous + Kokkos::parallel_reduce( + Kokkos::MDRangePolicy>({ 1, 1 }, { height + 1, width + 1 }), + KOKKOS_LAMBDA(const int i, const int j, double& loc_max_value) { + double value = fabs(temperature(i, j) - temperature_previous(i, j)); + if (value > loc_max_value) { + loc_max_value = value; + } + },Kokkos::Max(max_value)); + worst_dt = max_value; + + // update temperature_previous + Kokkos::parallel_for( + Kokkos::MDRangePolicy>({ 1, 1 }, { height + 1, width + 1 }), + KOKKOS_LAMBDA(const int i, const int j) { + temperature_previous(i, j) = temperature(i, j); + }); + + // track progress + if (iteration % 100 == 0) { + track_progress(iteration, temperature); + } + + iteration++; + } + + // Stop measuring time and calculate the elapsed time + auto end = std::chrono::high_resolution_clock::now(); + auto elapsed = std::chrono::duration_cast(end - begin); + + printf("Total time was %f seconds.\n", elapsed.count() * 1e-9); + printf("\nMax error at iteration %d was %f\n", iteration - 1, worst_dt); + } // end kokkos scope + + Kokkos::finalize(); + + return 0; } - -void initialize(view_type temperature_previous) { - //int i, j; - - // initialize temperature_previous to 0.0 - Kokkos::parallel_for( - Kokkos::MDRangePolicy>({0,0}, {height+2,width+2}), - KOKKOS_LAMBDA(const int i, const int j){ - temperature_previous(i,j) = 0.0; - }); - - // setting the left and right boundary conditions - Kokkos::parallel_for( - Kokkos::RangePolicy<>(0,height+2), - KOKKOS_LAMBDA(const int i){ - temperature_previous(i,0) = 0.0; - temperature_previous(i,width+1) = (100.0/height)*i; - }); - - // setting the top and bottom boundary condition - Kokkos::parallel_for( - Kokkos::RangePolicy<>(0,width+2), - KOKKOS_LAMBDA(const int j){ - temperature_previous(0,j) = 0.0; - temperature_previous(height+1,j) = (100.0/width)*j; - - }); +void initialize(view_type temperature_previous) +{ + // int i, j; + + // initialize temperature_previous to 0.0 + Kokkos::parallel_for( + Kokkos::MDRangePolicy>({ 0, 0 }, { height + 2, width + 2 }), + KOKKOS_LAMBDA(const int i, const int j) { + temperature_previous(i, j) = 0.0; + }); + + // setting the left and right boundary conditions + Kokkos::parallel_for( + Kokkos::RangePolicy<>(0, height + 2), + KOKKOS_LAMBDA(const int i) { + temperature_previous(i, 0) = 0.0; + temperature_previous(i, width + 1) = (100.0 / height) * i; + }); + + // setting the top and bottom boundary condition + Kokkos::parallel_for( + Kokkos::RangePolicy<>(0, width + 2), + KOKKOS_LAMBDA(const int j) { + temperature_previous(0, j) = 0.0; + temperature_previous(height + 1, j) = (100.0 / width) * j; + }); } +void track_progress(int iteration, view_type temperature) +{ + int i; -void track_progress(int iteration, view_type temperature) { - int i; - - // make a deep copy of temperature from device to host - view_type::HostMirror host_temperature = Kokkos::create_mirror_view(temperature); - Kokkos::deep_copy(host_temperature, temperature); + // make a deep copy of temperature from device to host + view_type::HostMirror host_temperature = Kokkos::create_mirror_view(temperature); + Kokkos::deep_copy(host_temperature, temperature); - printf("---------- Iteration number: %d ----------\n", iteration); - for (i = height-5; i <= height; i++) { - printf("[%d,%d]: %5.2f ", i,i, host_temperature(i,i)); - } - printf("\n"); + printf("---------- Iteration number: %d ----------\n", iteration); + for (i = height - 5; i <= height; i++) { + printf("[%d,%d]: %5.2f ", i, i, host_temperature(i, i)); + } + printf("\n"); } diff --git a/examples/laplaceMPI/laplace_mpi.cpp b/examples/laplaceMPI/laplace_mpi.cpp index 64f982a8..b5ddfc87 100644 --- a/examples/laplaceMPI/laplace_mpi.cpp +++ b/examples/laplaceMPI/laplace_mpi.cpp @@ -1,3 +1,36 @@ +/********************************************************************************************** + © 2020. Triad National Security, LLC. All rights reserved. + This program was produced under U.S. Government contract 89233218CNA000001 for Los Alamos + National Laboratory (LANL), which is operated by Triad National Security, LLC for the U.S. + Department of Energy/National Nuclear Security Administration. All rights in the program are + reserved by Triad National Security, LLC, and the U.S. Department of Energy/National Nuclear + Security Administration. The Government is granted for itself and others acting on its behalf a + nonexclusive, paid-up, irrevocable worldwide license in this material to reproduce, prepare + derivative works, distribute copies to the public, perform publicly and display publicly, and + to permit others to do so. + This program is open source under the BSD-3 License. + Redistribution and use in source and binary forms, with or without modification, are permitted + provided that the following conditions are met: + 1. Redistributions of source code must retain the above copyright notice, this list of + conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright notice, this list of + conditions and the following disclaimer in the documentation and/or other materials + provided with the distribution. + 3. Neither the name of the copyright holder nor the names of its contributors may be used + to endorse or promote products derived from this software without specific prior + written permission. + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + **********************************************************************************************/ #include #include #include @@ -8,7 +41,7 @@ // Dont change ROOT #define ROOT 0 -//---------------- +// ---------------- // Change to 0 or 1 as needed #define TRACK_PROGRESS 0 @@ -21,356 +54,352 @@ using namespace mtr; // matar namespace -int width = 1000; -int height = 1000; -int max_num_iterations = 1000; -double temp_tolerance = 0.01; +int width = 1000; +int height = 1000; +int max_num_iterations = 1000; +double temp_tolerance = 0.01; -void initialize(DCArrayKokkos &temperature_previous, int height, int width); -void track_progress(int iteration, DCArrayKokkos &temperature); -void parse_command_line(int argc, char *argv[]); +void initialize(DCArrayKokkos& temperature_previous, int height, int width); +void track_progress(int iteration, DCArrayKokkos& temperature); +void parse_command_line(int argc, char* argv[]); - -int main(int argc, char *argv[]) +int main(int argc, char* argv[]) { - - MPI_Init(&argc, &argv); - Kokkos::initialize(argc, argv); - { // kokkos scope - - // Parse command line options - parse_command_line(argc, argv); - - // start timing total code - double begin_time_total = MPI_Wtime(); - - int world_size, - rank, - width_loc, - height_loc, - size_loc; - - CArray all_size_loc; - CArray offsets; - - // get world_size and rank - MPI_Comm_size(MPI_COMM_WORLD, &world_size); - MPI_Comm_rank(MPI_COMM_WORLD, &rank); - - - // divide work along the height - // Note: +2 is added for boundary - width_loc = width+2; - height_loc = (height+2) / world_size; - if (rank < ((height+2) % world_size)) { - height_loc++; - } - size_loc = width_loc * height_loc; - - // root should keep an array of size_loc and offset - // for all processes - if (rank == ROOT) { - all_size_loc = CArray (world_size); - offsets = CArray (world_size); - - all_size_loc(ROOT) = size_loc; - for (int i = 1; i < world_size; i++) { - MPI_Recv(&all_size_loc(i), 1, MPI_INT, i, + MPI_Init(&argc, &argv); + Kokkos::initialize(argc, argv); + { // kokkos scope + // Parse command line options + parse_command_line(argc, argv); + + // start timing total code + double begin_time_total = MPI_Wtime(); + + int world_size; + int rank; + int width_loc; + int height_loc; + int size_loc; + + CArray all_size_loc; + CArray offsets; + + // get world_size and rank + MPI_Comm_size(MPI_COMM_WORLD, &world_size); + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + + // divide work along the height + // Note: +2 is added for boundary + width_loc = width + 2; + height_loc = (height + 2) / world_size; + if (rank < ((height + 2) % world_size)) { + height_loc++; + } + size_loc = width_loc * height_loc; + + // root should keep an array of size_loc and offset + // for all processes + if (rank == ROOT) { + all_size_loc = CArray(world_size); + offsets = CArray(world_size); + + all_size_loc(ROOT) = size_loc; + for (int i = 1; i < world_size; i++) { + MPI_Recv(&all_size_loc(i), 1, MPI_INT, i, MPI_ANY_TAG, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - } - - offsets(0) = 0; - for (int i = 1; i < world_size; i++) { - offsets(i) = offsets(i-1) + all_size_loc(i-1); - } - - } else { - MPI_Send(&size_loc, 1, MPI_INT, ROOT, 999, MPI_COMM_WORLD); - } - - // declare arrays - CArrayKokkos temperature_loc; - DCArrayKokkos temperature_previous_loc; - DCArrayKokkos temperature_previous_glob; - - // define and allocate arrays - temperature_loc = CArrayKokkos (height_loc, width_loc); - temperature_previous_loc = DCArrayKokkos (height_loc, width_loc); - if (rank == ROOT) { - temperature_previous_glob = DCArrayKokkos (height+2, width+2); - } - - // initialize temperature field. - if (rank == ROOT) { - initialize(temperature_previous_glob, height, width); - } - - // distribut work to all processes - if (rank == ROOT) { - temperature_previous_glob.update_host(); - MPI_Scatterv(&temperature_previous_glob.host(0,0), &all_size_loc(0), &offsets(0), - MPI_DOUBLE, &temperature_previous_loc.host(0,0), size_loc, MPI_DOUBLE, + } + + offsets(0) = 0; + for (int i = 1; i < world_size; i++) { + offsets(i) = offsets(i - 1) + all_size_loc(i - 1); + } + } + else { + MPI_Send(&size_loc, 1, MPI_INT, ROOT, 999, MPI_COMM_WORLD); + } + + // declare arrays + CArrayKokkos temperature_loc; + DCArrayKokkos temperature_previous_loc; + DCArrayKokkos temperature_previous_glob; + + // define and allocate arrays + temperature_loc = CArrayKokkos(height_loc, width_loc); + temperature_previous_loc = DCArrayKokkos(height_loc, width_loc); + if (rank == ROOT) { + temperature_previous_glob = DCArrayKokkos(height + 2, width + 2); + } + + // initialize temperature field. + if (rank == ROOT) { + initialize(temperature_previous_glob, height, width); + } + + // distribut work to all processes + if (rank == ROOT) { + temperature_previous_glob.update_host(); + MPI_Scatterv(&temperature_previous_glob.host(0, 0), &all_size_loc(0), &offsets(0), + MPI_DOUBLE, &temperature_previous_loc.host(0, 0), size_loc, MPI_DOUBLE, ROOT, MPI_COMM_WORLD); - } else { - MPI_Scatterv(NULL, NULL, NULL, MPI_DOUBLE, &temperature_previous_loc.host(0,0), size_loc, + } + else { + MPI_Scatterv(NULL, NULL, NULL, MPI_DOUBLE, &temperature_previous_loc.host(0, 0), size_loc, MPI_DOUBLE, ROOT, MPI_COMM_WORLD); - } - // - temperature_previous_loc.update_device(); - - // define neighbours - int up = rank-1; - int down = rank+1; - int up_tag = 888; - int down_tag = 999; - int nrequests; - MPI_Request requests_send[4]; - MPI_Request requests_recv[4]; - - DCArrayKokkos halo_up, halo_down; - DCArrayKokkos halo_up_out, halo_down_out; - if (up != -1) { - halo_up = DCArrayKokkos (width_loc); - halo_up_out = DCArrayKokkos (width_loc); - } - if (down != world_size) { - halo_down = DCArrayKokkos (width_loc); - halo_down_out = DCArrayKokkos (width_loc); - } - - int height_index_start, height_index_end; - if (rank == 0) { - height_index_start = 1; - } else { - height_index_start = 0; - } - if (rank == world_size-1) { - height_index_end = height_loc-1; - } else { - height_index_end = height_loc; - } - - // - int iteration = 1; - double worst_dt = 100.0; - double worst_dt_loc; - - double begin_time_main_loop = MPI_Wtime(); - // main loop - while (worst_dt > temp_tolerance && iteration <= max_num_iterations) { + } + // + temperature_previous_loc.update_device(); + + // define neighbours + int up = rank - 1; + int down = rank + 1; + int up_tag = 888; + int down_tag = 999; + int nrequests; + MPI_Request requests_send[4]; + MPI_Request requests_recv[4]; + + DCArrayKokkos halo_up, halo_down; + DCArrayKokkos halo_up_out, halo_down_out; + if (up != -1) { + halo_up = DCArrayKokkos(width_loc); + halo_up_out = DCArrayKokkos(width_loc); + } + if (down != world_size) { + halo_down = DCArrayKokkos(width_loc); + halo_down_out = DCArrayKokkos(width_loc); + } + + int height_index_start, height_index_end; + if (rank == 0) { + height_index_start = 1; + } + else { + height_index_start = 0; + } + if (rank == world_size - 1) { + height_index_end = height_loc - 1; + } + else { + height_index_end = height_loc; + } + + // + int iteration = 1; + double worst_dt = 100.0; + double worst_dt_loc; + + double begin_time_main_loop = MPI_Wtime(); + // main loop + while (worst_dt > temp_tolerance && iteration <= max_num_iterations) { #if !defined GPU - // communicate halo nodes - nrequests = 0; - if (up != -1) { - MPI_Irecv(halo_up.device_pointer(), halo_up.size(), MPI_DOUBLE, + // communicate halo nodes + nrequests = 0; + if (up != -1) { + MPI_Irecv(halo_up.device_pointer(), halo_up.size(), MPI_DOUBLE, up, up_tag, MPI_COMM_WORLD, &requests_recv[nrequests]); - MPI_Isend(temperature_previous_loc.device_pointer()+(0+(0*width_loc)), - halo_up_out.size(), MPI_DOUBLE, up, down_tag, MPI_COMM_WORLD, + MPI_Isend(temperature_previous_loc.device_pointer() + (0 + (0 * width_loc)), + halo_up_out.size(), MPI_DOUBLE, up, down_tag, MPI_COMM_WORLD, &requests_send[nrequests]); - nrequests++; - } + nrequests++; + } - if (down != world_size) { - MPI_Irecv(halo_down.device_pointer(), halo_down.size(), MPI_DOUBLE, + if (down != world_size) { + MPI_Irecv(halo_down.device_pointer(), halo_down.size(), MPI_DOUBLE, down, down_tag, MPI_COMM_WORLD, &requests_recv[nrequests]); - MPI_Isend(temperature_previous_loc.device_pointer()+(0+((height_loc-1)*width_loc)), - halo_down_out.size(), MPI_DOUBLE, down, up_tag, MPI_COMM_WORLD, + MPI_Isend(temperature_previous_loc.device_pointer() + (0 + ((height_loc - 1) * width_loc)), + halo_down_out.size(), MPI_DOUBLE, down, up_tag, MPI_COMM_WORLD, &requests_send[nrequests]); - nrequests++; - } + nrequests++; + } #else - // fill halo with data - if (up != -1) { - FOR_ALL(j, 0, width_loc, { - halo_up_out(j) = temperature_previous_loc(0,j); - }); - halo_up_out.update_host(); - } - - if (down != world_size) { - FOR_ALL(j, 0, width_loc, { - halo_down_out(j) = temperature_previous_loc(height_loc-1, j); - }); - halo_up_out.update_host(); - } - // - Kokkos::fence(); - - // communicate halo nodes - nrequests = 0; - if (up != -1) { - MPI_Irecv(halo_up.host_pointer(), halo_up.size(), MPI_DOUBLE, + // fill halo with data + if (up != -1) { + FOR_ALL(j, 0, width_loc, { + halo_up_out(j) = temperature_previous_loc(0, j); + }); + halo_up_out.update_host(); + } + + if (down != world_size) { + FOR_ALL(j, 0, width_loc, { + halo_down_out(j) = temperature_previous_loc(height_loc - 1, j); + }); + halo_up_out.update_host(); + } + // + Kokkos::fence(); + + // communicate halo nodes + nrequests = 0; + if (up != -1) { + MPI_Irecv(halo_up.host_pointer(), halo_up.size(), MPI_DOUBLE, up, up_tag, MPI_COMM_WORLD, &requests_recv[nrequests]); - MPI_Isend(halo_up_out.host_pointer(), halo_up_out.size(), MPI_DOUBLE, + MPI_Isend(halo_up_out.host_pointer(), halo_up_out.size(), MPI_DOUBLE, up, down_tag, MPI_COMM_WORLD, &requests_send[nrequests]); - nrequests++; - } + nrequests++; + } - if (down != world_size) { - MPI_Irecv(halo_down.host_pointer(), halo_down.size(), MPI_DOUBLE, + if (down != world_size) { + MPI_Irecv(halo_down.host_pointer(), halo_down.size(), MPI_DOUBLE, down, down_tag, MPI_COMM_WORLD, &requests_recv[nrequests]); - MPI_Isend(halo_down_out.host_pointer(), halo_down_out.size(), MPI_DOUBLE, + MPI_Isend(halo_down_out.host_pointer(), halo_down_out.size(), MPI_DOUBLE, down, up_tag, MPI_COMM_WORLD, &requests_send[nrequests]); - nrequests++; - } -#endif - - // finite difference for internal nodes - FOR_ALL(i, 1, height_loc-1, - j, 1, width_loc-1, { - temperature_loc(i,j) = 0.25 * (temperature_previous_loc(i+1,j) - + temperature_previous_loc(i-1,j) - + temperature_previous_loc(i,j+1) - + temperature_previous_loc(i,j-1)); - }); - - // Wait for all halo exchange to complete - if (nrequests > 0) { - MPI_Waitall(nrequests, requests_send, MPI_STATUSES_IGNORE); - MPI_Waitall(nrequests, requests_recv, MPI_STATUSES_IGNORE); - } + nrequests++; + } +#endif - // finite difference on surface nodes - if (up != -1) { + // finite difference for internal nodes + FOR_ALL(i, 1, height_loc - 1, + j, 1, width_loc - 1, { + temperature_loc(i, j) = 0.25 * (temperature_previous_loc(i + 1, j) + + temperature_previous_loc(i - 1, j) + + temperature_previous_loc(i, j + 1) + + temperature_previous_loc(i, j - 1)); + }); + + // Wait for all halo exchange to complete + if (nrequests > 0) { + MPI_Waitall(nrequests, requests_send, MPI_STATUSES_IGNORE); + MPI_Waitall(nrequests, requests_recv, MPI_STATUSES_IGNORE); + } + + // finite difference on surface nodes + if (up != -1) { #if defined GPU - halo_up.update_device(); + halo_up.update_device(); #endif - int i = 0; - FOR_ALL(j, 1, width_loc-1, { - temperature_loc(i,j) = 0.25 * (temperature_previous_loc(i+1,j) - + halo_up(j) - + temperature_previous_loc(i,j+1) - + temperature_previous_loc(i,j-1)); - }); - } // end if (up != -1) - - if (down != world_size) { + int i = 0; + FOR_ALL(j, 1, width_loc - 1, { + temperature_loc(i, j) = 0.25 * (temperature_previous_loc(i + 1, j) + + halo_up(j) + + temperature_previous_loc(i, j + 1) + + temperature_previous_loc(i, j - 1)); + }); + } // end if (up != -1) + + if (down != world_size) { #if defined GPU - halo_down.update_device(); + halo_down.update_device(); #endif - int i = height_loc-1; - FOR_ALL(j, 1, width_loc-1, { - temperature_loc(i,j) = 0.25 * (halo_down(j) - + temperature_previous_loc(i-1,j) - + temperature_previous_loc(i,j+1) - + temperature_previous_loc(i,j-1)); - }); - } // end if (down != world_size) - - // calculate max difference between temperature and temperature_previous - double loc_max_value = 100.0; - REDUCE_MAX(i, height_index_start, height_index_end, - j, 1, width_loc-1, + int i = height_loc - 1; + FOR_ALL(j, 1, width_loc - 1, { + temperature_loc(i, j) = 0.25 * (halo_down(j) + + temperature_previous_loc(i - 1, j) + + temperature_previous_loc(i, j + 1) + + temperature_previous_loc(i, j - 1)); + }); + } // end if (down != world_size) + + // calculate max difference between temperature and temperature_previous + double loc_max_value = 100.0; + REDUCE_MAX(i, height_index_start, height_index_end, + j, 1, width_loc - 1, loc_max_value, { - double value = fabs(temperature_loc(i,j) - temperature_previous_loc(i,j)); - if (value > loc_max_value) loc_max_value = value; - }, worst_dt_loc); - - // update temperature_previous - FOR_ALL(i, height_index_start, height_index_end, - j, 1, width_loc-1, { - temperature_previous_loc(i,j) = temperature_loc(i,j); - }); + double value = fabs(temperature_loc(i, j) - temperature_previous_loc(i, j)); + if (value > loc_max_value) { + loc_max_value = value; + } + }, worst_dt_loc); - // wait for all kokkos kernals to complete - Kokkos::fence(); + // update temperature_previous + FOR_ALL(i, height_index_start, height_index_end, + j, 1, width_loc - 1, { + temperature_previous_loc(i, j) = temperature_loc(i, j); + }); - // all reduce for worst_dt - MPI_Allreduce(&worst_dt_loc, &worst_dt, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD); + // wait for all kokkos kernals to complete + Kokkos::fence(); + // all reduce for worst_dt + MPI_Allreduce(&worst_dt_loc, &worst_dt, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD); #if TRACK_PROGRESS - // track progress - if (iteration % 100 == 0) { - - temperature_previous_loc.update_host(); + // track progress + if (iteration % 100 == 0) { + temperature_previous_loc.update_host(); - if (rank == ROOT) { - MPI_Gatherv(&temperature_previous_loc.host(0,0), size_loc, MPI_DOUBLE, - &temperature_previous_glob.host(0,0), &all_size_loc(0), &offsets(0), + if (rank == ROOT) { + MPI_Gatherv(&temperature_previous_loc.host(0, 0), size_loc, MPI_DOUBLE, + &temperature_previous_glob.host(0, 0), &all_size_loc(0), &offsets(0), MPI_DOUBLE, ROOT, MPI_COMM_WORLD); - - } else { - - MPI_Gatherv(&temperature_previous_loc.host(0,0), size_loc, MPI_DOUBLE, + } + else { + MPI_Gatherv(&temperature_previous_loc.host(0, 0), size_loc, MPI_DOUBLE, NULL, NULL, NULL, MPI_DOUBLE, ROOT, MPI_COMM_WORLD); - } - - if (rank == ROOT) { - track_progress(iteration, temperature_previous_glob); - } + } - } // end if (iteration % 100 == 0) + if (rank == ROOT) { + track_progress(iteration, temperature_previous_glob); + } + } // end if (iteration % 100 == 0) #endif - iteration++; - } // end while loop - - // stop timing - double end_time = MPI_Wtime(); - - if (rank == ROOT) { - printf("\n"); - printf("Number of MPI processes = %d\n", world_size); - printf("height = %d; width = %d\n", height, width); - printf("Total code time was %10.6e seconds.\n", end_time-begin_time_total); - printf("Main loop time was %10.6e seconds.\n", end_time-begin_time_main_loop); - printf("Max error at iteration %d was %10.6e\n", iteration-1, worst_dt); - } - - - } // end kokkos scope - Kokkos::finalize(); - MPI_Finalize(); - return 0; + iteration++; + } // end while loop + + // stop timing + double end_time = MPI_Wtime(); + + if (rank == ROOT) { + printf("\n"); + printf("Number of MPI processes = %d\n", world_size); + printf("height = %d; width = %d\n", height, width); + printf("Total code time was %10.6e seconds.\n", end_time - begin_time_total); + printf("Main loop time was %10.6e seconds.\n", end_time - begin_time_main_loop); + printf("Max error at iteration %d was %10.6e\n", iteration - 1, worst_dt); + } + } // end kokkos scope + Kokkos::finalize(); + MPI_Finalize(); + return 0; } +void initialize(DCArrayKokkos& temperature_previous, int height, int width) +{ + // initialize temperature_previous to 0.0 + FOR_ALL(i, 0, height + 2, + j, 0, width + 2, { + temperature_previous(i, j) = 0.0; + }); -void initialize(DCArrayKokkos &temperature_previous, int height, int width) { - // initialize temperature_previous to 0.0 - FOR_ALL(i, 0, height+2, - j, 0, width+2, { - temperature_previous(i,j) = 0.0; - }); - - // setting the left and right boundary conditions - FOR_ALL(i, 0, height+2, { - temperature_previous(i,0) = 0.0; - temperature_previous(i,width+1) = (100.0/height)*i; - }); - - // setting the top and bottom boundary condition - FOR_ALL(j, 0, width+2, { - temperature_previous(0,j) = 0.0; - temperature_previous(height+1,j) = (100.0/width)*j; + // setting the left and right boundary conditions + FOR_ALL(i, 0, height + 2, { + temperature_previous(i, 0) = 0.0; + temperature_previous(i, width + 1) = (100.0 / height) * i; + }); - }); + // setting the top and bottom boundary condition + FOR_ALL(j, 0, width + 2, { + temperature_previous(0, j) = 0.0; + temperature_previous(height + 1, j) = (100.0 / width) * j; + }); } -void track_progress(int iteration, DCArrayKokkos &temperature) { - - printf("---------- Iteration number: %d ----------\n", iteration); - for (int i = height-5; i <= height; i++) { - printf("[%d,%d]: %5.2f ", i,i, temperature.host(i,i)); - } - printf("\n"); +void track_progress(int iteration, DCArrayKokkos& temperature) +{ + printf("---------- Iteration number: %d ----------\n", iteration); + for (int i = height - 5; i <= height; i++) { + printf("[%d,%d]: %5.2f ", i, i, temperature.host(i, i)); + } + printf("\n"); } -void parse_command_line(int argc, char *argv[]) +void parse_command_line(int argc, char* argv[]) { - std::string opt; - int i = 1; - while (i < argc && argv[i][0] == '-') - { - opt = std::string(argv[i]); + std::string opt; + int i = 1; + while (i < argc && argv[i][0] == '-') + { + opt = std::string(argv[i]); - if(opt == "-height") - height = atoi(argv[++i]); + if (opt == "-height") { + height = atoi(argv[++i]); + } - if(opt == "-width") - width = atoi(argv[++i]); + if (opt == "-width") { + width = atoi(argv[++i]); + } - ++i; - } + ++i; + } } diff --git a/examples/main.cpp b/examples/main.cpp index 57434f3a..fa16a887 100644 --- a/examples/main.cpp +++ b/examples/main.cpp @@ -1,16 +1,49 @@ +/********************************************************************************************** + © 2020. Triad National Security, LLC. All rights reserved. + This program was produced under U.S. Government contract 89233218CNA000001 for Los Alamos + National Laboratory (LANL), which is operated by Triad National Security, LLC for the U.S. + Department of Energy/National Nuclear Security Administration. All rights in the program are + reserved by Triad National Security, LLC, and the U.S. Department of Energy/National Nuclear + Security Administration. The Government is granted for itself and others acting on its behalf a + nonexclusive, paid-up, irrevocable worldwide license in this material to reproduce, prepare + derivative works, distribute copies to the public, perform publicly and display publicly, and + to permit others to do so. + This program is open source under the BSD-3 License. + Redistribution and use in source and binary forms, with or without modification, are permitted + provided that the following conditions are met: + 1. Redistributions of source code must retain the above copyright notice, this list of + conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright notice, this list of + conditions and the following disclaimer in the documentation and/or other materials + provided with the distribution. + 3. Neither the name of the copyright holder nor the names of its contributors may be used + to endorse or promote products derived from this software without specific prior + written permission. + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + **********************************************************************************************/ #include #include "matar.h" using namespace mtr; // matar namespace -int main() { - +int main() +{ printf("Hello World\n"); - auto test = CArray (5, 5); + auto test = CArray(5, 5); - test(3,3) = 10; + test(3, 3) = 10; printf("Succesfully made and used a CArray\n"); diff --git a/examples/main_kokkos.cpp b/examples/main_kokkos.cpp index 957ae981..e3137e06 100644 --- a/examples/main_kokkos.cpp +++ b/examples/main_kokkos.cpp @@ -1,3 +1,36 @@ +/********************************************************************************************** + © 2020. Triad National Security, LLC. All rights reserved. + This program was produced under U.S. Government contract 89233218CNA000001 for Los Alamos + National Laboratory (LANL), which is operated by Triad National Security, LLC for the U.S. + Department of Energy/National Nuclear Security Administration. All rights in the program are + reserved by Triad National Security, LLC, and the U.S. Department of Energy/National Nuclear + Security Administration. The Government is granted for itself and others acting on its behalf a + nonexclusive, paid-up, irrevocable worldwide license in this material to reproduce, prepare + derivative works, distribute copies to the public, perform publicly and display publicly, and + to permit others to do so. + This program is open source under the BSD-3 License. + Redistribution and use in source and binary forms, with or without modification, are permitted + provided that the following conditions are met: + 1. Redistributions of source code must retain the above copyright notice, this list of + conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright notice, this list of + conditions and the following disclaimer in the documentation and/or other materials + provided with the distribution. + 3. Neither the name of the copyright holder nor the names of its contributors may be used + to endorse or promote products derived from this software without specific prior + written permission. + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + **********************************************************************************************/ #include #include #include @@ -8,239 +41,215 @@ using namespace mtr; // matar namespace // helper type for selecting variant type set by user -template struct overloaded : Ts... { using Ts::operator()...; }; +template struct overloaded : Ts ... { using Ts::operator() ...; }; // explicit deduction guide (not needed as of C++20) -template overloaded(Ts...) -> overloaded; - - +template overloaded(Ts ...)->overloaded; // A notional class -class Data { +class Data +{ private: u_int nx_; u_int ny_; u_int nz_; - - CArrayKokkos arr3D_; - + + CArrayKokkos arr3D_; + public: - + // default constructor Data(); - + // overload constructor to set dimensions Data(u_int nx, u_int ny, u_int nz); - + void some_fcn(); - }; // end class Data -Data::Data(){}; +Data::Data() +{ +}; -Data::Data(u_int nx, u_int ny, u_int nz){ - +Data::Data(u_int nx, u_int ny, u_int nz) +{ nx_ = nx; ny_ = ny; nz_ = nz; - - arr3D_ = CArrayKokkos (nx_, ny_, nz_); -}; + arr3D_ = CArrayKokkos(nx_, ny_, nz_); +}; -void Data::some_fcn(){ - +void Data::some_fcn() +{ // parallel loop inside a class // The KOKKOS_CLASS_LAMBDA is [=, *this]. The *this in the lambda // capture gives access to the class data Kokkos::parallel_for("3DCArray", - Kokkos::MDRangePolicy>({0,0,0}, {nx_, ny_, nz_}), + Kokkos::MDRangePolicy>({ 0, 0, 0 }, { nx_, ny_, nz_ }), KOKKOS_CLASS_LAMBDA(const int i, const int j, const int k) { - int idx = (i-1) * nx_ * ny_ + (j-1) * nz_ + (k-1); + int idx = (i - 1) * nx_ * ny_ + (j - 1) * nz_ + (k - 1); arr3D_(i, j, k) = idx; - }); + }); Kokkos::fence(); - + // now using the macros for a parallel loop inside a class FOR_ALL_CLASS(i, 0, nx_, j, 0, ny_, k, 0, nz_, { - - int idx = (i-1) * nx_ * ny_ + (j-1) * nz_ + (k-1); - arr3D_(i, j, k) = idx; - //printf("\nloop\n"); - }); + int idx = (i - 1) * nx_ * ny_ + (j - 1) * nz_ + (k - 1); + arr3D_(i, j, k) = idx; + // printf("\nloop\n"); + }); Kokkos::fence(); - + RUN_CLASS({ printf("inside RUN_CLASS\n"); }); - }; // end member function - - // functions called INSIDE a kokkos parallel loop KOKKOS_INLINE_FUNCTION -void pass_by_ref(const FMatrixKokkos &matrix); +void pass_by_ref(const FMatrixKokkos& matrix); KOKKOS_INLINE_FUNCTION -void pass_view_by_ref(const ViewFMatrixKokkos &matrix); +void pass_view_by_ref(const ViewFMatrixKokkos& matrix); KOKKOS_INLINE_FUNCTION void pass_by_val(const FMatrixKokkos matrix); - - // functions NOT called in a kokkos parallel loop -void pass_by_ref_two(const FMatrixKokkos &matrix); - -FMatrixKokkos return_by_val(); - - +void pass_by_ref_two(const FMatrixKokkos& matrix); +FMatrixKokkos return_by_val(); // function objects ModelA and ModelB with parallel loops inside -class ModelA{ - +class ModelA +{ private: - // initial variables - +// initial variables + public: // default constructor - ModelA (){}; - + ModelA () {}; + // overload constructor to set initial values - + // overload() - void operator()(FMatrixKokkos &matrix){ - - printf("inside ModelA \n"); - + void operator()(FMatrixKokkos& matrix) + { + printf("inside ModelA \n"); + int loc_sum; int val = 0; - + // NOTE: if private vars are accessed, requires REDUCE_SUM_CLASS // do summation in parallel on GPU REDUCE_SUM_CLASS(k, 1, 6, j, 1, 5, i, 1, 4, loc_sum, { - loc_sum += matrix(i,j,k,1); - }, val); - - Kokkos::fence(); + loc_sum += matrix(i, j, k, 1); + }, val); + + Kokkos::fence(); printf(" val = %i \n", val); - - } // end overload - + } // end overload }; // end function object ModelA - - -class ModelB{ - +class ModelB +{ private: - // initial variables - +// initial variables + public: // default constructor - ModelB (){}; - + ModelB () {}; + // overload constructor to set initial values - + // overload() - void operator()(FMatrixKokkos &matrix){ - - printf("inside ModelB \n"); - + void operator()(FMatrixKokkos& matrix) + { + printf("inside ModelB \n"); + int loc_sum; int val = 0; - + // NOTE: if private vars are accessed, requires REDUCE_SUM_CLASS // do summation in parallel on GPU REDUCE_SUM_CLASS(k, 1, 6, j, 1, 5, i, 1, 4, loc_sum, { - loc_sum += matrix(i,j,k,1); - }, val); - - Kokkos::fence(); + loc_sum += matrix(i, j, k, 1); + }, val); + + Kokkos::fence(); printf(" val = %i \n", val); - - } // end overload - + } // end overload }; // end function object ModelB using models = std::variant; - - // function objects called inside a parallel loop -class MethodA{ - +class MethodA +{ private: - // initial variables - +// initial variables + public: // default constructor - MethodA (){}; - + MethodA () {}; + // overload constructor to set initial values - + // overload() KOKKOS_INLINE_FUNCTION - void operator()(const FMatrixKokkos &matrix) const{ - - printf("inside MethodA \n"); - - int idx = matrix(1,1,1,1); - - matrix(1,1,1,1) = idx; // do something pointless + void operator()(const FMatrixKokkos& matrix) const + { + printf("inside MethodA \n"); + int idx = matrix(1, 1, 1, 1); + + matrix(1, 1, 1, 1) = idx; // do something pointless } // end overload - }; // end function object MethodA -class MethodB{ - +class MethodB +{ private: - // initial variables - +// initial variables + public: // default constructor - MethodB (){}; - + MethodB () {}; + // overload constructor to set initial values - + // overload() KOKKOS_INLINE_FUNCTION - void operator()(const FMatrixKokkos &matrix) const{ - - printf("inside MethodB \n"); - - int idx = matrix(1,1,1,1); - - matrix(1,1,1,1) = idx; // do something pointless + void operator()(const FMatrixKokkos& matrix) const + { + printf("inside MethodB \n"); + + int idx = matrix(1, 1, 1, 1); + matrix(1, 1, 1, 1) = idx; // do something pointless } // end overload - }; // end function object MethodB - using methods = std::variant; - /* using my_variant = std::variant; @@ -263,528 +272,479 @@ void foo(my_variant &v) { } */ - -template +template KOKKOS_INLINE_FUNCTION void run_methods(const F1, const F2, methods); - - // enum namespace choices { - enum myChoice - { - METHOD_A = 1, - METHOD_B = 2, - METHOD_C = 3 - }; -} - - +enum myChoice +{ + METHOD_A = 1, + METHOD_B = 2, + METHOD_C = 3 +}; +} // namespace choices // function pointer -template -struct method_ptrs{ - void (*fcn_ptr)(const T); +template +struct method_ptrs +{ + void (*fcn_ptr)(const T); }; -template +template KOKKOS_INLINE_FUNCTION -void sum(const T){ - printf("inside sum function\n"); +void sum(const T) +{ + printf("inside sum function\n"); }; -template +template KOKKOS_INLINE_FUNCTION -void multiply(const T){ - printf("inside multiply function\n"); +void multiply(const T) +{ + printf("inside multiply function\n"); }; - - // struct that stores data inside -struct code_data_t{ - double field_one[100]; - int field_two[200]; +struct code_data_t +{ + double field_one[100]; + int field_two[200]; }; - // a struct that stores MATAR dual arrays inside -struct cell_data_t{ +struct cell_data_t +{ + DCArrayKokkos den; + DCArrayKokkos pres; - DCArrayKokkos den; - DCArrayKokkos pres; - - KOKKOS_INLINE_FUNCTION - void initialize(const int i, const int j, const int k) const{ - den(i,j,k) = 0.0; - pres(i,j,k) = 0.0; + void initialize(const int i, const int j, const int k) const + { + den(i, j, k) = 0.0; + pres(i, j, k) = 0.0; }; - }; - // data in an exisiting framework that is managed -struct framework_data_t{ - +struct framework_data_t +{ // a 10 X 10 X 10 mesh int dim1 = 10; int dim2 = 10; int dim3 = 10; - + double data1[1000]; // notional data, could be dynammically allocated double data2[1000]; // ... - }; // view of data in an exisiting framework -struct framework_matar_t{ - - DViewCArrayKokkos data1; // Views of the notional data on CPU and GPU - DViewCArrayKokkos data2; // ... - +struct framework_matar_t +{ + DViewCArrayKokkos data1; // Views of the notional data on CPU and GPU + DViewCArrayKokkos data2; // ... }; - - - -//============================================================= +// ============================================================= // // Main function // -int main(int argc, char *argv[]) { - - +int main(int argc, char* argv[]) +{ Kokkos::initialize(argc, argv); - { - + { // ----------------------- // parameters for examples // ----------------------- u_int size_i, size_j, size_k, size_l; size_i = 3; size_j = 4; size_k = 5; size_l = 6; - + policy1D Arr_policy_1d = policy1D(0, size_i); - policy2D Arr_policy_2d = policy2D({0, 0}, {size_i, size_j}); - policy3D Arr_policy_3d = policy3D({0, 0, 0}, {size_i, size_j, size_k}); - policy4D Arr_policy_4d = policy4D({0, 0, 0, 0}, {size_i, size_j, size_k, size_l}); - - policy1D Mtx_policy_1d = policy1D(1, size_i+1); - policy2D Mtx_policy_2d = policy2D({1, 1}, {size_i+1, size_j+1}); - policy3D Mtx_policy_3d = policy3D({1, 1, 1}, {size_i+1, size_j+1, size_k+1}); - policy4D Mtx_policy_4d = policy4D({1, 1, 1, 1}, {size_i+1, size_j+1, size_k+1, size_l+1}); - - + policy2D Arr_policy_2d = policy2D({ 0, 0 }, { size_i, size_j }); + policy3D Arr_policy_3d = policy3D({ 0, 0, 0 }, { size_i, size_j, size_k }); + policy4D Arr_policy_4d = policy4D({ 0, 0, 0, 0 }, { size_i, size_j, size_k, size_l }); + + policy1D Mtx_policy_1d = policy1D(1, size_i + 1); + policy2D Mtx_policy_2d = policy2D({ 1, 1 }, { size_i + 1, size_j + 1 }); + policy3D Mtx_policy_3d = policy3D({ 1, 1, 1 }, { size_i + 1, size_j + 1, size_k + 1 }); + policy4D Mtx_policy_4d = policy4D({ 1, 1, 1, 1 }, { size_i + 1, size_j + 1, size_k + 1, size_l + 1 }); + // ----------------------- // CArray // ----------------------- - + printf("\n1D CArray\n"); - auto cak1D = CArrayKokkos (size_i); - + auto cak1D = CArrayKokkos(size_i); + // a parallel 1D loop Kokkos::parallel_for("1DCArray", Arr_policy_1d, KOKKOS_LAMBDA(const int i) { cak1D(i) = i; - //printf("%d) %d\n", i, cak1D(i)); + // printf("%d) %d\n", i, cak1D(i)); }); Kokkos::fence(); - + // the marco for a parallel 1D loop FOR_ALL(i, 0, size_i, { cak1D(i) = i; }); - + Kokkos::fence(); - + // ----------------------- // FArray // ----------------------- - + printf("\n2D FArray\n"); - auto fak2D = FArrayKokkos (size_i, size_j); + auto fak2D = FArrayKokkos(size_i, size_j); Kokkos::parallel_for("2DFArray", Arr_policy_2d, KOKKOS_LAMBDA(const int i, const int j) { - int idx = j * size_i + i; + int idx = j * size_i + i; fak2D(i, j) = idx; - //printf("%d) %d\n", idx, fak2D(i, j)); + // printf("%d) %d\n", idx, fak2D(i, j)); }); Kokkos::fence(); - + // the marco for a parallel 2D nested loop FOR_ALL(i, 0, size_i, j, 0, size_j, - { - int idx = j * size_i + i; - fak2D(i, j) = idx; - //printf("%d) %d\n", idx, fak2D(i, j)); - }); + { + int idx = j * size_i + i; + fak2D(i, j) = idx; + // printf("%d) %d\n", idx, fak2D(i, j)); + }); Kokkos::fence(); - - + // ----------------------- // CMatrix // ----------------------- - + printf("\n3D CMatrix\n"); - auto cmk3D = CMatrixKokkos (size_i, size_j, size_k); - printf("made 3D CMatrix\n"); + auto cmk3D = CMatrixKokkos(size_i, size_j, size_k); + printf("made 3D CMatrix\n"); printf("made CMATARkokkos\n"); Kokkos::parallel_for("3DCMatrix", Mtx_policy_3d, KOKKOS_LAMBDA(const int i, const int j, const int k) { - int idx = (i-1) * size_j * size_k + (j-1) * size_k + (k-1); + int idx = (i - 1) * size_j * size_k + (j - 1) * size_k + (k - 1); cmk3D(i, j, k) = idx; printf("%d) %d\n", i, cmk3D(i, j, k)); }); Kokkos::fence(); - - + // the marco for a parallel 3D nested loop - auto cmk3D_two = CMatrixKokkos (size_i, size_j, size_k); - FOR_ALL(i, 1, size_i+1, - j, 1, size_j+1, - k, 1, size_k+1, - { - int idx = (i-1) * size_j * size_k + (j-1) * size_k + (k-1); - cmk3D_two(i, j, k) = idx; - - //printf("index %d) CMatrix = %d and %d\n", idx, cmk3D_two(i, j, k), cmk3D(i, j, k)); - }); + auto cmk3D_two = CMatrixKokkos(size_i, size_j, size_k); + FOR_ALL(i, 1, size_i + 1, + j, 1, size_j + 1, + k, 1, size_k + 1, + { + int idx = (i - 1) * size_j * size_k + (j - 1) * size_k + (k - 1); + cmk3D_two(i, j, k) = idx; + + // printf("index %d) CMatrix = %d and %d\n", idx, cmk3D_two(i, j, k), cmk3D(i, j, k)); + }); Kokkos::fence(); - - + // ----------------------- // FMatrix // ----------------------- - + printf("\n4D FMatrix\n"); - auto fmk4D = FMatrixKokkos (size_i, size_j, size_k, size_l); - + auto fmk4D = FMatrixKokkos(size_i, size_j, size_k, size_l); + Kokkos::parallel_for("4DFMatrix", Mtx_policy_4d, KOKKOS_LAMBDA(const int i, const int j, const int k, const int l) { - int idx = (l-1) * size_i * size_j * size_k + (k-1) * size_i * size_j + (j-1) * size_i + (i-1); + int idx = (l - 1) * size_i * size_j * size_k + (k - 1) * size_i * size_j + (j - 1) * size_i + (i - 1); fmk4D(i, j, k, l) = idx; }); Kokkos::fence(); - - - - // -- functions exectuted on device inside a parallel for loop --- - // A parallel loop - FOR_ALL(i,0,1,{ - pass_by_ref(fmk4D); - pass_by_val(fmk4D); - }); - Kokkos::fence(); - - - // --- call a function that has kokkos parallel loops inside it --- - - // get a FMatrix from a function - fmk4D = return_by_val(); - - // verify the values are correct - FOR_ALL(i,0,1,{ - pass_by_ref(fmk4D); - }); - Kokkos::fence(); - - - // call a function that has kokkos parallel loops inside it - pass_by_ref_two(fmk4D); - - - - // ----------------------- + + // -- functions exectuted on device inside a parallel for loop --- + // A parallel loop + FOR_ALL(i, 0, 1, { + pass_by_ref(fmk4D); + pass_by_val(fmk4D); + }); + Kokkos::fence(); + + // --- call a function that has kokkos parallel loops inside it --- + + // get a FMatrix from a function + fmk4D = return_by_val(); + + // verify the values are correct + FOR_ALL(i, 0, 1, { + pass_by_ref(fmk4D); + }); + Kokkos::fence(); + + // call a function that has kokkos parallel loops inside it + pass_by_ref_two(fmk4D); + + // ----------------------- // ViewFMatrix // ----------------------- - + printf("\n3D ViewFMatrix\n"); - - // slice off the last dimension of FMatrix at L=1 - FOR_ALL(L,1,2,{ - // MATAR views by default are on the device - auto viewfmk3D = ViewFMatrixKokkos (&fmk4D(1,1,1,L),size_i, size_j, size_k); - - // pass this view to a function - pass_view_by_ref(viewfmk3D); - }); - Kokkos::fence(); - - - - // ----------------------- + + // slice off the last dimension of FMatrix at L=1 + FOR_ALL(L, 1, 2, { + // MATAR views by default are on the device + auto viewfmk3D = ViewFMatrixKokkos(&fmk4D(1, 1, 1, L), size_i, size_j, size_k); + + // pass this view to a function + pass_view_by_ref(viewfmk3D); + }); + Kokkos::fence(); + + // ----------------------- // functors // ----------------------- - printf("\nfunctors\n"); - ModelA model_a; - model_a(fmk4D); - - // -- - MethodA method_a; - FOR_ALL(i,1,2,{ - method_a(fmk4D); - }); - Kokkos::fence(); - - - // ----------------------- + printf("\nfunctors\n"); + ModelA model_a; + model_a(fmk4D); + + // -- + MethodA method_a; + FOR_ALL(i, 1, 2, { + method_a(fmk4D); + }); + Kokkos::fence(); + + // ----------------------- // std::variant access // ----------------------- - - printf("\nstd::variant with functors\n"); - + + printf("\nstd::variant with functors\n"); + models my_model = ModelA(); // set model type - - size_t idx = my_model.index(); - printf("index of model in variant is = %lu \n",idx); - // find and execute the model selected - std::visit(overloaded { + size_t idx = my_model.index(); + printf("index of model in variant is = %lu \n", idx); + + // find and execute the model selected + std::visit(overloaded { [&fmk4D](ModelA model) { - printf("ModelA is being executed\n"); - - model(fmk4D); + printf("ModelA is being executed\n"); - }, + model(fmk4D); + }, [&fmk4D](ModelB model) { - printf("ModelB is being executed\n"); - model(fmk4D); - } + printf("ModelB is being executed\n"); + model(fmk4D); + } }, my_model); - - - printf("\nCArray of std::variants with functors\n"); - // MATAR CArray of std::variants - CArray mat_models(3); - mat_models(0) = ModelB(); // material 0 physics model - mat_models(1) = ModelA(); // material 1 physics model - mat_models(2) = ModelB(); // material 2 physics model - - idx = mat_models(0).index(); - printf("index of model in variant is = %lu \n",idx); - - for (int mat_id=0; mat_id<3; mat_id++){ - // find and execute the model selected - std::visit(overloaded { - [&fmk4D](ModelA model) { - printf("ModelA is being executed\n"); - model(fmk4D); - }, - [&fmk4D](ModelB model) { - printf("ModelB is being executed\n"); - model(fmk4D); - } + printf("\nCArray of std::variants with functors\n"); + // MATAR CArray of std::variants + CArray mat_models(3); + mat_models(0) = ModelB(); // material 0 physics model + mat_models(1) = ModelA(); // material 1 physics model + mat_models(2) = ModelB(); // material 2 physics model + + idx = mat_models(0).index(); + printf("index of model in variant is = %lu \n", idx); + + for (int mat_id = 0; mat_id < 3; mat_id++) { + // find and execute the model selected + std::visit(overloaded { + [&fmk4D](ModelA model) { + printf("ModelA is being executed\n"); + model(fmk4D); + }, + [&fmk4D](ModelB model) { + printf("ModelB is being executed\n"); + model(fmk4D); + } }, mat_models(mat_id)); - } // end of loop over materials - - - CArray mat_methods(3); - mat_methods(0) = MethodB(); // material 0 numerical method - mat_methods(1) = MethodA(); // material 1 numerical method - mat_methods(2) = MethodB(); // material 2 numerical method + } // end of loop over materials + + CArray mat_methods(3); + mat_methods(0) = MethodB(); // material 0 numerical method + mat_methods(1) = MethodA(); // material 1 numerical method + mat_methods(2) = MethodB(); // material 2 numerical method // material centric approach - for (int mat_id=0; mat_id<3; mat_id++){ - - // find and execute the model selected - std::visit(overloaded { - [&fmk4D](MethodA method) { - printf("ModelA is being executed\n"); - - // e.g., loop over the cells in the mesh in parallel - FOR_ALL(i,1,2,{ - method(fmk4D); - }); - Kokkos::fence(); - - }, - [&fmk4D](MethodB method) { - printf("ModelB is being executed\n"); - - // e.g., loop over the cells in the mesh in parallel - FOR_ALL(i,1,2,{ - method(fmk4D); - }); - Kokkos::fence(); - } + for (int mat_id = 0; mat_id < 3; mat_id++) { + // find and execute the model selected + std::visit(overloaded { + [&fmk4D](MethodA method) { + printf("ModelA is being executed\n"); + + // e.g., loop over the cells in the mesh in parallel + FOR_ALL(i, 1, 2, { + method(fmk4D); + }); + Kokkos::fence(); + }, + [&fmk4D](MethodB method) { + printf("ModelB is being executed\n"); + + // e.g., loop over the cells in the mesh in parallel + FOR_ALL(i, 1, 2, { + method(fmk4D); + }); + Kokkos::fence(); + } }, mat_methods(mat_id)); - - } // end of loop over materials - - - - - - - // ----------------------- - // DualView types - // ----------------------- - - printf("\nDual views\n"); - - code_data_t my_code_data; // struct with arrays of data - - - // create a dual view of the data held inside my_code_data struct - auto field_one = DViewCArrayKokkos (&my_code_data.field_one[0], 100); - auto field_two = DViewCArrayKokkos (&my_code_data.field_two[0], 200); - - printf("modifying the dual view fields on the device\n"); - - // modify the values in field one on the device - FOR_ALL(i,0,100,{ - field_one(i) = 12.345; - }); - Kokkos::fence(); - field_one.update_host(); // copy data from devise to the host - - printf("dual view of field_one = %f, struct field_one = %f \n", field_one.host(0), my_code_data.field_one[0]); - - // modify the values in field two on the device - FOR_ALL(i,0,200,{ - field_two(i) = 3; - }); - Kokkos::fence(); - field_two.update_host(); // copy data from devise to the host - - printf("dual view of field_two = %i, struct field_two = %i \n", field_two.host(0), my_code_data.field_two[0]); - - printf("modifying struct field_one = 314.5 \n"); - for (int i=0; i<100; i++){ - my_code_data.field_one[i] = 314.15; - } // end for loop - printf("dual view of field_one = %f, struct field_one = %f \n", field_one.host(0), my_code_data.field_one[0]); - - - - // ----------------------- - // Dual Array types in an object - // ----------------------- - - printf("\nDual types inside struct\n"); - - // struct with MATAR arrays of data - cell_data_t cell_data; // allocate the data sizes: 10X10x10 mesh - - printf("allocate dual type sizes held in struct\n"); - cell_data.den = DCArrayKokkos (10,10,10); - cell_data.pres = DCArrayKokkos (10,10,10); - - - // set the values inside the cell_data struct on the device - - printf("setting the dual type values and calling initialize functions \n"); + } // end of loop over materials + + // ----------------------- + // DualView types + // ----------------------- + + printf("\nDual views\n"); + + code_data_t my_code_data; // struct with arrays of data + + // create a dual view of the data held inside my_code_data struct + auto field_one = DViewCArrayKokkos(&my_code_data.field_one[0], 100); + auto field_two = DViewCArrayKokkos(&my_code_data.field_two[0], 200); + + printf("modifying the dual view fields on the device\n"); + + // modify the values in field one on the device + FOR_ALL(i, 0, 100, { + field_one(i) = 12.345; + }); + Kokkos::fence(); + field_one.update_host(); // copy data from devise to the host + + printf("dual view of field_one = %f, struct field_one = %f \n", field_one.host(0), my_code_data.field_one[0]); + + // modify the values in field two on the device + FOR_ALL(i, 0, 200, { + field_two(i) = 3; + }); + Kokkos::fence(); + field_two.update_host(); // copy data from devise to the host + + printf("dual view of field_two = %i, struct field_two = %i \n", field_two.host(0), my_code_data.field_two[0]); + + printf("modifying struct field_one = 314.5 \n"); + for (int i = 0; i < 100; i++) { + my_code_data.field_one[i] = 314.15; + } // end for loop + printf("dual view of field_one = %f, struct field_one = %f \n", field_one.host(0), my_code_data.field_one[0]); + + // ----------------------- + // Dual Array types in an object + // ----------------------- + + printf("\nDual types inside struct\n"); + + // struct with MATAR arrays of data + cell_data_t cell_data; // allocate the data sizes: 10X10x10 mesh + + printf("allocate dual type sizes held in struct\n"); + cell_data.den = DCArrayKokkos(10, 10, 10); + cell_data.pres = DCArrayKokkos(10, 10, 10); + + // set the values inside the cell_data struct on the device + + printf("setting the dual type values and calling initialize functions \n"); FOR_ALL(i, 0, 10, j, 0, 10, k, 0, 10, - { - cell_data.initialize(i,j,k); - - cell_data.den(i,j,k) = 3.14159; - cell_data.pres(i,j,k) = 1.0; - }); + { + cell_data.initialize(i, j, k); + + cell_data.den(i, j, k) = 3.14159; + cell_data.pres(i, j, k) = 1.0; + }); Kokkos::fence(); - - // update the host side - cell_data.den.update_host(); - cell_data.pres.update_host(); - printf("The host values of the dual CArrays in the struct = %f and %f \n", cell_data.den.host(0,0,0), cell_data.pres.host(0,0,0)); - - - - printf("\nDualView types inside struct\n"); - framework_data_t framework_data; // data is allocated by some framework across CPUs. - - // use MATAR to get the data onto the device e.g., GPU and make multiD views of the data - framework_matar_t mtr_data; - - + + // update the host side + cell_data.den.update_host(); + cell_data.pres.update_host(); + printf("The host values of the dual CArrays in the struct = %f and %f \n", cell_data.den.host(0, 0, 0), cell_data.pres.host(0, 0, 0)); + + printf("\nDualView types inside struct\n"); + framework_data_t framework_data; // data is allocated by some framework across CPUs. + + // use MATAR to get the data onto the device e.g., GPU and make multiD views of the data + framework_matar_t mtr_data; + // get the mesh dims from the framework struct int mesh_dim1 = framework_data.dim1; int mesh_dim2 = framework_data.dim2; int mesh_dim3 = framework_data.dim3; - + printf("allocate data from the framework on the device\n"); - mtr_data.data1 = DViewCArrayKokkos (&framework_data.data1[0], + mtr_data.data1 = DViewCArrayKokkos(&framework_data.data1[0], mesh_dim1, mesh_dim2, mesh_dim3); - mtr_data.data2 = DViewCArrayKokkos (&framework_data.data2[0], + mtr_data.data2 = DViewCArrayKokkos(&framework_data.data2[0], mesh_dim1, mesh_dim2, mesh_dim3); - - + printf("setting the dual type values\n"); // set the framework values inside the struct on the device FOR_ALL(i, 0, mesh_dim1, j, 0, mesh_dim2, k, 0, mesh_dim3, - { - mtr_data.data1(i,j,k) = 5.6; - mtr_data.data2(i,j,k) = 9.2; - }); + { + mtr_data.data1(i, j, k) = 5.6; + mtr_data.data2(i, j, k) = 9.2; + }); Kokkos::fence(); - - // update the host side - mtr_data.data1.update_host(); - mtr_data.data2.update_host(); - printf("The 1st values of framework struct arrays = %f and %f \n", framework_data.data1[0], framework_data.data2[0]); - // note how MATAR modified the data in the framework on the device - - // The dualView type also gives a view of the 1D framework data on the host side - printf("The views of 1st host values of framework data = %f and %f \n", mtr_data.data1.host(0,0,0), mtr_data.data2.host(0,0,0)); - - framework_data.data1[0] = 77.77; - framework_data.data1[1] = 88.88; - mtr_data.data1.update_device(); - RUN({ - printf("value on device after update = %f, %f", mtr_data.data1(0,0,0), mtr_data.data1(0,0,1)); - }); - Kokkos::fence(); - - printf("\n"); - - - - + + // update the host side + mtr_data.data1.update_host(); + mtr_data.data2.update_host(); + printf("The 1st values of framework struct arrays = %f and %f \n", framework_data.data1[0], framework_data.data2[0]); + // note how MATAR modified the data in the framework on the device + + // The dualView type also gives a view of the 1D framework data on the host side + printf("The views of 1st host values of framework data = %f and %f \n", mtr_data.data1.host(0, 0, 0), mtr_data.data2.host(0, 0, 0)); + + framework_data.data1[0] = 77.77; + framework_data.data1[1] = 88.88; + mtr_data.data1.update_device(); + RUN({ + printf("value on device after update = %f, %f", mtr_data.data1(0, 0, 0), mtr_data.data1(0, 0, 1)); + }); + Kokkos::fence(); + + printf("\n"); + // ----------------------- // DynamicRaggedRightArray // ----------------------- - + printf("\nDynamic Ragged Right Array\n"); - DynamicRaggedRightArrayKokkos drrak; - drrak = DynamicRaggedRightArrayKokkos (size_i, size_j); - + DynamicRaggedRightArrayKokkos drrak; + drrak = DynamicRaggedRightArrayKokkos(size_i, size_j); + Kokkos::parallel_for("DRRAKTest", size_i, KOKKOS_LAMBDA(const int i) { for (int j = 0; j < (i % size_j) + 1; j++) { drrak.stride(i)++; - drrak(i,j) = j; - //printf("(%i) stride is %d\n", i, j); + drrak(i, j) = j; + // printf("(%i) stride is %d\n", i, j); } }); Kokkos::fence(); - + printf("\ntesting macro FOR_ALL\n"); - + // testing MATAR FOR_ALL loop - DynamicRaggedRightArrayKokkos my_dyn_ragged(size_i, size_j); + DynamicRaggedRightArrayKokkos my_dyn_ragged(size_i, size_j); FOR_ALL(i, 0, size_i, { for (int j = 0; j <= (i % size_j); j++) { my_dyn_ragged.stride(i)++; - my_dyn_ragged(i,j) = j; - printf(" dyn_ragged_right error = %i \n", my_dyn_ragged(i,j)-drrak(i,j)); - }// end for - });// end parallel for + my_dyn_ragged(i, j) = j; + printf(" dyn_ragged_right error = %i \n", my_dyn_ragged(i, j) - drrak(i, j)); + } // end for + }); // end parallel for Kokkos::fence(); - + // ----------------------- // RaggedRightArray // ----------------------- printf("\nRagged Right Array\n"); // testing ragged initialized with CArrayKokkos for strides - CArrayKokkos some_strides(4); - + CArrayKokkos some_strides(4); + // create a lower-triangular array RUN({ some_strides(0) = 1; @@ -792,177 +752,155 @@ int main(int argc, char *argv[]) { some_strides(2) = 3; some_strides(3) = 4; }); - - RaggedRightArrayKokkos lower_tri(some_strides); - - - - + + RaggedRightArrayKokkos lower_tri(some_strides); + // ----------------------- // CArray view // ----------------------- - + printf("\nView CArray\n"); std::array A1d; for (int init = 0; init < 9; init++) { - A1d[init] = init+1; + A1d[init] = init + 1; } - policy2D CAKPpol = policy2D({0,0}, {3, 3}); - DViewCArrayKokkos cakp; - cakp = DViewCArrayKokkos (&A1d[0], 3, 3); + policy2D CAKPpol = policy2D({ 0, 0 }, { 3, 3 }); + DViewCArrayKokkos cakp; + cakp = DViewCArrayKokkos(&A1d[0], 3, 3); Kokkos::parallel_for("CAKPTest", CAKPpol, KOKKOS_LAMBDA(const int i, const int j) { - //printf("%d) %d\n", i * 3 + j, cakp(i, j)); + // printf("%d) %d\n", i * 3 + j, cakp(i, j)); }); Kokkos::fence(); - + // ----------------------- // CArray inside a class // ----------------------- - + printf("\nCArray in a class\n"); Data my_data(size_i, size_j, size_k); my_data.some_fcn(); - - - - + printf("\nENUM\n"); - + // simple enum example: // choices::myChoice enumVar; // enumVar = choices::METHOD_A; // setting the method - + // declare methods MethodA my_method_a; MethodB my_method_b; - printf("CArrayKokkos of enums\n"); auto time_1 = std::chrono::high_resolution_clock::now(); - CArrayKokkos my_choices(2); - + CArrayKokkos my_choices(2); + // set the method on the GPU RUN({ my_choices(0) = choices::METHOD_A; my_choices(1) = choices::METHOD_B; }); Kokkos::fence(); - - - + // e.g., loop over in parallel - FOR_ALL(i,1,2,{ + FOR_ALL(i, 1, 2, { printf("selecting method\n"); - - switch (my_choices(i)) - { + + switch (my_choices(i)) { case choices::METHOD_A: - { - // do stuff - printf("using method_A\n"); - my_method_a(fmk4D); - break; - } - + { + // do stuff + printf("using method_A\n"); + my_method_a(fmk4D); + break; + } + case choices::METHOD_B: - { - // do stuff - printf("using method_B\n"); - my_method_b(fmk4D); - break; - } - + { + // do stuff + printf("using method_B\n"); + my_method_b(fmk4D); + break; + } + default: - { - // do nothing - } - }; // end switch - - + { + // do nothing + } + }; // end switch }); Kokkos::fence(); - + auto time_2 = std::chrono::high_resolution_clock::now(); - - + std::cout << "Elapsed time in seconds: " << std::chrono::duration_cast(time_2 - time_1).count() << " microsec" << std::endl; - - - + printf("\nCArray of function pointers\n"); - - //method_ptrs; - CArrayKokkos < method_ptrs> > Array_ptrs(2); - - + + // method_ptrs; + CArrayKokkos>> Array_ptrs(2); + // set the pointer on the device e.g., GPU RUN({ Array_ptrs(0).fcn_ptr = sum; Array_ptrs(1).fcn_ptr = multiply; }); Kokkos::fence(); - + // use the function RUN({ Array_ptrs(0).fcn_ptr(fmk4D); Array_ptrs(1).fcn_ptr(fmk4D); }); Kokkos::fence(); - - - CArrayHost a_carray_cpu(5); + + CArrayHost a_carray_cpu(5); a_carray_cpu(0) = 0; printf("\nalias name value [0] on host = %d \n", a_carray_cpu(0)); - - CArrayDevice a_carray_device(5); + + CArrayDevice a_carray_device(5); RUN({ a_carray_device(0) = 0; printf("\nalias name value [0] on device = %d \n", a_carray_device(0)); }); - - - // Hierarchical - - size_t hiersize = 4; - auto hierTest1D = CArrayKokkos (hiersize); - auto hierTest2D = CArrayKokkos (hiersize,hiersize); - auto hierTest3D = CArrayKokkos (hiersize,hiersize,hiersize); - FOR_ALL(i_i, 0, hiersize, j_j, 0, hiersize, k_k, 0, hiersize, { - hierTest3D(i_i, j_j, k_k) = 0.0; - }); - FOR_FIRST(hiersize,{ - //Kokkos::parallel_for( \ - //Kokkos::TeamPolicy<>( 32, Kokkos::AUTO, 32 ), \ - //KOKKOS_LAMBDA ( const Kokkos::TeamPolicy<>::member_type &teamMember ) { - const int i_i = TEAM_ID; - FOR_SECOND(j_j,i_i,hiersize,{ - //Kokkos::parallel_for( \ - //Kokkos::TeamThreadRange( teamMember, istart, iend ), [&] ( const int (j_j) ) { - //hierTest2D(i_i,j_j) = i_i * (j_j+1); - // int jstart = j_j*32; - // int jend = (j_j+1)*32; - FOR_THIRD(k_k, i_i, j_j, { - printf("%d,%d,%d\n", i_i,j_j,k_k); - //hierTest3D(i_i,j_j,k_k) = i_i*j_j*k_k; + + // Hierarchical + + size_t hiersize = 4; + auto hierTest1D = CArrayKokkos(hiersize); + auto hierTest2D = CArrayKokkos(hiersize, hiersize); + auto hierTest3D = CArrayKokkos(hiersize, hiersize, hiersize); + FOR_ALL(i_i, 0, hiersize, j_j, 0, hiersize, k_k, 0, hiersize, { + hierTest3D(i_i, j_j, k_k) = 0.0; + }); + FOR_FIRST(hiersize, { + // Kokkos::parallel_for( \ + //Kokkos::TeamPolicy<>( 32, Kokkos::AUTO, 32 ), \ + //KOKKOS_LAMBDA ( const Kokkos::TeamPolicy<>::member_type &teamMember ) { + const int i_i = TEAM_ID; + FOR_SECOND(j_j, i_i, hiersize, { + // Kokkos::parallel_for( \ + //Kokkos::TeamThreadRange( teamMember, istart, iend ), [&] ( const int (j_j) ) { + // hierTest2D(i_i,j_j) = i_i * (j_j+1); + // int jstart = j_j*32; + // int jend = (j_j+1)*32; + FOR_THIRD(k_k, i_i, j_j, { + printf("%d,%d,%d\n", i_i, j_j, k_k); + // hierTest3D(i_i,j_j,k_k) = i_i*j_j*k_k; + }); }); }); - }); - Kokkos::fence(); - printf("\n\n\nHierarchical\n"); - for (int ppp = 0; ppp < hiersize; ppp++) { - //printf("%f\n", hierTest3D(0,0,ppp)); - //printf("%f\n", hierTest2D(3,ppp)); - //printf("%f\n", hierTest3D(3,3,ppp)); - } - printf("\n\n"); - - - - + Kokkos::fence(); + printf("\n\n\nHierarchical\n"); + for (int ppp = 0; ppp < hiersize; ppp++) { + // printf("%f\n", hierTest3D(0,0,ppp)); + // printf("%f\n", hierTest2D(3,ppp)); + // printf("%f\n", hierTest3D(3,3,ppp)); + } + printf("\n\n"); } // end of kokkos scope - + Kokkos::finalize(); printf("\nfinished\n\n"); @@ -970,146 +908,121 @@ int main(int argc, char *argv[]) { return 0; } - - // ----- Functions called INSIDE a kokkos parallel loop ----- KOKKOS_INLINE_FUNCTION -void pass_by_ref(const FMatrixKokkos &matrix){ - printf("inside pass_by_ref function,"); +void pass_by_ref(const FMatrixKokkos& matrix) +{ + printf("inside pass_by_ref function,"); + int val = 0; + for (int k = 1; k <= 5; k++) { + for (int j = 1; j <= 4; j++) { + for (int i = 1; i <= 3; i++) { + val += matrix(i, j, k, 1); + } // end for i + } // end for j + } // end for k + + printf(" val = %i \n", val); +} - int val = 0; - for (int k=1; k<=5; k++){ - for (int j=1; j<=4; j++){ - for (int i=1; i<=3; i++){ - - val += matrix(i,j,k,1); - - } // end for i - } // end for j - } // end for k +KOKKOS_INLINE_FUNCTION +void pass_by_val(const FMatrixKokkos matrix) +{ + printf("inside pass_by_val function,"); + + int val = 0; - printf(" val = %i \n", val); + // do summation in serial + for (int k = 1; k <= 5; k++) { + for (int j = 1; j <= 4; j++) { + for (int i = 1; i <= 3; i++) { + val += matrix(i, j, k, 1); + } // end for i + } // end for j + } // end for k + printf(" val = %i, \n", val); } - KOKKOS_INLINE_FUNCTION -void pass_by_val(const FMatrixKokkos matrix){ - printf("inside pass_by_val function,"); - - int val = 0; - - // do summation in serial - for (int k=1; k<=5; k++){ - for (int j=1; j<=4; j++){ - for (int i=1; i<=3; i++){ - - val += matrix(i,j,k,1); - - } // end for i - } // end for j - } // end for k - - printf(" val = %i, \n", val); +void pass_view_by_ref(const ViewFMatrixKokkos& matrix) +{ + // remember that MATAR views are always on the device -} + printf("inside pass_view_by_ref function,"); + int val = 0; -KOKKOS_INLINE_FUNCTION -void pass_view_by_ref(const ViewFMatrixKokkos &matrix){ - - // remember that MATAR views are always on the device - - printf("inside pass_view_by_ref function,"); - - int val = 0; - - // do summation in serial - for (int k=1; k<=5; k++){ - for (int j=1; j<=4; j++){ - for (int i=1; i<=3; i++){ - - val += matrix(i,j,k); - - } // end for i - } // end for j - } // end for k - - printf(" val = %i, \n", val); + // do summation in serial + for (int k = 1; k <= 5; k++) { + for (int j = 1; j <= 4; j++) { + for (int i = 1; i <= 3; i++) { + val += matrix(i, j, k); + } // end for i + } // end for j + } // end for k + printf(" val = %i, \n", val); } // end function - -template +template KOKKOS_INLINE_FUNCTION -void run_methods(const F1 &lambda_fcn1, const F2 &lambda_fcn2, methods &v) { +void run_methods(const F1& lambda_fcn1, const F2& lambda_fcn2, methods& v) +{ switch (v.index()) { + case 0: { + lambda_fcn1(std::get(v)); + break; + } // end case 0 - case 0: { - lambda_fcn1(std::get(v)); - break; - } // end case 0 - - case 1: { - lambda_fcn2(std::get(v)); - break; - } // end case 1 - - } // end case + case 1: { + lambda_fcn2(std::get(v)); + break; + } // end case 1 + } // end case }; // end of function - - - - - // ----- Functions NOT called in a kokkos parallel loop ----- -void pass_by_ref_two(const FMatrixKokkos &matrix){ - printf("inside pass_by_ref_two function (parallel loops),"); +void pass_by_ref_two(const FMatrixKokkos& matrix) +{ + printf("inside pass_by_ref_two function (parallel loops),"); int loc_sum; int val = 0; - + // do summation in parallel on GPU REDUCE_SUM(k, 1, 6, j, 1, 5, i, 1, 4, loc_sum, { - loc_sum += matrix(i,j,k,1); - }, val); - - printf(" val = %i \n", val); + loc_sum += matrix(i, j, k, 1); + }, val); + printf(" val = %i \n", val); } +FMatrixKokkos return_by_val() +{ + printf("inside return_by_val \n"); -FMatrixKokkos return_by_val(){ + // ----------------------- + // parameters for examples + // ----------------------- + u_int size_i, size_j, size_k, size_l; + size_i = 3; size_j = 4; size_k = 5; size_l = 6; - printf("inside return_by_val \n"); + policy4D Mtx_policy_4d = policy4D({ 1, 1, 1, 1 }, { size_i + 1, size_j + 1, size_k + 1, size_l + 1 }); - // ----------------------- - // parameters for examples - // ----------------------- - u_int size_i, size_j, size_k, size_l; - size_i = 3; size_j = 4; size_k = 5; size_l = 6; - - policy4D Mtx_policy_4d = policy4D({1, 1, 1, 1}, {size_i+1, size_j+1, size_k+1, size_l+1}); + FMatrixKokkos fmk4D_local(size_i, size_j, size_k, size_l); - FMatrixKokkos fmk4D_local(size_i, size_j, size_k, size_l); - - Kokkos::parallel_for("4DFMatrix", Mtx_policy_4d, KOKKOS_LAMBDA(const int i, const int j, const int k, const int l) { - int idx = (l-1) * size_i * size_j * size_k + (k-1) * size_i * size_j + (j-1) * size_i + (i-1); + Kokkos::parallel_for("4DFMatrix", Mtx_policy_4d, KOKKOS_LAMBDA(const int i, const int j, const int k, const int l) { + int idx = (l - 1) * size_i * size_j * size_k + (k - 1) * size_i * size_j + (j - 1) * size_i + (i - 1); fmk4D_local(i, j, k, l) = idx; - }); - Kokkos::fence(); - - return fmk4D_local; -} - - - - + }); + Kokkos::fence(); + return fmk4D_local; +} diff --git a/examples/matar_fortran/matar_functions.cpp b/examples/matar_fortran/matar_functions.cpp index 28e066e9..09c254e1 100644 --- a/examples/matar_fortran/matar_functions.cpp +++ b/examples/matar_fortran/matar_functions.cpp @@ -1,3 +1,36 @@ +/********************************************************************************************** + © 2020. Triad National Security, LLC. All rights reserved. + This program was produced under U.S. Government contract 89233218CNA000001 for Los Alamos + National Laboratory (LANL), which is operated by Triad National Security, LLC for the U.S. + Department of Energy/National Nuclear Security Administration. All rights in the program are + reserved by Triad National Security, LLC, and the U.S. Department of Energy/National Nuclear + Security Administration. The Government is granted for itself and others acting on its behalf a + nonexclusive, paid-up, irrevocable worldwide license in this material to reproduce, prepare + derivative works, distribute copies to the public, perform publicly and display publicly, and + to permit others to do so. + This program is open source under the BSD-3 License. + Redistribution and use in source and binary forms, with or without modification, are permitted + provided that the following conditions are met: + 1. Redistributions of source code must retain the above copyright notice, this list of + conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright notice, this list of + conditions and the following disclaimer in the documentation and/or other materials + provided with the distribution. + 3. Neither the name of the copyright holder nor the names of its contributors may be used + to endorse or promote products derived from this software without specific prior + written permission. + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + **********************************************************************************************/ #include #include #include @@ -5,8 +38,8 @@ using namespace mtr; // matar namespace /* - Functions or subroutines that will be called from fortran should include "_" at the end of name. - Example: Given a C++ definded subroutine as "subroutineName_", + Functions or subroutines that will be called from fortran should include "_" at the end of name. + Example: Given a C++ definded subroutine as "subroutineName_", it should be called in fortran as "call subroutineName(...)" Also the functions or subroutines should be decleared with extern "C" */ @@ -14,8 +47,8 @@ using namespace mtr; // matar namespace extern "C" void kokkos_initialize_(); extern "C" void kokkos_finalize_(); -extern "C" void square_array_elements_(double *array, int *nx, int *ny); -extern "C" void sum_array_elements_(double *array, int *nx, int *ny, double *sum_of_elements); +extern "C" void square_array_elements_(double* array, int* nx, int* ny); +extern "C" void sum_array_elements_(double* array, int* nx, int* ny, double* sum_of_elements); void kokkos_initialize_() { @@ -27,7 +60,7 @@ void kokkos_finalize_() Kokkos::finalize(); } -void square_array_elements_(double *array, int *nx, int *ny) +void square_array_elements_(double* array, int* nx, int* ny) { // define private copys of nx and ny // this enables kokkos to copy stack variables @@ -36,31 +69,31 @@ void square_array_elements_(double *array, int *nx, int *ny) int ny_ = *ny; // create DViewFMatrixKokkos since array is fortran allocated - auto array_2D_dual_view = DViewFMatrixKokkos (array, nx_, ny_); + auto array_2D_dual_view = DViewFMatrixKokkos(array, nx_, ny_); DO_ALL(j, 1, ny_, i, 1, nx_, { - array_2D_dual_view(i,j) = pow(array_2D_dual_view(i,j), 2); + array_2D_dual_view(i, j) = pow(array_2D_dual_view(i, j), 2); }); array_2D_dual_view.update_host(); } -void sum_array_elements_(double *array, int *nx, int *ny, double *sum_of_elements) +void sum_array_elements_(double* array, int* nx, int* ny, double* sum_of_elements) { // define private copys of nx and ny int nx_ = *nx; int ny_ = *ny; // create DViewFMatrixKokkos since array is fortran allocated - auto array_2D_dual_view = DViewFMatrixKokkos (array, nx_, ny_); + auto array_2D_dual_view = DViewFMatrixKokkos(array, nx_, ny_); double global_sum; double local_sum; DO_REDUCE_SUM(j, 1, ny_, - i, 1, nx_, + i, 1, nx_, local_sum, { - local_sum += array_2D_dual_view(i,j); + local_sum += array_2D_dual_view(i, j); }, global_sum); // update sum_of_elements memory location diff --git a/examples/mtr-kokkos-simple.cpp b/examples/mtr-kokkos-simple.cpp index ee15736e..9db4dd38 100644 --- a/examples/mtr-kokkos-simple.cpp +++ b/examples/mtr-kokkos-simple.cpp @@ -42,7 +42,7 @@ // // Data type naming convetions: // -// Allocation memory or view existin memory: +// Allocation memory or view existing memory: // View = map data to multi-D or slice out data, no memory allocation on CPU // Blank = allocate memory on the CPU, GPU, or Both depending on specified location // @@ -89,6 +89,31 @@ using namespace mtr; // matar namespace +// Function on device to calculate y = m*x + b +// Parallelism is done outside this function +KOKKOS_INLINE_FUNCTION // or KOKKOKS_FUNCTION +double calc_line_value(double m, double x, double b){ + return m*x + b; +} // end function calc_y + + +// A function to calculate all y values; the parallelism is done inside this function +void calc_line(CArrayDevice &y, double m, double b, double x_start, double x_final){ + + int N = y.dims(0); // use meta data on y to find N + + double deltaX = (x_final - x_start)/((double)N - 1); + + FOR_ALL(i,0,N,{ + + double x = x_start + deltaX*(double)i; + + y(i) = calc_line_value(m,x,b); // calls the function to calculate a single y value + + }); // end parallel loop + +} // end function calc_all_y + // main int main(int argc, char *argv[]) { @@ -148,13 +173,12 @@ int main(int argc, char *argv[]) { }, result); - printf("3D reduce MAX %i\n", result); // =============== - - int N=200; // array dimensions are NxN + + int N=20; // array dimensions are NxN // A 2D array example following the C index convention // indicies go from 0 to less than N, last index varies the fastest @@ -167,6 +191,7 @@ int main(int argc, char *argv[]) { CArrayDevice U(N,N); // upper triangular array CArrayDevice x(N); CArrayDevice y(N); + auto time_1 = std::chrono::high_resolution_clock::now(); @@ -178,7 +203,7 @@ int main(int argc, char *argv[]) { A(i,j) = 1.0; B(i,j) = 2.0; - }); + }); // end parallel for FOR_ALL (i, 0, N, j, 0, N,{ @@ -192,11 +217,11 @@ int main(int argc, char *argv[]) { U(i,j) = 4.0; } // end if - }); + }); // end parallel for FOR_ALL (i, 0, N, { y(i) = 4.0; - }); + }); // end parallel for // Add two arrays together @@ -206,7 +231,7 @@ int main(int argc, char *argv[]) { C(i,j) = A(i,j) + B(i,j); - }); + }); // end parallel for // Multiply two arrays together // D = A*B @@ -216,7 +241,7 @@ int main(int argc, char *argv[]) { D(i,j) = A(i,k)*B(k,j); - }); + }); // end parallel for // backwards substitution for (int k = N-1; k>=0; k--){ @@ -248,17 +273,158 @@ int main(int argc, char *argv[]) { loc_sum, { loc_sum += L(i,j)*x(j); }, result); - } + } // end if x(i) = (y(i)- result)/U(i,i); } // end for i + + // calculate y=mx+b over x=[0:25] with N subdivisions + double x_start = 0.0; + double x_final = 25.0; + double m = 1.0; + double b = 5.0; + + double deltaX = (x_final- x_start)/((double)N - 1.0); + FOR_ALL (i, 0, N, { + + double x = x_start + ((double)i)*deltaX; + + // calling a function inside a parallel loop requires the KOKKOS_FUNCTION + y(i) = calc_line_value(m, x, b); + + }); // end parallel for + + // a single function to calculate all y values + calc_line(y, m, b, x_start, x_final); + + + + // ----- Heat transfer ----- + // y + // ^ + // | + // T=cold + // ----------T=Hot + // | | + // T=cold | | + // | | + // (0,0)---------- -> x + // + + int length = 20; + + // Parallel Jacobi solver for steady 2D heat transfer + CArrayDevice Temp(length+2, length+2); + CArrayDevice Temp_previous(length+2, length+2); + + // heat source is bottom right corner of mesh, T=100 in that corner + // temperature of left wall is T_cold=0. + // temperature of top wall is T_cold=0. + const double Temp_cold = 0.0; + const double Temp_hot = 100.0; + + + // initialize the inner mesh region + DO_ALL(i, 0, length+1, + j, 0, length+1, { + Temp_previous(i,j) = 0.0; + Temp(i,j) = Temp_previous(i,j); + }); // end parallel do + + // boundaries are at i=0 and i=length+1 + // boundaries are at j=0 and j=length+1 + + + // apply wall temperature boundary conditions to right and left walls + DO_ALL(k,0,length+1, { + + // k is an arbitrary index for i or j directions + + // left wall is cold + Temp_previous(0,k) = Temp_cold; + Temp(0,k) = Temp_previous(0,k); + + + // right wall is linearly decreasing in temperature in the positive vertical direction + Temp_previous(length+1,k) = ((double)k )*(Temp_hot - Temp_cold) /( (double)length + 1); + Temp(length+1,k) = Temp_previous(length+1,k); + + // top wall is linearly increasing in temperature in the positive horizontal direction + Temp_previous(k,length+1) = ((double)k )*(Temp_hot - Temp_cold) /( (double)length + 1); + Temp(k,0) = Temp_previous(k,0); + + // bottom wall is cold + Temp_previous(k,0) = Temp_cold; + Temp(k,length+1) = Temp_previous(k,length+1); + + }); // end parallel do + + + // solve for steady-state temperature + double tolerance = 0.01; + int max_iters = 10000; + for(int iter=0; iter<=max_iters; iter++){ + + // find next temperature + DO_ALL(i, 1, length, + j, 1, length, { + + Temp(i,j) = 0.25*( Temp_previous(i+1,j) + Temp_previous(i-1,j) + Temp_previous(i,j+1) + Temp_previous(i,j-1) ); + + }); // end parallel for + + double max_delta_temp; + double loc_max_delta_temp; + DO_REDUCE_MAX(i, 1, length, + j, 1, length, + loc_max_delta_temp, { + + // find the temperature difference between iterations + double dT = fabs( Temp(i,j)-Temp_previous(i,j) ); + + // get the largest dT + if ( loc_max_delta_temp < dT ){ + loc_max_delta_temp = dT; + } // end if + + }, max_delta_temp); + + // print the progress every 100 + if(iter%100 == 0){ + printf("interation = %d, max error = %f \n", iter, max_delta_temp); + } + + // stop when we reach the specified tolerance + if(max_delta_temp ms = time_2 - time_1; - std::cout << ms.count() << "ms\n"; - + std::cout << "runtime of all tests = " << ms.count() << "ms\n"; + + printf("\n"); + printf("Temperature profile\n"); + // print temperature result + for(int i=length+1; i>=0; i--){ + for (int j=0; j<=length+1; j++){ + printf(" %5.2f ", Temp(i,j)); + } // for j + printf("\n"); + }; // for i } // end of kokkos scope diff --git a/examples/parallel_hello_world.cpp b/examples/parallel_hello_world.cpp index c8d18448..71386e2d 100644 --- a/examples/parallel_hello_world.cpp +++ b/examples/parallel_hello_world.cpp @@ -1,16 +1,48 @@ +/********************************************************************************************** + © 2020. Triad National Security, LLC. All rights reserved. + This program was produced under U.S. Government contract 89233218CNA000001 for Los Alamos + National Laboratory (LANL), which is operated by Triad National Security, LLC for the U.S. + Department of Energy/National Nuclear Security Administration. All rights in the program are + reserved by Triad National Security, LLC, and the U.S. Department of Energy/National Nuclear + Security Administration. The Government is granted for itself and others acting on its behalf a + nonexclusive, paid-up, irrevocable worldwide license in this material to reproduce, prepare + derivative works, distribute copies to the public, perform publicly and display publicly, and + to permit others to do so. + This program is open source under the BSD-3 License. + Redistribution and use in source and binary forms, with or without modification, are permitted + provided that the following conditions are met: + 1. Redistributions of source code must retain the above copyright notice, this list of + conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright notice, this list of + conditions and the following disclaimer in the documentation and/or other materials + provided with the distribution. + 3. Neither the name of the copyright holder nor the names of its contributors may be used + to endorse or promote products derived from this software without specific prior + written permission. + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + **********************************************************************************************/ #include #include "matar.h" using namespace mtr; // matar namespace -int main() { - +int main() +{ int thread_id; - + #pragma omp parallel { thread_id = omp_get_thread_num(); printf("Hello World... from thread = %d\n", thread_id); } - } diff --git a/examples/phaseField/srcKokkosVerbose/CH_fourier_spectral_solver.cpp b/examples/phaseField/srcKokkosVerbose/CH_fourier_spectral_solver.cpp index bc937341..8546e5fd 100644 --- a/examples/phaseField/srcKokkosVerbose/CH_fourier_spectral_solver.cpp +++ b/examples/phaseField/srcKokkosVerbose/CH_fourier_spectral_solver.cpp @@ -1,3 +1,36 @@ +/********************************************************************************************** + © 2020. Triad National Security, LLC. All rights reserved. + This program was produced under U.S. Government contract 89233218CNA000001 for Los Alamos + National Laboratory (LANL), which is operated by Triad National Security, LLC for the U.S. + Department of Energy/National Nuclear Security Administration. All rights in the program are + reserved by Triad National Security, LLC, and the U.S. Department of Energy/National Nuclear + Security Administration. The Government is granted for itself and others acting on its behalf a + nonexclusive, paid-up, irrevocable worldwide license in this material to reproduce, prepare + derivative works, distribute copies to the public, perform publicly and display publicly, and + to permit others to do so. + This program is open source under the BSD-3 License. + Redistribution and use in source and binary forms, with or without modification, are permitted + provided that the following conditions are met: + 1. Redistributions of source code must retain the above copyright notice, this list of + conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright notice, this list of + conditions and the following disclaimer in the documentation and/or other materials + provided with the distribution. + 3. Neither the name of the copyright holder nor the names of its contributors may be used + to endorse or promote products derived from this software without specific prior + written permission. + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + **********************************************************************************************/ #include #include "CH_fourier_spectral_solver.h" #include "fourier_space.h" @@ -7,7 +40,7 @@ #include "fft_manager_out_of_place.h" #endif -CHFourierSpectralSolver::CHFourierSpectralSolver(SimParameters &sp) +CHFourierSpectralSolver::CHFourierSpectralSolver(SimParameters& sp) { // set simulation parameters nn_ = sp.nn; @@ -29,25 +62,23 @@ CHFourierSpectralSolver::CHFourierSpectralSolver(SimParameters &sp) #ifdef IN_PLACE_FFT nn_img_[2] = nz_; #elif OUT_OF_PLACE_FFT - nz21_ = nz_/2 + 1; + nz21_ = nz_ / 2 + 1; nn_img_[2] = nz21_; #endif // initialize arrays needed for simulation - comp_img_ = CArrayKokkos (nn_img_[0], nn_img_[1], nn_img_[2], 2); - dfdc_img_ = CArrayKokkos (nn_img_[0], nn_img_[1], nn_img_[2], 2); - kpow2_ = CArrayKokkos (nn_img_[0], nn_img_[1], nn_img_[2]); - denominator_ = CArrayKokkos (nn_img_[0], nn_img_[1], nn_img_[2]); + comp_img_ = CArrayKokkos(nn_img_[0], nn_img_[1], nn_img_[2], 2); + dfdc_img_ = CArrayKokkos(nn_img_[0], nn_img_[1], nn_img_[2], 2); + kpow2_ = CArrayKokkos(nn_img_[0], nn_img_[1], nn_img_[2]); + denominator_ = CArrayKokkos(nn_img_[0], nn_img_[1], nn_img_[2]); // set values of kpow2_ set_kpow2_(); // set values of denominator_ set_denominator_(); - } - void CHFourierSpectralSolver::set_kpow2_() { // get fourier space @@ -55,33 +86,28 @@ void CHFourierSpectralSolver::set_kpow2_() auto kx = fs.get_kx(); auto ky = fs.get_ky(); auto kz = fs.get_kz(); - + // calculate kpow2_ Kokkos::parallel_for( - Kokkos::MDRangePolicy>({0,0,0}, {nn_img_[0], nn_img_[1], nn_img_[2]}), - KOKKOS_CLASS_LAMBDA(const int i, const int j, const int k){ - kpow2_(i,j,k) = kx(i) * kx(i) - + ky(j) * ky(j) - + kz(k) * kz(k); + Kokkos::MDRangePolicy>({ 0, 0, 0 }, { nn_img_[0], nn_img_[1], nn_img_[2] }), + KOKKOS_CLASS_LAMBDA(const int i, const int j, const int k) { + kpow2_(i, j, k) = kx(i) * kx(i) + + ky(j) * ky(j) + + kz(k) * kz(k); }); - } - void CHFourierSpectralSolver::set_denominator_() { // calculate denominator_ Kokkos::parallel_for( - Kokkos::MDRangePolicy>({0,0,0}, {nn_img_[0], nn_img_[1], nn_img_[2]}), - KOKKOS_CLASS_LAMBDA(const int i, const int j, const int k){ - denominator_(i,j,k) = 1.0 + (dt_ * M_ * kappa_ * kpow2_(i,j,k) * kpow2_(i,j,k)); + Kokkos::MDRangePolicy>({ 0, 0, 0 }, { nn_img_[0], nn_img_[1], nn_img_[2] }), + KOKKOS_CLASS_LAMBDA(const int i, const int j, const int k) { + denominator_(i, j, k) = 1.0 + (dt_ * M_ * kappa_ * kpow2_(i, j, k) * kpow2_(i, j, k)); }); - - } - -void CHFourierSpectralSolver::time_march(DCArrayKokkos &comp, CArrayKokkos &dfdc) +void CHFourierSpectralSolver::time_march(DCArrayKokkos& comp, CArrayKokkos& dfdc) { // initialize fft manager #ifdef IN_PLACE_FFT @@ -89,7 +115,7 @@ void CHFourierSpectralSolver::time_march(DCArrayKokkos &comp, CArrayKokk #elif OUT_OF_PLACE_FFT static FFTManagerOutOfPlace fft_manager = FFTManagerOutOfPlace(nn_); #endif - + // get foward fft of comp fft_manager.perform_forward_fft(comp.device_pointer(), comp_img_.pointer()); @@ -98,26 +124,22 @@ void CHFourierSpectralSolver::time_march(DCArrayKokkos &comp, CArrayKokk // solve Cahn Hilliard equation in fourier space Kokkos::parallel_for( - Kokkos::MDRangePolicy>({0,0,0}, {nn_img_[0], nn_img_[1], nn_img_[2]}), - KOKKOS_CLASS_LAMBDA(const int i, const int j, const int k){ - comp_img_(i,j,k,0) = (comp_img_(i,j,k,0) - (dt_ * M_ * kpow2_(i,j,k)) * dfdc_img_(i,j,k,0)) - / (denominator_(i,j,k)); + Kokkos::MDRangePolicy>({ 0, 0, 0 }, { nn_img_[0], nn_img_[1], nn_img_[2] }), + KOKKOS_CLASS_LAMBDA(const int i, const int j, const int k) { + comp_img_(i, j, k, 0) = (comp_img_(i, j, k, 0) - (dt_ * M_ * kpow2_(i, j, k)) * dfdc_img_(i, j, k, 0)) + / (denominator_(i, j, k)); - comp_img_(i,j,k,1) = (comp_img_(i,j,k,1) - (dt_ * M_ * kpow2_(i,j,k)) * dfdc_img_(i,j,k,1)) - / (denominator_(i,j,k)); + comp_img_(i, j, k, 1) = (comp_img_(i, j, k, 1) - (dt_ * M_ * kpow2_(i, j, k)) * dfdc_img_(i, j, k, 1)) + / (denominator_(i, j, k)); }); - - // get backward fft of comp_img fft_manager.perform_backward_fft(comp_img_.pointer(), comp.device_pointer()); // normalize after inverse fft Kokkos::parallel_for( - Kokkos::MDRangePolicy>({0,0,0}, {nx_, ny_, nz_}), - KOKKOS_CLASS_LAMBDA(const int i, const int j, const int k){ - comp(i,j,k) = comp(i,j,k) / double(nx_ * ny_ * nz_); + Kokkos::MDRangePolicy>({ 0, 0, 0 }, { nx_, ny_, nz_ }), + KOKKOS_CLASS_LAMBDA(const int i, const int j, const int k) { + comp(i, j, k) = comp(i, j, k) / double(nx_ * ny_ * nz_); }); - } - diff --git a/examples/phaseField/srcKokkosVerbose/CH_fourier_spectral_solver.h b/examples/phaseField/srcKokkosVerbose/CH_fourier_spectral_solver.h index 673cb561..3394da9d 100644 --- a/examples/phaseField/srcKokkosVerbose/CH_fourier_spectral_solver.h +++ b/examples/phaseField/srcKokkosVerbose/CH_fourier_spectral_solver.h @@ -1,3 +1,36 @@ +/********************************************************************************************** + © 2020. Triad National Security, LLC. All rights reserved. + This program was produced under U.S. Government contract 89233218CNA000001 for Los Alamos + National Laboratory (LANL), which is operated by Triad National Security, LLC for the U.S. + Department of Energy/National Nuclear Security Administration. All rights in the program are + reserved by Triad National Security, LLC, and the U.S. Department of Energy/National Nuclear + Security Administration. The Government is granted for itself and others acting on its behalf a + nonexclusive, paid-up, irrevocable worldwide license in this material to reproduce, prepare + derivative works, distribute copies to the public, perform publicly and display publicly, and + to permit others to do so. + This program is open source under the BSD-3 License. + Redistribution and use in source and binary forms, with or without modification, are permitted + provided that the following conditions are met: + 1. Redistributions of source code must retain the above copyright notice, this list of + conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright notice, this list of + conditions and the following disclaimer in the documentation and/or other materials + provided with the distribution. + 3. Neither the name of the copyright holder nor the names of its contributors may be used + to endorse or promote products derived from this software without specific prior + written permission. + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + **********************************************************************************************/ #pragma once #include "sim_parameters.h" @@ -7,34 +40,34 @@ using namespace mtr; // matar namespace class CHFourierSpectralSolver { - private: - // simulation parameters - int* nn_; - int nn_img_[3]; - int nx_; - int ny_; - int nz_; +private: + // simulation parameters + int* nn_; + int nn_img_[3]; + int nx_; + int ny_; + int nz_; #ifdef OUT_OF_PLACE_FFT - int nz21_; + int nz21_; #endif - int ndim_; - double* delta_; - double dx_; - double dy_; - double dz_; - double dt_; - double M_; - double kappa_; - - // arrays needed by solver - CArrayKokkos comp_img_; - CArrayKokkos dfdc_img_; - CArrayKokkos kpow2_; - CArrayKokkos denominator_; - - public: - CHFourierSpectralSolver(SimParameters &sp); - void set_kpow2_(); - void set_denominator_(); - void time_march(DCArrayKokkos &comp, CArrayKokkos &dfdc); + int ndim_; + double* delta_; + double dx_; + double dy_; + double dz_; + double dt_; + double M_; + double kappa_; + + // arrays needed by solver + CArrayKokkos comp_img_; + CArrayKokkos dfdc_img_; + CArrayKokkos kpow2_; + CArrayKokkos denominator_; + +public: + CHFourierSpectralSolver(SimParameters& sp); + void set_kpow2_(); + void set_denominator_(); + void time_march(DCArrayKokkos& comp, CArrayKokkos& dfdc); }; diff --git a/examples/phaseField/srcKokkosVerbose/fft_manager_in_place.cpp b/examples/phaseField/srcKokkosVerbose/fft_manager_in_place.cpp index 48e12e38..b50f8cae 100644 --- a/examples/phaseField/srcKokkosVerbose/fft_manager_in_place.cpp +++ b/examples/phaseField/srcKokkosVerbose/fft_manager_in_place.cpp @@ -1,110 +1,134 @@ +/********************************************************************************************** + © 2020. Triad National Security, LLC. All rights reserved. + This program was produced under U.S. Government contract 89233218CNA000001 for Los Alamos + National Laboratory (LANL), which is operated by Triad National Security, LLC for the U.S. + Department of Energy/National Nuclear Security Administration. All rights in the program are + reserved by Triad National Security, LLC, and the U.S. Department of Energy/National Nuclear + Security Administration. The Government is granted for itself and others acting on its behalf a + nonexclusive, paid-up, irrevocable worldwide license in this material to reproduce, prepare + derivative works, distribute copies to the public, perform publicly and display publicly, and + to permit others to do so. + This program is open source under the BSD-3 License. + Redistribution and use in source and binary forms, with or without modification, are permitted + provided that the following conditions are met: + 1. Redistributions of source code must retain the above copyright notice, this list of + conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright notice, this list of + conditions and the following disclaimer in the documentation and/or other materials + provided with the distribution. + 3. Neither the name of the copyright holder nor the names of its contributors may be used + to endorse or promote products derived from this software without specific prior + written permission. + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + **********************************************************************************************/ #ifdef IN_PLACE_FFT # include "fft_manager_in_place.h" - -FFTManagerInPlace::FFTManagerInPlace(int * nn) +FFTManagerInPlace::FFTManagerInPlace(int* nn) { - nn_ = nn; - nx_ = nn_[0]; - ny_ = nn_[1]; - nz_ = nn_[2]; + nn_ = nn; + nx_ = nn_[0]; + ny_ = nn_[1]; + nz_ = nn_[2]; data_ = CArrayKokkos(nx_, ny_, nz_, 2); // calculate ndim ndim_ = 0; for (int i = 0; i < 3; i++) { - if (nn_[i] > 1) ++ndim_; + if (nn_[i] > 1) { + ++ndim_; + } } // initialize fft - #ifdef HAVE_CUDA - fftc_cufft_init_in_place_(); + #ifdef HAVE_CUDA + fftc_cufft_init_in_place_(); #else - fftc_fftw_init_in_place_(); + fftc_fftw_init_in_place_(); #endif } - -void FFTManagerInPlace::prep_for_forward_fft_(double *input) +void FFTManagerInPlace::prep_for_forward_fft_(double* input) { // this function writes the data in "input" array to "data_" array // in order to ready "data_" for in-place forward fft. // create view of input - auto input_view = ViewCArrayKokkos (input, nx_, ny_, nz_); + auto input_view = ViewCArrayKokkos(input, nx_, ny_, nz_); // write input to data for in-place forward fft Kokkos::parallel_for( - Kokkos::MDRangePolicy>({0,0,0}, {nx_, ny_, nz_}), - KOKKOS_CLASS_LAMBDA(const int i, const int j, const int k){ - data_(i,j,k,0) = input_view(i,j,k); - data_(i,j,k,1) = 0.0; + Kokkos::MDRangePolicy>({ 0, 0, 0 }, { nx_, ny_, nz_ }), + KOKKOS_CLASS_LAMBDA(const int i, const int j, const int k) { + data_(i, j, k, 0) = input_view(i, j, k); + data_(i, j, k, 1) = 0.0; }); - } - -void FFTManagerInPlace::get_forward_fft_result_(double *output) +void FFTManagerInPlace::get_forward_fft_result_(double* output) { // this function writes the result of in-place forward fft // in "data_" array into "output" array. // create view of output - auto output_view = ViewCArrayKokkos (output, nx_, ny_, nz_, 2); + auto output_view = ViewCArrayKokkos(output, nx_, ny_, nz_, 2); // write data to output after in-place fft Kokkos::parallel_for( - Kokkos::MDRangePolicy>({0,0,0}, {nx_, ny_, nz_}), - KOKKOS_CLASS_LAMBDA(const int i, const int j, const int k){ - output_view(i,j,k,0) = data_(i,j,k,0); - output_view(i,j,k,1) = data_(i,j,k,1); - + Kokkos::MDRangePolicy>({ 0, 0, 0 }, { nx_, ny_, nz_ }), + KOKKOS_CLASS_LAMBDA(const int i, const int j, const int k) { + output_view(i, j, k, 0) = data_(i, j, k, 0); + output_view(i, j, k, 1) = data_(i, j, k, 1); }); - } - -void FFTManagerInPlace::prep_for_backward_fft_(double *input) +void FFTManagerInPlace::prep_for_backward_fft_(double* input) { // this function writes the data in "input" array to "data_" array // in order to ready "data_" for in-place backward fft. // create view of input - auto input_view = ViewCArrayKokkos (input, nx_, ny_, nz_, 2); + auto input_view = ViewCArrayKokkos(input, nx_, ny_, nz_, 2); // write input to data for in-place fft Kokkos::parallel_for( - Kokkos::MDRangePolicy>({0,0,0}, {nx_, ny_, nz_}), - KOKKOS_CLASS_LAMBDA(const int i, const int j, const int k){ - data_(i,j,k,0) = input_view(i,j,k,0); - data_(i,j,k,1) = input_view(i,j,k,1); + Kokkos::MDRangePolicy>({ 0, 0, 0 }, { nx_, ny_, nz_ }), + KOKKOS_CLASS_LAMBDA(const int i, const int j, const int k) { + data_(i, j, k, 0) = input_view(i, j, k, 0); + data_(i, j, k, 1) = input_view(i, j, k, 1); }); - } - -void FFTManagerInPlace::get_backward_fft_result_(double *output) +void FFTManagerInPlace::get_backward_fft_result_(double* output) { // this function writes the result of in-place backward fft // in "data_" array into "output" array. // create view of output - auto output_view = ViewCArrayKokkos (output, nx_, ny_, nz_); + auto output_view = ViewCArrayKokkos(output, nx_, ny_, nz_); // write data to output after in-place fft Kokkos::parallel_for( - Kokkos::MDRangePolicy>({0,0,0}, {nx_, ny_, nz_}), - KOKKOS_CLASS_LAMBDA(const int i, const int j, const int k){ - output_view(i,j,k) = data_(i,j,k,0); + Kokkos::MDRangePolicy>({ 0, 0, 0 }, { nx_, ny_, nz_ }), + KOKKOS_CLASS_LAMBDA(const int i, const int j, const int k) { + output_view(i, j, k) = data_(i, j, k, 0); }); - } - -void FFTManagerInPlace::perform_forward_fft(double *input, double *output) +void FFTManagerInPlace::perform_forward_fft(double* input, double* output) { - // this function performs forward fft on "input" array and + // this function performs forward fft on "input" array and // writes the result to "output" array. // it calls the appropriate function to perform the forward in-place fft // either using OPENMP or CUDA. @@ -115,19 +139,18 @@ void FFTManagerInPlace::perform_forward_fft(double *input, double *output) // perform foward fft isign_ = -1; #ifdef HAVE_CUDA - fftc_cufft_in_place_(data_.pointer(), nn_, &ndim_, &isign_); + fftc_cufft_in_place_(data_.pointer(), nn_, &ndim_, &isign_); #else - fftc_fftw_in_place_(data_.pointer(), nn_, &ndim_, &isign_); + fftc_fftw_in_place_(data_.pointer(), nn_, &ndim_, &isign_); #endif // get result after performing foward fft get_forward_fft_result_(output); } - -void FFTManagerInPlace::perform_backward_fft(double *input, double *output) +void FFTManagerInPlace::perform_backward_fft(double* input, double* output) { - // this function performs backward fft on "input" array and + // this function performs backward fft on "input" array and // writes the result to "output" array. // it calls the appropriate function to perform the backward in-place fft // either using OPENMP or CUDA. @@ -138,9 +161,9 @@ void FFTManagerInPlace::perform_backward_fft(double *input, double *output) // perform backward fft isign_ = 1; #ifdef HAVE_CUDA - fftc_cufft_in_place_(data_.pointer(), nn_, &ndim_, &isign_); + fftc_cufft_in_place_(data_.pointer(), nn_, &ndim_, &isign_); #else - fftc_fftw_in_place_(data_.pointer(), nn_, &ndim_, &isign_); + fftc_fftw_in_place_(data_.pointer(), nn_, &ndim_, &isign_); #endif // get result after performing backward fft diff --git a/examples/phaseField/srcKokkosVerbose/fft_manager_in_place.h b/examples/phaseField/srcKokkosVerbose/fft_manager_in_place.h index 5c0d6e63..30192562 100644 --- a/examples/phaseField/srcKokkosVerbose/fft_manager_in_place.h +++ b/examples/phaseField/srcKokkosVerbose/fft_manager_in_place.h @@ -1,3 +1,36 @@ +/********************************************************************************************** + © 2020. Triad National Security, LLC. All rights reserved. + This program was produced under U.S. Government contract 89233218CNA000001 for Los Alamos + National Laboratory (LANL), which is operated by Triad National Security, LLC for the U.S. + Department of Energy/National Nuclear Security Administration. All rights in the program are + reserved by Triad National Security, LLC, and the U.S. Department of Energy/National Nuclear + Security Administration. The Government is granted for itself and others acting on its behalf a + nonexclusive, paid-up, irrevocable worldwide license in this material to reproduce, prepare + derivative works, distribute copies to the public, perform publicly and display publicly, and + to permit others to do so. + This program is open source under the BSD-3 License. + Redistribution and use in source and binary forms, with or without modification, are permitted + provided that the following conditions are met: + 1. Redistributions of source code must retain the above copyright notice, this list of + conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright notice, this list of + conditions and the following disclaimer in the documentation and/or other materials + provided with the distribution. + 3. Neither the name of the copyright holder nor the names of its contributors may be used + to endorse or promote products derived from this software without specific prior + written permission. + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + **********************************************************************************************/ #ifdef IN_PLACE_FFT #pragma once @@ -8,35 +41,32 @@ using namespace mtr; // matar namespace class FFTManagerInPlace { - private: - int* nn_; - int nx_; - int ny_; - int nz_; - int ndim_; - int isign_; - CArrayKokkos data_; - - public: - FFTManagerInPlace(int * nn); - void perform_forward_fft(double *input, double *output); - void perform_backward_fft(double *input, double *output); - - void prep_for_forward_fft_(double *input); - void get_forward_fft_result_(double *output); - void prep_for_backward_fft_(double *input); - void get_backward_fft_result_(double *output); +private: + int* nn_; + int nx_; + int ny_; + int nz_; + int ndim_; + int isign_; + CArrayKokkos data_; -}; +public: + FFTManagerInPlace(int* nn); + void perform_forward_fft(double* input, double* output); + void perform_backward_fft(double* input, double* output); + void prep_for_forward_fft_(double* input); + void get_forward_fft_result_(double* output); + void prep_for_backward_fft_(double* input); + void get_backward_fft_result_(double* output); +}; #ifdef HAVE_CUDA void fftc_cufft_init_in_place_(); -void fftc_cufft_in_place_(double data[], int nn[], int *ndim, int *isign); +void fftc_cufft_in_place_(double data[], int nn[], int* ndim, int* isign); #else void fftc_fftw_init_in_place_(); -void fftc_fftw_in_place_(double data[], int nn[], int *ndim, int *isign); +void fftc_fftw_in_place_(double data[], int nn[], int* ndim, int* isign); #endif - #endif diff --git a/examples/phaseField/srcKokkosVerbose/fft_manager_out_of_place.cpp b/examples/phaseField/srcKokkosVerbose/fft_manager_out_of_place.cpp index 438ca832..54438e14 100644 --- a/examples/phaseField/srcKokkosVerbose/fft_manager_out_of_place.cpp +++ b/examples/phaseField/srcKokkosVerbose/fft_manager_out_of_place.cpp @@ -1,29 +1,59 @@ +/********************************************************************************************** + © 2020. Triad National Security, LLC. All rights reserved. + This program was produced under U.S. Government contract 89233218CNA000001 for Los Alamos + National Laboratory (LANL), which is operated by Triad National Security, LLC for the U.S. + Department of Energy/National Nuclear Security Administration. All rights in the program are + reserved by Triad National Security, LLC, and the U.S. Department of Energy/National Nuclear + Security Administration. The Government is granted for itself and others acting on its behalf a + nonexclusive, paid-up, irrevocable worldwide license in this material to reproduce, prepare + derivative works, distribute copies to the public, perform publicly and display publicly, and + to permit others to do so. + This program is open source under the BSD-3 License. + Redistribution and use in source and binary forms, with or without modification, are permitted + provided that the following conditions are met: + 1. Redistributions of source code must retain the above copyright notice, this list of + conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright notice, this list of + conditions and the following disclaimer in the documentation and/or other materials + provided with the distribution. + 3. Neither the name of the copyright holder nor the names of its contributors may be used + to endorse or promote products derived from this software without specific prior + written permission. + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + **********************************************************************************************/ #ifdef OUT_OF_PLACE_FFT #include "fft_manager_out_of_place.h" -FFTManagerOutOfPlace::FFTManagerOutOfPlace(int * nn) +FFTManagerOutOfPlace::FFTManagerOutOfPlace(int* nn) { - nn_ = nn; - nx_ = nn_[0]; - ny_ = nn_[1]; - nz_ = nn_[2]; - nz21_ = nz_/2 + 1; + nn_ = nn; + nx_ = nn_[0]; + ny_ = nn_[1]; + nz_ = nn_[2]; + nz21_ = nz_ / 2 + 1; // initialize fft - #ifdef HAVE_CUDA - fftc_cufft_init_out_of_place_(); + #ifdef HAVE_CUDA + fftc_cufft_init_out_of_place_(); #else - fftc_fftw_init_out_of_place_(); + fftc_fftw_init_out_of_place_(); #endif } - - - -void FFTManagerOutOfPlace::perform_forward_fft(double *input, double *output) +void FFTManagerOutOfPlace::perform_forward_fft(double* input, double* output) { - // this function performs forward fft on "input" array and + // this function performs forward fft on "input" array and // writes the result to "output" array. // it calls the appropriate function to perform the forward out-of-place fft // either using OPENMP or CUDA. @@ -31,16 +61,15 @@ void FFTManagerOutOfPlace::perform_forward_fft(double *input, double *output) // perform foward fft isign_ = -1; #ifdef HAVE_CUDA - fftc_cufft_out_of_place_(input, output, nn_, &ndim_, &isign_); + fftc_cufft_out_of_place_(input, output, nn_, &ndim_, &isign_); #else - fftc_fftw_out_of_place_(input, output, nn_, &ndim_, &isign_); + fftc_fftw_out_of_place_(input, output, nn_, &ndim_, &isign_); #endif - } -void FFTManagerOutOfPlace::perform_backward_fft(double *input, double *output) +void FFTManagerOutOfPlace::perform_backward_fft(double* input, double* output) { - // this function performs backward fft on "input" array and + // this function performs backward fft on "input" array and // writes the result to "output" array. // it calls the appropriate function to perform the backward out-of-place fft // either using OPENMP or CUDA. @@ -48,12 +77,10 @@ void FFTManagerOutOfPlace::perform_backward_fft(double *input, double *output) // perform backward fft isign_ = 1; #ifdef HAVE_CUDA - fftc_cufft_out_of_place_(input, output, nn_, &ndim_, &isign_); + fftc_cufft_out_of_place_(input, output, nn_, &ndim_, &isign_); #else - fftc_fftw_out_of_place_(input, output, nn_, &ndim_, &isign_); + fftc_fftw_out_of_place_(input, output, nn_, &ndim_, &isign_); #endif } - - #endif diff --git a/examples/phaseField/srcKokkosVerbose/fft_manager_out_of_place.h b/examples/phaseField/srcKokkosVerbose/fft_manager_out_of_place.h index 1ef3f03d..31bd3fed 100644 --- a/examples/phaseField/srcKokkosVerbose/fft_manager_out_of_place.h +++ b/examples/phaseField/srcKokkosVerbose/fft_manager_out_of_place.h @@ -1,3 +1,36 @@ +/********************************************************************************************** + © 2020. Triad National Security, LLC. All rights reserved. + This program was produced under U.S. Government contract 89233218CNA000001 for Los Alamos + National Laboratory (LANL), which is operated by Triad National Security, LLC for the U.S. + Department of Energy/National Nuclear Security Administration. All rights in the program are + reserved by Triad National Security, LLC, and the U.S. Department of Energy/National Nuclear + Security Administration. The Government is granted for itself and others acting on its behalf a + nonexclusive, paid-up, irrevocable worldwide license in this material to reproduce, prepare + derivative works, distribute copies to the public, perform publicly and display publicly, and + to permit others to do so. + This program is open source under the BSD-3 License. + Redistribution and use in source and binary forms, with or without modification, are permitted + provided that the following conditions are met: + 1. Redistributions of source code must retain the above copyright notice, this list of + conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright notice, this list of + conditions and the following disclaimer in the documentation and/or other materials + provided with the distribution. + 3. Neither the name of the copyright holder nor the names of its contributors may be used + to endorse or promote products derived from this software without specific prior + written permission. + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + **********************************************************************************************/ #ifdef OUT_OF_PLACE_FFT #pragma once @@ -8,31 +41,27 @@ using namespace mtr; // matar namespace class FFTManagerOutOfPlace { - private: - int* nn_; - int nx_; - int ny_; - int nz_; - int nz21_; - int ndim_; - int isign_; - - public: - FFTManagerOutOfPlace(int * nn); - void perform_forward_fft(double *input, double *output); - void perform_backward_fft(double *input, double *output); +private: + int* nn_; + int nx_; + int ny_; + int nz_; + int nz21_; + int ndim_; + int isign_; +public: + FFTManagerOutOfPlace(int* nn); + void perform_forward_fft(double* input, double* output); + void perform_backward_fft(double* input, double* output); }; - - #ifdef HAVE_CUDA void fftc_cufft_init_out_of_place_(); -void fftc_cufft_out_of_place_(double input[], double output[], int nn[], int *ndim, int *isign); +void fftc_cufft_out_of_place_(double input[], double output[], int nn[], int* ndim, int* isign); #else void fftc_fftw_init_out_of_place_(); -void fftc_fftw_out_of_place_(double input[], double output[], int nn[], int *ndim, int *isign); +void fftc_fftw_out_of_place_(double input[], double output[], int nn[], int* ndim, int* isign); #endif - #endif diff --git a/examples/phaseField/srcKokkosVerbose/fftc_cufft_in_place.cpp b/examples/phaseField/srcKokkosVerbose/fftc_cufft_in_place.cpp index 2a4aeb26..1d597025 100644 --- a/examples/phaseField/srcKokkosVerbose/fftc_cufft_in_place.cpp +++ b/examples/phaseField/srcKokkosVerbose/fftc_cufft_in_place.cpp @@ -1,3 +1,36 @@ +/********************************************************************************************** + © 2020. Triad National Security, LLC. All rights reserved. + This program was produced under U.S. Government contract 89233218CNA000001 for Los Alamos + National Laboratory (LANL), which is operated by Triad National Security, LLC for the U.S. + Department of Energy/National Nuclear Security Administration. All rights in the program are + reserved by Triad National Security, LLC, and the U.S. Department of Energy/National Nuclear + Security Administration. The Government is granted for itself and others acting on its behalf a + nonexclusive, paid-up, irrevocable worldwide license in this material to reproduce, prepare + derivative works, distribute copies to the public, perform publicly and display publicly, and + to permit others to do so. + This program is open source under the BSD-3 License. + Redistribution and use in source and binary forms, with or without modification, are permitted + provided that the following conditions are met: + 1. Redistributions of source code must retain the above copyright notice, this list of + conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright notice, this list of + conditions and the following disclaimer in the documentation and/or other materials + provided with the distribution. + 3. Neither the name of the copyright holder nor the names of its contributors may be used + to endorse or promote products derived from this software without specific prior + written permission. + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + **********************************************************************************************/ #ifdef IN_PLACE_FFT #ifdef HAVE_CUDA @@ -7,77 +40,75 @@ #include #include -#include +#include // ---------------------------------------------------------------------- // CUFFT -static void -fft_cufft_forward(double *data, int nn[3]) +static void fft_cufft_forward(double* data, int nn[3]) { - int stride = 2 * nn[0] * nn[1] * nn[2]; - int rc, i; + int stride = 2 * nn[0] * nn[1] * nn[2]; + int rc, i; - static cufftHandle planZ2Z; + static cufftHandle planZ2Z; // typedef cuComplex cufftComplex; - typedef cuDoubleComplex cufftDoubleComplex; - if (!planZ2Z) { - cufftPlan3d(&planZ2Z, nn[0], nn[1], nn[2], CUFFT_Z2Z); - } + typedef cuDoubleComplex cufftDoubleComplex; + if (!planZ2Z) { + cufftPlan3d(&planZ2Z, nn[0], nn[1], nn[2], CUFFT_Z2Z); + } -//#pragma acc data copy(data[0:batch*stride]) - { -// printf("data1 %p\n", data); -//#pragma acc host_data use_device(data) +// #pragma acc data copy(data[0:batch*stride]) { +// printf("data1 %p\n", data); +// #pragma acc host_data use_device(data) + { // printf("data2 %p\n", data); - rc = cufftExecZ2Z(planZ2Z, (cufftDoubleComplex *) data, - (cufftDoubleComplex *) data, - CUFFT_FORWARD); - assert(rc == CUFFT_SUCCESS); + rc = cufftExecZ2Z(planZ2Z, (cufftDoubleComplex*) data, + (cufftDoubleComplex*) data, + CUFFT_FORWARD); + assert(rc == CUFFT_SUCCESS); + } } - } } -static void -fft_cufft_backward(double *data, int nn[3]) +static void fft_cufft_backward(double* data, int nn[3]) { - int stride = 2 * nn[0] * nn[1] * nn[2]; - int rc, i; - - static cufftHandle planZ2Z; + int stride = 2 * nn[0] * nn[1] * nn[2]; + int rc, i; + + static cufftHandle planZ2Z; // typedef cuComplex cufftComplex; - typedef cuDoubleComplex cufftDoubleComplex; - if (!planZ2Z) { - cufftPlan3d(&planZ2Z, nn[0], nn[1], nn[2], CUFFT_Z2Z); - } + typedef cuDoubleComplex cufftDoubleComplex; + if (!planZ2Z) { + cufftPlan3d(&planZ2Z, nn[0], nn[1], nn[2], CUFFT_Z2Z); + } -//#pragma acc data copy(data[0:batch*stride]) - { -//#pragma acc host_data use_device(data) - rc = cufftExecZ2Z(planZ2Z, (cufftDoubleComplex *) data, - (cufftDoubleComplex *) data, - CUFFT_INVERSE); - assert(rc == CUFFT_SUCCESS); - } +// #pragma acc data copy(data[0:batch*stride]) + { +// #pragma acc host_data use_device(data) + rc = cufftExecZ2Z(planZ2Z, (cufftDoubleComplex*) data, + (cufftDoubleComplex*) data, + CUFFT_INVERSE); + assert(rc == CUFFT_SUCCESS); + } } // ---------------------------------------------------------------------- -void fftc_cufft_in_place_(double data[], int nn[], int *ndim, int *isign) +void fftc_cufft_in_place_(double data[], int nn[], int* ndim, int* isign) { - //assert(*ndim == 3); - if (*isign == -1) { - fft_cufft_forward(data, nn); - } else { - fft_cufft_backward(data, nn); - } + // assert(*ndim == 3); + if (*isign == -1) { + fft_cufft_forward(data, nn); + } + else { + fft_cufft_backward(data, nn); + } } void fftc_cufft_init_in_place_(void) { } - #endif #endif diff --git a/examples/phaseField/srcKokkosVerbose/fftc_cufft_out_of_place.cpp b/examples/phaseField/srcKokkosVerbose/fftc_cufft_out_of_place.cpp index 06fcfeb5..0bef1662 100644 --- a/examples/phaseField/srcKokkosVerbose/fftc_cufft_out_of_place.cpp +++ b/examples/phaseField/srcKokkosVerbose/fftc_cufft_out_of_place.cpp @@ -1,3 +1,36 @@ +/********************************************************************************************** + © 2020. Triad National Security, LLC. All rights reserved. + This program was produced under U.S. Government contract 89233218CNA000001 for Los Alamos + National Laboratory (LANL), which is operated by Triad National Security, LLC for the U.S. + Department of Energy/National Nuclear Security Administration. All rights in the program are + reserved by Triad National Security, LLC, and the U.S. Department of Energy/National Nuclear + Security Administration. The Government is granted for itself and others acting on its behalf a + nonexclusive, paid-up, irrevocable worldwide license in this material to reproduce, prepare + derivative works, distribute copies to the public, perform publicly and display publicly, and + to permit others to do so. + This program is open source under the BSD-3 License. + Redistribution and use in source and binary forms, with or without modification, are permitted + provided that the following conditions are met: + 1. Redistributions of source code must retain the above copyright notice, this list of + conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright notice, this list of + conditions and the following disclaimer in the documentation and/or other materials + provided with the distribution. + 3. Neither the name of the copyright holder nor the names of its contributors may be used + to endorse or promote products derived from this software without specific prior + written permission. + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + **********************************************************************************************/ #ifdef OUT_OF_PLACE_FFT #ifdef HAVE_CUDA @@ -7,77 +40,75 @@ #include #include -#include +#include // ---------------------------------------------------------------------- // CUFFT -static void -fft_cufft_forward(double *input, double *output, int nn[3]) +static void fft_cufft_forward(double* input, double* output, int nn[3]) { - int stride = 2 * nn[0] * nn[1] * nn[2]; - int rc, i; + int stride = 2 * nn[0] * nn[1] * nn[2]; + int rc, i; - static cufftHandle planD2Z; + static cufftHandle planD2Z; // typedef cuComplex cufftComplex; - typedef cuDoubleComplex cufftDoubleComplexi; - typedef double cufftDoubleReal; - if (!planD2Z) { - cufftPlan3d(&planD2Z, nn[0], nn[1], nn[2], CUFFT_D2Z); - } + typedef cuDoubleComplex cufftDoubleComplexi; + typedef double cufftDoubleReal; + if (!planD2Z) { + cufftPlan3d(&planD2Z, nn[0], nn[1], nn[2], CUFFT_D2Z); + } -//#pragma acc data copy(data[0:batch*stride]) - { -// printf("data1 %p\n", data); -//#pragma acc host_data use_device(data) +// #pragma acc data copy(data[0:batch*stride]) { +// printf("data1 %p\n", data); +// #pragma acc host_data use_device(data) + { // printf("data2 %p\n", data); - rc = cufftExecD2Z(planD2Z, (cufftDoubleReal *) input, - (cufftDoubleComplex *) output); - assert(rc == CUFFT_SUCCESS); + rc = cufftExecD2Z(planD2Z, (cufftDoubleReal*) input, + (cufftDoubleComplex*) output); + assert(rc == CUFFT_SUCCESS); + } } - } } -static void -fft_cufft_backward(double *input, double *output, int nn[3]) +static void fft_cufft_backward(double* input, double* output, int nn[3]) { - int stride = 2 * nn[0] * nn[1] * nn[2]; - int rc, i; - - static cufftHandle planZ2D; + int stride = 2 * nn[0] * nn[1] * nn[2]; + int rc, i; + + static cufftHandle planZ2D; // typedef cuComplex cufftComplex; - typedef cuDoubleComplex cufftDoubleComplex; - typedef double cufftDoubleReal; - if (!planZ2D) { - cufftPlan3d(&planZ2D, nn[0], nn[1], nn[2], CUFFT_Z2D); - } + typedef cuDoubleComplex cufftDoubleComplex; + typedef double cufftDoubleReal; + if (!planZ2D) { + cufftPlan3d(&planZ2D, nn[0], nn[1], nn[2], CUFFT_Z2D); + } -//#pragma acc data copy(data[0:batch*stride]) - { -//#pragma acc host_data use_device(data) - rc = cufftExecZ2D(planZ2D, (cufftDoubleComplex *) input, - (cufftDoubleReal *) output); - assert(rc == CUFFT_SUCCESS); - } +// #pragma acc data copy(data[0:batch*stride]) + { +// #pragma acc host_data use_device(data) + rc = cufftExecZ2D(planZ2D, (cufftDoubleComplex*) input, + (cufftDoubleReal*) output); + assert(rc == CUFFT_SUCCESS); + } } // ---------------------------------------------------------------------- -void fftc_cufft_out_of_place_(double input[], double output[], int nn[], int *ndim, int *isign) +void fftc_cufft_out_of_place_(double input[], double output[], int nn[], int* ndim, int* isign) { - //assert(*ndim == 3); - if (*isign == -1) { - fft_cufft_forward(input, output, nn); - } else { - fft_cufft_backward(input, output, nn); - } + // assert(*ndim == 3); + if (*isign == -1) { + fft_cufft_forward(input, output, nn); + } + else { + fft_cufft_backward(input, output, nn); + } } void fftc_cufft_init_out_of_place_(void) { } - #endif #endif diff --git a/examples/phaseField/srcKokkosVerbose/fftc_fftw_in_place.cpp b/examples/phaseField/srcKokkosVerbose/fftc_fftw_in_place.cpp index 50409ef8..92065f19 100644 --- a/examples/phaseField/srcKokkosVerbose/fftc_fftw_in_place.cpp +++ b/examples/phaseField/srcKokkosVerbose/fftc_fftw_in_place.cpp @@ -1,3 +1,36 @@ +/********************************************************************************************** + © 2020. Triad National Security, LLC. All rights reserved. + This program was produced under U.S. Government contract 89233218CNA000001 for Los Alamos + National Laboratory (LANL), which is operated by Triad National Security, LLC for the U.S. + Department of Energy/National Nuclear Security Administration. All rights in the program are + reserved by Triad National Security, LLC, and the U.S. Department of Energy/National Nuclear + Security Administration. The Government is granted for itself and others acting on its behalf a + nonexclusive, paid-up, irrevocable worldwide license in this material to reproduce, prepare + derivative works, distribute copies to the public, perform publicly and display publicly, and + to permit others to do so. + This program is open source under the BSD-3 License. + Redistribution and use in source and binary forms, with or without modification, are permitted + provided that the following conditions are met: + 1. Redistributions of source code must retain the above copyright notice, this list of + conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright notice, this list of + conditions and the following disclaimer in the documentation and/or other materials + provided with the distribution. + 3. Neither the name of the copyright holder nor the names of its contributors may be used + to endorse or promote products derived from this software without specific prior + written permission. + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + **********************************************************************************************/ #ifdef IN_PLACE_FFT #ifdef HAVE_OPENMP @@ -9,55 +42,53 @@ #include #include -//#ifdef FFTW_OMP +// #ifdef FFTW_OMP #include -//#endif +// #endif // ---------------------------------------------------------------------- // FFTW -static void -fft_forward_fftw(double *data, int nn[3]) +static void fft_forward_fftw(double* data, int nn[3]) { - static fftw_plan plan; - if (!plan) { - plan = fftw_plan_dft_3d(nn[2], nn[1], nn[0], - (fftw_complex *) data, (fftw_complex *) data, - FFTW_FORWARD, FFTW_ESTIMATE); + static fftw_plan plan; + if (!plan) { + plan = fftw_plan_dft_3d(nn[2], nn[1], nn[0], + (fftw_complex*) data, (fftw_complex*) data, + FFTW_FORWARD, FFTW_ESTIMATE); + } - } - - fftw_execute_dft(plan, (fftw_complex *) data, (fftw_complex *) data); + fftw_execute_dft(plan, (fftw_complex*) data, (fftw_complex*) data); } -static void -fft_backward_fftw(double *data, int nn[3]) +static void fft_backward_fftw(double* data, int nn[3]) { - static fftw_plan plan; - if (!plan) { - plan = fftw_plan_dft_3d(nn[2], nn[1], nn[0], - (fftw_complex *) data, (fftw_complex *) data, - FFTW_BACKWARD, FFTW_ESTIMATE); - } + static fftw_plan plan; + if (!plan) { + plan = fftw_plan_dft_3d(nn[2], nn[1], nn[0], + (fftw_complex*) data, (fftw_complex*) data, + FFTW_BACKWARD, FFTW_ESTIMATE); + } - fftw_execute_dft(plan, (fftw_complex *) data, (fftw_complex *) data); + fftw_execute_dft(plan, (fftw_complex*) data, (fftw_complex*) data); } -void fftc_fftw_in_place_(double data[], int nn[], int *ndim, int *isign) +void fftc_fftw_in_place_(double data[], int nn[], int* ndim, int* isign) { - if (*isign == -1) { - fft_forward_fftw(data, nn); - } else { - fft_backward_fftw(data, nn); - } + if (*isign == -1) { + fft_forward_fftw(data, nn); + } + else { + fft_backward_fftw(data, nn); + } } void fftc_fftw_init_in_place_(void) { -//#ifdef FFTW_OMP - fftw_init_threads(); - fftw_plan_with_nthreads(omp_get_max_threads()); -//#endif +// #ifdef FFTW_OMP + fftw_init_threads(); + fftw_plan_with_nthreads(omp_get_max_threads()); +// #endif } #endif diff --git a/examples/phaseField/srcKokkosVerbose/fftc_fftw_out_of_place.cpp b/examples/phaseField/srcKokkosVerbose/fftc_fftw_out_of_place.cpp index d37cedab..6efb4f5a 100644 --- a/examples/phaseField/srcKokkosVerbose/fftc_fftw_out_of_place.cpp +++ b/examples/phaseField/srcKokkosVerbose/fftc_fftw_out_of_place.cpp @@ -1,3 +1,36 @@ +/********************************************************************************************** + © 2020. Triad National Security, LLC. All rights reserved. + This program was produced under U.S. Government contract 89233218CNA000001 for Los Alamos + National Laboratory (LANL), which is operated by Triad National Security, LLC for the U.S. + Department of Energy/National Nuclear Security Administration. All rights in the program are + reserved by Triad National Security, LLC, and the U.S. Department of Energy/National Nuclear + Security Administration. The Government is granted for itself and others acting on its behalf a + nonexclusive, paid-up, irrevocable worldwide license in this material to reproduce, prepare + derivative works, distribute copies to the public, perform publicly and display publicly, and + to permit others to do so. + This program is open source under the BSD-3 License. + Redistribution and use in source and binary forms, with or without modification, are permitted + provided that the following conditions are met: + 1. Redistributions of source code must retain the above copyright notice, this list of + conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright notice, this list of + conditions and the following disclaimer in the documentation and/or other materials + provided with the distribution. + 3. Neither the name of the copyright holder nor the names of its contributors may be used + to endorse or promote products derived from this software without specific prior + written permission. + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + **********************************************************************************************/ #ifdef OUT_OF_PLACE_FFT #ifdef HAVE_OPENMP @@ -9,55 +42,53 @@ #include #include -//#ifdef FFTW_OMP +// #ifdef FFTW_OMP #include -//#endif +// #endif // ---------------------------------------------------------------------- // FFTW -static void -fft_forward_fftw(double *input, double *output, int nn[3]) +static void fft_forward_fftw(double* input, double* output, int nn[3]) { - static fftw_plan plan; - if (!plan) { - plan = fftw_plan_dft_r2c_3d(nn[0], nn[1], nn[2], - (double *) input, (fftw_complex *) output, + static fftw_plan plan; + if (!plan) { + plan = fftw_plan_dft_r2c_3d(nn[0], nn[1], nn[2], + (double*) input, (fftw_complex*) output, FFTW_ESTIMATE); + } - } - - fftw_execute_dft_r2c(plan, (double *) input, (fftw_complex *) output); + fftw_execute_dft_r2c(plan, (double*) input, (fftw_complex*) output); } -static void -fft_backward_fftw(double *input, double *output, int nn[3]) +static void fft_backward_fftw(double* input, double* output, int nn[3]) { - static fftw_plan plan; - if (!plan) { - plan = fftw_plan_dft_c2r_3d(nn[0], nn[1], nn[2], - (fftw_complex *) input, (double *) output, - FFTW_ESTIMATE); - } + static fftw_plan plan; + if (!plan) { + plan = fftw_plan_dft_c2r_3d(nn[0], nn[1], nn[2], + (fftw_complex*) input, (double*) output, + FFTW_ESTIMATE); + } - fftw_execute_dft_c2r(plan, (fftw_complex *) input, (double *) output); + fftw_execute_dft_c2r(plan, (fftw_complex*) input, (double*) output); } -void fftc_fftw_out_of_place_(double input[], double output[], int nn[], int *ndim, int *isign) +void fftc_fftw_out_of_place_(double input[], double output[], int nn[], int* ndim, int* isign) { - if (*isign == -1) { - fft_forward_fftw(input, output, nn); - } else { - fft_backward_fftw(input, output, nn); - } + if (*isign == -1) { + fft_forward_fftw(input, output, nn); + } + else { + fft_backward_fftw(input, output, nn); + } } void fftc_fftw_init_out_of_place_(void) { -//#ifdef FFTW_OMP - fftw_init_threads(); - fftw_plan_with_nthreads(omp_get_max_threads()); -//#endif +// #ifdef FFTW_OMP + fftw_init_threads(); + fftw_plan_with_nthreads(omp_get_max_threads()); +// #endif } #endif diff --git a/examples/phaseField/srcKokkosVerbose/fourier_space.cpp b/examples/phaseField/srcKokkosVerbose/fourier_space.cpp index 19be6f95..a37fa9a4 100644 --- a/examples/phaseField/srcKokkosVerbose/fourier_space.cpp +++ b/examples/phaseField/srcKokkosVerbose/fourier_space.cpp @@ -1,3 +1,36 @@ +/********************************************************************************************** + © 2020. Triad National Security, LLC. All rights reserved. + This program was produced under U.S. Government contract 89233218CNA000001 for Los Alamos + National Laboratory (LANL), which is operated by Triad National Security, LLC for the U.S. + Department of Energy/National Nuclear Security Administration. All rights in the program are + reserved by Triad National Security, LLC, and the U.S. Department of Energy/National Nuclear + Security Administration. The Government is granted for itself and others acting on its behalf a + nonexclusive, paid-up, irrevocable worldwide license in this material to reproduce, prepare + derivative works, distribute copies to the public, perform publicly and display publicly, and + to permit others to do so. + This program is open source under the BSD-3 License. + Redistribution and use in source and binary forms, with or without modification, are permitted + provided that the following conditions are met: + 1. Redistributions of source code must retain the above copyright notice, this list of + conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright notice, this list of + conditions and the following disclaimer in the documentation and/or other materials + provided with the distribution. + 3. Neither the name of the copyright holder nor the names of its contributors may be used + to endorse or promote products derived from this software without specific prior + written permission. + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + **********************************************************************************************/ #include #include "fourier_space.h" @@ -20,78 +53,74 @@ FourierSpace::FourierSpace(int* nn, double* delta) #ifdef IN_PLACE_FFT kz_ = CArrayKokkos(nz_); #elif OUT_OF_PLACE_FFT - nz21_ = nz_/2 + 1; - kz_ = CArrayKokkos(nz21_); + nz21_ = nz_ / 2 + 1; + kz_ = CArrayKokkos(nz21_); #endif - // set values of kx_, ky_, and kz_ set_kx_ky_kz_(); - } - void FourierSpace::set_kx_ky_kz_() { // calculate kx_ Kokkos::parallel_for( Kokkos::RangePolicy<>(0, nx_), - KOKKOS_CLASS_LAMBDA(const int i){ + KOKKOS_CLASS_LAMBDA(const int i) { int ti; ti = i; - if (ti > nx_/2) ti = ti - nx_; + if (ti > nx_ / 2) { + ti = ti - nx_; + } kx_(i) = (float(ti) * twopi_) / (nx_ * dx_); }); - // calculate ky_ Kokkos::parallel_for( Kokkos::RangePolicy<>(0, ny_), - KOKKOS_CLASS_LAMBDA(const int j){ + KOKKOS_CLASS_LAMBDA(const int j) { int tj; tj = j; - if (tj > ny_/2) tj = tj - ny_; + if (tj > ny_ / 2) { + tj = tj - ny_; + } ky_(j) = (float(tj) * twopi_) / (ny_ * dy_); }); - // calculate kz_ for in-place-fft #ifdef IN_PLACE_FFT Kokkos::parallel_for( Kokkos::RangePolicy<>(0, nz_), - KOKKOS_CLASS_LAMBDA(const int k){ + KOKKOS_CLASS_LAMBDA(const int k) { int tk; tk = k; - if (tk > nz_/2) tk = tk - nz_; + if (tk > nz_ / 2) { + tk = tk - nz_; + } kz_(k) = (float(tk) * twopi_) / (nz_ * dz_); }); #elif OUT_OF_PLACE_FFT Kokkos::parallel_for( Kokkos::RangePolicy<>(0, nz21_), - KOKKOS_CLASS_LAMBDA(const int k){ + KOKKOS_CLASS_LAMBDA(const int k) { int tk; - tk = k; + tk = k; kz_(k) = (float(tk) * twopi_) / (nz_ * dz_); - }); + }); #endif } - CArrayKokkos& FourierSpace::get_kx() { return kx_; } - CArrayKokkos& FourierSpace::get_ky() { return ky_; } - CArrayKokkos& FourierSpace::get_kz() { return kz_; } - - diff --git a/examples/phaseField/srcKokkosVerbose/fourier_space.h b/examples/phaseField/srcKokkosVerbose/fourier_space.h index a437c53a..6030fc6b 100644 --- a/examples/phaseField/srcKokkosVerbose/fourier_space.h +++ b/examples/phaseField/srcKokkosVerbose/fourier_space.h @@ -1,3 +1,36 @@ +/********************************************************************************************** + © 2020. Triad National Security, LLC. All rights reserved. + This program was produced under U.S. Government contract 89233218CNA000001 for Los Alamos + National Laboratory (LANL), which is operated by Triad National Security, LLC for the U.S. + Department of Energy/National Nuclear Security Administration. All rights in the program are + reserved by Triad National Security, LLC, and the U.S. Department of Energy/National Nuclear + Security Administration. The Government is granted for itself and others acting on its behalf a + nonexclusive, paid-up, irrevocable worldwide license in this material to reproduce, prepare + derivative works, distribute copies to the public, perform publicly and display publicly, and + to permit others to do so. + This program is open source under the BSD-3 License. + Redistribution and use in source and binary forms, with or without modification, are permitted + provided that the following conditions are met: + 1. Redistributions of source code must retain the above copyright notice, this list of + conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright notice, this list of + conditions and the following disclaimer in the documentation and/or other materials + provided with the distribution. + 3. Neither the name of the copyright holder nor the names of its contributors may be used + to endorse or promote products derived from this software without specific prior + written permission. + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + **********************************************************************************************/ #pragma once #include "matar.h" @@ -6,29 +39,29 @@ using namespace mtr; // matar namespace class FourierSpace { - private: - int* nn_; - int nx_; - int ny_; - int nz_; +private: + int* nn_; + int nx_; + int ny_; + int nz_; #ifdef OUT_OF_PLACE_FFT - int nz21_; + int nz21_; #endif - double* delta_; - double dx_; - double dy_; - double dz_; - const double pi_ = 3.141592653589793238463; - const double twopi_ = 2.0*pi_; - CArrayKokkos kx_; - CArrayKokkos ky_; - CArrayKokkos kz_; - - public: - FourierSpace(int* nn, double* delta); - CArrayKokkos& get_kx(); - CArrayKokkos& get_ky(); - CArrayKokkos& get_kz(); + double* delta_; + double dx_; + double dy_; + double dz_; + const double pi_ = 3.141592653589793238463; + const double twopi_ = 2.0 * pi_; + CArrayKokkos kx_; + CArrayKokkos ky_; + CArrayKokkos kz_; - void set_kx_ky_kz_(); +public: + FourierSpace(int* nn, double* delta); + CArrayKokkos& get_kx(); + CArrayKokkos& get_ky(); + CArrayKokkos& get_kz(); + + void set_kx_ky_kz_(); }; diff --git a/examples/phaseField/srcKokkosVerbose/global_arrays.cpp b/examples/phaseField/srcKokkosVerbose/global_arrays.cpp index 672d4d74..2029ebb9 100644 --- a/examples/phaseField/srcKokkosVerbose/global_arrays.cpp +++ b/examples/phaseField/srcKokkosVerbose/global_arrays.cpp @@ -1,7 +1,40 @@ -#include "global_arrays.h" - -GlobalArrays::GlobalArrays(int* nn) -{ - this->comp = DCArrayKokkos(nn[0], nn[1], nn[2]); - this->dfdc = CArrayKokkos(nn[0], nn[1], nn[2]); -} +/********************************************************************************************** + © 2020. Triad National Security, LLC. All rights reserved. + This program was produced under U.S. Government contract 89233218CNA000001 for Los Alamos + National Laboratory (LANL), which is operated by Triad National Security, LLC for the U.S. + Department of Energy/National Nuclear Security Administration. All rights in the program are + reserved by Triad National Security, LLC, and the U.S. Department of Energy/National Nuclear + Security Administration. The Government is granted for itself and others acting on its behalf a + nonexclusive, paid-up, irrevocable worldwide license in this material to reproduce, prepare + derivative works, distribute copies to the public, perform publicly and display publicly, and + to permit others to do so. + This program is open source under the BSD-3 License. + Redistribution and use in source and binary forms, with or without modification, are permitted + provided that the following conditions are met: + 1. Redistributions of source code must retain the above copyright notice, this list of + conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright notice, this list of + conditions and the following disclaimer in the documentation and/or other materials + provided with the distribution. + 3. Neither the name of the copyright holder nor the names of its contributors may be used + to endorse or promote products derived from this software without specific prior + written permission. + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + **********************************************************************************************/ +#include "global_arrays.h" + +GlobalArrays::GlobalArrays(int* nn) +{ + this->comp = DCArrayKokkos(nn[0], nn[1], nn[2]); + this->dfdc = CArrayKokkos(nn[0], nn[1], nn[2]); +} diff --git a/examples/phaseField/srcKokkosVerbose/global_arrays.h b/examples/phaseField/srcKokkosVerbose/global_arrays.h index 3ce7e3b6..41ba3833 100644 --- a/examples/phaseField/srcKokkosVerbose/global_arrays.h +++ b/examples/phaseField/srcKokkosVerbose/global_arrays.h @@ -1,12 +1,45 @@ -#pragma once -#include "matar.h" - -using namespace mtr; // matar namespace - -struct GlobalArrays -{ - DCArrayKokkos comp; - CArrayKokkos dfdc; - - GlobalArrays(int* nn); -}; +/********************************************************************************************** + © 2020. Triad National Security, LLC. All rights reserved. + This program was produced under U.S. Government contract 89233218CNA000001 for Los Alamos + National Laboratory (LANL), which is operated by Triad National Security, LLC for the U.S. + Department of Energy/National Nuclear Security Administration. All rights in the program are + reserved by Triad National Security, LLC, and the U.S. Department of Energy/National Nuclear + Security Administration. The Government is granted for itself and others acting on its behalf a + nonexclusive, paid-up, irrevocable worldwide license in this material to reproduce, prepare + derivative works, distribute copies to the public, perform publicly and display publicly, and + to permit others to do so. + This program is open source under the BSD-3 License. + Redistribution and use in source and binary forms, with or without modification, are permitted + provided that the following conditions are met: + 1. Redistributions of source code must retain the above copyright notice, this list of + conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright notice, this list of + conditions and the following disclaimer in the documentation and/or other materials + provided with the distribution. + 3. Neither the name of the copyright holder nor the names of its contributors may be used + to endorse or promote products derived from this software without specific prior + written permission. + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + **********************************************************************************************/ +#pragma once +#include "matar.h" + +using namespace mtr; // matar namespace + +struct GlobalArrays +{ + DCArrayKokkos comp; + CArrayKokkos dfdc; + + GlobalArrays(int* nn); +}; diff --git a/examples/phaseField/srcKokkosVerbose/initialize_comp.cpp b/examples/phaseField/srcKokkosVerbose/initialize_comp.cpp index 3a030cdb..dc3a6870 100644 --- a/examples/phaseField/srcKokkosVerbose/initialize_comp.cpp +++ b/examples/phaseField/srcKokkosVerbose/initialize_comp.cpp @@ -1,15 +1,47 @@ +/********************************************************************************************** + © 2020. Triad National Security, LLC. All rights reserved. + This program was produced under U.S. Government contract 89233218CNA000001 for Los Alamos + National Laboratory (LANL), which is operated by Triad National Security, LLC for the U.S. + Department of Energy/National Nuclear Security Administration. All rights in the program are + reserved by Triad National Security, LLC, and the U.S. Department of Energy/National Nuclear + Security Administration. The Government is granted for itself and others acting on its behalf a + nonexclusive, paid-up, irrevocable worldwide license in this material to reproduce, prepare + derivative works, distribute copies to the public, perform publicly and display publicly, and + to permit others to do so. + This program is open source under the BSD-3 License. + Redistribution and use in source and binary forms, with or without modification, are permitted + provided that the following conditions are met: + 1. Redistributions of source code must retain the above copyright notice, this list of + conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright notice, this list of + conditions and the following disclaimer in the documentation and/or other materials + provided with the distribution. + 3. Neither the name of the copyright holder nor the names of its contributors may be used + to endorse or promote products derived from this software without specific prior + written permission. + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + **********************************************************************************************/ #include #include "initialize_comp.h" - -void initialize_comp(SimParameters& sp, DCArrayKokkos &comp) +void initialize_comp(SimParameters& sp, DCArrayKokkos& comp) { - // unpack simimulation parameters needed + // unpack simimulation parameters needed // for calculations in this function - int nx = sp.nn[0]; - int ny = sp.nn[1]; - int nz = sp.nn[2]; - int iseed = sp.iseed; + int nx = sp.nn[0]; + int ny = sp.nn[1]; + int nz = sp.nn[2]; + int iseed = sp.iseed; double c0 = sp.c0; double noise = sp.noise; @@ -23,10 +55,10 @@ void initialize_comp(SimParameters& sp, DCArrayKokkos &comp) for (int j = 0; j < ny; ++j) { for (int k = 0; k < nz; ++k) { // random number between 0.0 and 1.0 - r = (double) rand()/RAND_MAX; + r = (double) rand() / RAND_MAX; // initialize "comp" with stochastic thermal fluctuations - comp.host(i,j,k) = c0 + (2.0*r - 1.0)*noise; + comp.host(i, j, k) = c0 + (2.0 * r - 1.0) * noise; } } } diff --git a/examples/phaseField/srcKokkosVerbose/initialize_comp.h b/examples/phaseField/srcKokkosVerbose/initialize_comp.h index f1660806..a308feb4 100644 --- a/examples/phaseField/srcKokkosVerbose/initialize_comp.h +++ b/examples/phaseField/srcKokkosVerbose/initialize_comp.h @@ -1,8 +1,41 @@ -#pragma once - -#include "sim_parameters.h" -#include "matar.h" - -using namespace mtr; // matar namespace - -void initialize_comp(SimParameters& sp, DCArrayKokkos &comp); +/********************************************************************************************** + © 2020. Triad National Security, LLC. All rights reserved. + This program was produced under U.S. Government contract 89233218CNA000001 for Los Alamos + National Laboratory (LANL), which is operated by Triad National Security, LLC for the U.S. + Department of Energy/National Nuclear Security Administration. All rights in the program are + reserved by Triad National Security, LLC, and the U.S. Department of Energy/National Nuclear + Security Administration. The Government is granted for itself and others acting on its behalf a + nonexclusive, paid-up, irrevocable worldwide license in this material to reproduce, prepare + derivative works, distribute copies to the public, perform publicly and display publicly, and + to permit others to do so. + This program is open source under the BSD-3 License. + Redistribution and use in source and binary forms, with or without modification, are permitted + provided that the following conditions are met: + 1. Redistributions of source code must retain the above copyright notice, this list of + conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright notice, this list of + conditions and the following disclaimer in the documentation and/or other materials + provided with the distribution. + 3. Neither the name of the copyright holder nor the names of its contributors may be used + to endorse or promote products derived from this software without specific prior + written permission. + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + **********************************************************************************************/ +#pragma once + +#include "sim_parameters.h" +#include "matar.h" + +using namespace mtr; // matar namespace + +void initialize_comp(SimParameters& sp, DCArrayKokkos& comp); diff --git a/examples/phaseField/srcKokkosVerbose/local_free_energy.cpp b/examples/phaseField/srcKokkosVerbose/local_free_energy.cpp index e0dc8aba..36d2b061 100644 --- a/examples/phaseField/srcKokkosVerbose/local_free_energy.cpp +++ b/examples/phaseField/srcKokkosVerbose/local_free_energy.cpp @@ -1,53 +1,84 @@ +/********************************************************************************************** + © 2020. Triad National Security, LLC. All rights reserved. + This program was produced under U.S. Government contract 89233218CNA000001 for Los Alamos + National Laboratory (LANL), which is operated by Triad National Security, LLC for the U.S. + Department of Energy/National Nuclear Security Administration. All rights in the program are + reserved by Triad National Security, LLC, and the U.S. Department of Energy/National Nuclear + Security Administration. The Government is granted for itself and others acting on its behalf a + nonexclusive, paid-up, irrevocable worldwide license in this material to reproduce, prepare + derivative works, distribute copies to the public, perform publicly and display publicly, and + to permit others to do so. + This program is open source under the BSD-3 License. + Redistribution and use in source and binary forms, with or without modification, are permitted + provided that the following conditions are met: + 1. Redistributions of source code must retain the above copyright notice, this list of + conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright notice, this list of + conditions and the following disclaimer in the documentation and/or other materials + provided with the distribution. + 3. Neither the name of the copyright holder nor the names of its contributors may be used + to endorse or promote products derived from this software without specific prior + written permission. + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + **********************************************************************************************/ #include "local_free_energy.h" - -double calculate_total_free_energy(int* nn, double* delta, double kappa, DCArrayKokkos &comp) +double calculate_total_free_energy(int* nn, double* delta, double kappa, DCArrayKokkos& comp) { // this function calculates the total free energy of the system. - // unpack simimulation parameters needed + // unpack simimulation parameters needed // for calculations in this function - int nx = nn[0]; - int ny = nn[1]; - int nz = nn[2]; + int nx = nn[0]; + int ny = nn[1]; + int nz = nn[2]; double dx = delta[0]; double dy = delta[1]; double dz = delta[2]; - // + // double total_energy = 0.0; double loc_sum = 0.0; Kokkos::parallel_reduce( - Kokkos::MDRangePolicy>({1,1,1}, {nx-1, ny-1, nz-1}), - KOKKOS_LAMBDA(const int i, const int j, const int k, double& loc_sum){ - // central difference spatial derivative of comp - double dcdx = (comp(i+1,j,k) - comp(i-1,j,k)) / (2.0 * dx); - double dcdy = (comp(i,j+1,k) - comp(i,j-1,k)) / (2.0 * dy); - double dcdz = (comp(i,j,k+1) - comp(i,j,k-1)) / (2.0 * dz); - loc_sum += comp(i,j,k) * comp(i,j,k) * (1.0 - comp(i,j,k)) * (1.0 - comp(i,j,k)) - + 0.5 * kappa * (dcdx * dcdx + dcdy * dcdy + dcdz * dcdz); + Kokkos::MDRangePolicy>({ 1, 1, 1 }, { nx - 1, ny - 1, nz - 1 }), + KOKKOS_LAMBDA(const int i, const int j, const int k, double& loc_sum) { + // central difference spatial derivative of comp + double dcdx = (comp(i + 1, j, k) - comp(i - 1, j, k)) / (2.0 * dx); + double dcdy = (comp(i, j + 1, k) - comp(i, j - 1, k)) / (2.0 * dy); + double dcdz = (comp(i, j, k + 1) - comp(i, j, k - 1)) / (2.0 * dz); + loc_sum += comp(i, j, k) * comp(i, j, k) * (1.0 - comp(i, j, k)) * (1.0 - comp(i, j, k)) + + 0.5 * kappa * (dcdx * dcdx + dcdy * dcdy + dcdz * dcdz); }, total_energy); return total_energy; } -void calculate_dfdc(int* nn, DCArrayKokkos &comp, CArrayKokkos &dfdc) +void calculate_dfdc(int* nn, DCArrayKokkos& comp, CArrayKokkos& dfdc) { - // this function calculates the derivitive of local free energy density (f) + // this function calculates the derivitive of local free energy density (f) // with respect to composition (c) (df/dc). - // unpack simimulation parameters needed + // unpack simimulation parameters needed // for calculations in this function int nx = nn[0]; int ny = nn[1]; int nz = nn[2]; - + Kokkos::parallel_for( - Kokkos::MDRangePolicy>({0,0,0}, {nx, ny, nz}), - KOKKOS_LAMBDA(const int i, const int j, const int k){ - dfdc(i,j,k) = 4.0 * comp(i,j,k) * comp(i,j,k) * comp(i,j,k) - - 6.0 * comp(i,j,k) * comp(i,j,k) - + 2.0 * comp(i,j,k); + Kokkos::MDRangePolicy>({ 0, 0, 0 }, { nx, ny, nz }), + KOKKOS_LAMBDA(const int i, const int j, const int k) { + dfdc(i, j, k) = 4.0 * comp(i, j, k) * comp(i, j, k) * comp(i, j, k) + - 6.0 * comp(i, j, k) * comp(i, j, k) + + 2.0 * comp(i, j, k); }); - } diff --git a/examples/phaseField/srcKokkosVerbose/local_free_energy.h b/examples/phaseField/srcKokkosVerbose/local_free_energy.h index e4d75420..acc96f34 100644 --- a/examples/phaseField/srcKokkosVerbose/local_free_energy.h +++ b/examples/phaseField/srcKokkosVerbose/local_free_energy.h @@ -1,8 +1,41 @@ -#pragma once -#include "matar.h" - -using namespace mtr; // matar namespace - -double calculate_total_free_energy(int* nn, double* delta, double kappa, DCArrayKokkos &comp); - -void calculate_dfdc(int* nn, DCArrayKokkos &comp, CArrayKokkos &dfdc); +/********************************************************************************************** + © 2020. Triad National Security, LLC. All rights reserved. + This program was produced under U.S. Government contract 89233218CNA000001 for Los Alamos + National Laboratory (LANL), which is operated by Triad National Security, LLC for the U.S. + Department of Energy/National Nuclear Security Administration. All rights in the program are + reserved by Triad National Security, LLC, and the U.S. Department of Energy/National Nuclear + Security Administration. The Government is granted for itself and others acting on its behalf a + nonexclusive, paid-up, irrevocable worldwide license in this material to reproduce, prepare + derivative works, distribute copies to the public, perform publicly and display publicly, and + to permit others to do so. + This program is open source under the BSD-3 License. + Redistribution and use in source and binary forms, with or without modification, are permitted + provided that the following conditions are met: + 1. Redistributions of source code must retain the above copyright notice, this list of + conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright notice, this list of + conditions and the following disclaimer in the documentation and/or other materials + provided with the distribution. + 3. Neither the name of the copyright holder nor the names of its contributors may be used + to endorse or promote products derived from this software without specific prior + written permission. + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + **********************************************************************************************/ +#pragma once +#include "matar.h" + +using namespace mtr; // matar namespace + +double calculate_total_free_energy(int* nn, double* delta, double kappa, DCArrayKokkos& comp); + +void calculate_dfdc(int* nn, DCArrayKokkos& comp, CArrayKokkos& dfdc); diff --git a/examples/phaseField/srcKokkosVerbose/main.cpp b/examples/phaseField/srcKokkosVerbose/main.cpp index 2ddf4f9e..9f66f625 100644 --- a/examples/phaseField/srcKokkosVerbose/main.cpp +++ b/examples/phaseField/srcKokkosVerbose/main.cpp @@ -1,3 +1,36 @@ +/********************************************************************************************** + © 2020. Triad National Security, LLC. All rights reserved. + This program was produced under U.S. Government contract 89233218CNA000001 for Los Alamos + National Laboratory (LANL), which is operated by Triad National Security, LLC for the U.S. + Department of Energy/National Nuclear Security Administration. All rights in the program are + reserved by Triad National Security, LLC, and the U.S. Department of Energy/National Nuclear + Security Administration. The Government is granted for itself and others acting on its behalf a + nonexclusive, paid-up, irrevocable worldwide license in this material to reproduce, prepare + derivative works, distribute copies to the public, perform publicly and display publicly, and + to permit others to do so. + This program is open source under the BSD-3 License. + Redistribution and use in source and binary forms, with or without modification, are permitted + provided that the following conditions are met: + 1. Redistributions of source code must retain the above copyright notice, this list of + conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright notice, this list of + conditions and the following disclaimer in the documentation and/or other materials + provided with the distribution. + 3. Neither the name of the copyright holder nor the names of its contributors may be used + to endorse or promote products derived from this software without specific prior + written permission. + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + **********************************************************************************************/ #include #include #include @@ -9,55 +42,50 @@ #include "local_free_energy.h" #include "outputs.h" - int main(int argc, char* argv[]) { Kokkos::initialize(argc, argv); { + // simulation parameters + SimParameters sp; + sp.print(); - // simulation parameters - SimParameters sp; - sp.print(); - - // global arrays needed for simulation - GlobalArrays ga = GlobalArrays(sp.nn); - - // setup initial composition profile - initialize_comp(sp, ga.comp); + // global arrays needed for simulation + GlobalArrays ga = GlobalArrays(sp.nn); - // initialize solver - CHFourierSpectralSolver CH_fss(sp); + // setup initial composition profile + initialize_comp(sp, ga.comp); - // Start measuring time - auto begin = std::chrono::high_resolution_clock::now(); + // initialize solver + CHFourierSpectralSolver CH_fss(sp); - // time stepping loop - for (int iter = 1; iter <= sp.num_steps; iter++) { - // calculate df/dc - calculate_dfdc(sp.nn, ga.comp, ga.dfdc); + // Start measuring time + auto begin = std::chrono::high_resolution_clock::now(); - // Cahn Hilliard equation solver - CH_fss.time_march(ga.comp, ga.dfdc); + // time stepping loop + for (int iter = 1; iter <= sp.num_steps; iter++) { + // calculate df/dc + calculate_dfdc(sp.nn, ga.comp, ga.dfdc); - // report simulation progress and output vtk files - if (iter % sp.print_rate == 0) { + // Cahn Hilliard equation solver + CH_fss.time_march(ga.comp, ga.dfdc); - track_progress(iter, sp.nn, ga.comp); + // report simulation progress and output vtk files + if (iter % sp.print_rate == 0) { + track_progress(iter, sp.nn, ga.comp); - write_vtk(iter, sp.nn, sp.delta, ga.comp); + write_vtk(iter, sp.nn, sp.delta, ga.comp); - output_total_free_energy(iter, sp.print_rate, sp.num_steps, - sp.nn, sp.delta, sp.kappa, + output_total_free_energy(iter, sp.print_rate, sp.num_steps, + sp.nn, sp.delta, sp.kappa, ga.comp); + } } - } - - // Stop measuring time and calculate the elapsed time - auto end = std::chrono::high_resolution_clock::now(); - auto elapsed = std::chrono::duration_cast(end - begin); - printf("Total time was %f seconds.\n", elapsed.count() * 1e-9); - + // Stop measuring time and calculate the elapsed time + auto end = std::chrono::high_resolution_clock::now(); + auto elapsed = std::chrono::duration_cast(end - begin); + printf("Total time was %f seconds.\n", elapsed.count() * 1e-9); } Kokkos::finalize(); diff --git a/examples/phaseField/srcKokkosVerbose/outputs.cpp b/examples/phaseField/srcKokkosVerbose/outputs.cpp index 78b9b4b0..31b4de1e 100644 --- a/examples/phaseField/srcKokkosVerbose/outputs.cpp +++ b/examples/phaseField/srcKokkosVerbose/outputs.cpp @@ -1,11 +1,44 @@ +/********************************************************************************************** + © 2020. Triad National Security, LLC. All rights reserved. + This program was produced under U.S. Government contract 89233218CNA000001 for Los Alamos + National Laboratory (LANL), which is operated by Triad National Security, LLC for the U.S. + Department of Energy/National Nuclear Security Administration. All rights in the program are + reserved by Triad National Security, LLC, and the U.S. Department of Energy/National Nuclear + Security Administration. The Government is granted for itself and others acting on its behalf a + nonexclusive, paid-up, irrevocable worldwide license in this material to reproduce, prepare + derivative works, distribute copies to the public, perform publicly and display publicly, and + to permit others to do so. + This program is open source under the BSD-3 License. + Redistribution and use in source and binary forms, with or without modification, are permitted + provided that the following conditions are met: + 1. Redistributions of source code must retain the above copyright notice, this list of + conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright notice, this list of + conditions and the following disclaimer in the documentation and/or other materials + provided with the distribution. + 3. Neither the name of the copyright holder nor the names of its contributors may be used + to endorse or promote products derived from this software without specific prior + written permission. + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + **********************************************************************************************/ #include #include "outputs.h" #include "local_free_energy.h" -void track_progress(int iter, int* nn, DCArrayKokkos &comp) +void track_progress(int iter, int* nn, DCArrayKokkos& comp) { - // unpack simimulation parameters needed + // unpack simimulation parameters needed // for calculations in this function int nx = nn[0]; int ny = nn[1]; @@ -13,55 +46,49 @@ void track_progress(int iter, int* nn, DCArrayKokkos &comp) // sum of comp field double sum_comp = 0.0; - double loc_sum = 0.0; + double loc_sum = 0.0; Kokkos::parallel_reduce( - Kokkos::MDRangePolicy>({0,0,0}, {nx,ny,nz}), - KOKKOS_LAMBDA(const int i, const int j, const int k, double& loc_sum){ - loc_sum += comp(i,j,k); + Kokkos::MDRangePolicy>({ 0, 0, 0 }, { nx, ny, nz }), + KOKKOS_LAMBDA(const int i, const int j, const int k, double& loc_sum) { + loc_sum += comp(i, j, k); }, sum_comp); - // max of comp field double max_comp; double loc_max; Kokkos::parallel_reduce( - Kokkos::MDRangePolicy>({0,0,0}, {nx,ny,nz}), - KOKKOS_LAMBDA(const int i, const int j, const int k, double& loc_max){ - if(loc_max < comp(i,j,k)) loc_max = comp(i,j,k); - }, - Kokkos::Max(max_comp) - ); - + Kokkos::MDRangePolicy>({ 0, 0, 0 }, { nx, ny, nz }), + KOKKOS_LAMBDA(const int i, const int j, const int k, double& loc_max) { + if (loc_max < comp(i, j, k)) { + loc_max = comp(i, j, k); + } + }, Kokkos::Max(max_comp)); // min of comp field double min_comp; double loc_min; Kokkos::parallel_reduce( - Kokkos::MDRangePolicy>({0,0,0}, {nx,ny,nz}), - KOKKOS_LAMBDA(const int i, const int j, const int k, double& loc_min){ - if(loc_min > comp(i,j,k)) loc_min = comp(i,j,k); - }, - Kokkos::Min(min_comp) - ); - + Kokkos::MDRangePolicy>({ 0, 0, 0 }, { nx, ny, nz }), + KOKKOS_LAMBDA(const int i, const int j, const int k, double& loc_min) { + if (loc_min > comp(i, j, k)) { + loc_min = comp(i, j, k); + } + }, Kokkos::Min(min_comp)); printf("\n----------------------------------------------------\n"); printf("Iteration : %d\n", iter); printf("Conservation of comp : %f\n", sum_comp); printf("Max comp : %f\n", max_comp); printf("Min comp : %f\n", min_comp); - } - - -void write_vtk(int iter, int* nn, double* delta, DCArrayKokkos &comp) +void write_vtk(int iter, int* nn, double* delta, DCArrayKokkos& comp) { - // unpack simimulation parameters needed + // unpack simimulation parameters needed // for calculations in this function - int nx = nn[0]; - int ny = nn[1]; - int nz = nn[2]; + int nx = nn[0]; + int ny = nn[1]; + int nz = nn[2]; double dx = delta[0]; double dy = delta[1]; double dz = delta[2]; @@ -71,7 +98,7 @@ void write_vtk(int iter, int* nn, double* delta, DCArrayKokkos &comp) // output file management FILE* output_file; - char filename[50]; + char filename[50]; // create name of output vtk file sprintf(filename, "outputComp_%d.vtk", iter); @@ -85,17 +112,17 @@ void write_vtk(int iter, int* nn, double* delta, DCArrayKokkos &comp) fprintf(output_file, "%s\n", "ASCII"); fprintf(output_file, "%s\n", "DATASET STRUCTURED_GRID"); fprintf(output_file, "%s %i %i %i\n", "DIMENSIONS", nx, ny, nz); - fprintf(output_file, "%s %i %s\n", "POINTS", nx*ny*nz, "double"); + fprintf(output_file, "%s %i %s\n", "POINTS", nx * ny * nz, "double"); - // write grid point values + // write grid point values // Note: order of for loop is important (k,j,i) double x, y, z; for (int k = 0; k < nz; ++k) { for (int j = 0; j < ny; ++j) { - for (int i = 0; i < nx; ++i) { - x = double(i)*dx; - y = double(j)*dy; - z = double(k)*dz; + for (int i = 0; i < nx; ++i) { + x = double(i) * dx; + y = double(j) * dy; + z = double(k) * dz; fprintf(output_file, " %12.6E %12.6E %12.6E\n", x, y, z); } } @@ -103,36 +130,33 @@ void write_vtk(int iter, int* nn, double* delta, DCArrayKokkos &comp) // write data values // Note: order of for loop is important (k,j,i) - fprintf(output_file, "%s %i\n", "POINT_DATA", nx*ny*nz); + fprintf(output_file, "%s %i\n", "POINT_DATA", nx * ny * nz); fprintf(output_file, "%s\n", "SCALARS data double"); fprintf(output_file, "%s\n", "LOOKUP_TABLE default"); for (int k = 0; k < nz; ++k) { for (int j = 0; j < ny; ++j) { - for (int i = 0; i < nx; ++i) { - fprintf(output_file, " %12.6E\n", comp.host(i,j,k)); + for (int i = 0; i < nx; ++i) { + fprintf(output_file, " %12.6E\n", comp.host(i, j, k)); } } } - + // close file fclose(output_file); } - - -void output_total_free_energy(int iter, int print_rate, int num_steps, int* nn, - double* delta, double kappa, DCArrayKokkos &comp) +void output_total_free_energy(int iter, int print_rate, int num_steps, int* nn, + double* delta, double kappa, DCArrayKokkos& comp) { // get total_free_energy double total_free_energy = calculate_total_free_energy(nn, delta, kappa, comp); // output file management static FILE* output_file; - static char filename[50]; + static char filename[50]; // open output vtk file - if (iter == print_rate) - { + if (iter == print_rate) { // create name of output vtk file sprintf(filename, "total_free_energy.csv"); output_file = fopen(filename, "w"); @@ -142,9 +166,7 @@ void output_total_free_energy(int iter, int print_rate, int num_steps, int* nn, fprintf(output_file, "%i,%12.6E\n", iter, total_free_energy); // close file - if (iter == num_steps) - { + if (iter == num_steps) { fclose(output_file); } } - diff --git a/examples/phaseField/srcKokkosVerbose/outputs.h b/examples/phaseField/srcKokkosVerbose/outputs.h index b1941f91..6ea7f06f 100644 --- a/examples/phaseField/srcKokkosVerbose/outputs.h +++ b/examples/phaseField/srcKokkosVerbose/outputs.h @@ -1,15 +1,48 @@ -#pragma once - -#include "matar.h" - -using namespace mtr; // matar namespace - -// function to output simulation progress -void track_progress(int iter, int* nn, DCArrayKokkos &comp); - -// function to write vtk files for visualization -void write_vtk(int iter, int* nn, double* delta, DCArrayKokkos &comp); - -// function to write total_free_energy to file -void output_total_free_energy(int iter, int print_rate, int num_steps, int* nn, - double* delta, double kappa, DCArrayKokkos &comp); +/********************************************************************************************** + © 2020. Triad National Security, LLC. All rights reserved. + This program was produced under U.S. Government contract 89233218CNA000001 for Los Alamos + National Laboratory (LANL), which is operated by Triad National Security, LLC for the U.S. + Department of Energy/National Nuclear Security Administration. All rights in the program are + reserved by Triad National Security, LLC, and the U.S. Department of Energy/National Nuclear + Security Administration. The Government is granted for itself and others acting on its behalf a + nonexclusive, paid-up, irrevocable worldwide license in this material to reproduce, prepare + derivative works, distribute copies to the public, perform publicly and display publicly, and + to permit others to do so. + This program is open source under the BSD-3 License. + Redistribution and use in source and binary forms, with or without modification, are permitted + provided that the following conditions are met: + 1. Redistributions of source code must retain the above copyright notice, this list of + conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright notice, this list of + conditions and the following disclaimer in the documentation and/or other materials + provided with the distribution. + 3. Neither the name of the copyright holder nor the names of its contributors may be used + to endorse or promote products derived from this software without specific prior + written permission. + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + **********************************************************************************************/ +#pragma once + +#include "matar.h" + +using namespace mtr; // matar namespace + +// function to output simulation progress +void track_progress(int iter, int* nn, DCArrayKokkos& comp); + +// function to write vtk files for visualization +void write_vtk(int iter, int* nn, double* delta, DCArrayKokkos& comp); + +// function to write total_free_energy to file +void output_total_free_energy(int iter, int print_rate, int num_steps, int* nn, + double* delta, double kappa, DCArrayKokkos& comp); diff --git a/examples/phaseField/srcKokkosVerbose/sim_parameters.cpp b/examples/phaseField/srcKokkosVerbose/sim_parameters.cpp index 5683167b..1dcbf6cd 100644 --- a/examples/phaseField/srcKokkosVerbose/sim_parameters.cpp +++ b/examples/phaseField/srcKokkosVerbose/sim_parameters.cpp @@ -1,56 +1,88 @@ +/********************************************************************************************** + © 2020. Triad National Security, LLC. All rights reserved. + This program was produced under U.S. Government contract 89233218CNA000001 for Los Alamos + National Laboratory (LANL), which is operated by Triad National Security, LLC for the U.S. + Department of Energy/National Nuclear Security Administration. All rights in the program are + reserved by Triad National Security, LLC, and the U.S. Department of Energy/National Nuclear + Security Administration. The Government is granted for itself and others acting on its behalf a + nonexclusive, paid-up, irrevocable worldwide license in this material to reproduce, prepare + derivative works, distribute copies to the public, perform publicly and display publicly, and + to permit others to do so. + This program is open source under the BSD-3 License. + Redistribution and use in source and binary forms, with or without modification, are permitted + provided that the following conditions are met: + 1. Redistributions of source code must retain the above copyright notice, this list of + conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright notice, this list of + conditions and the following disclaimer in the documentation and/or other materials + provided with the distribution. + 3. Neither the name of the copyright holder nor the names of its contributors may be used + to endorse or promote products derived from this software without specific prior + written permission. + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + **********************************************************************************************/ #include #include #include #include "sim_parameters.h" - SimParameters::SimParameters() { // set default simulation parameters - this->nn[0] = 32; // nx - this->nn[1] = 32; // ny - this->nn[2] = 32; // nz - this->delta[0] = 1.0; // dx - this->delta[1] = 1.0; // dy - this->delta[2] = 1.0; // dz - this->dt = 5.0E-2; // dt + this->nn[0] = 32; // nx + this->nn[1] = 32; // ny + this->nn[2] = 32; // nz + this->delta[0] = 1.0; // dx + this->delta[1] = 1.0; // dy + this->delta[2] = 1.0; // dz + this->dt = 5.0E-2; // dt this->num_steps = 1000; // total number of time steps this->print_rate = 100; // time step interval for output file - this->iseed = 456; // random number seed - this->kappa = 1.0; // gradient energy coefficient - this->M = 1.0; // mobility - this->c0 = 5.0E-1; // critical composition - this->noise = 5.0E-3; // noise term for thermal fluctuations + this->iseed = 456; // random number seed + this->kappa = 1.0; // gradient energy coefficient + this->M = 1.0; // mobility + this->c0 = 5.0E-1; // critical composition + this->noise = 5.0E-3; // noise term for thermal fluctuations // set number of dimensions set_ndim(); } - void SimParameters::set_ndim() { ndim = 0; for (int i = 0; i < 3; i++) { - if (nn[i] > 1) ++ndim; + if (nn[i] > 1) { + ++ndim; + } } } - void SimParameters::print() { - std::cout << " nx = " << nn[0] << std::endl; - std::cout << " ny = " << nn[1] << std::endl; - std::cout << " nz = " << nn[2] << std::endl; - std::cout << " dx = " << delta[0] << std::endl; - std::cout << " dy = " << delta[1] << std::endl; - std::cout << " dz = " << delta[2] << std::endl; - std::cout << " dt = " << dt << std::endl; - std::cout << " num_steps = " << num_steps << std::endl; - std::cout << " print_rate = " << print_rate << std::endl; - std::cout << " iseed = " << iseed << std::endl; - std::cout << " kappa = " << kappa << std::endl; - std::cout << " M = " << M << std::endl; - std::cout << " c0 = " << c0 << std::endl; - std::cout << " noise = " << noise << std::endl; + std::cout << " nx = " << nn[0] << std::endl; + std::cout << " ny = " << nn[1] << std::endl; + std::cout << " nz = " << nn[2] << std::endl; + std::cout << " dx = " << delta[0] << std::endl; + std::cout << " dy = " << delta[1] << std::endl; + std::cout << " dz = " << delta[2] << std::endl; + std::cout << " dt = " << dt << std::endl; + std::cout << " num_steps = " << num_steps << std::endl; + std::cout << " print_rate = " << print_rate << std::endl; + std::cout << " iseed = " << iseed << std::endl; + std::cout << " kappa = " << kappa << std::endl; + std::cout << " M = " << M << std::endl; + std::cout << " c0 = " << c0 << std::endl; + std::cout << " noise = " << noise << std::endl; } diff --git a/examples/phaseField/srcKokkosVerbose/sim_parameters.h b/examples/phaseField/srcKokkosVerbose/sim_parameters.h index 9c694bf8..a89e4e23 100644 --- a/examples/phaseField/srcKokkosVerbose/sim_parameters.h +++ b/examples/phaseField/srcKokkosVerbose/sim_parameters.h @@ -1,21 +1,54 @@ -#pragma once - -struct SimParameters -{ - int nn[3]; - int ndim; - int num_steps; - int print_rate; - int iseed; - double dx; - double delta[3]; - double dt; - double kappa; - double M; - double c0; - double noise; - - SimParameters(); - void set_ndim(); - void print(); +/********************************************************************************************** + © 2020. Triad National Security, LLC. All rights reserved. + This program was produced under U.S. Government contract 89233218CNA000001 for Los Alamos + National Laboratory (LANL), which is operated by Triad National Security, LLC for the U.S. + Department of Energy/National Nuclear Security Administration. All rights in the program are + reserved by Triad National Security, LLC, and the U.S. Department of Energy/National Nuclear + Security Administration. The Government is granted for itself and others acting on its behalf a + nonexclusive, paid-up, irrevocable worldwide license in this material to reproduce, prepare + derivative works, distribute copies to the public, perform publicly and display publicly, and + to permit others to do so. + This program is open source under the BSD-3 License. + Redistribution and use in source and binary forms, with or without modification, are permitted + provided that the following conditions are met: + 1. Redistributions of source code must retain the above copyright notice, this list of + conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright notice, this list of + conditions and the following disclaimer in the documentation and/or other materials + provided with the distribution. + 3. Neither the name of the copyright holder nor the names of its contributors may be used + to endorse or promote products derived from this software without specific prior + written permission. + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + **********************************************************************************************/ +#pragma once + +struct SimParameters +{ + int nn[3]; + int ndim; + int num_steps; + int print_rate; + int iseed; + double dx; + double delta[3]; + double dt; + double kappa; + double M; + double c0; + double noise; + + SimParameters(); + void set_ndim(); + void print(); }; \ No newline at end of file diff --git a/examples/phaseField/srcMacros/CH_fourier_spectral_solver.cpp b/examples/phaseField/srcMacros/CH_fourier_spectral_solver.cpp index b089955f..23d7d6e8 100644 --- a/examples/phaseField/srcMacros/CH_fourier_spectral_solver.cpp +++ b/examples/phaseField/srcMacros/CH_fourier_spectral_solver.cpp @@ -1,3 +1,36 @@ +/********************************************************************************************** + © 2020. Triad National Security, LLC. All rights reserved. + This program was produced under U.S. Government contract 89233218CNA000001 for Los Alamos + National Laboratory (LANL), which is operated by Triad National Security, LLC for the U.S. + Department of Energy/National Nuclear Security Administration. All rights in the program are + reserved by Triad National Security, LLC, and the U.S. Department of Energy/National Nuclear + Security Administration. The Government is granted for itself and others acting on its behalf a + nonexclusive, paid-up, irrevocable worldwide license in this material to reproduce, prepare + derivative works, distribute copies to the public, perform publicly and display publicly, and + to permit others to do so. + This program is open source under the BSD-3 License. + Redistribution and use in source and binary forms, with or without modification, are permitted + provided that the following conditions are met: + 1. Redistributions of source code must retain the above copyright notice, this list of + conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright notice, this list of + conditions and the following disclaimer in the documentation and/or other materials + provided with the distribution. + 3. Neither the name of the copyright holder nor the names of its contributors may be used + to endorse or promote products derived from this software without specific prior + written permission. + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + **********************************************************************************************/ #include #include #include "CH_fourier_spectral_solver.h" @@ -8,7 +41,7 @@ #include "fft_manager_out_of_place.h" #endif -CHFourierSpectralSolver::CHFourierSpectralSolver(SimParameters &sp) +CHFourierSpectralSolver::CHFourierSpectralSolver(SimParameters& sp) { // set simulation parameters nn_ = sp.nn; @@ -30,25 +63,23 @@ CHFourierSpectralSolver::CHFourierSpectralSolver(SimParameters &sp) #ifdef IN_PLACE_FFT nn_img_[2] = nz_; #elif OUT_OF_PLACE_FFT - nz21_ = nz_/2 + 1; + nz21_ = nz_ / 2 + 1; nn_img_[2] = nz21_; #endif // initialize arrays needed for simulation - comp_img_ = CArrayKokkos (nn_img_[0], nn_img_[1], nn_img_[2], 2); - dfdc_img_ = CArrayKokkos (nn_img_[0], nn_img_[1], nn_img_[2], 2); - kpow2_ = CArrayKokkos (nn_img_[0], nn_img_[1], nn_img_[2]); - denominator_ = CArrayKokkos (nn_img_[0], nn_img_[1], nn_img_[2]); + comp_img_ = CArrayKokkos(nn_img_[0], nn_img_[1], nn_img_[2], 2); + dfdc_img_ = CArrayKokkos(nn_img_[0], nn_img_[1], nn_img_[2], 2); + kpow2_ = CArrayKokkos(nn_img_[0], nn_img_[1], nn_img_[2]); + denominator_ = CArrayKokkos(nn_img_[0], nn_img_[1], nn_img_[2]); // set values of kpow2_ set_kpow2_(); // set values of denominator_ set_denominator_(); - } - void CHFourierSpectralSolver::set_kpow2_() { // get fourier space @@ -56,30 +87,28 @@ void CHFourierSpectralSolver::set_kpow2_() auto kx = fs.get_kx(); auto ky = fs.get_ky(); auto kz = fs.get_kz(); - + // calculate kpow2_ FOR_ALL_CLASS(i, 0, nn_img_[0], j, 0, nn_img_[1], k, 0, nn_img_[2], { - kpow2_(i,j,k) = kx(i) * kx(i) - + ky(j) * ky(j) - + kz(k) * kz(k); + kpow2_(i, j, k) = kx(i) * kx(i) + + ky(j) * ky(j) + + kz(k) * kz(k); }); } - void CHFourierSpectralSolver::set_denominator_() { // calculate denominator_ FOR_ALL_CLASS(i, 0, nn_img_[0], j, 0, nn_img_[1], k, 0, nn_img_[2], { - denominator_(i,j,k) = 1.0 + (dt_ * M_ * kappa_ * kpow2_(i,j,k) * kpow2_(i,j,k)); + denominator_(i, j, k) = 1.0 + (dt_ * M_ * kappa_ * kpow2_(i, j, k) * kpow2_(i, j, k)); }); } - -void CHFourierSpectralSolver::time_march(DCArrayKokkos &comp, CArrayKokkos &dfdc) +void CHFourierSpectralSolver::time_march(DCArrayKokkos& comp, CArrayKokkos& dfdc) { // initialize fft manager #ifdef IN_PLACE_FFT @@ -87,7 +116,7 @@ void CHFourierSpectralSolver::time_march(DCArrayKokkos &comp, CArrayKokk #elif OUT_OF_PLACE_FFT static FFTManagerOutOfPlace fft_manager = FFTManagerOutOfPlace(nn_); #endif - + // get foward fft of comp fft_manager.perform_forward_fft(comp.device_pointer(), comp_img_.pointer()); @@ -98,21 +127,20 @@ void CHFourierSpectralSolver::time_march(DCArrayKokkos &comp, CArrayKokk FOR_ALL_CLASS(i, 0, nn_img_[0], j, 0, nn_img_[1], k, 0, nn_img_[2], { - comp_img_(i,j,k,0) = (comp_img_(i,j,k,0) - (dt_ * M_ * kpow2_(i,j,k)) * dfdc_img_(i,j,k,0)) - / (denominator_(i,j,k)); - - comp_img_(i,j,k,1) = (comp_img_(i,j,k,1) - (dt_ * M_ * kpow2_(i,j,k)) * dfdc_img_(i,j,k,1)) - / (denominator_(i,j,k)); + comp_img_(i, j, k, 0) = (comp_img_(i, j, k, 0) - (dt_ * M_ * kpow2_(i, j, k)) * dfdc_img_(i, j, k, 0)) + / (denominator_(i, j, k)); + + comp_img_(i, j, k, 1) = (comp_img_(i, j, k, 1) - (dt_ * M_ * kpow2_(i, j, k)) * dfdc_img_(i, j, k, 1)) + / (denominator_(i, j, k)); }); - + // get backward fft of comp_img fft_manager.perform_backward_fft(comp_img_.pointer(), comp.device_pointer()); // normalize after inverse fft - FOR_ALL_CLASS(i, 0, nx_, + FOR_ALL_CLASS(i, 0, nx_, j, 0, ny_, - k, 0, nz_,{ - comp(i,j,k) = comp(i,j,k) / double(nx_ * ny_ * nz_); - }); + k, 0, nz_, { + comp(i, j, k) = comp(i, j, k) / double(nx_ * ny_ * nz_); + }); } - diff --git a/examples/phaseField/srcMacros/CH_fourier_spectral_solver.h b/examples/phaseField/srcMacros/CH_fourier_spectral_solver.h index 673cb561..3394da9d 100644 --- a/examples/phaseField/srcMacros/CH_fourier_spectral_solver.h +++ b/examples/phaseField/srcMacros/CH_fourier_spectral_solver.h @@ -1,3 +1,36 @@ +/********************************************************************************************** + © 2020. Triad National Security, LLC. All rights reserved. + This program was produced under U.S. Government contract 89233218CNA000001 for Los Alamos + National Laboratory (LANL), which is operated by Triad National Security, LLC for the U.S. + Department of Energy/National Nuclear Security Administration. All rights in the program are + reserved by Triad National Security, LLC, and the U.S. Department of Energy/National Nuclear + Security Administration. The Government is granted for itself and others acting on its behalf a + nonexclusive, paid-up, irrevocable worldwide license in this material to reproduce, prepare + derivative works, distribute copies to the public, perform publicly and display publicly, and + to permit others to do so. + This program is open source under the BSD-3 License. + Redistribution and use in source and binary forms, with or without modification, are permitted + provided that the following conditions are met: + 1. Redistributions of source code must retain the above copyright notice, this list of + conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright notice, this list of + conditions and the following disclaimer in the documentation and/or other materials + provided with the distribution. + 3. Neither the name of the copyright holder nor the names of its contributors may be used + to endorse or promote products derived from this software without specific prior + written permission. + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + **********************************************************************************************/ #pragma once #include "sim_parameters.h" @@ -7,34 +40,34 @@ using namespace mtr; // matar namespace class CHFourierSpectralSolver { - private: - // simulation parameters - int* nn_; - int nn_img_[3]; - int nx_; - int ny_; - int nz_; +private: + // simulation parameters + int* nn_; + int nn_img_[3]; + int nx_; + int ny_; + int nz_; #ifdef OUT_OF_PLACE_FFT - int nz21_; + int nz21_; #endif - int ndim_; - double* delta_; - double dx_; - double dy_; - double dz_; - double dt_; - double M_; - double kappa_; - - // arrays needed by solver - CArrayKokkos comp_img_; - CArrayKokkos dfdc_img_; - CArrayKokkos kpow2_; - CArrayKokkos denominator_; - - public: - CHFourierSpectralSolver(SimParameters &sp); - void set_kpow2_(); - void set_denominator_(); - void time_march(DCArrayKokkos &comp, CArrayKokkos &dfdc); + int ndim_; + double* delta_; + double dx_; + double dy_; + double dz_; + double dt_; + double M_; + double kappa_; + + // arrays needed by solver + CArrayKokkos comp_img_; + CArrayKokkos dfdc_img_; + CArrayKokkos kpow2_; + CArrayKokkos denominator_; + +public: + CHFourierSpectralSolver(SimParameters& sp); + void set_kpow2_(); + void set_denominator_(); + void time_march(DCArrayKokkos& comp, CArrayKokkos& dfdc); }; diff --git a/examples/phaseField/srcMacros/fft_manager_in_place.cpp b/examples/phaseField/srcMacros/fft_manager_in_place.cpp index ab9ef533..9618e0b1 100644 --- a/examples/phaseField/srcMacros/fft_manager_in_place.cpp +++ b/examples/phaseField/srcMacros/fft_manager_in_place.cpp @@ -1,106 +1,134 @@ +/********************************************************************************************** + © 2020. Triad National Security, LLC. All rights reserved. + This program was produced under U.S. Government contract 89233218CNA000001 for Los Alamos + National Laboratory (LANL), which is operated by Triad National Security, LLC for the U.S. + Department of Energy/National Nuclear Security Administration. All rights in the program are + reserved by Triad National Security, LLC, and the U.S. Department of Energy/National Nuclear + Security Administration. The Government is granted for itself and others acting on its behalf a + nonexclusive, paid-up, irrevocable worldwide license in this material to reproduce, prepare + derivative works, distribute copies to the public, perform publicly and display publicly, and + to permit others to do so. + This program is open source under the BSD-3 License. + Redistribution and use in source and binary forms, with or without modification, are permitted + provided that the following conditions are met: + 1. Redistributions of source code must retain the above copyright notice, this list of + conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright notice, this list of + conditions and the following disclaimer in the documentation and/or other materials + provided with the distribution. + 3. Neither the name of the copyright holder nor the names of its contributors may be used + to endorse or promote products derived from this software without specific prior + written permission. + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + **********************************************************************************************/ #ifdef IN_PLACE_FFT # include "fft_manager_in_place.h" - -FFTManagerInPlace::FFTManagerInPlace(int * nn) +FFTManagerInPlace::FFTManagerInPlace(int* nn) { - nn_ = nn; - nx_ = nn_[0]; - ny_ = nn_[1]; - nz_ = nn_[2]; + nn_ = nn; + nx_ = nn_[0]; + ny_ = nn_[1]; + nz_ = nn_[2]; data_ = CArrayKokkos(nx_, ny_, nz_, 2); // calculate ndim ndim_ = 0; for (int i = 0; i < 3; i++) { - if (nn_[i] > 1) ++ndim_; + if (nn_[i] > 1) { + ++ndim_; + } } // initialize fft - #ifdef HAVE_CUDA - fftc_cufft_init_in_place_(); + #ifdef HAVE_CUDA + fftc_cufft_init_in_place_(); #else - fftc_fftw_init_in_place_(); + fftc_fftw_init_in_place_(); #endif } - -void FFTManagerInPlace::prep_for_forward_fft_(double *input) +void FFTManagerInPlace::prep_for_forward_fft_(double* input) { // this function writes the data in "input" array to "data_" array // in order to ready "data_" for in-place forward fft. // create view of input - auto input_view = ViewCArrayKokkos (input, nx_, ny_, nz_); - + auto input_view = ViewCArrayKokkos(input, nx_, ny_, nz_); // write input to data for in-place forward fft FOR_ALL_CLASS(i, 0, nx_, j, 0, ny_, k, 0, nz_, { - data_(i,j,k,0) = input_view(i,j,k); - data_(i,j,k,1) = 0.0; + data_(i, j, k, 0) = input_view(i, j, k); + data_(i, j, k, 1) = 0.0; }); } - -void FFTManagerInPlace::get_forward_fft_result_(double *output) +void FFTManagerInPlace::get_forward_fft_result_(double* output) { // this function writes the result of in-place forward fft // in "data_" array into "output" array. // create view of output - auto output_view = ViewCArrayKokkos (output, nx_, ny_, nz_, 2); + auto output_view = ViewCArrayKokkos(output, nx_, ny_, nz_, 2); // write data to output after in-place fft FOR_ALL_CLASS(i, 0, nx_, j, 0, ny_, k, 0, nz_, { - output_view(i,j,k,0) = data_(i,j,k,0); - output_view(i,j,k,1) = data_(i,j,k,1); + output_view(i, j, k, 0) = data_(i, j, k, 0); + output_view(i, j, k, 1) = data_(i, j, k, 1); }); } - -void FFTManagerInPlace::prep_for_backward_fft_(double *input) +void FFTManagerInPlace::prep_for_backward_fft_(double* input) { // this function writes the data in "input" array to "data_" array // in order to ready "data_" for in-place backward fft. // create view of input - auto input_view = ViewCArrayKokkos (input, nx_, ny_, nz_, 2); + auto input_view = ViewCArrayKokkos(input, nx_, ny_, nz_, 2); // write input to data for in-place fft FOR_ALL_CLASS(i, 0, nx_, j, 0, ny_, k, 0, nz_, { - data_(i,j,k,0) = input_view(i,j,k,0); - data_(i,j,k,1) = input_view(i,j,k,1); + data_(i, j, k, 0) = input_view(i, j, k, 0); + data_(i, j, k, 1) = input_view(i, j, k, 1); }); } - -void FFTManagerInPlace::get_backward_fft_result_(double *output) +void FFTManagerInPlace::get_backward_fft_result_(double* output) { // this function writes the result of in-place backward fft // in "data_" array into "output" array. // create view of output - auto output_view = ViewCArrayKokkos (output, nx_, ny_, nz_); + auto output_view = ViewCArrayKokkos(output, nx_, ny_, nz_); // write data to output after in-place fft FOR_ALL_CLASS(i, 0, nx_, j, 0, ny_, k, 0, nz_, { - output_view(i,j,k) = data_(i,j,k,0); + output_view(i, j, k) = data_(i, j, k, 0); }); } - -void FFTManagerInPlace::perform_forward_fft(double *input, double *output) +void FFTManagerInPlace::perform_forward_fft(double* input, double* output) { - // this function performs forward fft on "input" array and + // this function performs forward fft on "input" array and // writes the result to "output" array. // it calls the appropriate function to perform the forward in-place fft // either using OPENMP or CUDA. @@ -111,19 +139,18 @@ void FFTManagerInPlace::perform_forward_fft(double *input, double *output) // perform foward fft isign_ = -1; #ifdef HAVE_CUDA - fftc_cufft_in_place_(data_.pointer(), nn_, &ndim_, &isign_); + fftc_cufft_in_place_(data_.pointer(), nn_, &ndim_, &isign_); #else - fftc_fftw_in_place_(data_.pointer(), nn_, &ndim_, &isign_); + fftc_fftw_in_place_(data_.pointer(), nn_, &ndim_, &isign_); #endif // get result after performing foward fft get_forward_fft_result_(output); } - -void FFTManagerInPlace::perform_backward_fft(double *input, double *output) +void FFTManagerInPlace::perform_backward_fft(double* input, double* output) { - // this function performs backward fft on "input" array and + // this function performs backward fft on "input" array and // writes the result to "output" array. // it calls the appropriate function to perform the backward in-place fft // either using OPENMP or CUDA. @@ -134,9 +161,9 @@ void FFTManagerInPlace::perform_backward_fft(double *input, double *output) // perform backward fft isign_ = 1; #ifdef HAVE_CUDA - fftc_cufft_in_place_(data_.pointer(), nn_, &ndim_, &isign_); + fftc_cufft_in_place_(data_.pointer(), nn_, &ndim_, &isign_); #else - fftc_fftw_in_place_(data_.pointer(), nn_, &ndim_, &isign_); + fftc_fftw_in_place_(data_.pointer(), nn_, &ndim_, &isign_); #endif // get result after performing backward fft @@ -144,6 +171,3 @@ void FFTManagerInPlace::perform_backward_fft(double *input, double *output) } #endif - - - diff --git a/examples/phaseField/srcMacros/fft_manager_in_place.h b/examples/phaseField/srcMacros/fft_manager_in_place.h index 5c0d6e63..30192562 100644 --- a/examples/phaseField/srcMacros/fft_manager_in_place.h +++ b/examples/phaseField/srcMacros/fft_manager_in_place.h @@ -1,3 +1,36 @@ +/********************************************************************************************** + © 2020. Triad National Security, LLC. All rights reserved. + This program was produced under U.S. Government contract 89233218CNA000001 for Los Alamos + National Laboratory (LANL), which is operated by Triad National Security, LLC for the U.S. + Department of Energy/National Nuclear Security Administration. All rights in the program are + reserved by Triad National Security, LLC, and the U.S. Department of Energy/National Nuclear + Security Administration. The Government is granted for itself and others acting on its behalf a + nonexclusive, paid-up, irrevocable worldwide license in this material to reproduce, prepare + derivative works, distribute copies to the public, perform publicly and display publicly, and + to permit others to do so. + This program is open source under the BSD-3 License. + Redistribution and use in source and binary forms, with or without modification, are permitted + provided that the following conditions are met: + 1. Redistributions of source code must retain the above copyright notice, this list of + conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright notice, this list of + conditions and the following disclaimer in the documentation and/or other materials + provided with the distribution. + 3. Neither the name of the copyright holder nor the names of its contributors may be used + to endorse or promote products derived from this software without specific prior + written permission. + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + **********************************************************************************************/ #ifdef IN_PLACE_FFT #pragma once @@ -8,35 +41,32 @@ using namespace mtr; // matar namespace class FFTManagerInPlace { - private: - int* nn_; - int nx_; - int ny_; - int nz_; - int ndim_; - int isign_; - CArrayKokkos data_; - - public: - FFTManagerInPlace(int * nn); - void perform_forward_fft(double *input, double *output); - void perform_backward_fft(double *input, double *output); - - void prep_for_forward_fft_(double *input); - void get_forward_fft_result_(double *output); - void prep_for_backward_fft_(double *input); - void get_backward_fft_result_(double *output); +private: + int* nn_; + int nx_; + int ny_; + int nz_; + int ndim_; + int isign_; + CArrayKokkos data_; -}; +public: + FFTManagerInPlace(int* nn); + void perform_forward_fft(double* input, double* output); + void perform_backward_fft(double* input, double* output); + void prep_for_forward_fft_(double* input); + void get_forward_fft_result_(double* output); + void prep_for_backward_fft_(double* input); + void get_backward_fft_result_(double* output); +}; #ifdef HAVE_CUDA void fftc_cufft_init_in_place_(); -void fftc_cufft_in_place_(double data[], int nn[], int *ndim, int *isign); +void fftc_cufft_in_place_(double data[], int nn[], int* ndim, int* isign); #else void fftc_fftw_init_in_place_(); -void fftc_fftw_in_place_(double data[], int nn[], int *ndim, int *isign); +void fftc_fftw_in_place_(double data[], int nn[], int* ndim, int* isign); #endif - #endif diff --git a/examples/phaseField/srcMacros/fft_manager_out_of_place.cpp b/examples/phaseField/srcMacros/fft_manager_out_of_place.cpp index 438ca832..54438e14 100644 --- a/examples/phaseField/srcMacros/fft_manager_out_of_place.cpp +++ b/examples/phaseField/srcMacros/fft_manager_out_of_place.cpp @@ -1,29 +1,59 @@ +/********************************************************************************************** + © 2020. Triad National Security, LLC. All rights reserved. + This program was produced under U.S. Government contract 89233218CNA000001 for Los Alamos + National Laboratory (LANL), which is operated by Triad National Security, LLC for the U.S. + Department of Energy/National Nuclear Security Administration. All rights in the program are + reserved by Triad National Security, LLC, and the U.S. Department of Energy/National Nuclear + Security Administration. The Government is granted for itself and others acting on its behalf a + nonexclusive, paid-up, irrevocable worldwide license in this material to reproduce, prepare + derivative works, distribute copies to the public, perform publicly and display publicly, and + to permit others to do so. + This program is open source under the BSD-3 License. + Redistribution and use in source and binary forms, with or without modification, are permitted + provided that the following conditions are met: + 1. Redistributions of source code must retain the above copyright notice, this list of + conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright notice, this list of + conditions and the following disclaimer in the documentation and/or other materials + provided with the distribution. + 3. Neither the name of the copyright holder nor the names of its contributors may be used + to endorse or promote products derived from this software without specific prior + written permission. + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + **********************************************************************************************/ #ifdef OUT_OF_PLACE_FFT #include "fft_manager_out_of_place.h" -FFTManagerOutOfPlace::FFTManagerOutOfPlace(int * nn) +FFTManagerOutOfPlace::FFTManagerOutOfPlace(int* nn) { - nn_ = nn; - nx_ = nn_[0]; - ny_ = nn_[1]; - nz_ = nn_[2]; - nz21_ = nz_/2 + 1; + nn_ = nn; + nx_ = nn_[0]; + ny_ = nn_[1]; + nz_ = nn_[2]; + nz21_ = nz_ / 2 + 1; // initialize fft - #ifdef HAVE_CUDA - fftc_cufft_init_out_of_place_(); + #ifdef HAVE_CUDA + fftc_cufft_init_out_of_place_(); #else - fftc_fftw_init_out_of_place_(); + fftc_fftw_init_out_of_place_(); #endif } - - - -void FFTManagerOutOfPlace::perform_forward_fft(double *input, double *output) +void FFTManagerOutOfPlace::perform_forward_fft(double* input, double* output) { - // this function performs forward fft on "input" array and + // this function performs forward fft on "input" array and // writes the result to "output" array. // it calls the appropriate function to perform the forward out-of-place fft // either using OPENMP or CUDA. @@ -31,16 +61,15 @@ void FFTManagerOutOfPlace::perform_forward_fft(double *input, double *output) // perform foward fft isign_ = -1; #ifdef HAVE_CUDA - fftc_cufft_out_of_place_(input, output, nn_, &ndim_, &isign_); + fftc_cufft_out_of_place_(input, output, nn_, &ndim_, &isign_); #else - fftc_fftw_out_of_place_(input, output, nn_, &ndim_, &isign_); + fftc_fftw_out_of_place_(input, output, nn_, &ndim_, &isign_); #endif - } -void FFTManagerOutOfPlace::perform_backward_fft(double *input, double *output) +void FFTManagerOutOfPlace::perform_backward_fft(double* input, double* output) { - // this function performs backward fft on "input" array and + // this function performs backward fft on "input" array and // writes the result to "output" array. // it calls the appropriate function to perform the backward out-of-place fft // either using OPENMP or CUDA. @@ -48,12 +77,10 @@ void FFTManagerOutOfPlace::perform_backward_fft(double *input, double *output) // perform backward fft isign_ = 1; #ifdef HAVE_CUDA - fftc_cufft_out_of_place_(input, output, nn_, &ndim_, &isign_); + fftc_cufft_out_of_place_(input, output, nn_, &ndim_, &isign_); #else - fftc_fftw_out_of_place_(input, output, nn_, &ndim_, &isign_); + fftc_fftw_out_of_place_(input, output, nn_, &ndim_, &isign_); #endif } - - #endif diff --git a/examples/phaseField/srcMacros/fft_manager_out_of_place.h b/examples/phaseField/srcMacros/fft_manager_out_of_place.h index 1ef3f03d..31bd3fed 100644 --- a/examples/phaseField/srcMacros/fft_manager_out_of_place.h +++ b/examples/phaseField/srcMacros/fft_manager_out_of_place.h @@ -1,3 +1,36 @@ +/********************************************************************************************** + © 2020. Triad National Security, LLC. All rights reserved. + This program was produced under U.S. Government contract 89233218CNA000001 for Los Alamos + National Laboratory (LANL), which is operated by Triad National Security, LLC for the U.S. + Department of Energy/National Nuclear Security Administration. All rights in the program are + reserved by Triad National Security, LLC, and the U.S. Department of Energy/National Nuclear + Security Administration. The Government is granted for itself and others acting on its behalf a + nonexclusive, paid-up, irrevocable worldwide license in this material to reproduce, prepare + derivative works, distribute copies to the public, perform publicly and display publicly, and + to permit others to do so. + This program is open source under the BSD-3 License. + Redistribution and use in source and binary forms, with or without modification, are permitted + provided that the following conditions are met: + 1. Redistributions of source code must retain the above copyright notice, this list of + conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright notice, this list of + conditions and the following disclaimer in the documentation and/or other materials + provided with the distribution. + 3. Neither the name of the copyright holder nor the names of its contributors may be used + to endorse or promote products derived from this software without specific prior + written permission. + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + **********************************************************************************************/ #ifdef OUT_OF_PLACE_FFT #pragma once @@ -8,31 +41,27 @@ using namespace mtr; // matar namespace class FFTManagerOutOfPlace { - private: - int* nn_; - int nx_; - int ny_; - int nz_; - int nz21_; - int ndim_; - int isign_; - - public: - FFTManagerOutOfPlace(int * nn); - void perform_forward_fft(double *input, double *output); - void perform_backward_fft(double *input, double *output); +private: + int* nn_; + int nx_; + int ny_; + int nz_; + int nz21_; + int ndim_; + int isign_; +public: + FFTManagerOutOfPlace(int* nn); + void perform_forward_fft(double* input, double* output); + void perform_backward_fft(double* input, double* output); }; - - #ifdef HAVE_CUDA void fftc_cufft_init_out_of_place_(); -void fftc_cufft_out_of_place_(double input[], double output[], int nn[], int *ndim, int *isign); +void fftc_cufft_out_of_place_(double input[], double output[], int nn[], int* ndim, int* isign); #else void fftc_fftw_init_out_of_place_(); -void fftc_fftw_out_of_place_(double input[], double output[], int nn[], int *ndim, int *isign); +void fftc_fftw_out_of_place_(double input[], double output[], int nn[], int* ndim, int* isign); #endif - #endif diff --git a/examples/phaseField/srcMacros/fftc_cufft_in_place.cpp b/examples/phaseField/srcMacros/fftc_cufft_in_place.cpp index 2a4aeb26..1d597025 100644 --- a/examples/phaseField/srcMacros/fftc_cufft_in_place.cpp +++ b/examples/phaseField/srcMacros/fftc_cufft_in_place.cpp @@ -1,3 +1,36 @@ +/********************************************************************************************** + © 2020. Triad National Security, LLC. All rights reserved. + This program was produced under U.S. Government contract 89233218CNA000001 for Los Alamos + National Laboratory (LANL), which is operated by Triad National Security, LLC for the U.S. + Department of Energy/National Nuclear Security Administration. All rights in the program are + reserved by Triad National Security, LLC, and the U.S. Department of Energy/National Nuclear + Security Administration. The Government is granted for itself and others acting on its behalf a + nonexclusive, paid-up, irrevocable worldwide license in this material to reproduce, prepare + derivative works, distribute copies to the public, perform publicly and display publicly, and + to permit others to do so. + This program is open source under the BSD-3 License. + Redistribution and use in source and binary forms, with or without modification, are permitted + provided that the following conditions are met: + 1. Redistributions of source code must retain the above copyright notice, this list of + conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright notice, this list of + conditions and the following disclaimer in the documentation and/or other materials + provided with the distribution. + 3. Neither the name of the copyright holder nor the names of its contributors may be used + to endorse or promote products derived from this software without specific prior + written permission. + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + **********************************************************************************************/ #ifdef IN_PLACE_FFT #ifdef HAVE_CUDA @@ -7,77 +40,75 @@ #include #include -#include +#include // ---------------------------------------------------------------------- // CUFFT -static void -fft_cufft_forward(double *data, int nn[3]) +static void fft_cufft_forward(double* data, int nn[3]) { - int stride = 2 * nn[0] * nn[1] * nn[2]; - int rc, i; + int stride = 2 * nn[0] * nn[1] * nn[2]; + int rc, i; - static cufftHandle planZ2Z; + static cufftHandle planZ2Z; // typedef cuComplex cufftComplex; - typedef cuDoubleComplex cufftDoubleComplex; - if (!planZ2Z) { - cufftPlan3d(&planZ2Z, nn[0], nn[1], nn[2], CUFFT_Z2Z); - } + typedef cuDoubleComplex cufftDoubleComplex; + if (!planZ2Z) { + cufftPlan3d(&planZ2Z, nn[0], nn[1], nn[2], CUFFT_Z2Z); + } -//#pragma acc data copy(data[0:batch*stride]) - { -// printf("data1 %p\n", data); -//#pragma acc host_data use_device(data) +// #pragma acc data copy(data[0:batch*stride]) { +// printf("data1 %p\n", data); +// #pragma acc host_data use_device(data) + { // printf("data2 %p\n", data); - rc = cufftExecZ2Z(planZ2Z, (cufftDoubleComplex *) data, - (cufftDoubleComplex *) data, - CUFFT_FORWARD); - assert(rc == CUFFT_SUCCESS); + rc = cufftExecZ2Z(planZ2Z, (cufftDoubleComplex*) data, + (cufftDoubleComplex*) data, + CUFFT_FORWARD); + assert(rc == CUFFT_SUCCESS); + } } - } } -static void -fft_cufft_backward(double *data, int nn[3]) +static void fft_cufft_backward(double* data, int nn[3]) { - int stride = 2 * nn[0] * nn[1] * nn[2]; - int rc, i; - - static cufftHandle planZ2Z; + int stride = 2 * nn[0] * nn[1] * nn[2]; + int rc, i; + + static cufftHandle planZ2Z; // typedef cuComplex cufftComplex; - typedef cuDoubleComplex cufftDoubleComplex; - if (!planZ2Z) { - cufftPlan3d(&planZ2Z, nn[0], nn[1], nn[2], CUFFT_Z2Z); - } + typedef cuDoubleComplex cufftDoubleComplex; + if (!planZ2Z) { + cufftPlan3d(&planZ2Z, nn[0], nn[1], nn[2], CUFFT_Z2Z); + } -//#pragma acc data copy(data[0:batch*stride]) - { -//#pragma acc host_data use_device(data) - rc = cufftExecZ2Z(planZ2Z, (cufftDoubleComplex *) data, - (cufftDoubleComplex *) data, - CUFFT_INVERSE); - assert(rc == CUFFT_SUCCESS); - } +// #pragma acc data copy(data[0:batch*stride]) + { +// #pragma acc host_data use_device(data) + rc = cufftExecZ2Z(planZ2Z, (cufftDoubleComplex*) data, + (cufftDoubleComplex*) data, + CUFFT_INVERSE); + assert(rc == CUFFT_SUCCESS); + } } // ---------------------------------------------------------------------- -void fftc_cufft_in_place_(double data[], int nn[], int *ndim, int *isign) +void fftc_cufft_in_place_(double data[], int nn[], int* ndim, int* isign) { - //assert(*ndim == 3); - if (*isign == -1) { - fft_cufft_forward(data, nn); - } else { - fft_cufft_backward(data, nn); - } + // assert(*ndim == 3); + if (*isign == -1) { + fft_cufft_forward(data, nn); + } + else { + fft_cufft_backward(data, nn); + } } void fftc_cufft_init_in_place_(void) { } - #endif #endif diff --git a/examples/phaseField/srcMacros/fftc_cufft_out_of_place.cpp b/examples/phaseField/srcMacros/fftc_cufft_out_of_place.cpp index 06fcfeb5..0bef1662 100644 --- a/examples/phaseField/srcMacros/fftc_cufft_out_of_place.cpp +++ b/examples/phaseField/srcMacros/fftc_cufft_out_of_place.cpp @@ -1,3 +1,36 @@ +/********************************************************************************************** + © 2020. Triad National Security, LLC. All rights reserved. + This program was produced under U.S. Government contract 89233218CNA000001 for Los Alamos + National Laboratory (LANL), which is operated by Triad National Security, LLC for the U.S. + Department of Energy/National Nuclear Security Administration. All rights in the program are + reserved by Triad National Security, LLC, and the U.S. Department of Energy/National Nuclear + Security Administration. The Government is granted for itself and others acting on its behalf a + nonexclusive, paid-up, irrevocable worldwide license in this material to reproduce, prepare + derivative works, distribute copies to the public, perform publicly and display publicly, and + to permit others to do so. + This program is open source under the BSD-3 License. + Redistribution and use in source and binary forms, with or without modification, are permitted + provided that the following conditions are met: + 1. Redistributions of source code must retain the above copyright notice, this list of + conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright notice, this list of + conditions and the following disclaimer in the documentation and/or other materials + provided with the distribution. + 3. Neither the name of the copyright holder nor the names of its contributors may be used + to endorse or promote products derived from this software without specific prior + written permission. + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + **********************************************************************************************/ #ifdef OUT_OF_PLACE_FFT #ifdef HAVE_CUDA @@ -7,77 +40,75 @@ #include #include -#include +#include // ---------------------------------------------------------------------- // CUFFT -static void -fft_cufft_forward(double *input, double *output, int nn[3]) +static void fft_cufft_forward(double* input, double* output, int nn[3]) { - int stride = 2 * nn[0] * nn[1] * nn[2]; - int rc, i; + int stride = 2 * nn[0] * nn[1] * nn[2]; + int rc, i; - static cufftHandle planD2Z; + static cufftHandle planD2Z; // typedef cuComplex cufftComplex; - typedef cuDoubleComplex cufftDoubleComplexi; - typedef double cufftDoubleReal; - if (!planD2Z) { - cufftPlan3d(&planD2Z, nn[0], nn[1], nn[2], CUFFT_D2Z); - } + typedef cuDoubleComplex cufftDoubleComplexi; + typedef double cufftDoubleReal; + if (!planD2Z) { + cufftPlan3d(&planD2Z, nn[0], nn[1], nn[2], CUFFT_D2Z); + } -//#pragma acc data copy(data[0:batch*stride]) - { -// printf("data1 %p\n", data); -//#pragma acc host_data use_device(data) +// #pragma acc data copy(data[0:batch*stride]) { +// printf("data1 %p\n", data); +// #pragma acc host_data use_device(data) + { // printf("data2 %p\n", data); - rc = cufftExecD2Z(planD2Z, (cufftDoubleReal *) input, - (cufftDoubleComplex *) output); - assert(rc == CUFFT_SUCCESS); + rc = cufftExecD2Z(planD2Z, (cufftDoubleReal*) input, + (cufftDoubleComplex*) output); + assert(rc == CUFFT_SUCCESS); + } } - } } -static void -fft_cufft_backward(double *input, double *output, int nn[3]) +static void fft_cufft_backward(double* input, double* output, int nn[3]) { - int stride = 2 * nn[0] * nn[1] * nn[2]; - int rc, i; - - static cufftHandle planZ2D; + int stride = 2 * nn[0] * nn[1] * nn[2]; + int rc, i; + + static cufftHandle planZ2D; // typedef cuComplex cufftComplex; - typedef cuDoubleComplex cufftDoubleComplex; - typedef double cufftDoubleReal; - if (!planZ2D) { - cufftPlan3d(&planZ2D, nn[0], nn[1], nn[2], CUFFT_Z2D); - } + typedef cuDoubleComplex cufftDoubleComplex; + typedef double cufftDoubleReal; + if (!planZ2D) { + cufftPlan3d(&planZ2D, nn[0], nn[1], nn[2], CUFFT_Z2D); + } -//#pragma acc data copy(data[0:batch*stride]) - { -//#pragma acc host_data use_device(data) - rc = cufftExecZ2D(planZ2D, (cufftDoubleComplex *) input, - (cufftDoubleReal *) output); - assert(rc == CUFFT_SUCCESS); - } +// #pragma acc data copy(data[0:batch*stride]) + { +// #pragma acc host_data use_device(data) + rc = cufftExecZ2D(planZ2D, (cufftDoubleComplex*) input, + (cufftDoubleReal*) output); + assert(rc == CUFFT_SUCCESS); + } } // ---------------------------------------------------------------------- -void fftc_cufft_out_of_place_(double input[], double output[], int nn[], int *ndim, int *isign) +void fftc_cufft_out_of_place_(double input[], double output[], int nn[], int* ndim, int* isign) { - //assert(*ndim == 3); - if (*isign == -1) { - fft_cufft_forward(input, output, nn); - } else { - fft_cufft_backward(input, output, nn); - } + // assert(*ndim == 3); + if (*isign == -1) { + fft_cufft_forward(input, output, nn); + } + else { + fft_cufft_backward(input, output, nn); + } } void fftc_cufft_init_out_of_place_(void) { } - #endif #endif diff --git a/examples/phaseField/srcMacros/fftc_fftw_in_place.cpp b/examples/phaseField/srcMacros/fftc_fftw_in_place.cpp index 50409ef8..92065f19 100644 --- a/examples/phaseField/srcMacros/fftc_fftw_in_place.cpp +++ b/examples/phaseField/srcMacros/fftc_fftw_in_place.cpp @@ -1,3 +1,36 @@ +/********************************************************************************************** + © 2020. Triad National Security, LLC. All rights reserved. + This program was produced under U.S. Government contract 89233218CNA000001 for Los Alamos + National Laboratory (LANL), which is operated by Triad National Security, LLC for the U.S. + Department of Energy/National Nuclear Security Administration. All rights in the program are + reserved by Triad National Security, LLC, and the U.S. Department of Energy/National Nuclear + Security Administration. The Government is granted for itself and others acting on its behalf a + nonexclusive, paid-up, irrevocable worldwide license in this material to reproduce, prepare + derivative works, distribute copies to the public, perform publicly and display publicly, and + to permit others to do so. + This program is open source under the BSD-3 License. + Redistribution and use in source and binary forms, with or without modification, are permitted + provided that the following conditions are met: + 1. Redistributions of source code must retain the above copyright notice, this list of + conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright notice, this list of + conditions and the following disclaimer in the documentation and/or other materials + provided with the distribution. + 3. Neither the name of the copyright holder nor the names of its contributors may be used + to endorse or promote products derived from this software without specific prior + written permission. + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + **********************************************************************************************/ #ifdef IN_PLACE_FFT #ifdef HAVE_OPENMP @@ -9,55 +42,53 @@ #include #include -//#ifdef FFTW_OMP +// #ifdef FFTW_OMP #include -//#endif +// #endif // ---------------------------------------------------------------------- // FFTW -static void -fft_forward_fftw(double *data, int nn[3]) +static void fft_forward_fftw(double* data, int nn[3]) { - static fftw_plan plan; - if (!plan) { - plan = fftw_plan_dft_3d(nn[2], nn[1], nn[0], - (fftw_complex *) data, (fftw_complex *) data, - FFTW_FORWARD, FFTW_ESTIMATE); + static fftw_plan plan; + if (!plan) { + plan = fftw_plan_dft_3d(nn[2], nn[1], nn[0], + (fftw_complex*) data, (fftw_complex*) data, + FFTW_FORWARD, FFTW_ESTIMATE); + } - } - - fftw_execute_dft(plan, (fftw_complex *) data, (fftw_complex *) data); + fftw_execute_dft(plan, (fftw_complex*) data, (fftw_complex*) data); } -static void -fft_backward_fftw(double *data, int nn[3]) +static void fft_backward_fftw(double* data, int nn[3]) { - static fftw_plan plan; - if (!plan) { - plan = fftw_plan_dft_3d(nn[2], nn[1], nn[0], - (fftw_complex *) data, (fftw_complex *) data, - FFTW_BACKWARD, FFTW_ESTIMATE); - } + static fftw_plan plan; + if (!plan) { + plan = fftw_plan_dft_3d(nn[2], nn[1], nn[0], + (fftw_complex*) data, (fftw_complex*) data, + FFTW_BACKWARD, FFTW_ESTIMATE); + } - fftw_execute_dft(plan, (fftw_complex *) data, (fftw_complex *) data); + fftw_execute_dft(plan, (fftw_complex*) data, (fftw_complex*) data); } -void fftc_fftw_in_place_(double data[], int nn[], int *ndim, int *isign) +void fftc_fftw_in_place_(double data[], int nn[], int* ndim, int* isign) { - if (*isign == -1) { - fft_forward_fftw(data, nn); - } else { - fft_backward_fftw(data, nn); - } + if (*isign == -1) { + fft_forward_fftw(data, nn); + } + else { + fft_backward_fftw(data, nn); + } } void fftc_fftw_init_in_place_(void) { -//#ifdef FFTW_OMP - fftw_init_threads(); - fftw_plan_with_nthreads(omp_get_max_threads()); -//#endif +// #ifdef FFTW_OMP + fftw_init_threads(); + fftw_plan_with_nthreads(omp_get_max_threads()); +// #endif } #endif diff --git a/examples/phaseField/srcMacros/fftc_fftw_out_of_place.cpp b/examples/phaseField/srcMacros/fftc_fftw_out_of_place.cpp index d37cedab..6efb4f5a 100644 --- a/examples/phaseField/srcMacros/fftc_fftw_out_of_place.cpp +++ b/examples/phaseField/srcMacros/fftc_fftw_out_of_place.cpp @@ -1,3 +1,36 @@ +/********************************************************************************************** + © 2020. Triad National Security, LLC. All rights reserved. + This program was produced under U.S. Government contract 89233218CNA000001 for Los Alamos + National Laboratory (LANL), which is operated by Triad National Security, LLC for the U.S. + Department of Energy/National Nuclear Security Administration. All rights in the program are + reserved by Triad National Security, LLC, and the U.S. Department of Energy/National Nuclear + Security Administration. The Government is granted for itself and others acting on its behalf a + nonexclusive, paid-up, irrevocable worldwide license in this material to reproduce, prepare + derivative works, distribute copies to the public, perform publicly and display publicly, and + to permit others to do so. + This program is open source under the BSD-3 License. + Redistribution and use in source and binary forms, with or without modification, are permitted + provided that the following conditions are met: + 1. Redistributions of source code must retain the above copyright notice, this list of + conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright notice, this list of + conditions and the following disclaimer in the documentation and/or other materials + provided with the distribution. + 3. Neither the name of the copyright holder nor the names of its contributors may be used + to endorse or promote products derived from this software without specific prior + written permission. + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + **********************************************************************************************/ #ifdef OUT_OF_PLACE_FFT #ifdef HAVE_OPENMP @@ -9,55 +42,53 @@ #include #include -//#ifdef FFTW_OMP +// #ifdef FFTW_OMP #include -//#endif +// #endif // ---------------------------------------------------------------------- // FFTW -static void -fft_forward_fftw(double *input, double *output, int nn[3]) +static void fft_forward_fftw(double* input, double* output, int nn[3]) { - static fftw_plan plan; - if (!plan) { - plan = fftw_plan_dft_r2c_3d(nn[0], nn[1], nn[2], - (double *) input, (fftw_complex *) output, + static fftw_plan plan; + if (!plan) { + plan = fftw_plan_dft_r2c_3d(nn[0], nn[1], nn[2], + (double*) input, (fftw_complex*) output, FFTW_ESTIMATE); + } - } - - fftw_execute_dft_r2c(plan, (double *) input, (fftw_complex *) output); + fftw_execute_dft_r2c(plan, (double*) input, (fftw_complex*) output); } -static void -fft_backward_fftw(double *input, double *output, int nn[3]) +static void fft_backward_fftw(double* input, double* output, int nn[3]) { - static fftw_plan plan; - if (!plan) { - plan = fftw_plan_dft_c2r_3d(nn[0], nn[1], nn[2], - (fftw_complex *) input, (double *) output, - FFTW_ESTIMATE); - } + static fftw_plan plan; + if (!plan) { + plan = fftw_plan_dft_c2r_3d(nn[0], nn[1], nn[2], + (fftw_complex*) input, (double*) output, + FFTW_ESTIMATE); + } - fftw_execute_dft_c2r(plan, (fftw_complex *) input, (double *) output); + fftw_execute_dft_c2r(plan, (fftw_complex*) input, (double*) output); } -void fftc_fftw_out_of_place_(double input[], double output[], int nn[], int *ndim, int *isign) +void fftc_fftw_out_of_place_(double input[], double output[], int nn[], int* ndim, int* isign) { - if (*isign == -1) { - fft_forward_fftw(input, output, nn); - } else { - fft_backward_fftw(input, output, nn); - } + if (*isign == -1) { + fft_forward_fftw(input, output, nn); + } + else { + fft_backward_fftw(input, output, nn); + } } void fftc_fftw_init_out_of_place_(void) { -//#ifdef FFTW_OMP - fftw_init_threads(); - fftw_plan_with_nthreads(omp_get_max_threads()); -//#endif +// #ifdef FFTW_OMP + fftw_init_threads(); + fftw_plan_with_nthreads(omp_get_max_threads()); +// #endif } #endif diff --git a/examples/phaseField/srcMacros/fourier_space.cpp b/examples/phaseField/srcMacros/fourier_space.cpp index 57711f87..3fd7559e 100644 --- a/examples/phaseField/srcMacros/fourier_space.cpp +++ b/examples/phaseField/srcMacros/fourier_space.cpp @@ -1,3 +1,36 @@ +/********************************************************************************************** + © 2020. Triad National Security, LLC. All rights reserved. + This program was produced under U.S. Government contract 89233218CNA000001 for Los Alamos + National Laboratory (LANL), which is operated by Triad National Security, LLC for the U.S. + Department of Energy/National Nuclear Security Administration. All rights in the program are + reserved by Triad National Security, LLC, and the U.S. Department of Energy/National Nuclear + Security Administration. The Government is granted for itself and others acting on its behalf a + nonexclusive, paid-up, irrevocable worldwide license in this material to reproduce, prepare + derivative works, distribute copies to the public, perform publicly and display publicly, and + to permit others to do so. + This program is open source under the BSD-3 License. + Redistribution and use in source and binary forms, with or without modification, are permitted + provided that the following conditions are met: + 1. Redistributions of source code must retain the above copyright notice, this list of + conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright notice, this list of + conditions and the following disclaimer in the documentation and/or other materials + provided with the distribution. + 3. Neither the name of the copyright holder nor the names of its contributors may be used + to endorse or promote products derived from this software without specific prior + written permission. + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + **********************************************************************************************/ #include #include "fourier_space.h" @@ -20,70 +53,66 @@ FourierSpace::FourierSpace(int* nn, double* delta) #ifdef IN_PLACE_FFT kz_ = CArrayKokkos(nz_); #elif OUT_OF_PLACE_FFT - nz21_ = nz_/2 + 1; - kz_ = CArrayKokkos(nz21_); + nz21_ = nz_ / 2 + 1; + kz_ = CArrayKokkos(nz21_); #endif - // set values of kx_, ky_, and kz_ set_kx_ky_kz_(); - } - void FourierSpace::set_kx_ky_kz_() { // calculate kx_ FOR_ALL_CLASS(i, 0, nx_, { - int ti; - ti = i; - if (ti > nx_/2) ti = ti - nx_; - kx_(i) = (float(ti) * twopi_) / (nx_ * dx_); + int ti; + ti = i; + if (ti > nx_ / 2) { + ti = ti - nx_; + } + kx_(i) = (float(ti) * twopi_) / (nx_ * dx_); }); - // calculate ky_ FOR_ALL_CLASS(j, 0, ny_, { - int tj; - tj = j; - if (tj > ny_/2) tj = tj - ny_; - ky_(j) = (float(tj) * twopi_) / (ny_ * dy_); + int tj; + tj = j; + if (tj > ny_ / 2) { + tj = tj - ny_; + } + ky_(j) = (float(tj) * twopi_) / (ny_ * dy_); }); - // calculate kz_ for in-place-fft #ifdef IN_PLACE_FFT FOR_ALL_CLASS(k, 0, nz_, { - int tk; - tk = k; - if (tk > nz_/2) tk = tk - nz_; - kz_(k) = (float(tk) * twopi_) / (nz_ * dz_); + int tk; + tk = k; + if (tk > nz_ / 2) { + tk = tk - nz_; + } + kz_(k) = (float(tk) * twopi_) / (nz_ * dz_); }); #elif OUT_OF_PLACE_FFT FOR_ALL_CLASS(k, 0, nz21_, { - int tk; - tk = k; - kz_(k) = (float(tk) * twopi_) / (nz_ * dz_); - }); + int tk; + tk = k; + kz_(k) = (float(tk) * twopi_) / (nz_ * dz_); + }); #endif } - CArrayKokkos& FourierSpace::get_kx() { return kx_; } - CArrayKokkos& FourierSpace::get_ky() { return ky_; } - CArrayKokkos& FourierSpace::get_kz() { return kz_; } - - diff --git a/examples/phaseField/srcMacros/fourier_space.h b/examples/phaseField/srcMacros/fourier_space.h index a437c53a..6030fc6b 100644 --- a/examples/phaseField/srcMacros/fourier_space.h +++ b/examples/phaseField/srcMacros/fourier_space.h @@ -1,3 +1,36 @@ +/********************************************************************************************** + © 2020. Triad National Security, LLC. All rights reserved. + This program was produced under U.S. Government contract 89233218CNA000001 for Los Alamos + National Laboratory (LANL), which is operated by Triad National Security, LLC for the U.S. + Department of Energy/National Nuclear Security Administration. All rights in the program are + reserved by Triad National Security, LLC, and the U.S. Department of Energy/National Nuclear + Security Administration. The Government is granted for itself and others acting on its behalf a + nonexclusive, paid-up, irrevocable worldwide license in this material to reproduce, prepare + derivative works, distribute copies to the public, perform publicly and display publicly, and + to permit others to do so. + This program is open source under the BSD-3 License. + Redistribution and use in source and binary forms, with or without modification, are permitted + provided that the following conditions are met: + 1. Redistributions of source code must retain the above copyright notice, this list of + conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright notice, this list of + conditions and the following disclaimer in the documentation and/or other materials + provided with the distribution. + 3. Neither the name of the copyright holder nor the names of its contributors may be used + to endorse or promote products derived from this software without specific prior + written permission. + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + **********************************************************************************************/ #pragma once #include "matar.h" @@ -6,29 +39,29 @@ using namespace mtr; // matar namespace class FourierSpace { - private: - int* nn_; - int nx_; - int ny_; - int nz_; +private: + int* nn_; + int nx_; + int ny_; + int nz_; #ifdef OUT_OF_PLACE_FFT - int nz21_; + int nz21_; #endif - double* delta_; - double dx_; - double dy_; - double dz_; - const double pi_ = 3.141592653589793238463; - const double twopi_ = 2.0*pi_; - CArrayKokkos kx_; - CArrayKokkos ky_; - CArrayKokkos kz_; - - public: - FourierSpace(int* nn, double* delta); - CArrayKokkos& get_kx(); - CArrayKokkos& get_ky(); - CArrayKokkos& get_kz(); + double* delta_; + double dx_; + double dy_; + double dz_; + const double pi_ = 3.141592653589793238463; + const double twopi_ = 2.0 * pi_; + CArrayKokkos kx_; + CArrayKokkos ky_; + CArrayKokkos kz_; - void set_kx_ky_kz_(); +public: + FourierSpace(int* nn, double* delta); + CArrayKokkos& get_kx(); + CArrayKokkos& get_ky(); + CArrayKokkos& get_kz(); + + void set_kx_ky_kz_(); }; diff --git a/examples/phaseField/srcMacros/global_arrays.cpp b/examples/phaseField/srcMacros/global_arrays.cpp index 672d4d74..2029ebb9 100644 --- a/examples/phaseField/srcMacros/global_arrays.cpp +++ b/examples/phaseField/srcMacros/global_arrays.cpp @@ -1,7 +1,40 @@ -#include "global_arrays.h" - -GlobalArrays::GlobalArrays(int* nn) -{ - this->comp = DCArrayKokkos(nn[0], nn[1], nn[2]); - this->dfdc = CArrayKokkos(nn[0], nn[1], nn[2]); -} +/********************************************************************************************** + © 2020. Triad National Security, LLC. All rights reserved. + This program was produced under U.S. Government contract 89233218CNA000001 for Los Alamos + National Laboratory (LANL), which is operated by Triad National Security, LLC for the U.S. + Department of Energy/National Nuclear Security Administration. All rights in the program are + reserved by Triad National Security, LLC, and the U.S. Department of Energy/National Nuclear + Security Administration. The Government is granted for itself and others acting on its behalf a + nonexclusive, paid-up, irrevocable worldwide license in this material to reproduce, prepare + derivative works, distribute copies to the public, perform publicly and display publicly, and + to permit others to do so. + This program is open source under the BSD-3 License. + Redistribution and use in source and binary forms, with or without modification, are permitted + provided that the following conditions are met: + 1. Redistributions of source code must retain the above copyright notice, this list of + conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright notice, this list of + conditions and the following disclaimer in the documentation and/or other materials + provided with the distribution. + 3. Neither the name of the copyright holder nor the names of its contributors may be used + to endorse or promote products derived from this software without specific prior + written permission. + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + **********************************************************************************************/ +#include "global_arrays.h" + +GlobalArrays::GlobalArrays(int* nn) +{ + this->comp = DCArrayKokkos(nn[0], nn[1], nn[2]); + this->dfdc = CArrayKokkos(nn[0], nn[1], nn[2]); +} diff --git a/examples/phaseField/srcMacros/global_arrays.h b/examples/phaseField/srcMacros/global_arrays.h index 3ce7e3b6..41ba3833 100644 --- a/examples/phaseField/srcMacros/global_arrays.h +++ b/examples/phaseField/srcMacros/global_arrays.h @@ -1,12 +1,45 @@ -#pragma once -#include "matar.h" - -using namespace mtr; // matar namespace - -struct GlobalArrays -{ - DCArrayKokkos comp; - CArrayKokkos dfdc; - - GlobalArrays(int* nn); -}; +/********************************************************************************************** + © 2020. Triad National Security, LLC. All rights reserved. + This program was produced under U.S. Government contract 89233218CNA000001 for Los Alamos + National Laboratory (LANL), which is operated by Triad National Security, LLC for the U.S. + Department of Energy/National Nuclear Security Administration. All rights in the program are + reserved by Triad National Security, LLC, and the U.S. Department of Energy/National Nuclear + Security Administration. The Government is granted for itself and others acting on its behalf a + nonexclusive, paid-up, irrevocable worldwide license in this material to reproduce, prepare + derivative works, distribute copies to the public, perform publicly and display publicly, and + to permit others to do so. + This program is open source under the BSD-3 License. + Redistribution and use in source and binary forms, with or without modification, are permitted + provided that the following conditions are met: + 1. Redistributions of source code must retain the above copyright notice, this list of + conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright notice, this list of + conditions and the following disclaimer in the documentation and/or other materials + provided with the distribution. + 3. Neither the name of the copyright holder nor the names of its contributors may be used + to endorse or promote products derived from this software without specific prior + written permission. + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + **********************************************************************************************/ +#pragma once +#include "matar.h" + +using namespace mtr; // matar namespace + +struct GlobalArrays +{ + DCArrayKokkos comp; + CArrayKokkos dfdc; + + GlobalArrays(int* nn); +}; diff --git a/examples/phaseField/srcMacros/initialize_comp.cpp b/examples/phaseField/srcMacros/initialize_comp.cpp index 3a030cdb..dc3a6870 100644 --- a/examples/phaseField/srcMacros/initialize_comp.cpp +++ b/examples/phaseField/srcMacros/initialize_comp.cpp @@ -1,15 +1,47 @@ +/********************************************************************************************** + © 2020. Triad National Security, LLC. All rights reserved. + This program was produced under U.S. Government contract 89233218CNA000001 for Los Alamos + National Laboratory (LANL), which is operated by Triad National Security, LLC for the U.S. + Department of Energy/National Nuclear Security Administration. All rights in the program are + reserved by Triad National Security, LLC, and the U.S. Department of Energy/National Nuclear + Security Administration. The Government is granted for itself and others acting on its behalf a + nonexclusive, paid-up, irrevocable worldwide license in this material to reproduce, prepare + derivative works, distribute copies to the public, perform publicly and display publicly, and + to permit others to do so. + This program is open source under the BSD-3 License. + Redistribution and use in source and binary forms, with or without modification, are permitted + provided that the following conditions are met: + 1. Redistributions of source code must retain the above copyright notice, this list of + conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright notice, this list of + conditions and the following disclaimer in the documentation and/or other materials + provided with the distribution. + 3. Neither the name of the copyright holder nor the names of its contributors may be used + to endorse or promote products derived from this software without specific prior + written permission. + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + **********************************************************************************************/ #include #include "initialize_comp.h" - -void initialize_comp(SimParameters& sp, DCArrayKokkos &comp) +void initialize_comp(SimParameters& sp, DCArrayKokkos& comp) { - // unpack simimulation parameters needed + // unpack simimulation parameters needed // for calculations in this function - int nx = sp.nn[0]; - int ny = sp.nn[1]; - int nz = sp.nn[2]; - int iseed = sp.iseed; + int nx = sp.nn[0]; + int ny = sp.nn[1]; + int nz = sp.nn[2]; + int iseed = sp.iseed; double c0 = sp.c0; double noise = sp.noise; @@ -23,10 +55,10 @@ void initialize_comp(SimParameters& sp, DCArrayKokkos &comp) for (int j = 0; j < ny; ++j) { for (int k = 0; k < nz; ++k) { // random number between 0.0 and 1.0 - r = (double) rand()/RAND_MAX; + r = (double) rand() / RAND_MAX; // initialize "comp" with stochastic thermal fluctuations - comp.host(i,j,k) = c0 + (2.0*r - 1.0)*noise; + comp.host(i, j, k) = c0 + (2.0 * r - 1.0) * noise; } } } diff --git a/examples/phaseField/srcMacros/initialize_comp.h b/examples/phaseField/srcMacros/initialize_comp.h index f1660806..a308feb4 100644 --- a/examples/phaseField/srcMacros/initialize_comp.h +++ b/examples/phaseField/srcMacros/initialize_comp.h @@ -1,8 +1,41 @@ -#pragma once - -#include "sim_parameters.h" -#include "matar.h" - -using namespace mtr; // matar namespace - -void initialize_comp(SimParameters& sp, DCArrayKokkos &comp); +/********************************************************************************************** + © 2020. Triad National Security, LLC. All rights reserved. + This program was produced under U.S. Government contract 89233218CNA000001 for Los Alamos + National Laboratory (LANL), which is operated by Triad National Security, LLC for the U.S. + Department of Energy/National Nuclear Security Administration. All rights in the program are + reserved by Triad National Security, LLC, and the U.S. Department of Energy/National Nuclear + Security Administration. The Government is granted for itself and others acting on its behalf a + nonexclusive, paid-up, irrevocable worldwide license in this material to reproduce, prepare + derivative works, distribute copies to the public, perform publicly and display publicly, and + to permit others to do so. + This program is open source under the BSD-3 License. + Redistribution and use in source and binary forms, with or without modification, are permitted + provided that the following conditions are met: + 1. Redistributions of source code must retain the above copyright notice, this list of + conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright notice, this list of + conditions and the following disclaimer in the documentation and/or other materials + provided with the distribution. + 3. Neither the name of the copyright holder nor the names of its contributors may be used + to endorse or promote products derived from this software without specific prior + written permission. + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + **********************************************************************************************/ +#pragma once + +#include "sim_parameters.h" +#include "matar.h" + +using namespace mtr; // matar namespace + +void initialize_comp(SimParameters& sp, DCArrayKokkos& comp); diff --git a/examples/phaseField/srcMacros/local_free_energy.cpp b/examples/phaseField/srcMacros/local_free_energy.cpp index 821a3ef3..a26ad265 100644 --- a/examples/phaseField/srcMacros/local_free_energy.cpp +++ b/examples/phaseField/srcMacros/local_free_energy.cpp @@ -1,53 +1,85 @@ +/********************************************************************************************** + © 2020. Triad National Security, LLC. All rights reserved. + This program was produced under U.S. Government contract 89233218CNA000001 for Los Alamos + National Laboratory (LANL), which is operated by Triad National Security, LLC for the U.S. + Department of Energy/National Nuclear Security Administration. All rights in the program are + reserved by Triad National Security, LLC, and the U.S. Department of Energy/National Nuclear + Security Administration. The Government is granted for itself and others acting on its behalf a + nonexclusive, paid-up, irrevocable worldwide license in this material to reproduce, prepare + derivative works, distribute copies to the public, perform publicly and display publicly, and + to permit others to do so. + This program is open source under the BSD-3 License. + Redistribution and use in source and binary forms, with or without modification, are permitted + provided that the following conditions are met: + 1. Redistributions of source code must retain the above copyright notice, this list of + conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright notice, this list of + conditions and the following disclaimer in the documentation and/or other materials + provided with the distribution. + 3. Neither the name of the copyright holder nor the names of its contributors may be used + to endorse or promote products derived from this software without specific prior + written permission. + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + **********************************************************************************************/ #include "local_free_energy.h" - -double calculate_total_free_energy(int* nn, double* delta, double kappa, DCArrayKokkos &comp) +double calculate_total_free_energy(int* nn, double* delta, double kappa, DCArrayKokkos& comp) { // this function calculates the total free energy of the system. - // unpack simimulation parameters needed + // unpack simimulation parameters needed // for calculations in this function - int nx = nn[0]; - int ny = nn[1]; - int nz = nn[2]; + int nx = nn[0]; + int ny = nn[1]; + int nz = nn[2]; double dx = delta[0]; double dy = delta[1]; double dz = delta[2]; - // + // double total_energy = 0.0; double loc_sum = 0.0; - REDUCE_SUM(i, 1, nx-1, - j, 1, ny-1, - k, 1, nz-1, + REDUCE_SUM(i, 1, nx - 1, + j, 1, ny - 1, + k, 1, nz - 1, loc_sum, { - // central difference spatial derivative of comp - double dcdx = (comp(i+1,j,k) - comp(i-1,j,k)) / (2.0 * dx); - double dcdy = (comp(i,j+1,k) - comp(i,j-1,k)) / (2.0 * dy); - double dcdz = (comp(i,j,k+1) - comp(i,j,k-1)) / (2.0 * dz); - loc_sum += comp(i,j,k) * comp(i,j,k) * (1.0 - comp(i,j,k)) * (1.0 - comp(i,j,k)) + // central difference spatial derivative of comp + double dcdx = (comp(i + 1, j, k) - comp(i - 1, j, k)) / (2.0 * dx); + double dcdy = (comp(i, j + 1, k) - comp(i, j - 1, k)) / (2.0 * dy); + double dcdz = (comp(i, j, k + 1) - comp(i, j, k - 1)) / (2.0 * dz); + loc_sum += comp(i, j, k) * comp(i, j, k) * (1.0 - comp(i, j, k)) * (1.0 - comp(i, j, k)) + 0.5 * kappa * (dcdx * dcdx + dcdy * dcdy + dcdz * dcdz); }, total_energy); return total_energy; } -void calculate_dfdc(int* nn, DCArrayKokkos &comp, CArrayKokkos &dfdc) +void calculate_dfdc(int* nn, DCArrayKokkos& comp, CArrayKokkos& dfdc) { - // this function calculates the derivitive of local free energy density (f) + // this function calculates the derivitive of local free energy density (f) // with respect to composition (c) (df/dc). - // unpack simimulation parameters needed + // unpack simimulation parameters needed // for calculations in this function int nx = nn[0]; int ny = nn[1]; int nz = nn[2]; - - FOR_ALL(i, 0, nx, + + FOR_ALL(i, 0, nx, j, 0, ny, - k, 0, nz,{ - dfdc(i,j,k) = 4.0 * comp(i,j,k) * comp(i,j,k) * comp(i,j,k) - - 6.0 * comp(i,j,k) * comp(i,j,k) - + 2.0 * comp(i,j,k); + k, 0, nz, { + dfdc(i, j, k) = 4.0 * comp(i, j, k) * comp(i, j, k) * comp(i, j, k) + - 6.0 * comp(i, j, k) * comp(i, j, k) + + 2.0 * comp(i, j, k); }); } diff --git a/examples/phaseField/srcMacros/local_free_energy.h b/examples/phaseField/srcMacros/local_free_energy.h index e4d75420..acc96f34 100644 --- a/examples/phaseField/srcMacros/local_free_energy.h +++ b/examples/phaseField/srcMacros/local_free_energy.h @@ -1,8 +1,41 @@ -#pragma once -#include "matar.h" - -using namespace mtr; // matar namespace - -double calculate_total_free_energy(int* nn, double* delta, double kappa, DCArrayKokkos &comp); - -void calculate_dfdc(int* nn, DCArrayKokkos &comp, CArrayKokkos &dfdc); +/********************************************************************************************** + © 2020. Triad National Security, LLC. All rights reserved. + This program was produced under U.S. Government contract 89233218CNA000001 for Los Alamos + National Laboratory (LANL), which is operated by Triad National Security, LLC for the U.S. + Department of Energy/National Nuclear Security Administration. All rights in the program are + reserved by Triad National Security, LLC, and the U.S. Department of Energy/National Nuclear + Security Administration. The Government is granted for itself and others acting on its behalf a + nonexclusive, paid-up, irrevocable worldwide license in this material to reproduce, prepare + derivative works, distribute copies to the public, perform publicly and display publicly, and + to permit others to do so. + This program is open source under the BSD-3 License. + Redistribution and use in source and binary forms, with or without modification, are permitted + provided that the following conditions are met: + 1. Redistributions of source code must retain the above copyright notice, this list of + conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright notice, this list of + conditions and the following disclaimer in the documentation and/or other materials + provided with the distribution. + 3. Neither the name of the copyright holder nor the names of its contributors may be used + to endorse or promote products derived from this software without specific prior + written permission. + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + **********************************************************************************************/ +#pragma once +#include "matar.h" + +using namespace mtr; // matar namespace + +double calculate_total_free_energy(int* nn, double* delta, double kappa, DCArrayKokkos& comp); + +void calculate_dfdc(int* nn, DCArrayKokkos& comp, CArrayKokkos& dfdc); diff --git a/examples/phaseField/srcMacros/main.cpp b/examples/phaseField/srcMacros/main.cpp index 2ddf4f9e..9f66f625 100644 --- a/examples/phaseField/srcMacros/main.cpp +++ b/examples/phaseField/srcMacros/main.cpp @@ -1,3 +1,36 @@ +/********************************************************************************************** + © 2020. Triad National Security, LLC. All rights reserved. + This program was produced under U.S. Government contract 89233218CNA000001 for Los Alamos + National Laboratory (LANL), which is operated by Triad National Security, LLC for the U.S. + Department of Energy/National Nuclear Security Administration. All rights in the program are + reserved by Triad National Security, LLC, and the U.S. Department of Energy/National Nuclear + Security Administration. The Government is granted for itself and others acting on its behalf a + nonexclusive, paid-up, irrevocable worldwide license in this material to reproduce, prepare + derivative works, distribute copies to the public, perform publicly and display publicly, and + to permit others to do so. + This program is open source under the BSD-3 License. + Redistribution and use in source and binary forms, with or without modification, are permitted + provided that the following conditions are met: + 1. Redistributions of source code must retain the above copyright notice, this list of + conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright notice, this list of + conditions and the following disclaimer in the documentation and/or other materials + provided with the distribution. + 3. Neither the name of the copyright holder nor the names of its contributors may be used + to endorse or promote products derived from this software without specific prior + written permission. + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + **********************************************************************************************/ #include #include #include @@ -9,55 +42,50 @@ #include "local_free_energy.h" #include "outputs.h" - int main(int argc, char* argv[]) { Kokkos::initialize(argc, argv); { + // simulation parameters + SimParameters sp; + sp.print(); - // simulation parameters - SimParameters sp; - sp.print(); - - // global arrays needed for simulation - GlobalArrays ga = GlobalArrays(sp.nn); - - // setup initial composition profile - initialize_comp(sp, ga.comp); + // global arrays needed for simulation + GlobalArrays ga = GlobalArrays(sp.nn); - // initialize solver - CHFourierSpectralSolver CH_fss(sp); + // setup initial composition profile + initialize_comp(sp, ga.comp); - // Start measuring time - auto begin = std::chrono::high_resolution_clock::now(); + // initialize solver + CHFourierSpectralSolver CH_fss(sp); - // time stepping loop - for (int iter = 1; iter <= sp.num_steps; iter++) { - // calculate df/dc - calculate_dfdc(sp.nn, ga.comp, ga.dfdc); + // Start measuring time + auto begin = std::chrono::high_resolution_clock::now(); - // Cahn Hilliard equation solver - CH_fss.time_march(ga.comp, ga.dfdc); + // time stepping loop + for (int iter = 1; iter <= sp.num_steps; iter++) { + // calculate df/dc + calculate_dfdc(sp.nn, ga.comp, ga.dfdc); - // report simulation progress and output vtk files - if (iter % sp.print_rate == 0) { + // Cahn Hilliard equation solver + CH_fss.time_march(ga.comp, ga.dfdc); - track_progress(iter, sp.nn, ga.comp); + // report simulation progress and output vtk files + if (iter % sp.print_rate == 0) { + track_progress(iter, sp.nn, ga.comp); - write_vtk(iter, sp.nn, sp.delta, ga.comp); + write_vtk(iter, sp.nn, sp.delta, ga.comp); - output_total_free_energy(iter, sp.print_rate, sp.num_steps, - sp.nn, sp.delta, sp.kappa, + output_total_free_energy(iter, sp.print_rate, sp.num_steps, + sp.nn, sp.delta, sp.kappa, ga.comp); + } } - } - - // Stop measuring time and calculate the elapsed time - auto end = std::chrono::high_resolution_clock::now(); - auto elapsed = std::chrono::duration_cast(end - begin); - printf("Total time was %f seconds.\n", elapsed.count() * 1e-9); - + // Stop measuring time and calculate the elapsed time + auto end = std::chrono::high_resolution_clock::now(); + auto elapsed = std::chrono::duration_cast(end - begin); + printf("Total time was %f seconds.\n", elapsed.count() * 1e-9); } Kokkos::finalize(); diff --git a/examples/phaseField/srcMacros/outputs.cpp b/examples/phaseField/srcMacros/outputs.cpp index 82c5a86b..a323f3cf 100644 --- a/examples/phaseField/srcMacros/outputs.cpp +++ b/examples/phaseField/srcMacros/outputs.cpp @@ -1,11 +1,44 @@ +/********************************************************************************************** + © 2020. Triad National Security, LLC. All rights reserved. + This program was produced under U.S. Government contract 89233218CNA000001 for Los Alamos + National Laboratory (LANL), which is operated by Triad National Security, LLC for the U.S. + Department of Energy/National Nuclear Security Administration. All rights in the program are + reserved by Triad National Security, LLC, and the U.S. Department of Energy/National Nuclear + Security Administration. The Government is granted for itself and others acting on its behalf a + nonexclusive, paid-up, irrevocable worldwide license in this material to reproduce, prepare + derivative works, distribute copies to the public, perform publicly and display publicly, and + to permit others to do so. + This program is open source under the BSD-3 License. + Redistribution and use in source and binary forms, with or without modification, are permitted + provided that the following conditions are met: + 1. Redistributions of source code must retain the above copyright notice, this list of + conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright notice, this list of + conditions and the following disclaimer in the documentation and/or other materials + provided with the distribution. + 3. Neither the name of the copyright holder nor the names of its contributors may be used + to endorse or promote products derived from this software without specific prior + written permission. + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + **********************************************************************************************/ #include #include "outputs.h" #include "local_free_energy.h" -void track_progress(int iter, int* nn, DCArrayKokkos &comp) +void track_progress(int iter, int* nn, DCArrayKokkos& comp) { - // unpack simimulation parameters needed + // unpack simimulation parameters needed // for calculations in this function int nx = nn[0]; int ny = nn[1]; @@ -13,15 +46,14 @@ void track_progress(int iter, int* nn, DCArrayKokkos &comp) // sum of comp field double sum_comp = 0.0; - double loc_sum = 0.0; + double loc_sum = 0.0; REDUCE_SUM(i, 0, nx, j, 0, ny, k, 0, nz, loc_sum, { - loc_sum += comp(i,j,k); + loc_sum += comp(i, j, k); }, sum_comp); - // max of comp field double max_comp; double loc_max; @@ -29,22 +61,22 @@ void track_progress(int iter, int* nn, DCArrayKokkos &comp) j, 0, ny, k, 0, nz, loc_max, { - if(loc_max < comp(i,j,k)){ - loc_max = comp(i,j,k); - } + if (loc_max < comp(i, j, k)) { + loc_max = comp(i, j, k); + } }, max_comp); - // min of comp field double min_comp; double loc_min; REDUCE_MIN(i, 0, nx, j, 0, ny, k, 0, nz, - loc_min, { - if(loc_min > comp(i,j,k)){ - loc_min = comp(i,j,k); } + loc_min, { + if (loc_min > comp(i, j, k)) { + loc_min = comp(i, j, k); + } }, min_comp); @@ -53,18 +85,15 @@ void track_progress(int iter, int* nn, DCArrayKokkos &comp) printf("Conservation of comp : %f\n", sum_comp); printf("Max comp : %f\n", max_comp); printf("Min comp : %f\n", min_comp); - } - - -void write_vtk(int iter, int* nn, double* delta, DCArrayKokkos &comp) +void write_vtk(int iter, int* nn, double* delta, DCArrayKokkos& comp) { - // unpack simimulation parameters needed + // unpack simimulation parameters needed // for calculations in this function - int nx = nn[0]; - int ny = nn[1]; - int nz = nn[2]; + int nx = nn[0]; + int ny = nn[1]; + int nz = nn[2]; double dx = delta[0]; double dy = delta[1]; double dz = delta[2]; @@ -74,7 +103,7 @@ void write_vtk(int iter, int* nn, double* delta, DCArrayKokkos &comp) // output file management FILE* output_file; - char filename[50]; + char filename[50]; // create name of output vtk file sprintf(filename, "outputComp_%d.vtk", iter); @@ -88,17 +117,17 @@ void write_vtk(int iter, int* nn, double* delta, DCArrayKokkos &comp) fprintf(output_file, "%s\n", "ASCII"); fprintf(output_file, "%s\n", "DATASET STRUCTURED_GRID"); fprintf(output_file, "%s %i %i %i\n", "DIMENSIONS", nx, ny, nz); - fprintf(output_file, "%s %i %s\n", "POINTS", nx*ny*nz, "double"); + fprintf(output_file, "%s %i %s\n", "POINTS", nx * ny * nz, "double"); - // write grid point values + // write grid point values // Note: order of for loop is important (k,j,i) double x, y, z; for (int k = 0; k < nz; ++k) { for (int j = 0; j < ny; ++j) { - for (int i = 0; i < nx; ++i) { - x = double(i)*dx; - y = double(j)*dy; - z = double(k)*dz; + for (int i = 0; i < nx; ++i) { + x = double(i) * dx; + y = double(j) * dy; + z = double(k) * dz; fprintf(output_file, " %12.6E %12.6E %12.6E\n", x, y, z); } } @@ -106,36 +135,33 @@ void write_vtk(int iter, int* nn, double* delta, DCArrayKokkos &comp) // write data values // Note: order of for loop is important (k,j,i) - fprintf(output_file, "%s %i\n", "POINT_DATA", nx*ny*nz); + fprintf(output_file, "%s %i\n", "POINT_DATA", nx * ny * nz); fprintf(output_file, "%s\n", "SCALARS data double"); fprintf(output_file, "%s\n", "LOOKUP_TABLE default"); for (int k = 0; k < nz; ++k) { for (int j = 0; j < ny; ++j) { - for (int i = 0; i < nx; ++i) { - fprintf(output_file, " %12.6E\n", comp.host(i,j,k)); + for (int i = 0; i < nx; ++i) { + fprintf(output_file, " %12.6E\n", comp.host(i, j, k)); } } } - + // close file fclose(output_file); } - - -void output_total_free_energy(int iter, int print_rate, int num_steps, int* nn, - double* delta, double kappa, DCArrayKokkos &comp) +void output_total_free_energy(int iter, int print_rate, int num_steps, int* nn, + double* delta, double kappa, DCArrayKokkos& comp) { // get total_free_energy double total_free_energy = calculate_total_free_energy(nn, delta, kappa, comp); // output file management static FILE* output_file; - static char filename[50]; + static char filename[50]; // open output vtk file - if (iter == print_rate) - { + if (iter == print_rate) { // create name of output vtk file sprintf(filename, "total_free_energy.csv"); output_file = fopen(filename, "w"); @@ -145,8 +171,7 @@ void output_total_free_energy(int iter, int print_rate, int num_steps, int* nn, fprintf(output_file, "%i,%12.6E\n", iter, total_free_energy); // close file - if (iter == num_steps) - { + if (iter == num_steps) { fclose(output_file); } } diff --git a/examples/phaseField/srcMacros/outputs.h b/examples/phaseField/srcMacros/outputs.h index b1941f91..6ea7f06f 100644 --- a/examples/phaseField/srcMacros/outputs.h +++ b/examples/phaseField/srcMacros/outputs.h @@ -1,15 +1,48 @@ -#pragma once - -#include "matar.h" - -using namespace mtr; // matar namespace - -// function to output simulation progress -void track_progress(int iter, int* nn, DCArrayKokkos &comp); - -// function to write vtk files for visualization -void write_vtk(int iter, int* nn, double* delta, DCArrayKokkos &comp); - -// function to write total_free_energy to file -void output_total_free_energy(int iter, int print_rate, int num_steps, int* nn, - double* delta, double kappa, DCArrayKokkos &comp); +/********************************************************************************************** + © 2020. Triad National Security, LLC. All rights reserved. + This program was produced under U.S. Government contract 89233218CNA000001 for Los Alamos + National Laboratory (LANL), which is operated by Triad National Security, LLC for the U.S. + Department of Energy/National Nuclear Security Administration. All rights in the program are + reserved by Triad National Security, LLC, and the U.S. Department of Energy/National Nuclear + Security Administration. The Government is granted for itself and others acting on its behalf a + nonexclusive, paid-up, irrevocable worldwide license in this material to reproduce, prepare + derivative works, distribute copies to the public, perform publicly and display publicly, and + to permit others to do so. + This program is open source under the BSD-3 License. + Redistribution and use in source and binary forms, with or without modification, are permitted + provided that the following conditions are met: + 1. Redistributions of source code must retain the above copyright notice, this list of + conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright notice, this list of + conditions and the following disclaimer in the documentation and/or other materials + provided with the distribution. + 3. Neither the name of the copyright holder nor the names of its contributors may be used + to endorse or promote products derived from this software without specific prior + written permission. + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + **********************************************************************************************/ +#pragma once + +#include "matar.h" + +using namespace mtr; // matar namespace + +// function to output simulation progress +void track_progress(int iter, int* nn, DCArrayKokkos& comp); + +// function to write vtk files for visualization +void write_vtk(int iter, int* nn, double* delta, DCArrayKokkos& comp); + +// function to write total_free_energy to file +void output_total_free_energy(int iter, int print_rate, int num_steps, int* nn, + double* delta, double kappa, DCArrayKokkos& comp); diff --git a/examples/phaseField/srcMacros/sim_parameters.cpp b/examples/phaseField/srcMacros/sim_parameters.cpp index 5683167b..1dcbf6cd 100644 --- a/examples/phaseField/srcMacros/sim_parameters.cpp +++ b/examples/phaseField/srcMacros/sim_parameters.cpp @@ -1,56 +1,88 @@ +/********************************************************************************************** + © 2020. Triad National Security, LLC. All rights reserved. + This program was produced under U.S. Government contract 89233218CNA000001 for Los Alamos + National Laboratory (LANL), which is operated by Triad National Security, LLC for the U.S. + Department of Energy/National Nuclear Security Administration. All rights in the program are + reserved by Triad National Security, LLC, and the U.S. Department of Energy/National Nuclear + Security Administration. The Government is granted for itself and others acting on its behalf a + nonexclusive, paid-up, irrevocable worldwide license in this material to reproduce, prepare + derivative works, distribute copies to the public, perform publicly and display publicly, and + to permit others to do so. + This program is open source under the BSD-3 License. + Redistribution and use in source and binary forms, with or without modification, are permitted + provided that the following conditions are met: + 1. Redistributions of source code must retain the above copyright notice, this list of + conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright notice, this list of + conditions and the following disclaimer in the documentation and/or other materials + provided with the distribution. + 3. Neither the name of the copyright holder nor the names of its contributors may be used + to endorse or promote products derived from this software without specific prior + written permission. + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + **********************************************************************************************/ #include #include #include #include "sim_parameters.h" - SimParameters::SimParameters() { // set default simulation parameters - this->nn[0] = 32; // nx - this->nn[1] = 32; // ny - this->nn[2] = 32; // nz - this->delta[0] = 1.0; // dx - this->delta[1] = 1.0; // dy - this->delta[2] = 1.0; // dz - this->dt = 5.0E-2; // dt + this->nn[0] = 32; // nx + this->nn[1] = 32; // ny + this->nn[2] = 32; // nz + this->delta[0] = 1.0; // dx + this->delta[1] = 1.0; // dy + this->delta[2] = 1.0; // dz + this->dt = 5.0E-2; // dt this->num_steps = 1000; // total number of time steps this->print_rate = 100; // time step interval for output file - this->iseed = 456; // random number seed - this->kappa = 1.0; // gradient energy coefficient - this->M = 1.0; // mobility - this->c0 = 5.0E-1; // critical composition - this->noise = 5.0E-3; // noise term for thermal fluctuations + this->iseed = 456; // random number seed + this->kappa = 1.0; // gradient energy coefficient + this->M = 1.0; // mobility + this->c0 = 5.0E-1; // critical composition + this->noise = 5.0E-3; // noise term for thermal fluctuations // set number of dimensions set_ndim(); } - void SimParameters::set_ndim() { ndim = 0; for (int i = 0; i < 3; i++) { - if (nn[i] > 1) ++ndim; + if (nn[i] > 1) { + ++ndim; + } } } - void SimParameters::print() { - std::cout << " nx = " << nn[0] << std::endl; - std::cout << " ny = " << nn[1] << std::endl; - std::cout << " nz = " << nn[2] << std::endl; - std::cout << " dx = " << delta[0] << std::endl; - std::cout << " dy = " << delta[1] << std::endl; - std::cout << " dz = " << delta[2] << std::endl; - std::cout << " dt = " << dt << std::endl; - std::cout << " num_steps = " << num_steps << std::endl; - std::cout << " print_rate = " << print_rate << std::endl; - std::cout << " iseed = " << iseed << std::endl; - std::cout << " kappa = " << kappa << std::endl; - std::cout << " M = " << M << std::endl; - std::cout << " c0 = " << c0 << std::endl; - std::cout << " noise = " << noise << std::endl; + std::cout << " nx = " << nn[0] << std::endl; + std::cout << " ny = " << nn[1] << std::endl; + std::cout << " nz = " << nn[2] << std::endl; + std::cout << " dx = " << delta[0] << std::endl; + std::cout << " dy = " << delta[1] << std::endl; + std::cout << " dz = " << delta[2] << std::endl; + std::cout << " dt = " << dt << std::endl; + std::cout << " num_steps = " << num_steps << std::endl; + std::cout << " print_rate = " << print_rate << std::endl; + std::cout << " iseed = " << iseed << std::endl; + std::cout << " kappa = " << kappa << std::endl; + std::cout << " M = " << M << std::endl; + std::cout << " c0 = " << c0 << std::endl; + std::cout << " noise = " << noise << std::endl; } diff --git a/examples/phaseField/srcMacros/sim_parameters.h b/examples/phaseField/srcMacros/sim_parameters.h index 9c694bf8..a89e4e23 100644 --- a/examples/phaseField/srcMacros/sim_parameters.h +++ b/examples/phaseField/srcMacros/sim_parameters.h @@ -1,21 +1,54 @@ -#pragma once - -struct SimParameters -{ - int nn[3]; - int ndim; - int num_steps; - int print_rate; - int iseed; - double dx; - double delta[3]; - double dt; - double kappa; - double M; - double c0; - double noise; - - SimParameters(); - void set_ndim(); - void print(); +/********************************************************************************************** + © 2020. Triad National Security, LLC. All rights reserved. + This program was produced under U.S. Government contract 89233218CNA000001 for Los Alamos + National Laboratory (LANL), which is operated by Triad National Security, LLC for the U.S. + Department of Energy/National Nuclear Security Administration. All rights in the program are + reserved by Triad National Security, LLC, and the U.S. Department of Energy/National Nuclear + Security Administration. The Government is granted for itself and others acting on its behalf a + nonexclusive, paid-up, irrevocable worldwide license in this material to reproduce, prepare + derivative works, distribute copies to the public, perform publicly and display publicly, and + to permit others to do so. + This program is open source under the BSD-3 License. + Redistribution and use in source and binary forms, with or without modification, are permitted + provided that the following conditions are met: + 1. Redistributions of source code must retain the above copyright notice, this list of + conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright notice, this list of + conditions and the following disclaimer in the documentation and/or other materials + provided with the distribution. + 3. Neither the name of the copyright holder nor the names of its contributors may be used + to endorse or promote products derived from this software without specific prior + written permission. + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + **********************************************************************************************/ +#pragma once + +struct SimParameters +{ + int nn[3]; + int ndim; + int num_steps; + int print_rate; + int iseed; + double dx; + double delta[3]; + double dt; + double kappa; + double M; + double c0; + double noise; + + SimParameters(); + void set_ndim(); + void print(); }; \ No newline at end of file diff --git a/examples/phaseFieldMPI/complex_arrays.cpp b/examples/phaseFieldMPI/complex_arrays.cpp index 10137f64..5b73e233 100644 --- a/examples/phaseFieldMPI/complex_arrays.cpp +++ b/examples/phaseFieldMPI/complex_arrays.cpp @@ -1,14 +1,47 @@ +/********************************************************************************************** + © 2020. Triad National Security, LLC. All rights reserved. + This program was produced under U.S. Government contract 89233218CNA000001 for Los Alamos + National Laboratory (LANL), which is operated by Triad National Security, LLC for the U.S. + Department of Energy/National Nuclear Security Administration. All rights in the program are + reserved by Triad National Security, LLC, and the U.S. Department of Energy/National Nuclear + Security Administration. The Government is granted for itself and others acting on its behalf a + nonexclusive, paid-up, irrevocable worldwide license in this material to reproduce, prepare + derivative works, distribute copies to the public, perform publicly and display publicly, and + to permit others to do so. + This program is open source under the BSD-3 License. + Redistribution and use in source and binary forms, with or without modification, are permitted + provided that the following conditions are met: + 1. Redistributions of source code must retain the above copyright notice, this list of + conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright notice, this list of + conditions and the following disclaimer in the documentation and/or other materials + provided with the distribution. + 3. Neither the name of the copyright holder nor the names of its contributors may be used + to endorse or promote products derived from this software without specific prior + written permission. + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + **********************************************************************************************/ #include #include #include "complex_arrays.h" #include "mpi.h" -ComplexArrays::ComplexArrays(const SimParameters & sp, const std::array & loc_nn_img, const std::array & loc_start_index) : -comp_img(loc_nn_img[2], loc_nn_img[1], loc_nn_img[0], 2), -dfdc_img(loc_nn_img[2], loc_nn_img[1], loc_nn_img[0], 2), -kpow2(loc_nn_img[2], loc_nn_img[1], loc_nn_img[0]), -denominator(loc_nn_img[2], loc_nn_img[1], loc_nn_img[0]), -fs(sp.nn, loc_nn_img, loc_start_index, sp.delta) +ComplexArrays::ComplexArrays(const SimParameters& sp, const std::array& loc_nn_img, const std::array& loc_start_index) : + comp_img(loc_nn_img[2], loc_nn_img[1], loc_nn_img[0], 2), + dfdc_img(loc_nn_img[2], loc_nn_img[1], loc_nn_img[0], 2), + kpow2(loc_nn_img[2], loc_nn_img[1], loc_nn_img[0]), + denominator(loc_nn_img[2], loc_nn_img[1], loc_nn_img[0]), + fs(sp.nn, loc_nn_img, loc_start_index, sp.delta) { // set values of kpow2 set_kpow2(); @@ -17,30 +50,28 @@ fs(sp.nn, loc_nn_img, loc_start_index, sp.delta) set_denominator(sp); } - void ComplexArrays::set_kpow2() -{ +{ // calculate kpow2 FOR_ALL_CLASS(k, 0, kpow2.dims(0), j, 0, kpow2.dims(1), i, 0, kpow2.dims(2), { - kpow2(k,j,i) = fs.kx(i) * fs.kx(i) - + fs.ky(j) * fs.ky(j) - + fs.kz(k) * fs.kz(k); + kpow2(k, j, i) = fs.kx(i) * fs.kx(i) + + fs.ky(j) * fs.ky(j) + + fs.kz(k) * fs.kz(k); }); } - -void ComplexArrays::set_denominator(const SimParameters & sp) +void ComplexArrays::set_denominator(const SimParameters& sp) { - double dt = sp.dt; - double M = sp.M; + double dt = sp.dt; + double M = sp.M; double kappa = sp.kappa; // calculate denominator_ FOR_ALL_CLASS(k, 0, denominator.dims(0), j, 0, denominator.dims(1), i, 0, denominator.dims(2), { - denominator(k,j,i) = 1.0 + (dt * M * kappa * kpow2(k,j,i) * kpow2(k,j,i)); + denominator(k, j, i) = 1.0 + (dt * M * kappa * kpow2(k, j, i) * kpow2(k, j, i)); }); } diff --git a/examples/phaseFieldMPI/complex_arrays.h b/examples/phaseFieldMPI/complex_arrays.h index 46d7a8f6..530e9f1a 100644 --- a/examples/phaseFieldMPI/complex_arrays.h +++ b/examples/phaseFieldMPI/complex_arrays.h @@ -1,3 +1,36 @@ +/********************************************************************************************** + © 2020. Triad National Security, LLC. All rights reserved. + This program was produced under U.S. Government contract 89233218CNA000001 for Los Alamos + National Laboratory (LANL), which is operated by Triad National Security, LLC for the U.S. + Department of Energy/National Nuclear Security Administration. All rights in the program are + reserved by Triad National Security, LLC, and the U.S. Department of Energy/National Nuclear + Security Administration. The Government is granted for itself and others acting on its behalf a + nonexclusive, paid-up, irrevocable worldwide license in this material to reproduce, prepare + derivative works, distribute copies to the public, perform publicly and display publicly, and + to permit others to do so. + This program is open source under the BSD-3 License. + Redistribution and use in source and binary forms, with or without modification, are permitted + provided that the following conditions are met: + 1. Redistributions of source code must retain the above copyright notice, this list of + conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright notice, this list of + conditions and the following disclaimer in the documentation and/or other materials + provided with the distribution. + 3. Neither the name of the copyright holder nor the names of its contributors may be used + to endorse or promote products derived from this software without specific prior + written permission. + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + **********************************************************************************************/ #pragma once #include "sim_parameters.h" @@ -10,14 +43,14 @@ using namespace mtr; // matar namespace class ComplexArrays { public: -// arrays needed by solver -DCArrayKokkos comp_img; -DCArrayKokkos dfdc_img; -DCArrayKokkos kpow2; -CArrayKokkos denominator; -FourierSpace fs; +// arrays needed by solver + DCArrayKokkos comp_img; + DCArrayKokkos dfdc_img; + DCArrayKokkos kpow2; + CArrayKokkos denominator; + FourierSpace fs; -ComplexArrays(const SimParameters & sp, const std::array & nn_img, const std::array & start_index); -void set_kpow2(); -void set_denominator(const SimParameters & sp); + ComplexArrays(const SimParameters& sp, const std::array& nn_img, const std::array& start_index); + void set_kpow2(); + void set_denominator(const SimParameters& sp); }; diff --git a/examples/phaseFieldMPI/fourier_space.cpp b/examples/phaseFieldMPI/fourier_space.cpp index c6aa013b..42ee50ea 100644 --- a/examples/phaseFieldMPI/fourier_space.cpp +++ b/examples/phaseFieldMPI/fourier_space.cpp @@ -1,57 +1,95 @@ +/********************************************************************************************** + © 2020. Triad National Security, LLC. All rights reserved. + This program was produced under U.S. Government contract 89233218CNA000001 for Los Alamos + National Laboratory (LANL), which is operated by Triad National Security, LLC for the U.S. + Department of Energy/National Nuclear Security Administration. All rights in the program are + reserved by Triad National Security, LLC, and the U.S. Department of Energy/National Nuclear + Security Administration. The Government is granted for itself and others acting on its behalf a + nonexclusive, paid-up, irrevocable worldwide license in this material to reproduce, prepare + derivative works, distribute copies to the public, perform publicly and display publicly, and + to permit others to do so. + This program is open source under the BSD-3 License. + Redistribution and use in source and binary forms, with or without modification, are permitted + provided that the following conditions are met: + 1. Redistributions of source code must retain the above copyright notice, this list of + conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright notice, this list of + conditions and the following disclaimer in the documentation and/or other materials + provided with the distribution. + 3. Neither the name of the copyright holder nor the names of its contributors may be used + to endorse or promote products derived from this software without specific prior + written permission. + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + **********************************************************************************************/ #include #include "fourier_space.h" #include "mpi.h" -FourierSpace::FourierSpace(const std::array & glob_nn_real, - const std::array & loc_nn_cmplx, - const std::array & loc_start_index, - const std::array & delta) : -kx(loc_nn_cmplx[0]), -ky(loc_nn_cmplx[1]), -kz(loc_nn_cmplx[2]) +FourierSpace::FourierSpace(const std::array& glob_nn_real, + const std::array& loc_nn_cmplx, + const std::array& loc_start_index, + const std::array& delta) : + kx(loc_nn_cmplx[0]), + ky(loc_nn_cmplx[1]), + kz(loc_nn_cmplx[2]) { // set values of kx, ky, and kz set_kx_ky_kz(glob_nn_real, loc_nn_cmplx, loc_start_index, delta); } - -void FourierSpace::set_kx_ky_kz(const std::array & glob_nn_real, - const std::array & loc_nn_cmplx, - const std::array & loc_start_index, - const std::array & delta) +void FourierSpace::set_kx_ky_kz(const std::array& glob_nn_real, + const std::array& loc_nn_cmplx, + const std::array& loc_start_index, + const std::array& delta) { int nx = glob_nn_real[0]; int ny = glob_nn_real[1]; int nz = glob_nn_real[2]; - double dx = delta[0]; - double dy = delta[1]; - double dz = delta[2]; int xstart = loc_start_index[0]; int ystart = loc_start_index[1]; int zstart = loc_start_index[2]; + double dx = delta[0]; + double dy = delta[1]; + double dz = delta[2]; // calculate kx FOR_ALL_CLASS(i, 0, kx.dims(0), { - int ti; - ti = i + xstart; - if (ti > nx/2) ti = ti - nx; - kx(i) = (double(ti) * twopi) / (double(nx) * dx); + int ti; + ti = i + xstart; + if (ti > nx / 2) { + ti = ti - nx; + } + kx(i) = (double(ti) * twopi) / (double(nx) * dx); }); // calculate ky FOR_ALL_CLASS(j, 0, ky.dims(0), { - int tj; - tj = j + ystart; - if (tj > ny/2) tj = tj - ny; - ky(j) = (double(tj) * twopi) / (double(ny) * dy); + int tj; + tj = j + ystart; + if (tj > ny / 2) { + tj = tj - ny; + } + ky(j) = (double(tj) * twopi) / (double(ny) * dy); }); // calculate kz FOR_ALL_CLASS(k, 0, kz.dims(0), { - int tk; - tk = k + zstart; - if (tk > nz/2) tk = tk - nz; - kz(k) = (double(tk) * twopi) / (double(nz) * dz); + int tk; + tk = k + zstart; + if (tk > nz / 2) { + tk = tk - nz; + } + kz(k) = (double(tk) * twopi) / (double(nz) * dz); }); } diff --git a/examples/phaseFieldMPI/fourier_space.h b/examples/phaseFieldMPI/fourier_space.h index fe55d35e..80ac6f61 100644 --- a/examples/phaseFieldMPI/fourier_space.h +++ b/examples/phaseFieldMPI/fourier_space.h @@ -1,3 +1,36 @@ +/********************************************************************************************** + © 2020. Triad National Security, LLC. All rights reserved. + This program was produced under U.S. Government contract 89233218CNA000001 for Los Alamos + National Laboratory (LANL), which is operated by Triad National Security, LLC for the U.S. + Department of Energy/National Nuclear Security Administration. All rights in the program are + reserved by Triad National Security, LLC, and the U.S. Department of Energy/National Nuclear + Security Administration. The Government is granted for itself and others acting on its behalf a + nonexclusive, paid-up, irrevocable worldwide license in this material to reproduce, prepare + derivative works, distribute copies to the public, perform publicly and display publicly, and + to permit others to do so. + This program is open source under the BSD-3 License. + Redistribution and use in source and binary forms, with or without modification, are permitted + provided that the following conditions are met: + 1. Redistributions of source code must retain the above copyright notice, this list of + conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright notice, this list of + conditions and the following disclaimer in the documentation and/or other materials + provided with the distribution. + 3. Neither the name of the copyright holder nor the names of its contributors may be used + to endorse or promote products derived from this software without specific prior + written permission. + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + **********************************************************************************************/ #pragma once #include "matar.h" @@ -8,18 +41,18 @@ using namespace mtr; // matar namespace class FourierSpace { public: -const double pi = 3.141592653589793238463; -const double twopi = 2.0*pi; -CArrayKokkos kx; -CArrayKokkos ky; -CArrayKokkos kz; - -FourierSpace(const std::array & glob_nn_real, - const std::array & loc_nn_cmplx, - const std::array & loc_start_index, - const std::array & delta); -void set_kx_ky_kz(const std::array & glob_nn_real, - const std::array & loc_nn_cmplx, - const std::array & loc_start_index, - const std::array & delta); + const double pi = 3.141592653589793238463; + const double twopi = 2.0 * pi; + CArrayKokkos kx; + CArrayKokkos ky; + CArrayKokkos kz; + + FourierSpace(const std::array& glob_nn_real, + const std::array& loc_nn_cmplx, + const std::array& loc_start_index, + const std::array& delta); + void set_kx_ky_kz(const std::array& glob_nn_real, + const std::array& loc_nn_cmplx, + const std::array& loc_start_index, + const std::array& delta); }; diff --git a/examples/phaseFieldMPI/global_arrays.cpp b/examples/phaseFieldMPI/global_arrays.cpp index 2f1c3d5e..be851954 100644 --- a/examples/phaseFieldMPI/global_arrays.cpp +++ b/examples/phaseFieldMPI/global_arrays.cpp @@ -1,14 +1,47 @@ -#include "global_arrays.h" -#include "mpi.h" - -GlobalArrays::GlobalArrays(const std::array & nn_all, const std::array & nn) : -comp(nn[2], nn[1], nn[0]), -dfdc(nn[2], nn[1], nn[0]) -{ - int rank; - MPI_Comm_rank(MPI_COMM_WORLD, &rank); - - if (0 == rank) { - this->comp_all = CArray(nn_all[2], nn_all[1], nn_all[0]); - } -} +/********************************************************************************************** + © 2020. Triad National Security, LLC. All rights reserved. + This program was produced under U.S. Government contract 89233218CNA000001 for Los Alamos + National Laboratory (LANL), which is operated by Triad National Security, LLC for the U.S. + Department of Energy/National Nuclear Security Administration. All rights in the program are + reserved by Triad National Security, LLC, and the U.S. Department of Energy/National Nuclear + Security Administration. The Government is granted for itself and others acting on its behalf a + nonexclusive, paid-up, irrevocable worldwide license in this material to reproduce, prepare + derivative works, distribute copies to the public, perform publicly and display publicly, and + to permit others to do so. + This program is open source under the BSD-3 License. + Redistribution and use in source and binary forms, with or without modification, are permitted + provided that the following conditions are met: + 1. Redistributions of source code must retain the above copyright notice, this list of + conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright notice, this list of + conditions and the following disclaimer in the documentation and/or other materials + provided with the distribution. + 3. Neither the name of the copyright holder nor the names of its contributors may be used + to endorse or promote products derived from this software without specific prior + written permission. + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + **********************************************************************************************/ +#include "global_arrays.h" +#include "mpi.h" + +GlobalArrays::GlobalArrays(const std::array& nn_all, const std::array& nn) : + comp(nn[2], nn[1], nn[0]), + dfdc(nn[2], nn[1], nn[0]) +{ + int rank; + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + + if (0 == rank) { + this->comp_all = CArray(nn_all[2], nn_all[1], nn_all[0]); + } +} diff --git a/examples/phaseFieldMPI/global_arrays.h b/examples/phaseFieldMPI/global_arrays.h index ee5721ee..5dca68a9 100644 --- a/examples/phaseFieldMPI/global_arrays.h +++ b/examples/phaseFieldMPI/global_arrays.h @@ -1,13 +1,46 @@ -#pragma once -#include "matar.h" - -using namespace mtr; // matar namespace - -struct GlobalArrays -{ - CArray comp_all; - DCArrayKokkos comp; - DCArrayKokkos dfdc; - - GlobalArrays(const std::array & nn_all, const std::array & nn); -}; +/********************************************************************************************** + © 2020. Triad National Security, LLC. All rights reserved. + This program was produced under U.S. Government contract 89233218CNA000001 for Los Alamos + National Laboratory (LANL), which is operated by Triad National Security, LLC for the U.S. + Department of Energy/National Nuclear Security Administration. All rights in the program are + reserved by Triad National Security, LLC, and the U.S. Department of Energy/National Nuclear + Security Administration. The Government is granted for itself and others acting on its behalf a + nonexclusive, paid-up, irrevocable worldwide license in this material to reproduce, prepare + derivative works, distribute copies to the public, perform publicly and display publicly, and + to permit others to do so. + This program is open source under the BSD-3 License. + Redistribution and use in source and binary forms, with or without modification, are permitted + provided that the following conditions are met: + 1. Redistributions of source code must retain the above copyright notice, this list of + conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright notice, this list of + conditions and the following disclaimer in the documentation and/or other materials + provided with the distribution. + 3. Neither the name of the copyright holder nor the names of its contributors may be used + to endorse or promote products derived from this software without specific prior + written permission. + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + **********************************************************************************************/ +#pragma once +#include "matar.h" + +using namespace mtr; // matar namespace + +struct GlobalArrays +{ + CArray comp_all; + DCArrayKokkos comp; + DCArrayKokkos dfdc; + + GlobalArrays(const std::array& nn_all, const std::array& nn); +}; diff --git a/examples/phaseFieldMPI/heffte_backends.h b/examples/phaseFieldMPI/heffte_backends.h index d69767e1..57c330df 100644 --- a/examples/phaseFieldMPI/heffte_backends.h +++ b/examples/phaseFieldMPI/heffte_backends.h @@ -1,13 +1,46 @@ +/********************************************************************************************** + © 2020. Triad National Security, LLC. All rights reserved. + This program was produced under U.S. Government contract 89233218CNA000001 for Los Alamos + National Laboratory (LANL), which is operated by Triad National Security, LLC for the U.S. + Department of Energy/National Nuclear Security Administration. All rights in the program are + reserved by Triad National Security, LLC, and the U.S. Department of Energy/National Nuclear + Security Administration. The Government is granted for itself and others acting on its behalf a + nonexclusive, paid-up, irrevocable worldwide license in this material to reproduce, prepare + derivative works, distribute copies to the public, perform publicly and display publicly, and + to permit others to do so. + This program is open source under the BSD-3 License. + Redistribution and use in source and binary forms, with or without modification, are permitted + provided that the following conditions are met: + 1. Redistributions of source code must retain the above copyright notice, this list of + conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright notice, this list of + conditions and the following disclaimer in the documentation and/or other materials + provided with the distribution. + 3. Neither the name of the copyright holder nor the names of its contributors may be used + to endorse or promote products derived from this software without specific prior + written permission. + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + **********************************************************************************************/ #pragma once #include "heffte.h" #ifdef USE_CUFFT - using heffte_backend = heffte::backend::cufft; +using heffte_backend = heffte::backend::cufft; #elif USE_ROCFFT - using heffte_backend = heffte::backend::rocfft; +using heffte_backend = heffte::backend::rocfft; #elif USE_FFTW - using heffte_backend = heffte::backend::fftw; +using heffte_backend = heffte::backend::fftw; #elif USE_MKL - using heffte_backend = heffte::backend::mkl; +using heffte_backend = heffte::backend::mkl; #endif diff --git a/examples/phaseFieldMPI/heffte_fft.cpp b/examples/phaseFieldMPI/heffte_fft.cpp index 91c8289f..1c922c74 100644 --- a/examples/phaseFieldMPI/heffte_fft.cpp +++ b/examples/phaseFieldMPI/heffte_fft.cpp @@ -1 +1,34 @@ +/********************************************************************************************** + © 2020. Triad National Security, LLC. All rights reserved. + This program was produced under U.S. Government contract 89233218CNA000001 for Los Alamos + National Laboratory (LANL), which is operated by Triad National Security, LLC for the U.S. + Department of Energy/National Nuclear Security Administration. All rights in the program are + reserved by Triad National Security, LLC, and the U.S. Department of Energy/National Nuclear + Security Administration. The Government is granted for itself and others acting on its behalf a + nonexclusive, paid-up, irrevocable worldwide license in this material to reproduce, prepare + derivative works, distribute copies to the public, perform publicly and display publicly, and + to permit others to do so. + This program is open source under the BSD-3 License. + Redistribution and use in source and binary forms, with or without modification, are permitted + provided that the following conditions are met: + 1. Redistributions of source code must retain the above copyright notice, this list of + conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright notice, this list of + conditions and the following disclaimer in the documentation and/or other materials + provided with the distribution. + 3. Neither the name of the copyright holder nor the names of its contributors may be used + to endorse or promote products derived from this software without specific prior + written permission. + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + **********************************************************************************************/ #include diff --git a/examples/phaseFieldMPI/heffte_fft.h b/examples/phaseFieldMPI/heffte_fft.h index 2c9ce87e..4d08cea7 100644 --- a/examples/phaseFieldMPI/heffte_fft.h +++ b/examples/phaseFieldMPI/heffte_fft.h @@ -1,3 +1,36 @@ +/********************************************************************************************** + © 2020. Triad National Security, LLC. All rights reserved. + This program was produced under U.S. Government contract 89233218CNA000001 for Los Alamos + National Laboratory (LANL), which is operated by Triad National Security, LLC for the U.S. + Department of Energy/National Nuclear Security Administration. All rights in the program are + reserved by Triad National Security, LLC, and the U.S. Department of Energy/National Nuclear + Security Administration. The Government is granted for itself and others acting on its behalf a + nonexclusive, paid-up, irrevocable worldwide license in this material to reproduce, prepare + derivative works, distribute copies to the public, perform publicly and display publicly, and + to permit others to do so. + This program is open source under the BSD-3 License. + Redistribution and use in source and binary forms, with or without modification, are permitted + provided that the following conditions are met: + 1. Redistributions of source code must retain the above copyright notice, this list of + conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright notice, this list of + conditions and the following disclaimer in the documentation and/or other materials + provided with the distribution. + 3. Neither the name of the copyright holder nor the names of its contributors may be used + to endorse or promote products derived from this software without specific prior + written permission. + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + **********************************************************************************************/ #pragma once #include @@ -9,56 +42,56 @@ *************************************************** */ -template +template class FFTBase { public: - MPI_Comm comm; + MPI_Comm comm; const int root = 0; - int my_rank; - int num_ranks; - std::array globalRealBoxSize; - std::array globalComplexBoxSize; - heffte::box3d<> globalRealBox; - heffte::box3d<> globalComplexBox; + int my_rank; + int num_ranks; + std::array globalRealBoxSize; + std::array globalComplexBoxSize; + heffte::box3d<> globalRealBox; + heffte::box3d<> globalComplexBox; std::array procGrid; std::vector> localRealBoxes; std::vector> localComplexBoxes; - heffte::box3d<> myRealBox; - heffte::box3d<> myComplexBox; + heffte::box3d<> myRealBox; + heffte::box3d<> myComplexBox; heffte::plan_options options; - std::vector> localRealBoxSizes; - std::vector> localComplexBoxSizes; + std::vector> localRealBoxSizes; + std::vector> localComplexBoxSizes; MPI_Datatype mpiType; std::vector mpiSubarrayTypes; - FFTBase(MPI_Comm comm_, const std::array & globalRealBoxSize_, const std::array & globalComplexBoxSize_); + FFTBase(MPI_Comm comm_, const std::array& globalRealBoxSize_, const std::array& globalComplexBoxSize_); virtual ~FFTBase(); - virtual void forward(const R *input, std::complex *output) = 0; - virtual void forward(const R *input, R *output) = 0; - virtual void backward(const std::complex *input, R *output) = 0; - virtual void backward(const R *input, R *output) = 0; + virtual void forward(const R* input, std::complex* output) = 0; + virtual void forward(const R* input, R* output) = 0; + virtual void backward(const std::complex* input, R* output) = 0; + virtual void backward(const R* input, R* output) = 0; }; -template -FFTBase::FFTBase(MPI_Comm comm_, const std::array & globalRealBoxSize_, const std::array & globalComplexBoxSize_) : -comm(comm_), -my_rank(heffte::mpi::comm_rank(comm)), -num_ranks(heffte::mpi::comm_size(comm)), -globalRealBoxSize(globalRealBoxSize_), -globalComplexBoxSize(globalComplexBoxSize_), -globalRealBox({0, 0, 0}, {globalRealBoxSize[0]-1, globalRealBoxSize[1]-1, globalRealBoxSize[2]-1}), -globalComplexBox({0, 0, 0}, {globalComplexBoxSize[0]-1, globalComplexBoxSize[1]-1, globalComplexBoxSize[2]-1}), -procGrid(heffte::proc_setup_min_surface(globalRealBox, num_ranks)), -localRealBoxes(heffte::split_world(globalRealBox, procGrid)), -localComplexBoxes(heffte::split_world(globalComplexBox, procGrid)), -myRealBox(localRealBoxes[my_rank]), -myComplexBox(localComplexBoxes[my_rank]), -options(heffte::default_options()), -localRealBoxSizes(num_ranks), -localComplexBoxSizes(num_ranks), -mpiType(MPI_DATATYPE_NULL), -mpiSubarrayTypes(num_ranks, MPI_DATATYPE_NULL) +template +FFTBase::FFTBase(MPI_Comm comm_, const std::array& globalRealBoxSize_, const std::array& globalComplexBoxSize_) : + comm(comm_), + my_rank(heffte::mpi::comm_rank(comm)), + num_ranks(heffte::mpi::comm_size(comm)), + globalRealBoxSize(globalRealBoxSize_), + globalComplexBoxSize(globalComplexBoxSize_), + globalRealBox({ 0, 0, 0 }, { globalRealBoxSize[0] - 1, globalRealBoxSize[1] - 1, globalRealBoxSize[2] - 1 }), + globalComplexBox({ 0, 0, 0 }, { globalComplexBoxSize[0] - 1, globalComplexBoxSize[1] - 1, globalComplexBoxSize[2] - 1 }), + procGrid(heffte::proc_setup_min_surface(globalRealBox, num_ranks)), + localRealBoxes(heffte::split_world(globalRealBox, procGrid)), + localComplexBoxes(heffte::split_world(globalComplexBox, procGrid)), + myRealBox(localRealBoxes[my_rank]), + myComplexBox(localComplexBoxes[my_rank]), + options(heffte::default_options()), + localRealBoxSizes(num_ranks), + localComplexBoxSizes(num_ranks), + mpiType(MPI_DATATYPE_NULL), + mpiSubarrayTypes(num_ranks, MPI_DATATYPE_NULL) { // use strided 1-D FFT operations // some backends work just as well when the entries of the data are not contiguous @@ -69,7 +102,7 @@ mpiSubarrayTypes(num_ranks, MPI_DATATYPE_NULL) // collaborative all-to-all and individual point-to-point communications are two alternatives // one may be better than the other depending on // the version of MPI, the hardware interconnect, and the problem size - //options.use_alltoall = true; + // options.use_alltoall = true; // in the intermediate steps, the data can be shapes as either 2-D slabs or 1-D pencils // for sufficiently large problem, it is expected that the pencil decomposition is better @@ -77,39 +110,32 @@ mpiSubarrayTypes(num_ranks, MPI_DATATYPE_NULL) options.use_pencils = true; // calculate sizes of real and complex domains(boxes) for each rank - for (size_t i = 0; i < num_ranks; i++) - { - for (size_t j = 0; j < 3; j++) - { - localRealBoxSizes[i][j] = localRealBoxes[i].high[j] - localRealBoxes[i].low[j] + 1; - localComplexBoxSizes[i][j] = localComplexBoxes[i].high[j] - localComplexBoxes[i].low[j] + 1; + for (size_t i = 0; i < num_ranks; i++) { + for (size_t j = 0; j < 3; j++) { + localRealBoxSizes[i][j] = localRealBoxes[i].high[j] - localRealBoxes[i].low[j] + 1; + localComplexBoxSizes[i][j] = localComplexBoxes[i].high[j] - localComplexBoxes[i].low[j] + 1; } } // set correct mpiType dependent on typename R - if constexpr (std::is_same::value) - { + if constexpr (std::is_same::value) { mpiType = MPI_FLOAT; } - else if constexpr (std::is_same::value) - { + else if constexpr (std::is_same::value) { mpiType = MPI_DOUBLE; } - else if constexpr (std::is_same::value) - { + else if constexpr (std::is_same::value) { mpiType = MPI_LONG_DOUBLE; } - else - { + else{ throw std::runtime_error("mpiType can not be deduced in FFTBase. Please add the mpi type.\n"); } // create MPI subarray type for each rank - for (size_t i = 0; i < num_ranks; i++) - { + for (size_t i = 0; i < num_ranks; i++) { int dimensions_full_array[3] = { globalRealBoxSize[2], globalRealBoxSize[1], globalRealBoxSize[0] }; - int dimensions_subarray[3] = { localRealBoxSizes[i][2], localRealBoxSizes[i][1], localRealBoxSizes[i][0] }; - int start_coordinates[3] = { localRealBoxes[i].low[2], localRealBoxes[i].low[1], localRealBoxes[i].low[0] }; + int dimensions_subarray[3] = { localRealBoxSizes[i][2], localRealBoxSizes[i][1], localRealBoxSizes[i][0] }; + int start_coordinates[3] = { localRealBoxes[i].low[2], localRealBoxes[i].low[1], localRealBoxes[i].low[0] }; MPI_Type_create_subarray(3, dimensions_full_array, dimensions_subarray, start_coordinates, MPI_ORDER_C, mpiType, &mpiSubarrayTypes[i]); MPI_Type_commit(&mpiSubarrayTypes[i]); } @@ -122,11 +148,10 @@ mpiSubarrayTypes(num_ranks, MPI_DATATYPE_NULL) #endif } -template -FFTBase::~FFTBase() +template +FFTBase::~FFTBase() { - for (size_t i = 0; i < num_ranks; i++) - { + for (size_t i = 0; i < num_ranks; i++) { MPI_Type_free(&mpiSubarrayTypes[i]); } } @@ -135,59 +160,59 @@ FFTBase::~FFTBase() FFT3D_R2C *************************************************** */ -template -class FFT3D_R2C : public FFTBase +template +class FFT3D_R2C : public FFTBase { public: int r2c_direction; heffte::fft3d_r2c fft; // heffte class for performing the fft typename heffte::fft3d::template buffer_container> workspace; - FFT3D_R2C(MPI_Comm comm, const std::array & globalRealBoxSize); + FFT3D_R2C(MPI_Comm comm, const std::array& globalRealBoxSize); ~FFT3D_R2C(); - void forward(const R *input, std::complex *output) override; - void forward(const R *input, R *output) override; - void backward(const std::complex *input, R *output) override; - void backward(const R *input, R *output) override; + void forward(const R* input, std::complex* output) override; + void forward(const R* input, R* output) override; + void backward(const std::complex* input, R* output) override; + void backward(const R* input, R* output) override; }; -template -FFT3D_R2C::FFT3D_R2C(MPI_Comm comm, const std::array & globalRealBoxSize) : -FFTBase(comm, globalRealBoxSize, {globalRealBoxSize[0]/2+1, globalRealBoxSize[1], globalRealBoxSize[2]}), -r2c_direction(0), -fft(this->myRealBox, this->myComplexBox, r2c_direction, this->comm, this->options), -workspace(fft.size_workspace()) +template +FFT3D_R2C::FFT3D_R2C(MPI_Comm comm, const std::array& globalRealBoxSize) : + FFTBase(comm, globalRealBoxSize, { globalRealBoxSize[0] / 2 + 1, globalRealBoxSize[1], globalRealBoxSize[2] }), + r2c_direction(0), + fft(this->myRealBox, this->myComplexBox, r2c_direction, this->comm, this->options), + workspace(fft.size_workspace()) { // check if the complex indexes have correct dimension assert(this->globalRealBox.r2c(r2c_direction) == this->globalComplexBox); } -template -void FFT3D_R2C::forward(const R *input, std::complex *output) +template +void FFT3D_R2C::forward(const R* input, std::complex* output) { fft.forward(input, output, workspace.data()); } -template -void FFT3D_R2C::forward(const R *input, R *output) +template +void FFT3D_R2C::forward(const R* input, R* output) { fft.forward(input, (std::complex*)output, workspace.data()); } -template -void FFT3D_R2C::backward(const std::complex *input, R *output) +template +void FFT3D_R2C::backward(const std::complex* input, R* output) { fft.backward(input, output, workspace.data(), heffte::scale::full); } -template -void FFT3D_R2C::backward(const R *input, R *output) +template +void FFT3D_R2C::backward(const R* input, R* output) { fft.backward((std::complex*)input, output, workspace.data(), heffte::scale::full); } -template -FFT3D_R2C::~FFT3D_R2C() +template +FFT3D_R2C::~FFT3D_R2C() { } @@ -195,54 +220,54 @@ FFT3D_R2C::~FFT3D_R2C() FFT3D *************************************************** */ -template -class FFT3D : public FFTBase +template +class FFT3D : public FFTBase { public: heffte::fft3d fft; // heffte class for performing the fft typename heffte::fft3d::template buffer_container> workspace; - FFT3D(MPI_Comm comm, const std::array & globalRealBoxSize); + FFT3D(MPI_Comm comm, const std::array& globalRealBoxSize); ~FFT3D(); - void forward(const R *input, std::complex *output) override; - void forward(const R *input, R *output) override; - void backward(const std::complex *input, R *output) override; - void backward(const R *input, R *output) override; + void forward(const R* input, std::complex* output) override; + void forward(const R* input, R* output) override; + void backward(const std::complex* input, R* output) override; + void backward(const R* input, R* output) override; }; -template -FFT3D::FFT3D(MPI_Comm comm, const std::array & globalRealBoxSize) : -FFTBase(comm, globalRealBoxSize, globalRealBoxSize), -fft(this->myRealBox, this->myComplexBox, this->comm, this->options), -workspace(fft.size_workspace()) +template +FFT3D::FFT3D(MPI_Comm comm, const std::array& globalRealBoxSize) : + FFTBase(comm, globalRealBoxSize, globalRealBoxSize), + fft(this->myRealBox, this->myComplexBox, this->comm, this->options), + workspace(fft.size_workspace()) { } -template -void FFT3D::forward(const R *input, std::complex *output) +template +void FFT3D::forward(const R* input, std::complex* output) { fft.forward(input, output, workspace.data()); } -template -void FFT3D::forward(const R *input, R *output) +template +void FFT3D::forward(const R* input, R* output) { fft.forward(input, (std::complex*)output, workspace.data()); } -template -void FFT3D::backward(const std::complex *input, R *output) +template +void FFT3D::backward(const std::complex* input, R* output) { fft.backward(input, output, workspace.data(), heffte::scale::full); } -template -void FFT3D::backward(const R *input, R *output) +template +void FFT3D::backward(const R* input, R* output) { fft.backward((std::complex*)input, output, workspace.data(), heffte::scale::full); } -template -FFT3D::~FFT3D() +template +FFT3D::~FFT3D() { } diff --git a/examples/phaseFieldMPI/initialize_comp.cpp b/examples/phaseFieldMPI/initialize_comp.cpp index 0ebe51a3..0d114b87 100644 --- a/examples/phaseFieldMPI/initialize_comp.cpp +++ b/examples/phaseFieldMPI/initialize_comp.cpp @@ -1,29 +1,58 @@ -#include -#include "initialize_comp.h" - - -void initialize_comp(const SimParameters &sp, DCArrayKokkos &comp, CArray &comp_all) -{ -if (0 == rank) -{ - // seed random number generator - srand(sp.iseed); - - // to hold random number - double r; - - for (int i = 0; i < sp.nn[0]; ++i) { - for (int j = 0; j < sp.nn[1]; ++j) { - for (int k = 0; k < sp.nn[2]; ++k) { - // random number between 0.0 and 1.0 - r = (double) rand()/RAND_MAX; - - // initialize "comp" with stochastic thermal fluctuations - comp_all(i,j,k) = sp.c0 + (2.0*r - 1.0)*sp.noise; - } - } - } -} - - -} +/********************************************************************************************** + © 2020. Triad National Security, LLC. All rights reserved. + This program was produced under U.S. Government contract 89233218CNA000001 for Los Alamos + National Laboratory (LANL), which is operated by Triad National Security, LLC for the U.S. + Department of Energy/National Nuclear Security Administration. All rights in the program are + reserved by Triad National Security, LLC, and the U.S. Department of Energy/National Nuclear + Security Administration. The Government is granted for itself and others acting on its behalf a + nonexclusive, paid-up, irrevocable worldwide license in this material to reproduce, prepare + derivative works, distribute copies to the public, perform publicly and display publicly, and + to permit others to do so. + This program is open source under the BSD-3 License. + Redistribution and use in source and binary forms, with or without modification, are permitted + provided that the following conditions are met: + 1. Redistributions of source code must retain the above copyright notice, this list of + conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright notice, this list of + conditions and the following disclaimer in the documentation and/or other materials + provided with the distribution. + 3. Neither the name of the copyright holder nor the names of its contributors may be used + to endorse or promote products derived from this software without specific prior + written permission. + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + **********************************************************************************************/ +#include +#include "initialize_comp.h" + +void initialize_comp(const SimParameters& sp, DCArrayKokkos& comp, CArray& comp_all) +{ + if (0 == rank) { + // seed random number generator + srand(sp.iseed); + + // to hold random number + double r; + + for (int i = 0; i < sp.nn[0]; ++i) { + for (int j = 0; j < sp.nn[1]; ++j) { + for (int k = 0; k < sp.nn[2]; ++k) { + // random number between 0.0 and 1.0 + r = (double) rand() / RAND_MAX; + + // initialize "comp" with stochastic thermal fluctuations + comp_all(i, j, k) = sp.c0 + (2.0 * r - 1.0) * sp.noise; + } + } + } + } +} diff --git a/examples/phaseFieldMPI/initialize_comp.h b/examples/phaseFieldMPI/initialize_comp.h index dc34c15b..88b58f34 100644 --- a/examples/phaseFieldMPI/initialize_comp.h +++ b/examples/phaseFieldMPI/initialize_comp.h @@ -1,8 +1,41 @@ -#pragma once - -#include "sim_parameters.h" -#include "matar.h" - -using namespace mtr; // matar namespace - -void initialize_comp(const SimParameters &sp, DCArrayKokkos &comp, CArray &comp_all); +/********************************************************************************************** + © 2020. Triad National Security, LLC. All rights reserved. + This program was produced under U.S. Government contract 89233218CNA000001 for Los Alamos + National Laboratory (LANL), which is operated by Triad National Security, LLC for the U.S. + Department of Energy/National Nuclear Security Administration. All rights in the program are + reserved by Triad National Security, LLC, and the U.S. Department of Energy/National Nuclear + Security Administration. The Government is granted for itself and others acting on its behalf a + nonexclusive, paid-up, irrevocable worldwide license in this material to reproduce, prepare + derivative works, distribute copies to the public, perform publicly and display publicly, and + to permit others to do so. + This program is open source under the BSD-3 License. + Redistribution and use in source and binary forms, with or without modification, are permitted + provided that the following conditions are met: + 1. Redistributions of source code must retain the above copyright notice, this list of + conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright notice, this list of + conditions and the following disclaimer in the documentation and/or other materials + provided with the distribution. + 3. Neither the name of the copyright holder nor the names of its contributors may be used + to endorse or promote products derived from this software without specific prior + written permission. + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + **********************************************************************************************/ +#pragma once + +#include "sim_parameters.h" +#include "matar.h" + +using namespace mtr; // matar namespace + +void initialize_comp(const SimParameters& sp, DCArrayKokkos& comp, CArray& comp_all); diff --git a/examples/phaseFieldMPI/main.cpp b/examples/phaseFieldMPI/main.cpp index 9caa8426..88dade9d 100644 --- a/examples/phaseFieldMPI/main.cpp +++ b/examples/phaseFieldMPI/main.cpp @@ -1,4 +1,3 @@ - /********************************************************************************************** © 2020. Triad National Security, LLC. All rights reserved. This program was produced under U.S. Government contract 89233218CNA000001 for Los Alamos @@ -12,14 +11,11 @@ This program is open source under the BSD-3 License. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. @@ -36,11 +32,9 @@ ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. **********************************************************************************************/ - - #include "system.h" -void parse_command_line(int argc, char *argv[], SimParameters & sp); +void parse_command_line(int argc, char* argv[], SimParameters& sp); int main(int argc, char* argv[]) { @@ -48,7 +42,6 @@ int main(int argc, char* argv[]) Kokkos::initialize(argc, argv); { // kokkos scope - // simulation parameters SimParameters sp; @@ -57,7 +50,6 @@ int main(int argc, char* argv[]) // Simulation system System sys(MPI_COMM_WORLD, sp); sys.solve(); - } // kokkos end scope Kokkos::finalize(); @@ -66,23 +58,26 @@ int main(int argc, char* argv[]) return 0; } -void parse_command_line(int argc, char *argv[], SimParameters & sp) +void parse_command_line(int argc, char* argv[], SimParameters& sp) { - std::string opt; - int i = 1; - while (i < argc && argv[i][0] == '-') - { - opt = std::string(argv[i]); - - if(opt == "-nx") - sp.nn[0] = atoi(argv[++i]); - - if(opt == "-ny") - sp.nn[1] = atoi(argv[++i]); - - if(opt == "-nz") - sp.nn[2] = atoi(argv[++i]); - - ++i; - } + std::string opt; + int i = 1; + while (i < argc && argv[i][0] == '-') + { + opt = std::string(argv[i]); + + if (opt == "-nx") { + sp.nn[0] = atoi(argv[++i]); + } + + if (opt == "-ny") { + sp.nn[1] = atoi(argv[++i]); + } + + if (opt == "-nz") { + sp.nn[2] = atoi(argv[++i]); + } + + ++i; + } } diff --git a/examples/phaseFieldMPI/profile.cpp b/examples/phaseFieldMPI/profile.cpp old mode 100755 new mode 100644 index 3769289e..4cadc6ac --- a/examples/phaseFieldMPI/profile.cpp +++ b/examples/phaseFieldMPI/profile.cpp @@ -1,3 +1,36 @@ +/********************************************************************************************** + © 2020. Triad National Security, LLC. All rights reserved. + This program was produced under U.S. Government contract 89233218CNA000001 for Los Alamos + National Laboratory (LANL), which is operated by Triad National Security, LLC for the U.S. + Department of Energy/National Nuclear Security Administration. All rights in the program are + reserved by Triad National Security, LLC, and the U.S. Department of Energy/National Nuclear + Security Administration. The Government is granted for itself and others acting on its behalf a + nonexclusive, paid-up, irrevocable worldwide license in this material to reproduce, prepare + derivative works, distribute copies to the public, perform publicly and display publicly, and + to permit others to do so. + This program is open source under the BSD-3 License. + Redistribution and use in source and binary forms, with or without modification, are permitted + provided that the following conditions are met: + 1. Redistributions of source code must retain the above copyright notice, this list of + conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright notice, this list of + conditions and the following disclaimer in the documentation and/or other materials + provided with the distribution. + 3. Neither the name of the copyright holder nor the names of its contributors may be used + to endorse or promote products derived from this software without specific prior + written permission. + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + **********************************************************************************************/ #include "profile.h" #include @@ -6,11 +39,11 @@ // Event::Event(const std::string& name) { - name_ = name; - count_ = 0; - //total_time_ = duration_t::duration::zero(); - clock_t::time_point t1 = clock_t::now(); - total_time_ = t1 - t1; + name_ = name; + count_ = 0; + // total_time_ = duration_t::duration::zero(); + clock_t::time_point t1 = clock_t::now(); + total_time_ = t1 - t1; } void Event::start() @@ -26,9 +59,9 @@ void Event::stop() double Event::get_time_in_seconds() { - return std::chrono::duration_cast - - (total_time_).count() * 1.0e-9; + return std::chrono::duration_cast + + (total_time_).count() * 1.0e-9; } int Event::get_count() @@ -41,12 +74,11 @@ std::string& Event::get_name() return name_; } - // // Declaration of Profile class // Event Profile::total = Event("total"); -Event Profile::fft_forward = Event("fft_forward"); +Event Profile::fft_forward = Event("fft_forward"); Event Profile::fft_backward = Event("fft_backward"); // @@ -56,60 +88,60 @@ Profile::Profile() { } -void Profile::start(Event & event) +void Profile::start(Event& event) { - event.start(); + event.start(); } -void Profile::stop(Event & event) +void Profile::stop(Event& event) { - if (event.get_count() == 0){ - events_.push_back(&event); - } + if (event.get_count() == 0) { + events_.push_back(&event); + } - event.stop(); + event.stop(); } -void Profile::start_barrier(Event & event) +void Profile::start_barrier(Event& event) { - Kokkos::fence(); + Kokkos::fence(); #ifdef HAVE_CUDA - cudaDeviceSynchronize(); + cudaDeviceSynchronize(); #elif HAVE_OPENMP #pragma omp barrier #endif - event.start(); + event.start(); } -void Profile::stop_barrier(Event & event) +void Profile::stop_barrier(Event& event) { - if (event.get_count() == 0){ - events_.push_back(&event); - } + if (event.get_count() == 0) { + events_.push_back(&event); + } - Kokkos::fence(); + Kokkos::fence(); #ifdef HAVE_CUDA - cudaDeviceSynchronize(); + cudaDeviceSynchronize(); #elif HAVE_OPENMP #pragma omp barrier #endif - event.stop(); + event.stop(); } -void Profile::print_one(Event &event) +void Profile::print_one(Event& event) { - //printf("%s : %12.4E seconds\n", event.get_name().c_str(), event.get_time_in_seconds()); - printf("\n"); - printf("%s:\n", event.get_name().c_str()); - printf(" time: %12.4E seconds", event.get_time_in_seconds()); - printf(" count: %d", event.get_count()); - printf(" fraction: %12.4E%%", event.get_time_in_seconds()/total.get_time_in_seconds()*100.0); - printf("\n"); + // printf("%s : %12.4E seconds\n", event.get_name().c_str(), event.get_time_in_seconds()); + printf("\n"); + printf("%s:\n", event.get_name().c_str()); + printf(" time: %12.4E seconds", event.get_time_in_seconds()); + printf(" count: %d", event.get_count()); + printf(" fraction: %12.4E%%", event.get_time_in_seconds() / total.get_time_in_seconds() * 100.0); + printf("\n"); } void Profile::print() { - for (Event* event : events_) { - print_one(*event); - } + for (Event* event : events_) { + print_one(*event); + } } diff --git a/examples/phaseFieldMPI/profile.h b/examples/phaseFieldMPI/profile.h old mode 100755 new mode 100644 index a1998a77..b6d8f66f --- a/examples/phaseFieldMPI/profile.h +++ b/examples/phaseFieldMPI/profile.h @@ -1,3 +1,36 @@ +/********************************************************************************************** + © 2020. Triad National Security, LLC. All rights reserved. + This program was produced under U.S. Government contract 89233218CNA000001 for Los Alamos + National Laboratory (LANL), which is operated by Triad National Security, LLC for the U.S. + Department of Energy/National Nuclear Security Administration. All rights in the program are + reserved by Triad National Security, LLC, and the U.S. Department of Energy/National Nuclear + Security Administration. The Government is granted for itself and others acting on its behalf a + nonexclusive, paid-up, irrevocable worldwide license in this material to reproduce, prepare + derivative works, distribute copies to the public, perform publicly and display publicly, and + to permit others to do so. + This program is open source under the BSD-3 License. + Redistribution and use in source and binary forms, with or without modification, are permitted + provided that the following conditions are met: + 1. Redistributions of source code must retain the above copyright notice, this list of + conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright notice, this list of + conditions and the following disclaimer in the documentation and/or other materials + provided with the distribution. + 3. Neither the name of the copyright holder nor the names of its contributors may be used + to endorse or promote products derived from this software without specific prior + written permission. + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + **********************************************************************************************/ #pragma once #include @@ -5,55 +38,50 @@ #include #include - // Defination of Event class class Event { +// using chrono_t = std::chrono; +using clock_t = std::chrono::high_resolution_clock; +using duration_t = clock_t::duration; - //using chrono_t = std::chrono; - using clock_t = std::chrono::high_resolution_clock; - using duration_t = clock_t::duration; +private: + clock_t::time_point start_time_; + duration_t total_time_; // total time + int count_; // current call count + std::string name_; // name of event - private: - clock_t::time_point start_time_; - duration_t total_time_; // total time - int count_; // current call count - std::string name_; // name of event - - public: +public: Event(const std::string& name); void start(); void stop(); double get_time_in_seconds(); int get_count(); std::string& get_name(); - }; - // Defination of Profile class class Profile { - - //----------------------------------------------- - // To use the profile class for another projects, - // change the events. Remember to define the Events - // in profile.cpp file since they are static members. - public: +// ----------------------------------------------- +// To use the profile class for another projects, +// change the events. Remember to define the Events +// in profile.cpp file since they are static members. +public: static Event total; static Event fft_forward; static Event fft_backward; - //----------------------------------------------- +// ----------------------------------------------- - private: +private: static std::vector events_; - public: +public: Profile(); - static void start(Event &event); - static void stop(Event &event); - static void start_barrier(Event &event); - static void stop_barrier(Event &event); - static void print_one(Event &event); + static void start(Event& event); + static void stop(Event& event); + static void start_barrier(Event& event); + static void stop_barrier(Event& event); + static void print_one(Event& event); static void print(); }; diff --git a/examples/phaseFieldMPI/sim_parameters.cpp b/examples/phaseFieldMPI/sim_parameters.cpp index e9c049ce..0cb9494a 100644 --- a/examples/phaseFieldMPI/sim_parameters.cpp +++ b/examples/phaseFieldMPI/sim_parameters.cpp @@ -1,56 +1,88 @@ +/********************************************************************************************** + © 2020. Triad National Security, LLC. All rights reserved. + This program was produced under U.S. Government contract 89233218CNA000001 for Los Alamos + National Laboratory (LANL), which is operated by Triad National Security, LLC for the U.S. + Department of Energy/National Nuclear Security Administration. All rights in the program are + reserved by Triad National Security, LLC, and the U.S. Department of Energy/National Nuclear + Security Administration. The Government is granted for itself and others acting on its behalf a + nonexclusive, paid-up, irrevocable worldwide license in this material to reproduce, prepare + derivative works, distribute copies to the public, perform publicly and display publicly, and + to permit others to do so. + This program is open source under the BSD-3 License. + Redistribution and use in source and binary forms, with or without modification, are permitted + provided that the following conditions are met: + 1. Redistributions of source code must retain the above copyright notice, this list of + conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright notice, this list of + conditions and the following disclaimer in the documentation and/or other materials + provided with the distribution. + 3. Neither the name of the copyright holder nor the names of its contributors may be used + to endorse or promote products derived from this software without specific prior + written permission. + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + **********************************************************************************************/ #include #include #include #include "sim_parameters.h" - SimParameters::SimParameters() { // set default simulation parameters - this->nn[0] = 32; // nx - this->nn[1] = 32; // ny - this->nn[2] = 32; // nz - this->delta[0] = 1.0; // dx - this->delta[1] = 1.0; // dy - this->delta[2] = 1.0; // dz - this->dt = 5.0E-2; // dt + this->nn[0] = 32; // nx + this->nn[1] = 32; // ny + this->nn[2] = 32; // nz + this->delta[0] = 1.0; // dx + this->delta[1] = 1.0; // dy + this->delta[2] = 1.0; // dz + this->dt = 5.0E-2; // dt this->num_steps = 1000; // total number of time steps this->print_rate = 100; // time step interval for output file - this->iseed = 456; // random number seed - this->kappa = 1.0; // gradient energy coefficient - this->M = 1.0; // mobility - this->c0 = 5.0E-1; // critical composition - this->noise = 5.0E-3; // noise term for thermal fluctuations + this->iseed = 456; // random number seed + this->kappa = 1.0; // gradient energy coefficient + this->M = 1.0; // mobility + this->c0 = 5.0E-1; // critical composition + this->noise = 5.0E-3; // noise term for thermal fluctuations // set number of dimensions set_ndim(); } - void SimParameters::set_ndim() { ndim = 0; for (int i = 0; i < 3; i++) { - if (nn[i] > 1) ++ndim; + if (nn[i] > 1) { + ++ndim; + } } } - void SimParameters::print() const { - std::cout << " nx = " << nn[0] << std::endl; - std::cout << " ny = " << nn[1] << std::endl; - std::cout << " nz = " << nn[2] << std::endl; - std::cout << " dx = " << delta[0] << std::endl; - std::cout << " dy = " << delta[1] << std::endl; - std::cout << " dz = " << delta[2] << std::endl; - std::cout << " dt = " << dt << std::endl; - std::cout << " num_steps = " << num_steps << std::endl; - std::cout << " print_rate = " << print_rate << std::endl; - std::cout << " iseed = " << iseed << std::endl; - std::cout << " kappa = " << kappa << std::endl; - std::cout << " M = " << M << std::endl; - std::cout << " c0 = " << c0 << std::endl; - std::cout << " noise = " << noise << std::endl; + std::cout << " nx = " << nn[0] << std::endl; + std::cout << " ny = " << nn[1] << std::endl; + std::cout << " nz = " << nn[2] << std::endl; + std::cout << " dx = " << delta[0] << std::endl; + std::cout << " dy = " << delta[1] << std::endl; + std::cout << " dz = " << delta[2] << std::endl; + std::cout << " dt = " << dt << std::endl; + std::cout << " num_steps = " << num_steps << std::endl; + std::cout << " print_rate = " << print_rate << std::endl; + std::cout << " iseed = " << iseed << std::endl; + std::cout << " kappa = " << kappa << std::endl; + std::cout << " M = " << M << std::endl; + std::cout << " c0 = " << c0 << std::endl; + std::cout << " noise = " << noise << std::endl; } diff --git a/examples/phaseFieldMPI/sim_parameters.h b/examples/phaseFieldMPI/sim_parameters.h index 8ef0b091..d9abb89c 100644 --- a/examples/phaseFieldMPI/sim_parameters.h +++ b/examples/phaseFieldMPI/sim_parameters.h @@ -1,25 +1,57 @@ -#pragma once -#include - - -struct SimParameters -{ - std::array nn; - int ndim; - int num_steps; - int print_rate; - int iseed; - double dx; - std::array delta; - double dt; - double kappa; - double M; - double c0; - double noise; - - SimParameters(); - void print() const; - -private: - void set_ndim(); -}; +/********************************************************************************************** + © 2020. Triad National Security, LLC. All rights reserved. + This program was produced under U.S. Government contract 89233218CNA000001 for Los Alamos + National Laboratory (LANL), which is operated by Triad National Security, LLC for the U.S. + Department of Energy/National Nuclear Security Administration. All rights in the program are + reserved by Triad National Security, LLC, and the U.S. Department of Energy/National Nuclear + Security Administration. The Government is granted for itself and others acting on its behalf a + nonexclusive, paid-up, irrevocable worldwide license in this material to reproduce, prepare + derivative works, distribute copies to the public, perform publicly and display publicly, and + to permit others to do so. + This program is open source under the BSD-3 License. + Redistribution and use in source and binary forms, with or without modification, are permitted + provided that the following conditions are met: + 1. Redistributions of source code must retain the above copyright notice, this list of + conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright notice, this list of + conditions and the following disclaimer in the documentation and/or other materials + provided with the distribution. + 3. Neither the name of the copyright holder nor the names of its contributors may be used + to endorse or promote products derived from this software without specific prior + written permission. + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + **********************************************************************************************/ +#pragma once +#include + +struct SimParameters +{ + std::array nn; + int ndim; + int num_steps; + int print_rate; + int iseed; + double dx; + std::array delta; + double dt; + double kappa; + double M; + double c0; + double noise; + + SimParameters(); + void print() const; + + private: + void set_ndim(); +}; diff --git a/examples/phaseFieldMPI/system.cpp b/examples/phaseFieldMPI/system.cpp index f968c046..7c3f940c 100644 --- a/examples/phaseFieldMPI/system.cpp +++ b/examples/phaseFieldMPI/system.cpp @@ -11,14 +11,14 @@ This program is open source under the BSD-3 License. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - + 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - + 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - + 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. @@ -35,25 +35,23 @@ ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. **********************************************************************************************/ - #include "system.h" #include "stdlib.h" #include "string" -System::System(MPI_Comm comm_, const SimParameters & sp_) : -comm(comm_), -my_rank(heffte::mpi::comm_rank(comm)), -num_ranks(heffte::mpi::comm_size(comm)), -sp(sp_), -fft(comm, sp.nn), -ga(sp.nn, fft.localRealBoxSizes[my_rank]), -ca(sp, fft.localComplexBoxSizes[my_rank], fft.myComplexBox.low), -total_free_energy_file(NULL), -vtk_writer(comm, fft.globalRealBoxSize, fft.localRealBoxSizes[my_rank], fft.localRealBoxes[my_rank].low, "%12.6E\n") +System::System(MPI_Comm comm_, const SimParameters& sp_) : + comm(comm_), + my_rank(heffte::mpi::comm_rank(comm)), + num_ranks(heffte::mpi::comm_size(comm)), + sp(sp_), + fft(comm, sp.nn), + ga(sp.nn, fft.localRealBoxSizes[my_rank]), + ca(sp, fft.localComplexBoxSizes[my_rank], fft.myComplexBox.low), + total_free_energy_file(NULL), + vtk_writer(comm, fft.globalRealBoxSize, fft.localRealBoxSizes[my_rank], fft.localRealBoxes[my_rank].low, "%12.6E\n") { // print simulation parameters - if (root == my_rank) - { + if (root == my_rank) { sp.print(); total_free_energy_file = fopen("total_free_energy.csv", "w"); } @@ -61,8 +59,7 @@ vtk_writer(comm, fft.globalRealBoxSize, fft.localRealBoxSizes[my_rank], fft.loca System::~System() { - if (root == my_rank) - { + if (root == my_rank) { fclose(total_free_energy_file); } } @@ -71,31 +68,29 @@ void System::initialize_comp() { // start a non-blocking recieve MPI_Request request; - MPI_Status status; + MPI_Status status; MPI_Irecv(ga.comp.host_pointer(), ga.comp.size(), fft.mpiType, root, 999, comm, &request); - - if (root == my_rank) - { + + if (root == my_rank) { // seed random number generator srand(sp.iseed); for (int k = 0; k < ga.comp_all.dims(0); ++k) { - for (int j = 0; j < ga.comp_all.dims(1); ++j) { - for (int i = 0; i < ga.comp_all.dims(2); ++i) { - // random number between 0.0 and 1.0 - double r = (double) rand()/RAND_MAX; - - // initialize "comp" with stochastic thermal fluctuations - ga.comp_all(k,j,i) = sp.c0 + (2.0*r - 1.0)*sp.noise; - } - } + for (int j = 0; j < ga.comp_all.dims(1); ++j) { + for (int i = 0; i < ga.comp_all.dims(2); ++i) { + // random number between 0.0 and 1.0 + double r = (double) rand() / RAND_MAX; + + // initialize "comp" with stochastic thermal fluctuations + ga.comp_all(k, j, i) = sp.c0 + (2.0 * r - 1.0) * sp.noise; + } + } } // send subarrays to ranks MPI_Request requests[num_ranks]; - MPI_Status statuses[num_ranks]; - for (size_t i = 0; i < num_ranks; i++) - { + MPI_Status statuses[num_ranks]; + for (size_t i = 0; i < num_ranks; i++) { MPI_Isend(ga.comp_all.pointer(), 1, fft.mpiSubarrayTypes[i], i, 999, comm, &requests[i]); } @@ -112,15 +107,15 @@ void System::initialize_comp() void System::calculate_dfdc() { - // this function calculates the derivitive of local free energy density (f) + // this function calculates the derivitive of local free energy density (f) // with respect to composition (c) (df/dc). FOR_ALL(k, 0, ga.dfdc.dims(0), j, 0, ga.dfdc.dims(1), i, 0, ga.dfdc.dims(2), { - ga.dfdc(k,j,i) = 4.0 * ga.comp(k,j,i) * ga.comp(k,j,i) * ga.comp(k,j,i) - - 6.0 * ga.comp(k,j,i) * ga.comp(k,j,i) - + 2.0 * ga.comp(k,j,i); + ga.dfdc(k, j, i) = 4.0 * ga.comp(k, j, i) * ga.comp(k, j, i) * ga.comp(k, j, i) + - 6.0 * ga.comp(k, j, i) * ga.comp(k, j, i) + + 2.0 * ga.comp(k, j, i); }); Kokkos::fence(); } @@ -129,30 +124,30 @@ double System::calculate_total_free_energy() { // this function calculates the total free energy of the system. - // unpack simimulation parameters needed + // unpack simimulation parameters needed // for calculations in this function - double dx = sp.delta[0]; - double dy = sp.delta[1]; - double dz = sp.delta[2]; + double dx = sp.delta[0]; + double dy = sp.delta[1]; + double dz = sp.delta[2]; double kappa = sp.kappa; - // + // double total_energy = 0.0; double loc_sum = 0.0; #if 0 // bulk free energy + interfacial energy - REDUCE_SUM(k, 1, ga.comp.dims(0)-1, - j, 1, ga.comp.dims(1)-1, - i, 1, ga.comp.dims(2)-1, + REDUCE_SUM(k, 1, ga.comp.dims(0) - 1, + j, 1, ga.comp.dims(1) - 1, + i, 1, ga.comp.dims(2) - 1, loc_sum, { - // central difference spatial derivative of comp - double dcdz = (ga.comp(k+1,j,i) - ga.comp(k-1,j,i)) / (2.0 * dz); - double dcdy = (ga.comp(i,j+1,k) - ga.comp(i,j-1,k)) / (2.0 * dy); - double dcdx = (ga.comp(k,j,i+1) - ga.comp(k,j,i-1)) / (2.0 * dx); - loc_sum += ga.comp(k,j,i) * ga.comp(k,j,i) - * (1.0 - ga.comp(k,j,i)) * (1.0 - ga.comp(k,j,i)) - + 0.5 * kappa * (dcdx * dcdx + dcdy * dcdy + dcdz * dcdz); + // central difference spatial derivative of comp + double dcdz = (ga.comp(k + 1, j, i) - ga.comp(k - 1, j, i)) / (2.0 * dz); + double dcdy = (ga.comp(i, j + 1, k) - ga.comp(i, j - 1, k)) / (2.0 * dy); + double dcdx = (ga.comp(k, j, i + 1) - ga.comp(k, j, i - 1)) / (2.0 * dx); + loc_sum += ga.comp(k, j, i) * ga.comp(k, j, i) + * (1.0 - ga.comp(k, j, i)) * (1.0 - ga.comp(k, j, i)) + + 0.5 * kappa * (dcdx * dcdx + dcdy * dcdy + dcdz * dcdz); }, total_energy); #endif @@ -161,7 +156,7 @@ double System::calculate_total_free_energy() j, 0, ga.comp.dims(1), i, 0, ga.comp.dims(2), loc_sum, { - loc_sum += ga.comp(k,j,i) * ga.comp(k,j,i) * (1.0 - ga.comp(k,j,i)) * (1.0 - ga.comp(k,j,i)); + loc_sum += ga.comp(k, j, i) * ga.comp(k, j, i) * (1.0 - ga.comp(k, j, i)) * (1.0 - ga.comp(k, j, i)); }, total_energy); return total_energy; @@ -184,14 +179,14 @@ void System::time_march() FOR_ALL(k, 0, ca.comp_img.dims(0), j, 0, ca.comp_img.dims(1), i, 0, ca.comp_img.dims(2), { - ca.comp_img(k,j,i,0) = (ca.comp_img(k,j,i,0) - (sp.dt * sp.M * ca.kpow2(k,j,i)) * ca.dfdc_img(k,j,i,0)) - / (ca.denominator(k,j,i)); - - ca.comp_img(k,j,i,1) = (ca.comp_img(k,j,i,1) - (sp.dt * sp.M * ca.kpow2(k,j,i)) * ca.dfdc_img(k,j,i,1)) - / (ca.denominator(k,j,i)); + ca.comp_img(k, j, i, 0) = (ca.comp_img(k, j, i, 0) - (sp.dt * sp.M * ca.kpow2(k, j, i)) * ca.dfdc_img(k, j, i, 0)) + / (ca.denominator(k, j, i)); + + ca.comp_img(k, j, i, 1) = (ca.comp_img(k, j, i, 1) - (sp.dt * sp.M * ca.kpow2(k, j, i)) * ca.dfdc_img(k, j, i, 1)) + / (ca.denominator(k, j, i)); }); Kokkos::fence(); - + // get backward fft of comp_img (note fft.backward was set to scale the result already. // you can chnage if needed in FFT3D_R2C class) Profile::start_barrier(Profile::fft_backward); @@ -204,12 +199,12 @@ void System::track_progress(int iter) { // sum of comp field double sum_comp = 0.0; - double loc_sum = 0.0; + double loc_sum = 0.0; REDUCE_SUM(k, 0, ga.comp.dims(0), j, 0, ga.comp.dims(1), i, 0, ga.comp.dims(2), loc_sum, { - loc_sum += ga.comp(k,j,i); + loc_sum += ga.comp(k, j, i); }, sum_comp); // max of comp field @@ -219,9 +214,9 @@ void System::track_progress(int iter) j, 0, ga.comp.dims(1), i, 0, ga.comp.dims(2), loc_max, { - if(loc_max < ga.comp(k,j,i)){ - loc_max = ga.comp(k,j,i); - } + if (loc_max < ga.comp(k, j, i)) { + loc_max = ga.comp(k, j, i); + } }, max_comp); // min of comp field @@ -230,10 +225,10 @@ void System::track_progress(int iter) REDUCE_MIN(k, 0, ga.comp.dims(0), j, 0, ga.comp.dims(1), i, 0, ga.comp.dims(2), - loc_min, { - if(loc_min > ga.comp(k,j,i)){ - loc_min = ga.comp(k,j,i); - } + loc_min, { + if (loc_min > ga.comp(k, j, i)) { + loc_min = ga.comp(k, j, i); + } }, min_comp); double glob_sum_comp = 0.0; @@ -243,8 +238,7 @@ void System::track_progress(int iter) MPI_Reduce(&max_comp, &glob_max_comp, 1, MPI_DOUBLE, MPI_MAX, root, comm); MPI_Reduce(&min_comp, &glob_min_comp, 1, MPI_DOUBLE, MPI_MIN, root, comm); - if (root == my_rank) - { + if (root == my_rank) { printf("\n----------------------------------------------------\n"); printf("Iteration : %d\n", iter); printf("Conservation of comp : %E\n", glob_sum_comp); @@ -261,8 +255,9 @@ void System::output_total_free_energy(int iter) double glob_total_free_energy = 0.0; MPI_Reduce(&total_free_energy, &glob_total_free_energy, 1, MPI_DOUBLE, MPI_SUM, root, comm); - if (root == my_rank) + if (root == my_rank) { fprintf(total_free_energy_file, "%i,%12.6E\n", iter, glob_total_free_energy); + } } void System::solve() @@ -281,7 +276,6 @@ void System::solve() // report simulation progress and output vtk files if (iter % sp.print_rate == 0) { - track_progress(iter); output_total_free_energy(iter); diff --git a/examples/phaseFieldMPI/system.h b/examples/phaseFieldMPI/system.h index 9980c0f3..1308e847 100644 --- a/examples/phaseFieldMPI/system.h +++ b/examples/phaseFieldMPI/system.h @@ -11,14 +11,14 @@ This program is open source under the BSD-3 License. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - + 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - + 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - + 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. @@ -35,7 +35,6 @@ ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. **********************************************************************************************/ - #include #include "stdio.h" #include "mpi.h" @@ -64,7 +63,7 @@ struct System FILE* total_free_energy_file; VTK_Writer_MPI_IO vtk_writer; - System(MPI_Comm comm_, const SimParameters & sp); + System(MPI_Comm comm_, const SimParameters& sp); ~System(); void initialize_comp(); void check_subarray_mpi_data_exchange(); diff --git a/examples/phaseFieldMPI/vtk_writer_mpi_io.cpp b/examples/phaseFieldMPI/vtk_writer_mpi_io.cpp index 22ab77a7..dbd79d8e 100644 --- a/examples/phaseFieldMPI/vtk_writer_mpi_io.cpp +++ b/examples/phaseFieldMPI/vtk_writer_mpi_io.cpp @@ -1,16 +1,49 @@ +/********************************************************************************************** + © 2020. Triad National Security, LLC. All rights reserved. + This program was produced under U.S. Government contract 89233218CNA000001 for Los Alamos + National Laboratory (LANL), which is operated by Triad National Security, LLC for the U.S. + Department of Energy/National Nuclear Security Administration. All rights in the program are + reserved by Triad National Security, LLC, and the U.S. Department of Energy/National Nuclear + Security Administration. The Government is granted for itself and others acting on its behalf a + nonexclusive, paid-up, irrevocable worldwide license in this material to reproduce, prepare + derivative works, distribute copies to the public, perform publicly and display publicly, and + to permit others to do so. + This program is open source under the BSD-3 License. + Redistribution and use in source and binary forms, with or without modification, are permitted + provided that the following conditions are met: + 1. Redistributions of source code must retain the above copyright notice, this list of + conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright notice, this list of + conditions and the following disclaimer in the documentation and/or other materials + provided with the distribution. + 3. Neither the name of the copyright holder nor the names of its contributors may be used + to endorse or promote products derived from this software without specific prior + written permission. + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + **********************************************************************************************/ #include -VTK_Writer_MPI_IO::VTK_Writer_MPI_IO(MPI_Comm mpi_io_comm, const std::array & dimensions_full_array, - const std::array & dimensions_subarray, - const std::array & start_coordinates, - const char *format) : -mpi_io_comm_(mpi_io_comm), -dimensions_full_array_(dimensions_full_array), -dimensions_subarray_(dimensions_subarray), -start_coordinates_(start_coordinates), -format_(format), -chars_per_num_type_(MPI_DATATYPE_NULL), -file_space_type_(MPI_DATATYPE_NULL) +VTK_Writer_MPI_IO::VTK_Writer_MPI_IO(MPI_Comm mpi_io_comm, const std::array& dimensions_full_array, + const std::array& dimensions_subarray, + const std::array& start_coordinates, + const char* format) : + mpi_io_comm_(mpi_io_comm), + dimensions_full_array_(dimensions_full_array), + dimensions_subarray_(dimensions_subarray), + start_coordinates_(start_coordinates), + format_(format), + chars_per_num_type_(MPI_DATATYPE_NULL), + file_space_type_(MPI_DATATYPE_NULL) { // calculating chars_per_num based on format specified char s[100]; @@ -23,8 +56,8 @@ file_space_type_(MPI_DATATYPE_NULL) // create file_space_type_ int dimensions_full_array_reordered[3] = { dimensions_full_array_[2], dimensions_full_array_[1], dimensions_full_array_[0] }; - int dimensions_subarray_reordered[3] = { dimensions_subarray_[2], dimensions_subarray_[1], dimensions_subarray_[0] }; - int start_coordinates_reordered[3] = { start_coordinates_[2], start_coordinates_[1], start_coordinates_[0] }; + int dimensions_subarray_reordered[3] = { dimensions_subarray_[2], dimensions_subarray_[1], dimensions_subarray_[0] }; + int start_coordinates_reordered[3] = { start_coordinates_[2], start_coordinates_[1], start_coordinates_[0] }; MPI_Type_create_subarray(3, dimensions_full_array_reordered, dimensions_subarray_reordered, start_coordinates_reordered, MPI_ORDER_C, chars_per_num_type_, &file_space_type_); @@ -37,7 +70,7 @@ VTK_Writer_MPI_IO::~VTK_Writer_MPI_IO() MPI_Type_free(&file_space_type_); } -void VTK_Writer_MPI_IO::write(int iter, const double *data) +void VTK_Writer_MPI_IO::write(int iter, const double* data) { // global array dimensions int nx = dimensions_full_array_[0]; @@ -47,10 +80,10 @@ void VTK_Writer_MPI_IO::write(int iter, const double *data) // create name of output vtk file char filename[50]; sprintf(filename, "outputComp_%d.vtk", iter); - + // for storing header_text std::string header_text; - + // write vtk file heading char buff[300]; sprintf(buff, "%s\n", "# vtk DataFile Version 3.0"); @@ -67,7 +100,7 @@ void VTK_Writer_MPI_IO::write(int iter, const double *data) header_text += buff; sprintf(buff, "%s %d %d %d\n", "SPACING", 1, 1, 1); header_text += buff; - sprintf(buff, "%s %d\n", "POINT_DATA", nx*ny*nz); + sprintf(buff, "%s %d\n", "POINT_DATA", nx * ny * nz); header_text += buff; sprintf(buff, "%s\n", "SCALARS data double"); header_text += buff; @@ -75,13 +108,12 @@ void VTK_Writer_MPI_IO::write(int iter, const double *data) header_text += buff; // for holding data converted to chars - const int subarray_size = dimensions_subarray_[0]*dimensions_subarray_[1]*dimensions_subarray_[2]; - char *data_as_chars = new char[subarray_size*chars_per_num_]; + const int subarray_size = dimensions_subarray_[0] * dimensions_subarray_[1] * dimensions_subarray_[2]; + char* data_as_chars = new char[subarray_size * chars_per_num_]; // write data into data_as_chars - for (int i = 0; i < subarray_size; i++) - { - sprintf(&data_as_chars[i*chars_per_num_], format_.c_str(), data[i]); + for (int i = 0; i < subarray_size; i++) { + sprintf(&data_as_chars[i * chars_per_num_], format_.c_str(), data[i]); } write_mpi_io_file(filename, header_text.c_str(), data_as_chars); @@ -89,7 +121,7 @@ void VTK_Writer_MPI_IO::write(int iter, const double *data) delete[] data_as_chars; } -MPI_File VTK_Writer_MPI_IO::create_mpi_io_file(const char *filename) +MPI_File VTK_Writer_MPI_IO::create_mpi_io_file(const char* filename) { int file_mode = MPI_MODE_UNIQUE_OPEN | MPI_MODE_WRONLY | MPI_MODE_CREATE; @@ -107,20 +139,19 @@ MPI_File VTK_Writer_MPI_IO::create_mpi_io_file(const char *filename) return file_handle; } -void VTK_Writer_MPI_IO::write_mpi_io_file(const char *filename, const char* header_text, const char *data_as_chars) +void VTK_Writer_MPI_IO::write_mpi_io_file(const char* filename, const char* header_text, const char* data_as_chars) { int my_rank; MPI_Comm_rank(mpi_io_comm_, &my_rank); - int header_text_size = strlen(header_text); + int header_text_size = strlen(header_text); int data_as_chars_size = strlen(data_as_chars); // open file MPI_File file_handle = create_mpi_io_file(filename); // my_rank == 0 writes header of file - if (my_rank == 0 && header_text_size > 0) - { + if (my_rank == 0 && header_text_size > 0) { MPI_File_write(file_handle, header_text, header_text_size, MPI_CHAR, MPI_STATUS_IGNORE); } MPI_Barrier(mpi_io_comm_); diff --git a/examples/phaseFieldMPI/vtk_writer_mpi_io.h b/examples/phaseFieldMPI/vtk_writer_mpi_io.h index 4759964c..73101618 100644 --- a/examples/phaseFieldMPI/vtk_writer_mpi_io.h +++ b/examples/phaseFieldMPI/vtk_writer_mpi_io.h @@ -1,3 +1,36 @@ +/********************************************************************************************** + © 2020. Triad National Security, LLC. All rights reserved. + This program was produced under U.S. Government contract 89233218CNA000001 for Los Alamos + National Laboratory (LANL), which is operated by Triad National Security, LLC for the U.S. + Department of Energy/National Nuclear Security Administration. All rights in the program are + reserved by Triad National Security, LLC, and the U.S. Department of Energy/National Nuclear + Security Administration. The Government is granted for itself and others acting on its behalf a + nonexclusive, paid-up, irrevocable worldwide license in this material to reproduce, prepare + derivative works, distribute copies to the public, perform publicly and display publicly, and + to permit others to do so. + This program is open source under the BSD-3 License. + Redistribution and use in source and binary forms, with or without modification, are permitted + provided that the following conditions are met: + 1. Redistributions of source code must retain the above copyright notice, this list of + conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright notice, this list of + conditions and the following disclaimer in the documentation and/or other materials + provided with the distribution. + 3. Neither the name of the copyright holder nor the names of its contributors may be used + to endorse or promote products derived from this software without specific prior + written permission. + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + **********************************************************************************************/ #include #include #include @@ -6,29 +39,28 @@ #include #include - class VTK_Writer_MPI_IO { private: MPI_Comm mpi_io_comm_; - const std::array dimensions_full_array_; - const std::array dimensions_subarray_; - const std::array start_coordinates_; + const std::array dimensions_full_array_; + const std::array dimensions_subarray_; + const std::array start_coordinates_; std::string format_; int chars_per_num_; MPI_Datatype chars_per_num_type_; MPI_Datatype file_space_type_; - MPI_File create_mpi_io_file(const char *filename); - void write_mpi_io_file(const char *filename, const char* header_text, - const char *data_as_chars); + MPI_File create_mpi_io_file(const char* filename); + void write_mpi_io_file(const char* filename, const char* header_text, + const char* data_as_chars); public: - VTK_Writer_MPI_IO(MPI_Comm mpi_io_comm, const std::array & dimensions_full_array, - const std::array & dimensions_subarray, - const std::array & start_coordinates, - const char *format); + VTK_Writer_MPI_IO(MPI_Comm mpi_io_comm, const std::array& dimensions_full_array, + const std::array& dimensions_subarray, + const std::array& start_coordinates, + const char* format); ~VTK_Writer_MPI_IO(); - void write(int iter, const double *data); + void write(int iter, const double* data); }; diff --git a/examples/random_numbers.cpp b/examples/random_numbers.cpp index 805f201d..7b8bb8c8 100644 --- a/examples/random_numbers.cpp +++ b/examples/random_numbers.cpp @@ -1,3 +1,36 @@ +/********************************************************************************************** + © 2020. Triad National Security, LLC. All rights reserved. + This program was produced under U.S. Government contract 89233218CNA000001 for Los Alamos + National Laboratory (LANL), which is operated by Triad National Security, LLC for the U.S. + Department of Energy/National Nuclear Security Administration. All rights in the program are + reserved by Triad National Security, LLC, and the U.S. Department of Energy/National Nuclear + Security Administration. The Government is granted for itself and others acting on its behalf a + nonexclusive, paid-up, irrevocable worldwide license in this material to reproduce, prepare + derivative works, distribute copies to the public, perform publicly and display publicly, and + to permit others to do so. + This program is open source under the BSD-3 License. + Redistribution and use in source and binary forms, with or without modification, are permitted + provided that the following conditions are met: + 1. Redistributions of source code must retain the above copyright notice, this list of + conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright notice, this list of + conditions and the following disclaimer in the documentation and/or other materials + provided with the distribution. + 3. Neither the name of the copyright holder nor the names of its contributors may be used + to endorse or promote products derived from this software without specific prior + written permission. + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + **********************************************************************************************/ #include #include "matar.h" #include // for Kokkos random number generator @@ -6,52 +39,49 @@ // Kokkos provides two random number generator pools one for 64bit states and one for 1024 bit states. // Choose one. -//using gen_t = Kokkos::Random_XorShift64_Pool; +// using gen_t = Kokkos::Random_XorShift64_Pool; using gen_t = Kokkos::Random_XorShift1024_Pool; -int main () +int main() { - Kokkos::initialize(); - { // kokkos scope - - // Seed random number generator - gen_t rand_pool(SEED); - - // DCArrayKokkos type to store the random numbers generated on the device - // and print out on the host - const int N = 100; - DCArrayKokkos arr(N); - - // Generate random numbers - FOR_ALL(i, 0, N, { - - // Get a random number state from the pool for the active thread - gen_t::generator_type rand_gen = rand_pool.get_state(); - - // rand_gen.rand() generates integers from (0,MAX_RAND] - // rand_gen.rand(END) generates integers from (0,END] - // rand_gen.rand(START, END) generates integers from (START,END] - // Note, frand() or drand() can be used in place of rand() to generate floats and - // doubles, respectively. Please check out Kokkos_Random.hpp for all the other type of - // scalars that are supported. - - // generate random numbers in the range (0,10] - arr(i) = rand_gen.rand(10); - - // Give the state back, which will allow another thread to acquire it - rand_pool.free_state(rand_gen); - }); // end FOR_ALL - - // update host - arr.update_host(); - - for (int i = 0; i < N; i++) { - printf(" %d", arr.host(i)); - } - printf("\n"); - - } // end kokkos scope - Kokkos::finalize(); - - return 0; + Kokkos::initialize(); + { // kokkos scope + // Seed random number generator + gen_t rand_pool(SEED); + + // DCArrayKokkos type to store the random numbers generated on the device + // and print out on the host + const int N = 100; + DCArrayKokkos arr(N); + + // Generate random numbers + FOR_ALL(i, 0, N, { + // Get a random number state from the pool for the active thread + gen_t::generator_type rand_gen = rand_pool.get_state(); + + // rand_gen.rand() generates integers from (0,MAX_RAND] + // rand_gen.rand(END) generates integers from (0,END] + // rand_gen.rand(START, END) generates integers from (START,END] + // Note, frand() or drand() can be used in place of rand() to generate floats and + // doubles, respectively. Please check out Kokkos_Random.hpp for all the other type of + // scalars that are supported. + + // generate random numbers in the range (0,10] + arr(i) = rand_gen.rand(10); + + // Give the state back, which will allow another thread to acquire it + rand_pool.free_state(rand_gen); + }); // end FOR_ALL + + // update host + arr.update_host(); + + for (int i = 0; i < N; i++) { + printf(" %d", arr.host(i)); + } + printf("\n"); + } // end kokkos scope + Kokkos::finalize(); + + return 0; } diff --git a/examples/sparsetests/matVec.cpp b/examples/sparsetests/matVec.cpp index b068ed30..e5e7f6be 100644 --- a/examples/sparsetests/matVec.cpp +++ b/examples/sparsetests/matVec.cpp @@ -1,152 +1,186 @@ +/********************************************************************************************** + © 2020. Triad National Security, LLC. All rights reserved. + This program was produced under U.S. Government contract 89233218CNA000001 for Los Alamos + National Laboratory (LANL), which is operated by Triad National Security, LLC for the U.S. + Department of Energy/National Nuclear Security Administration. All rights in the program are + reserved by Triad National Security, LLC, and the U.S. Department of Energy/National Nuclear + Security Administration. The Government is granted for itself and others acting on its behalf a + nonexclusive, paid-up, irrevocable worldwide license in this material to reproduce, prepare + derivative works, distribute copies to the public, perform publicly and display publicly, and + to permit others to do so. + This program is open source under the BSD-3 License. + Redistribution and use in source and binary forms, with or without modification, are permitted + provided that the following conditions are met: + 1. Redistributions of source code must retain the above copyright notice, this list of + conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright notice, this list of + conditions and the following disclaimer in the documentation and/or other materials + provided with the distribution. + 3. Neither the name of the copyright holder nor the names of its contributors may be used + to endorse or promote products derived from this software without specific prior + written permission. + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + **********************************************************************************************/ #include #include #include #include -#include +#include #include #define EXPORT true using namespace mtr; // matar namespace -void matVec(CArrayKokkos &A, CArrayKokkos &v, CArrayKokkos &b){ - size_t n = A.dims(0); - size_t m = A.dims(1); - FOR_ALL(i, 0, n, - { - for(int j = 0; j < m ; j++){ - b(i) += A(i,j) * v(j); - } - } - ); +void matVec(CArrayKokkos& A, CArrayKokkos& v, CArrayKokkos& b) +{ + size_t n = A.dims(0); + size_t m = A.dims(1); + FOR_ALL(i, 0, n, + { + for (int j = 0; j < m ; j++) { + b(i) += A(i, j) * v(j); + } + }); Kokkos::fence(); } -void matVecSparse(CSRArrayKokkos &A, CArrayKokkos &v, CArrayKokkos &b){ - size_t m = A.dim2(); - size_t n = A.dim1(); - FOR_ALL(i, 0, n, { - size_t col; - for(auto j = A.begin_index(i); j < A.end_index(i); j++){ - col = A.get_col_flat(j); - b(i) += A(i,col) * v(col); - } - } - ); - Kokkos::fence(); +void matVecSparse(CSRArrayKokkos& A, CArrayKokkos& v, CArrayKokkos& b) +{ + size_t m = A.dim2(); + size_t n = A.dim1(); + FOR_ALL(i, 0, n, { + size_t col; + for (auto j = A.begin_index(i); j < A.end_index(i); j++) { + col = A.get_col_flat(j); + b(i) += A(i, col) * v(col); + } + }); + Kokkos::fence(); } - -int main(int argc, char** argv){ +int main(int argc, char** argv) +{ Kokkos::initialize(); { - int nrows = 55; - int ncols = 55; - size_t n; - if(argc != 2){ + int nrows = 55; + int ncols = 55; + size_t n; + if (argc != 2) { printf("Usage is .powerTest using default of 5000\n"); n = 5000; - } else{ - n = (size_t) atoi(argv[1]); - } - nrows = n; - ncols = n; - CArrayKokkos A(nrows, ncols); - - FOR_ALL(i, 0, nrows, + } + else{ + n = (size_t) atoi(argv[1]); + } + nrows = n; + ncols = n; + CArrayKokkos A(nrows, ncols); + + FOR_ALL(i, 0, nrows, j, 0, ncols, { - A(i,j) = 0.0; - }); - CArrayKokkos data(3*nrows); - CArrayKokkos starts(nrows+1); - CArrayKokkos cols(3*nrows); - CArrayKokkos v1(ncols); - CArrayKokkos v2(ncols); - CArrayKokkos b1(nrows); - CArrayKokkos b2(nrows); - - int i; - i = 0; - FOR_ALL(i, 0, ncols,{ - v1(i) = 1; - v2(i) = 1; - b1(i) = 0; - b2(i) = 0; - }); - FOR_ALL(i, 0, nrows,{ - - if(i == nrows -2){ - A(i,i-1) = i ; - A(i,i) = i; - A(i,i+1) = i; - data(3*i) = i; - data(3*i+1) = i; - data(3*i+2) = i; - cols(3*i) = i-1; - cols(3*i+1) = i; - cols(3*i+2) = i+1; - b1(i) = 0; - b2(i) = 0; - starts(i) = 3*i; - } - else if(i == nrows -1){ - A(i,i-2) = i ; - A(i,i-1) = i; - A(i,i) = i; - data(3*i) = i; - data(3*i+1) = i; - data(3*i+2) = i; - cols(3*i) = i-2; - cols(3*i+1) = i-1; - cols(3*i+2) = i; - b1(i) = 0; - b2(i) = 0; - starts(i) = 3*i; - } - else { - A(i,i) = i; - A(i,i+1) = i; - A(i,i+2) = i; - data(3*i) = i ; - data(3*i+1) = i; - data(3*i+2) = i; - cols(3*i) = i; - cols(3*i+1) = i+1; - cols(3*i+2) = i+2; - b1(i) = 0; - b2(i) = 0; - starts(i) = 3*i; - } - }); - RUN({ - starts(0) = 0; - starts(nrows) = 3*nrows; - }); - CSRArrayKokkos B (data, starts, cols, nrows, ncols); - auto start = std::chrono::high_resolution_clock::now(); + A(i, j) = 0.0; + }); + CArrayKokkos data(3 * nrows); + CArrayKokkos starts(nrows + 1); + CArrayKokkos cols(3 * nrows); + CArrayKokkos v1(ncols); + CArrayKokkos v2(ncols); + CArrayKokkos b1(nrows); + CArrayKokkos b2(nrows); - int j; - matVec(A,v1,b1); - Kokkos::fence(); - auto lap1 = std::chrono::high_resolution_clock::now(); - auto lap2 = std::chrono::high_resolution_clock::now(); - matVecSparse(B,v2,b2); - Kokkos::fence(); - auto lap3 = std::chrono::high_resolution_clock::now(); - auto time1 = std::chrono::duration_cast(lap1 - start); - auto time2 = std::chrono::duration_cast(lap3- lap2); + int i; + i = 0; + FOR_ALL(i, 0, ncols, { + v1(i) = 1; + v2(i) = 1; + b1(i) = 0; + b2(i) = 0; + }); + FOR_ALL(i, 0, nrows, { + if (i == nrows - 2) { + A(i, i - 1) = i; + A(i, i) = i; + A(i, i + 1) = i; + data(3 * i) = i; + data(3 * i + 1) = i; + data(3 * i + 2) = i; + cols(3 * i) = i - 1; + cols(3 * i + 1) = i; + cols(3 * i + 2) = i + 1; + b1(i) = 0; + b2(i) = 0; + starts(i) = 3 * i; + } + else if (i == nrows - 1) { + A(i, i - 2) = i; + A(i, i - 1) = i; + A(i, i) = i; + data(3 * i) = i; + data(3 * i + 1) = i; + data(3 * i + 2) = i; + cols(3 * i) = i - 2; + cols(3 * i + 1) = i - 1; + cols(3 * i + 2) = i; + b1(i) = 0; + b2(i) = 0; + starts(i) = 3 * i; + } + else { + A(i, i) = i; + A(i, i + 1) = i; + A(i, i + 2) = i; + data(3 * i) = i; + data(3 * i + 1) = i; + data(3 * i + 2) = i; + cols(3 * i) = i; + cols(3 * i + 1) = i + 1; + cols(3 * i + 2) = i + 2; + b1(i) = 0; + b2(i) = 0; + starts(i) = 3 * i; + } + }); + RUN({ + starts(0) = 0; + starts(nrows) = 3 * nrows; + }); + CSRArrayKokkos B(data, starts, cols, nrows, ncols); + auto start = std::chrono::high_resolution_clock::now(); + + int j; + matVec(A, v1, b1); + Kokkos::fence(); + auto lap1 = std::chrono::high_resolution_clock::now(); + auto lap2 = std::chrono::high_resolution_clock::now(); + matVecSparse(B, v2, b2); + Kokkos::fence(); + auto lap3 = std::chrono::high_resolution_clock::now(); + auto time1 = std::chrono::duration_cast(lap1 - start); + auto time2 = std::chrono::duration_cast(lap3 - lap2); - if(!EXPORT){ + if (!EXPORT) { RUN({ printf("Size: %ld, Dense: %.2e, Sparse: %.2e, %f, %f \n", n, time1.count() * 1e-9, time2.count() * 1e-9, b1(57980), b2(57980) ); }); - } else { - RUN({ - for(int i = 0; i < n; i++){ - if(abs(b1(i) - b2(i) > 1e-7)){ - printf("b1(%d) - b2(%d) = %.2e\n", i, i, b1(i)-b2(i)); - } + } + else { + RUN({ + for (int i = 0; i < n; i++) { + if (abs(b1(i) - b2(i) > 1e-7)) { + printf("b1(%d) - b2(%d) = %.2e\n", i, i, b1(i) - b2(i)); } - printf("%ld, %.2e, %.2e, %f, %f, %f \n", n, time1.count() * 1e-9, time2.count() * 1e-9, (1e-9*time1.count())/(1e-9*time2.count()), b1(25), b2(25) ); - }); - } - }Kokkos::finalize(); + } + printf("%ld, %.2e, %.2e, %f, %f, %f \n", n, time1.count() * 1e-9, time2.count() * 1e-9, (1e-9 * time1.count()) / (1e-9 * time2.count()), b1(25), b2(25) ); + }); + } + } Kokkos::finalize(); } diff --git a/examples/sparsetests/powerIter.cpp b/examples/sparsetests/powerIter.cpp index a9b469a8..270be239 100644 --- a/examples/sparsetests/powerIter.cpp +++ b/examples/sparsetests/powerIter.cpp @@ -1,246 +1,299 @@ +/********************************************************************************************** + © 2020. Triad National Security, LLC. All rights reserved. + This program was produced under U.S. Government contract 89233218CNA000001 for Los Alamos + National Laboratory (LANL), which is operated by Triad National Security, LLC for the U.S. + Department of Energy/National Nuclear Security Administration. All rights in the program are + reserved by Triad National Security, LLC, and the U.S. Department of Energy/National Nuclear + Security Administration. The Government is granted for itself and others acting on its behalf a + nonexclusive, paid-up, irrevocable worldwide license in this material to reproduce, prepare + derivative works, distribute copies to the public, perform publicly and display publicly, and + to permit others to do so. + This program is open source under the BSD-3 License. + Redistribution and use in source and binary forms, with or without modification, are permitted + provided that the following conditions are met: + 1. Redistributions of source code must retain the above copyright notice, this list of + conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright notice, this list of + conditions and the following disclaimer in the documentation and/or other materials + provided with the distribution. + 3. Neither the name of the copyright holder nor the names of its contributors may be used + to endorse or promote products derived from this software without specific prior + written permission. + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + **********************************************************************************************/ #include #include #include #include -#include +#include #include #define EXPORT true using namespace mtr; // matar namespace -void matVec(CArrayKokkos &A, CArrayKokkos &v, CArrayKokkos &b){ +void matVec(CArrayKokkos& A, CArrayKokkos& v, CArrayKokkos& b) +{ FOR_ALL(i, 0, A.dims(0), - { - size_t m = A.dims(1); - for(int j = 0; j < m ; j++){ - b(i) += A(i,j) * v(j); - } - }); + { + size_t m = A.dims(1); + for (int j = 0; j < m ; j++) { + b(i) += A(i, j) * v(j); + } + }); } -void matVecSp(CSRArrayKokkos &A, CArrayKokkos &v, CArrayKokkos &b){ - size_t m = A.dim2(); - size_t n = A.dim1(); - FOR_ALL(i, 0, n, { - size_t col; - for(auto j = A.begin_index(i); j < A.end_index(i); j++){ - col = A.get_col_flat(j); - b(i) += A(i,col) * v(col); - } - } - ); +void matVecSp(CSRArrayKokkos& A, CArrayKokkos& v, CArrayKokkos& b) +{ + size_t m = A.dim2(); + size_t n = A.dim1(); + FOR_ALL(i, 0, n, { + size_t col; + for (auto j = A.begin_index(i); j < A.end_index(i); j++) { + col = A.get_col_flat(j); + b(i) += A(i, col) * v(col); + } + }); } - -void renorm(CArrayKokkos &b){ - double total= 0 ; - double loc_total = 0; - int n = b.dims(0); - int i = 0; - REDUCE_SUM(i, 0, n, - loc_total, { loc_total += b(i) * b(i);} - , total ); - total = 1/sqrt(total); - FOR_ALL(i, 0, n, - {b(i) *= total;} - ); - //printf("Norm is %f\n", total); +void renorm(CArrayKokkos& b) +{ + double total = 0; + double loc_total = 0; + int n = b.dims(0); + int i = 0; + REDUCE_SUM(i, 0, n, + loc_total, { loc_total += b(i) * b(i); } + , total); + total = 1 / sqrt(total); + FOR_ALL(i, 0, n, + { + b(i) *= total; + }); + // printf("Norm is %f\n", total); } -void copy(CArrayKokkos &a, CArrayKokkos &b){ - int n = b.dims(0); - FOR_ALL(i, 0, n, - {b(i) = a(i); - a(i) = 0; - } - ); +void copy(CArrayKokkos& a, CArrayKokkos& b) +{ + int n = b.dims(0); + FOR_ALL(i, 0, n, + { + b(i) = a(i); + a(i) = 0; + }); } -double innerProd(CArrayKokkos &a, CArrayKokkos &b){ - double total = 0; - double loc_total = 0 ; - int n = b.dims(0); - REDUCE_SUM(i, 0, n, - loc_total, {loc_total += a(i) * b(i);} - , total); - return total; +double innerProd(CArrayKokkos& a, CArrayKokkos& b) +{ + double total = 0; + double loc_total = 0; + int n = b.dims(0); + REDUCE_SUM(i, 0, n, + loc_total, { + loc_total += a(i) * b(i); + }, total); + return total; } -double l1Change(CArrayKokkos &a, CArrayKokkos &b){ - double total = 0; - double loc_total = 0 ; - int n = b.dims(0); - REDUCE_SUM(i, 0, n, - loc_total, {loc_total += abs(a(i) - b(i)) ; } - , total); - return total; +double l1Change(CArrayKokkos& a, CArrayKokkos& b) +{ + double total = 0; + double loc_total = 0; + int n = b.dims(0); + REDUCE_SUM(i, 0, n, + loc_total, { + loc_total += abs(a(i) - b(i)); + }, total); + return total; } -double powerIter(CArrayKokkos &A, CArrayKokkos &v, CArrayKokkos &b, double tol, int max_iter, int& did_converge){ - double last_totl = 4*tol; - double my_tol = 2*tol; - int my_iter = 0; - - while(my_iter < max_iter && my_tol > tol){ - matVec(A, v, b); - renorm(b); - if(my_iter % 100 == 0){ - my_tol = l1Change(b, v); - } - copy(b,v); - my_iter++; - } - matVec(A,v,b); - if(!EXPORT){ - printf("Converged in %d iterations with tol of %f\n", my_iter, my_tol); - } - if(my_iter >= max_iter && my_tol > tol){ - did_converge = 0; - }else{ - did_converge = 1; +double powerIter(CArrayKokkos& A, CArrayKokkos& v, CArrayKokkos& b, double tol, int max_iter, int& did_converge) +{ + double last_totl = 4 * tol; + double my_tol = 2 * tol; + int my_iter = 0; + + while (my_iter < max_iter && my_tol > tol) { + matVec(A, v, b); + renorm(b); + if (my_iter % 100 == 0) { + my_tol = l1Change(b, v); } - return innerProd(v, b); -} - -void renormSp(CArrayKokkos &b){ - double total= 0 ; - double loc_total = 0; - int n = b.dims(0); - int i = 0; - REDUCE_SUM(i, 0, n, - loc_total, { loc_total += b(i) * b(i);} - , total ); - total = 1/sqrt(total); - FOR_ALL(i, 0, n, - {b(i) *= total;} - ); - //printf("Norm is %f\n", total); + copy(b, v); + my_iter++; + } + matVec(A, v, b); + if (!EXPORT) { + printf("Converged in %d iterations with tol of %f\n", my_iter, my_tol); + } + if (my_iter >= max_iter && my_tol > tol) { + did_converge = 0; + } + else{ + did_converge = 1; + } + return innerProd(v, b); } -void copySp(CArrayKokkos &a, CArrayKokkos &b){ - int n = b.dims(0); - FOR_ALL(i, 0, n, - {b(i) = a(i); - a(i) = 0; - } - ); +void renormSp(CArrayKokkos& b) +{ + double total = 0; + double loc_total = 0; + int n = b.dims(0); + int i = 0; + REDUCE_SUM(i, 0, n, + loc_total, { loc_total += b(i) * b(i); } + , total); + total = 1 / sqrt(total); + FOR_ALL(i, 0, n, + { + b(i) *= total; + }); + // printf("Norm is %f\n", total); } -double innerProdSp(CArrayKokkos &a, CArrayKokkos &b){ - double total = 0; - double loc_total = 0 ; - int n = b.dims(0); - REDUCE_SUM(i, 0, n, - loc_total, {loc_total += a(i) * b(i);} - , total); - return total; +void copySp(CArrayKokkos& a, CArrayKokkos& b) +{ + int n = b.dims(0); + FOR_ALL(i, 0, n, + { + b(i) = a(i); + a(i) = 0; + }); } -double l1ChangeSp(CArrayKokkos &a, CArrayKokkos &b){ - double total = 0; - double loc_total = 0 ; - int n = b.dims(0); - REDUCE_SUM(i, 0, n, - loc_total, {loc_total += abs(a(i) - b(i)) ; } - , total); - return total; +double innerProdSp(CArrayKokkos& a, CArrayKokkos& b) +{ + double total = 0; + double loc_total = 0; + int n = b.dims(0); + REDUCE_SUM(i, 0, n, + loc_total, { + loc_total += a(i) * b(i); + }, total); + return total; } -double powerIterSp(CSRArrayKokkos &A, CArrayKokkos &v, CArrayKokkos &b, double tol, int max_iter, int &did_converge){ - double last_totl = 4*tol; - double my_tol = 2*tol; - int my_iter = 0; - - while(my_iter < max_iter && my_tol > tol){ - matVecSp(A, v, b); - renormSp(b); - if(my_iter % 100 == 0){ - my_tol = l1ChangeSp(b, v); - } - copySp(b,v); - my_iter++; - } - matVecSp(A,v,b); - if(!EXPORT){ - printf("Converged in %d iterations with tol of %f\n", my_iter, my_tol); - } - if(my_iter >= max_iter && my_tol > tol){ - did_converge = 0; - }else{ - did_converge = 1; +double l1ChangeSp(CArrayKokkos& a, CArrayKokkos& b) +{ + double total = 0; + double loc_total = 0; + int n = b.dims(0); + REDUCE_SUM(i, 0, n, + loc_total, { + loc_total += abs(a(i) - b(i)); + }, total); + return total; +} + +double powerIterSp(CSRArrayKokkos& A, CArrayKokkos& v, CArrayKokkos& b, double tol, int max_iter, int& did_converge) +{ + double last_totl = 4 * tol; + double my_tol = 2 * tol; + int my_iter = 0; + + while (my_iter < max_iter && my_tol > tol) { + matVecSp(A, v, b); + renormSp(b); + if (my_iter % 100 == 0) { + my_tol = l1ChangeSp(b, v); } - return innerProdSp(v, b); -} + copySp(b, v); + my_iter++; + } + matVecSp(A, v, b); + if (!EXPORT) { + printf("Converged in %d iterations with tol of %f\n", my_iter, my_tol); + } + if (my_iter >= max_iter && my_tol > tol) { + did_converge = 0; + } + else{ + did_converge = 1; + } + return innerProdSp(v, b); +} -int main(int argc, char** argv){ - +int main(int argc, char** argv) +{ Kokkos::initialize(); { size_t n; - if(argc != 2){ + if (argc != 2) { printf("Usage is .powerTest using default of 5000\n"); n = 5000; - } else{ - n = (size_t) atoi(argv[1]); } - CArrayKokkos A(n,n); + else{ + n = (size_t) atoi(argv[1]); + } + CArrayKokkos A(n, n); CArrayKokkos v(n); CArrayKokkos b1(n); CArrayKokkos b2(n); CArrayKokkos v1(n); - CArrayKokkos data(3*n-2); - CArrayKokkos starts(n+1); - CArrayKokkos cols(3*n-2); - double eig1 = 0; - double eig2 = 0; + CArrayKokkos data(3 * n - 2); + CArrayKokkos starts(n + 1); + CArrayKokkos cols(3 * n - 2); + double eig1 = 0; + double eig2 = 0; double my_tol = n * (1e-07); - int t1 = 1; - int t2 = 1; - FOR_ALL(i, 0, n, - { - v(i) = 1; - v1(i) = 1; - b1(i) = 0; - b2(i) = 0; - if(i == 1){ - starts(i) = 2; - } else if(i==0){ - starts(i) = 0; - } else{ - starts(i) = 2 + 3*(i-1); - } - }); - RUN({ starts(n) = 3*n-2; }); + int t1 = 1; + int t2 = 1; FOR_ALL(i, 0, n, - j, 0, n,{ - if(abs(i - j) <= 1){ - A(i,j) = i + 2* j; - if(i == 0){ - data(i+j) = i + 2*j; - cols(i+j) = j; - } else { - data(3*(i-1) + 3 + j - i ) = i + 2*j; - cols(3*(i-1) + 3 + j - i) = j; - - } - } - }); - CSRArrayKokkos Asp(data, starts, cols, n,n); + { + v(i) = 1; + v1(i) = 1; + b1(i) = 0; + b2(i) = 0; + if (i == 1) { + starts(i) = 2; + } + else if (i == 0) { + starts(i) = 0; + } + else{ + starts(i) = 2 + 3 * (i - 1); + } + }); + RUN({ starts(n) = 3 * n - 2; }); + FOR_ALL(i, 0, n, + j, 0, n, { + if (abs(i - j) <= 1) { + A(i, j) = i + 2 * j; + if (i == 0) { + data(i + j) = i + 2 * j; + cols(i + j) = j; + } + else { + data(3 * (i - 1) + 3 + j - i) = i + 2 * j; + cols(3 * (i - 1) + 3 + j - i) = j; + } + } + }); + CSRArrayKokkos Asp(data, starts, cols, n, n); auto start = std::chrono::high_resolution_clock::now(); - eig1 = powerIter(A, v, b1, my_tol, 3000, t1); + eig1 = powerIter(A, v, b1, my_tol, 3000, t1); auto lap = std::chrono::high_resolution_clock::now(); eig2 = powerIterSp(Asp, v1, b2, my_tol, 3000, t2); auto lap2 = std::chrono::high_resolution_clock::now(); - if(!EXPORT){ + if (!EXPORT) { printf("Max eig is %f %f\n", eig1, eig2); printf("Dense took %.2e \n Sparse took %.2e\n", std::chrono::duration_cast(lap - start) * 1e-9, std::chrono::duration_cast(lap2 - lap) * 1e-9); - } else { - printf("%ld, %.2e, %.2e, %d, %d, %f, %f\n", n, std::chrono::duration_cast(lap - start) * 1e-9, std::chrono::duration_cast(lap2 - lap) * 1e-9, t1, t2, eig1, eig2); + } + else { + printf("%ld, %.2e, %.2e, %d, %d, %f, %f\n", n, std::chrono::duration_cast(lap - start) * 1e-9, + std::chrono::duration_cast(lap2 - lap) * 1e-9, t1, t2, eig1, eig2); } } Kokkos::finalize(); - return 0 ; + return 0; } - - - diff --git a/examples/sparsetests/sparseMatVec.cpp b/examples/sparsetests/sparseMatVec.cpp index 6dd1173d..124e3851 100644 --- a/examples/sparsetests/sparseMatVec.cpp +++ b/examples/sparsetests/sparseMatVec.cpp @@ -1,117 +1,151 @@ +/********************************************************************************************** + © 2020. Triad National Security, LLC. All rights reserved. + This program was produced under U.S. Government contract 89233218CNA000001 for Los Alamos + National Laboratory (LANL), which is operated by Triad National Security, LLC for the U.S. + Department of Energy/National Nuclear Security Administration. All rights in the program are + reserved by Triad National Security, LLC, and the U.S. Department of Energy/National Nuclear + Security Administration. The Government is granted for itself and others acting on its behalf a + nonexclusive, paid-up, irrevocable worldwide license in this material to reproduce, prepare + derivative works, distribute copies to the public, perform publicly and display publicly, and + to permit others to do so. + This program is open source under the BSD-3 License. + Redistribution and use in source and binary forms, with or without modification, are permitted + provided that the following conditions are met: + 1. Redistributions of source code must retain the above copyright notice, this list of + conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright notice, this list of + conditions and the following disclaimer in the documentation and/or other materials + provided with the distribution. + 3. Neither the name of the copyright holder nor the names of its contributors may be used + to endorse or promote products derived from this software without specific prior + written permission. + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + **********************************************************************************************/ #include #include #include #include -#include +#include #include -#define EXPORT true +#define EXPORT true using namespace mtr; // matar namespace -void matVecSparse(CSRArrayKokkos &A, CArrayKokkos &v, CArrayKokkos &b){ - size_t m = A.dim2(); - size_t n = A.dim1(); - FOR_ALL(i, 0, n, { - size_t col; - for(auto j = A.begin_index(i); j < A.end_index(i); j++){ - col = A.get_col_flat(j); - b(i) += A(i,col) * v(col); - } - } - ); - Kokkos::fence(); +void matVecSparse(CSRArrayKokkos& A, CArrayKokkos& v, CArrayKokkos& b) +{ + size_t m = A.dim2(); + size_t n = A.dim1(); + FOR_ALL(i, 0, n, { + size_t col; + for (auto j = A.begin_index(i); j < A.end_index(i); j++) { + col = A.get_col_flat(j); + b(i) += A(i, col) * v(col); + } + }); + Kokkos::fence(); } - -int main(int argc, char** argv){ +int main(int argc, char** argv) +{ Kokkos::initialize(); { - int nrows = 55; - int ncols = 55; - size_t n; - if(argc != 2){ + int nrows = 55; + int ncols = 55; + size_t n; + if (argc != 2) { printf("Usage is .powerTest using default of 5000\n"); n = 5000; - } else{ - n = (size_t) atoi(argv[1]); - } - nrows = n; - ncols = n; - CArrayKokkos data(3*nrows); - CArrayKokkos starts(nrows+1); - CArrayKokkos cols(3*nrows); - CArrayKokkos v1(ncols); - CArrayKokkos v2(ncols); - CArrayKokkos b1(nrows); - CArrayKokkos b2(nrows); - - int i; - i = 0; - FOR_ALL(i, 0, ncols,{ - v1(i) = 1; - v2(i) = 1; - b1(i) = 0; - b2(i) = 0; - }); - FOR_ALL(i, 0, nrows,{ - - if(i == nrows -2){ - data(3*i) = i; - data(3*i+1) = i; - data(3*i+2) = i; - cols(3*i) = i-1; - cols(3*i+1) = i; - cols(3*i+2) = i+1; - b1(i) = 0; - b2(i) = 0; - starts(i) = 3*i; - } - else if(i == nrows -1){ - data(3*i) = i; - data(3*i+1) = i; - data(3*i+2) = i; - cols(3*i) = i-2; - cols(3*i+1) = i-1; - cols(3*i+2) = i; - b1(i) = 0; - b2(i) = 0; - starts(i) = 3*i; - } - else { - data(3*i) = i ; - data(3*i+1) = i; - data(3*i+2) = i; - cols(3*i) = i; - cols(3*i+1) = i+1; - cols(3*i+2) = i+2; - b1(i) = 0; - b2(i) = 0; - starts(i) = 3*i; - } - }); - RUN({ - starts(0) = 0; - starts(nrows) = 3*nrows; - }); - CSRArrayKokkos B (data, starts, cols, nrows, ncols); - auto start = std::chrono::high_resolution_clock::now(); + } + else{ + n = (size_t) atoi(argv[1]); + } + nrows = n; + ncols = n; + CArrayKokkos data(3 * nrows); + CArrayKokkos starts(nrows + 1); + CArrayKokkos cols(3 * nrows); + CArrayKokkos v1(ncols); + CArrayKokkos v2(ncols); + CArrayKokkos b1(nrows); + CArrayKokkos b2(nrows); - matVecSparse(B,v2,b2); - Kokkos::fence(); - auto lap1 = std::chrono::high_resolution_clock::now(); - auto time1 = std::chrono::duration_cast(lap1 - start); + int i; + i = 0; + FOR_ALL(i, 0, ncols, { + v1(i) = 1; + v2(i) = 1; + b1(i) = 0; + b2(i) = 0; + }); + FOR_ALL(i, 0, nrows, { + if (i == nrows - 2) { + data(3 * i) = i; + data(3 * i + 1) = i; + data(3 * i + 2) = i; + cols(3 * i) = i - 1; + cols(3 * i + 1) = i; + cols(3 * i + 2) = i + 1; + b1(i) = 0; + b2(i) = 0; + starts(i) = 3 * i; + } + else if (i == nrows - 1) { + data(3 * i) = i; + data(3 * i + 1) = i; + data(3 * i + 2) = i; + cols(3 * i) = i - 2; + cols(3 * i + 1) = i - 1; + cols(3 * i + 2) = i; + b1(i) = 0; + b2(i) = 0; + starts(i) = 3 * i; + } + else { + data(3 * i) = i; + data(3 * i + 1) = i; + data(3 * i + 2) = i; + cols(3 * i) = i; + cols(3 * i + 1) = i + 1; + cols(3 * i + 2) = i + 2; + b1(i) = 0; + b2(i) = 0; + starts(i) = 3 * i; + } + }); + RUN({ + starts(0) = 0; + starts(nrows) = 3 * nrows; + }); + CSRArrayKokkos B(data, starts, cols, nrows, ncols); + auto start = std::chrono::high_resolution_clock::now(); - if(!EXPORT){ - RUN({ printf("Size: %ld, Sparse: %.2e, %f, %f \n", n, time1.count() * 1e-9 , b1(57980), b2(57980) ); }); - } else { - RUN({ - for(int i = 0; i < n; i++){ - if(abs(b1(i) - b2(i) > 1e-7)){ - printf("b1(%d) - b2(%d) = %.2e\n", i, i, b1(i)-b2(i)); - } + matVecSparse(B, v2, b2); + Kokkos::fence(); + auto lap1 = std::chrono::high_resolution_clock::now(); + auto time1 = std::chrono::duration_cast(lap1 - start); + + if (!EXPORT) { + RUN({ printf("Size: %ld, Sparse: %.2e, %f, %f \n", n, time1.count() * 1e-9, b1(57980), b2(57980) ); }); + } + else { + RUN({ + for (int i = 0; i < n; i++) { + if (abs(b1(i) - b2(i) > 1e-7)) { + printf("b1(%d) - b2(%d) = %.2e\n", i, i, b1(i) - b2(i)); } - printf("%ld, %.2e, %.2e, %f, %f, %f \n", n, time1.count() * 1e-9, b1(25), b2(25) ); - }); - } - }Kokkos::finalize(); + } + printf("%ld, %.2e, %.2e, %f, %f, %f \n", n, time1.count() * 1e-9, b1(25), b2(25) ); + }); + } + } Kokkos::finalize(); } diff --git a/examples/sparsetests/sparsePowerIter.cpp b/examples/sparsetests/sparsePowerIter.cpp index 5926774e..5d066f2b 100644 --- a/examples/sparsetests/sparsePowerIter.cpp +++ b/examples/sparsetests/sparsePowerIter.cpp @@ -1,161 +1,203 @@ +/********************************************************************************************** + © 2020. Triad National Security, LLC. All rights reserved. + This program was produced under U.S. Government contract 89233218CNA000001 for Los Alamos + National Laboratory (LANL), which is operated by Triad National Security, LLC for the U.S. + Department of Energy/National Nuclear Security Administration. All rights in the program are + reserved by Triad National Security, LLC, and the U.S. Department of Energy/National Nuclear + Security Administration. The Government is granted for itself and others acting on its behalf a + nonexclusive, paid-up, irrevocable worldwide license in this material to reproduce, prepare + derivative works, distribute copies to the public, perform publicly and display publicly, and + to permit others to do so. + This program is open source under the BSD-3 License. + Redistribution and use in source and binary forms, with or without modification, are permitted + provided that the following conditions are met: + 1. Redistributions of source code must retain the above copyright notice, this list of + conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright notice, this list of + conditions and the following disclaimer in the documentation and/or other materials + provided with the distribution. + 3. Neither the name of the copyright holder nor the names of its contributors may be used + to endorse or promote products derived from this software without specific prior + written permission. + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + **********************************************************************************************/ #include #include #include #include -#include +#include #include #define EXPORT true using namespace mtr; // matar namespace - -void matVecSp(CSRArrayKokkos &A, CArrayKokkos &v, CArrayKokkos &b){ - size_t m = A.dim2(); - size_t n = A.dim1(); - FOR_ALL(i, 0, n, { - size_t col; - for(auto j = A.begin_index(i); j < A.end_index(i); j++){ - col = A.get_col_flat(j); - b(i) += A(i,col) * v(col); - } - } - ); -} +void matVecSp(CSRArrayKokkos& A, CArrayKokkos& v, CArrayKokkos& b) +{ + size_t m = A.dim2(); + size_t n = A.dim1(); + FOR_ALL(i, 0, n, { + size_t col; + for (auto j = A.begin_index(i); j < A.end_index(i); j++) { + col = A.get_col_flat(j); + b(i) += A(i, col) * v(col); + } + }); +} -void renormSp(CArrayKokkos &b){ - double total= 0 ; - double loc_total = 0; - int n = b.dims(0); - int i = 0; - REDUCE_SUM(i, 0, n, - loc_total, { loc_total += b(i) * b(i);} - , total ); - total = 1/sqrt(total); - FOR_ALL(i, 0, n, - {b(i) *= total;} - ); - //printf("Norm is %f\n", total); +void renormSp(CArrayKokkos& b) +{ + double total = 0; + double loc_total = 0; + int n = b.dims(0); + int i = 0; + REDUCE_SUM(i, 0, n, + loc_total, { loc_total += b(i) * b(i); } + , total); + total = 1 / sqrt(total); + FOR_ALL(i, 0, n, + { + b(i) *= total; + }); + // printf("Norm is %f\n", total); } -void copySp(CArrayKokkos &a, CArrayKokkos &b){ - int n = b.dims(0); - FOR_ALL(i, 0, n, - {b(i) = a(i); - a(i) = 0; - } - ); +void copySp(CArrayKokkos& a, CArrayKokkos& b) +{ + int n = b.dims(0); + FOR_ALL(i, 0, n, + { + b(i) = a(i); + a(i) = 0; + }); } -double innerProdSp(CArrayKokkos &a, CArrayKokkos &b){ - double total = 0; - double loc_total = 0 ; - int n = b.dims(0); - REDUCE_SUM(i, 0, n, - loc_total, {loc_total += a(i) * b(i);} - , total); - return total; +double innerProdSp(CArrayKokkos& a, CArrayKokkos& b) +{ + double total = 0; + double loc_total = 0; + int n = b.dims(0); + REDUCE_SUM(i, 0, n, + loc_total, { + loc_total += a(i) * b(i); + }, total); + return total; } -double l1ChangeSp(CArrayKokkos &a, CArrayKokkos &b){ - double total = 0; - double loc_total = 0 ; - int n = b.dims(0); - REDUCE_SUM(i, 0, n, - loc_total, {loc_total += abs(a(i) - b(i)) ; } - , total); - return total; +double l1ChangeSp(CArrayKokkos& a, CArrayKokkos& b) +{ + double total = 0; + double loc_total = 0; + int n = b.dims(0); + REDUCE_SUM(i, 0, n, + loc_total, { + loc_total += abs(a(i) - b(i)); + }, total); + return total; } -double powerIterSp(CSRArrayKokkos &A, CArrayKokkos &v, CArrayKokkos &b, double tol, int max_iter, int &did_converge){ - double last_totl = 4*tol; - double my_tol = 2*tol; - int my_iter = 0; - - while(my_iter < max_iter && my_tol > tol){ - matVecSp(A, v, b); - renormSp(b); - if(my_iter % 100 == 0){ - my_tol = l1ChangeSp(b, v); - } - copySp(b,v); - my_iter++; - } - matVecSp(A,v,b); - if(!EXPORT){ - printf("Converged in %d iterations with tol of %f\n", my_iter, my_tol); - } - if(my_iter >= max_iter && my_tol > tol){ - did_converge = 0; - }else{ - did_converge = 1; +double powerIterSp(CSRArrayKokkos& A, CArrayKokkos& v, CArrayKokkos& b, double tol, int max_iter, int& did_converge) +{ + double last_totl = 4 * tol; + double my_tol = 2 * tol; + int my_iter = 0; + + while (my_iter < max_iter && my_tol > tol) { + matVecSp(A, v, b); + renormSp(b); + if (my_iter % 100 == 0) { + my_tol = l1ChangeSp(b, v); } - return innerProdSp(v, b); -} + copySp(b, v); + my_iter++; + } + matVecSp(A, v, b); + if (!EXPORT) { + printf("Converged in %d iterations with tol of %f\n", my_iter, my_tol); + } + if (my_iter >= max_iter && my_tol > tol) { + did_converge = 0; + } + else{ + did_converge = 1; + } + return innerProdSp(v, b); +} -int main(int argc, char** argv){ - +int main(int argc, char** argv) +{ Kokkos::initialize(); { size_t n; - if(argc != 2){ + if (argc != 2) { printf("Usage is .powerTest using default of 5000\n"); n = 5000; - } else{ - n = (size_t) atoi(argv[1]); + } + else{ + n = (size_t) atoi(argv[1]); } CArrayKokkos v(n); CArrayKokkos b1(n); CArrayKokkos b2(n); CArrayKokkos v1(n); - CArrayKokkos data(3*n-2); - CArrayKokkos starts(n+1); - CArrayKokkos cols(3*n-2); - double eig1 = 0; - double eig2 = 0; + CArrayKokkos data(3 * n - 2); + CArrayKokkos starts(n + 1); + CArrayKokkos cols(3 * n - 2); + double eig1 = 0; + double eig2 = 0; double my_tol = n * (1e-09); - int t1 = 1; - int t2 = 1; - FOR_ALL(i, 0, n, - { - v(i) = 1; - v1(i) = 1; - b1(i) = 0; - b2(i) = 0; - if(i == 1){ - starts(i) = 2; - } else if(i==0){ - starts(i) = 0; - } else{ - starts(i) = 2 + 3*(i-1); - } - }); - RUN({ starts(n) = 3*n-2; }); + int t1 = 1; + int t2 = 1; + FOR_ALL(i, 0, n, + { + v(i) = 1; + v1(i) = 1; + b1(i) = 0; + b2(i) = 0; + if (i == 1) { + starts(i) = 2; + } + else if (i == 0) { + starts(i) = 0; + } + else{ + starts(i) = 2 + 3 * (i - 1); + } + }); + RUN({ starts(n) = 3 * n - 2; }); FOR_ALL(i, 0, n, - j, 0, n,{ - if(abs(i - j) <= 1){ - if(i == 0){ - data(i+j) = i + 2*j; - cols(i+j) = j; - } else { - data(3*(i-1) + 3 + j - i ) = i + 2*j; - cols(3*(i-1) + 3 + j - i) = j; - - } - } - }); - CSRArrayKokkos Asp(data, starts, cols, n,n); + j, 0, n, { + if (abs(i - j) <= 1) { + if (i == 0) { + data(i + j) = i + 2 * j; + cols(i + j) = j; + } + else { + data(3 * (i - 1) + 3 + j - i) = i + 2 * j; + cols(3 * (i - 1) + 3 + j - i) = j; + } + } + }); + CSRArrayKokkos Asp(data, starts, cols, n, n); auto start = std::chrono::high_resolution_clock::now(); eig2 = powerIterSp(Asp, v1, b2, my_tol, 10000000, t2); auto lap = std::chrono::high_resolution_clock::now(); - if(!EXPORT){ + if (!EXPORT) { printf("Max eig is %f %f\n", eig1, eig2); printf("Sparse took %.2e\n", std::chrono::duration_cast(lap - start) * 1e-9); - } else { - printf("%ld, %.2e, %d, %f\n", n, std::chrono::duration_cast(lap - start) * 1e-9, t2, eig2); + } + else { + printf("%ld, %.2e, %d, %f\n", n, std::chrono::duration_cast(lap - start) * 1e-9, t2, eig2); } } Kokkos::finalize(); - return 0 ; + return 0; } - - - diff --git a/examples/test_dual_types.cpp b/examples/test_dual_types.cpp index 8a3404bb..9e998904 100644 --- a/examples/test_dual_types.cpp +++ b/examples/test_dual_types.cpp @@ -1,3 +1,36 @@ +/********************************************************************************************** + © 2020. Triad National Security, LLC. All rights reserved. + This program was produced under U.S. Government contract 89233218CNA000001 for Los Alamos + National Laboratory (LANL), which is operated by Triad National Security, LLC for the U.S. + Department of Energy/National Nuclear Security Administration. All rights in the program are + reserved by Triad National Security, LLC, and the U.S. Department of Energy/National Nuclear + Security Administration. The Government is granted for itself and others acting on its behalf a + nonexclusive, paid-up, irrevocable worldwide license in this material to reproduce, prepare + derivative works, distribute copies to the public, perform publicly and display publicly, and + to permit others to do so. + This program is open source under the BSD-3 License. + Redistribution and use in source and binary forms, with or without modification, are permitted + provided that the following conditions are met: + 1. Redistributions of source code must retain the above copyright notice, this list of + conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright notice, this list of + conditions and the following disclaimer in the documentation and/or other materials + provided with the distribution. + 3. Neither the name of the copyright holder nor the names of its contributors may be used + to endorse or promote products derived from this software without specific prior + written permission. + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + **********************************************************************************************/ #include #include @@ -9,22 +42,19 @@ using namespace mtr; // matar namespace void DViewCArrayKokkosTwoDimensionExample(); void DCArrayKokkosTwoDimensionExample(); -int main() { - +int main() +{ Kokkos::initialize(); - { - - // Run DViewCArrayKokkos 2D example - DViewCArrayKokkosTwoDimensionExample(); - - // Run DCArrayKokkos 2D example - DCArrayKokkosTwoDimensionExample(); + { + // Run DViewCArrayKokkos 2D example + DViewCArrayKokkosTwoDimensionExample(); + // Run DCArrayKokkos 2D example + DCArrayKokkosTwoDimensionExample(); } // end of kokkos scope Kokkos::finalize(); } - void DViewCArrayKokkosTwoDimensionExample() { printf("\n====================Running 2D DViewCArrayKokkos example====================\n"); @@ -33,35 +63,35 @@ void DViewCArrayKokkosTwoDimensionExample() int ny = 2; // CPU arr - int arr[nx*ny]; - - for (int i = 0; i < nx*ny; i++){ + int arr[nx * ny]; + + for (int i = 0; i < nx * ny; i++) { arr[i] = 1; } // Create A_2D - auto A_2D = DViewCArrayKokkos (&arr[0], nx, ny); - + auto A_2D = DViewCArrayKokkos(&arr[0], nx, ny); + // Print host copy of data printf("Printing host copy of data (should be all 1s):\n"); - for (int i = 0; i < nx; i++){ - for (int j = 0; j < ny; j++){ - printf("%d\n", A_2D.host(i,j)); + for (int i = 0; i < nx; i++) { + for (int j = 0; j < ny; j++) { + printf("%d\n", A_2D.host(i, j)); } } - + // Print device copy of data printf("Printing device copy of data (should be all 1s):\n"); FOR_ALL(i, 0, nx, j, 0, ny, { - printf("%d\n", A_2D(i,j)); + printf("%d\n", A_2D(i, j)); }); Kokkos::fence(); // Manupulate data on device and update host FOR_ALL(i, 0, nx, j, 0, ny, { - A_2D(i,j) = 2; + A_2D(i, j) = 2; }); A_2D.update_host(); Kokkos::fence(); @@ -69,9 +99,9 @@ void DViewCArrayKokkosTwoDimensionExample() // Print host copy of data printf("Printing host copy of data (should be all 2s):\n"); - for (int i = 0; i < nx; i++){ - for (int j = 0; j < ny; j++){ - printf("%d\n", A_2D.host(i,j)); + for (int i = 0; i < nx; i++) { + for (int j = 0; j < ny; j++) { + printf("%d\n", A_2D.host(i, j)); } } @@ -79,7 +109,7 @@ void DViewCArrayKokkosTwoDimensionExample() printf("Printing device copy of data (should be all 2s):\n"); FOR_ALL(i, 0, nx, j, 0, ny, { - printf("%d\n", A_2D(i,j)); + printf("%d\n", A_2D(i, j)); }); Kokkos::fence(); @@ -91,8 +121,6 @@ void DViewCArrayKokkosTwoDimensionExample() printf("Device data pointer: %p\n", A_2D.device_pointer()); } - - void DCArrayKokkosTwoDimensionExample() { printf("\n====================Running 2D DCArrayKokkos example====================\n"); @@ -101,12 +129,12 @@ void DCArrayKokkosTwoDimensionExample() int ny = 2; // Create A_2D - auto A_2D = DCArrayKokkos (nx, ny); + auto A_2D = DCArrayKokkos(nx, ny); // Set data to one on host and updata device - for (int i = 0; i < nx; i++){ - for (int j = 0; j < ny; j++){ - A_2D.host(i,j) = 1; + for (int i = 0; i < nx; i++) { + for (int j = 0; j < ny; j++) { + A_2D.host(i, j) = 1; } } A_2D.update_device(); @@ -114,24 +142,24 @@ void DCArrayKokkosTwoDimensionExample() // Print host copy of data printf("Printing host copy of data (should be all 1s):\n"); - for (int i = 0; i < nx; i++){ - for (int j = 0; j < ny; j++){ - printf("%d\n", A_2D.host(i,j)); + for (int i = 0; i < nx; i++) { + for (int j = 0; j < ny; j++) { + printf("%d\n", A_2D.host(i, j)); } } - + // Print device copy of data printf("Printing device copy of data (should be all 1s):\n"); FOR_ALL(i, 0, nx, j, 0, ny, { - printf("%d\n", A_2D(i,j)); + printf("%d\n", A_2D(i, j)); }); Kokkos::fence(); // Manupulate data on device and update host FOR_ALL(i, 0, nx, j, 0, ny, { - A_2D(i,j) = 2; + A_2D(i, j) = 2; }); A_2D.update_host(); Kokkos::fence(); @@ -139,9 +167,9 @@ void DCArrayKokkosTwoDimensionExample() // Print host copy of data printf("Printing host copy of data (should be all 2s):\n"); - for (int i = 0; i < nx; i++){ - for (int j = 0; j < ny; j++){ - printf("%d\n", A_2D.host(i,j)); + for (int i = 0; i < nx; i++) { + for (int j = 0; j < ny; j++) { + printf("%d\n", A_2D.host(i, j)); } } @@ -149,7 +177,7 @@ void DCArrayKokkosTwoDimensionExample() printf("Printing device copy of data (should be all 2s):\n"); FOR_ALL(i, 0, nx, j, 0, ny, { - printf("%d\n", A_2D(i,j)); + printf("%d\n", A_2D(i, j)); }); Kokkos::fence(); @@ -159,5 +187,4 @@ void DCArrayKokkosTwoDimensionExample() printf("Should be different addresses if using GPU backend.\n"); printf("Host data pointer: %p\n", A_2D.host_pointer()); printf("Device data pointer: %p\n", A_2D.device_pointer()); - } diff --git a/examples/test_for.cpp b/examples/test_for.cpp index 8dd9e41a..a4acab89 100644 --- a/examples/test_for.cpp +++ b/examples/test_for.cpp @@ -1,4 +1,36 @@ - +/********************************************************************************************** + © 2020. Triad National Security, LLC. All rights reserved. + This program was produced under U.S. Government contract 89233218CNA000001 for Los Alamos + National Laboratory (LANL), which is operated by Triad National Security, LLC for the U.S. + Department of Energy/National Nuclear Security Administration. All rights in the program are + reserved by Triad National Security, LLC, and the U.S. Department of Energy/National Nuclear + Security Administration. The Government is granted for itself and others acting on its behalf a + nonexclusive, paid-up, irrevocable worldwide license in this material to reproduce, prepare + derivative works, distribute copies to the public, perform publicly and display publicly, and + to permit others to do so. + This program is open source under the BSD-3 License. + Redistribution and use in source and binary forms, with or without modification, are permitted + provided that the following conditions are met: + 1. Redistributions of source code must retain the above copyright notice, this list of + conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright notice, this list of + conditions and the following disclaimer in the documentation and/or other materials + provided with the distribution. + 3. Neither the name of the copyright holder nor the names of its contributors may be used + to endorse or promote products derived from this software without specific prior + written permission. + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + **********************************************************************************************/ #include #include #include @@ -8,123 +40,111 @@ using namespace mtr; // matar namespace // main -int main(){ - +int main() +{ // A view example int A[10]; - ViewCArray arr(A,10); - FOR_ALL (i, 0, 10, { + ViewCArray arr(A, 10); + FOR_ALL(i, 0, 10, { arr(i) = 314; }); - // A 2D array example - CArray arr_2D(10,10); - FOR_ALL (i, 0, 10, - j, 0, 10,{ - arr_2D(i,j) = 314; + CArray arr_2D(10, 10); + FOR_ALL(i, 0, 10, + j, 0, 10, { + arr_2D(i, j) = 314; }); - // A 3D array example - CArray arr_3D(10,10,10); - FOR_ALL (i, 0, 10, + CArray arr_3D(10, 10, 10); + FOR_ALL(i, 0, 10, j, 0, 10, - k, 0, 10,{ - arr_3D(i,j,k) = 314; + k, 0, 10, { + arr_3D(i, j, k) = 314; }); - int loc_sum = 0; - int result = 0; + int result = 0; REDUCE_SUM(i, 0, 10, loc_sum, { - loc_sum += arr(i)*arr(i); + loc_sum += arr(i) * arr(i); }, result); - + // testing loc_sum = 0; - for (int i=0; i<10; i++){ - loc_sum += 314*314; + for (int i = 0; i < 10; i++) { + loc_sum += 314 * 314; } std::cout << "1D reduce : " << result << " vs. " << loc_sum << " \n"; - - + loc_sum = 0; - result = 0; + result = 0; REDUCE_SUM(i, 0, 10, j, 0, 10, loc_sum, { - loc_sum += arr_2D(i,j)*arr_2D(i,j); - }, result); - + loc_sum += arr_2D(i, j) * arr_2D(i, j); + }, result); + // testing loc_sum = 0; - for (int i=0; i<10; i++){ - for (int j=0; j<10; j++){ - loc_sum += 314*314; + for (int i = 0; i < 10; i++) { + for (int j = 0; j < 10; j++) { + loc_sum += 314 * 314; } } std::cout << "2D reduce : " << result << " vs. " << loc_sum << " \n"; - - + loc_sum = 0; - result = 0; + result = 0; REDUCE_SUM(i, 0, 10, j, 0, 10, k, 0, 10, loc_sum, { - loc_sum += arr_3D(i,j,k)*arr_3D(i,j,k); - }, result); - + loc_sum += arr_3D(i, j, k) * arr_3D(i, j, k); + }, result); + // testing loc_sum = 0; - for (int i=0; i<10; i++){ - for (int j=0; j<10; j++){ - for (int k=0; k<10; k++){ - loc_sum += 314*314; + for (int i = 0; i < 10; i++) { + for (int j = 0; j < 10; j++) { + for (int k = 0; k < 10; k++) { + loc_sum += 314 * 314; } } } std::cout << "3D reduce : " << result << " vs. " << loc_sum << " \n"; - - + int loc_max; REDUCE_MAX(i, 0, 10, j, 0, 10, k, 0, 10, loc_max, { - loc_max = std::max(arr_3D(i,j,k), loc_max); - }, result); - + loc_max = std::max(arr_3D(i, j, k), loc_max); + }, result); + std::cout << "3D reduce MAX : " << result << " \n"; - - - + int loc_min; REDUCE_MIN(i, 0, 10, j, 0, 10, k, 0, 10, loc_min, { - loc_min = std::min(arr_3D(i,j,k), loc_min); - }, result); - + loc_min = std::min(arr_3D(i, j, k), loc_min); + }, result); + std::cout << "3D reduce MIN : " << result << " \n"; - - + REDUCE_MIN_CLASS(i, 0, 10, j, 0, 10, k, 0, 10, loc_min, { - loc_min = std::min(arr_3D(i,j,k), loc_min); - }, result); - + loc_min = std::min(arr_3D(i, j, k), loc_min); + }, result); + std::cout << "3D reduce MIN CLASS : " << result << " \n"; - - + std::cout << "done" << std::endl; return 0; } - - diff --git a/examples/test_rocm/SomeClass.cpp b/examples/test_rocm/SomeClass.cpp index 7d71e45a..500c68d0 100644 --- a/examples/test_rocm/SomeClass.cpp +++ b/examples/test_rocm/SomeClass.cpp @@ -1,5 +1,39 @@ +/********************************************************************************************** + © 2020. Triad National Security, LLC. All rights reserved. + This program was produced under U.S. Government contract 89233218CNA000001 for Los Alamos + National Laboratory (LANL), which is operated by Triad National Security, LLC for the U.S. + Department of Energy/National Nuclear Security Administration. All rights in the program are + reserved by Triad National Security, LLC, and the U.S. Department of Energy/National Nuclear + Security Administration. The Government is granted for itself and others acting on its behalf a + nonexclusive, paid-up, irrevocable worldwide license in this material to reproduce, prepare + derivative works, distribute copies to the public, perform publicly and display publicly, and + to permit others to do so. + This program is open source under the BSD-3 License. + Redistribution and use in source and binary forms, with or without modification, are permitted + provided that the following conditions are met: + 1. Redistributions of source code must retain the above copyright notice, this list of + conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright notice, this list of + conditions and the following disclaimer in the documentation and/or other materials + provided with the distribution. + 3. Neither the name of the copyright holder nor the names of its contributors may be used + to endorse or promote products derived from this software without specific prior + written permission. + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + **********************************************************************************************/ #include "SomeClass.h" KOKKOS_FUNCTION void SomeClass::some_func() const -{} +{ +} diff --git a/examples/test_rocm/SomeClass.h b/examples/test_rocm/SomeClass.h index 791e3445..fa9e90db 100644 --- a/examples/test_rocm/SomeClass.h +++ b/examples/test_rocm/SomeClass.h @@ -1,3 +1,36 @@ +/********************************************************************************************** + © 2020. Triad National Security, LLC. All rights reserved. + This program was produced under U.S. Government contract 89233218CNA000001 for Los Alamos + National Laboratory (LANL), which is operated by Triad National Security, LLC for the U.S. + Department of Energy/National Nuclear Security Administration. All rights in the program are + reserved by Triad National Security, LLC, and the U.S. Department of Energy/National Nuclear + Security Administration. The Government is granted for itself and others acting on its behalf a + nonexclusive, paid-up, irrevocable worldwide license in this material to reproduce, prepare + derivative works, distribute copies to the public, perform publicly and display publicly, and + to permit others to do so. + This program is open source under the BSD-3 License. + Redistribution and use in source and binary forms, with or without modification, are permitted + provided that the following conditions are met: + 1. Redistributions of source code must retain the above copyright notice, this list of + conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright notice, this list of + conditions and the following disclaimer in the documentation and/or other materials + provided with the distribution. + 3. Neither the name of the copyright holder nor the names of its contributors may be used + to endorse or promote products derived from this software without specific prior + written permission. + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + **********************************************************************************************/ #pragma once #include "matar.h" diff --git a/examples/test_rocm/main.cpp b/examples/test_rocm/main.cpp index 277c925c..158942a1 100644 --- a/examples/test_rocm/main.cpp +++ b/examples/test_rocm/main.cpp @@ -1,10 +1,43 @@ +/********************************************************************************************** + © 2020. Triad National Security, LLC. All rights reserved. + This program was produced under U.S. Government contract 89233218CNA000001 for Los Alamos + National Laboratory (LANL), which is operated by Triad National Security, LLC for the U.S. + Department of Energy/National Nuclear Security Administration. All rights in the program are + reserved by Triad National Security, LLC, and the U.S. Department of Energy/National Nuclear + Security Administration. The Government is granted for itself and others acting on its behalf a + nonexclusive, paid-up, irrevocable worldwide license in this material to reproduce, prepare + derivative works, distribute copies to the public, perform publicly and display publicly, and + to permit others to do so. + This program is open source under the BSD-3 License. + Redistribution and use in source and binary forms, with or without modification, are permitted + provided that the following conditions are met: + 1. Redistributions of source code must retain the above copyright notice, this list of + conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright notice, this list of + conditions and the following disclaimer in the documentation and/or other materials + provided with the distribution. + 3. Neither the name of the copyright holder nor the names of its contributors may be used + to endorse or promote products derived from this software without specific prior + written permission. + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + **********************************************************************************************/ #include "matar.h" #include "SomeClass.h" int main(int argc, char* argv[]) { Kokkos::initialize(argc, argv); - { + { FOR_ALL(i, 0, 10, { SomeClass s; s.some_func(); diff --git a/examples/test_shared_ptr.cpp b/examples/test_shared_ptr.cpp index 694e5b46..4c6fccc4 100644 --- a/examples/test_shared_ptr.cpp +++ b/examples/test_shared_ptr.cpp @@ -1,3 +1,36 @@ +/********************************************************************************************** + © 2020. Triad National Security, LLC. All rights reserved. + This program was produced under U.S. Government contract 89233218CNA000001 for Los Alamos + National Laboratory (LANL), which is operated by Triad National Security, LLC for the U.S. + Department of Energy/National Nuclear Security Administration. All rights in the program are + reserved by Triad National Security, LLC, and the U.S. Department of Energy/National Nuclear + Security Administration. The Government is granted for itself and others acting on its behalf a + nonexclusive, paid-up, irrevocable worldwide license in this material to reproduce, prepare + derivative works, distribute copies to the public, perform publicly and display publicly, and + to permit others to do so. + This program is open source under the BSD-3 License. + Redistribution and use in source and binary forms, with or without modification, are permitted + provided that the following conditions are met: + 1. Redistributions of source code must retain the above copyright notice, this list of + conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright notice, this list of + conditions and the following disclaimer in the documentation and/or other materials + provided with the distribution. + 3. Neither the name of the copyright holder nor the names of its contributors may be used + to endorse or promote products derived from this software without specific prior + written permission. + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + **********************************************************************************************/ #include #include "matar.h" @@ -5,33 +38,32 @@ using namespace mtr; // matar namespace int N = 2; -using ArrayType = CArray ; +using ArrayType = CArray; ArrayType func() { - ArrayType A (N); + ArrayType A(N); - for (int i = 0; i < N; i++) { - A(i) = 2; - } - printf("Pointer of A in func = %p\n", A.pointer()); - for (int i = 0; i < N; i++) { - printf("Value of A(%d) in func = %d\n", i, A(i)); - } + for (int i = 0; i < N; i++) { + A(i) = 2; + } + printf("Pointer of A in func = %p\n", A.pointer()); + for (int i = 0; i < N; i++) { + printf("Value of A(%d) in func = %d\n", i, A(i)); + } - return A; + return A; } +int main() +{ + auto B = func(); -int main() { - - auto B = func(); + printf("\n"); + printf("Pointer of B in main = %p\n", B.pointer()); + for (int i = 0; i < N; i++) { + printf("Value of B(%d) in main = %d\n", i, B(i)); + } - printf("\n"); - printf("Pointer of B in main = %p\n", B.pointer()); - for (int i = 0; i < N; i++) { - printf("Value of B(%d) in main = %d\n", i, B(i)); - } - - return 0; + return 0; } diff --git a/examples/virtualFcnKokkos/child.cpp b/examples/virtualFcnKokkos/child.cpp index d5ec55e6..6c99f0ff 100644 --- a/examples/virtualFcnKokkos/child.cpp +++ b/examples/virtualFcnKokkos/child.cpp @@ -7,45 +7,41 @@ #include #include "child.hpp" - - -//---------------------------- - KOKKOS_FUNCTION - child_variables::child_variables() {}; - -//---------------------------- - KOKKOS_FUNCTION - child_models::child_models() {}; - - KOKKOS_FUNCTION - baby1::baby1(double glitter, double food){ - this_glitter = glitter; - this_food = food; - } - - KOKKOS_FUNCTION - double baby1::math(double jump, double bounce){ - return this_glitter + this_food; - } - - KOKKOS_FUNCTION - baby2::baby2() { - this_glitter = 0.0; - this_food = 0.0; - } - - KOKKOS_FUNCTION - double baby2::math(double jump, double bounce){ - double sum = jump * bounce; - return sum; - } - - - - - - - - - - +// ---------------------------- +KOKKOS_FUNCTION +child_variables::child_variables() +{ +}; + +// ---------------------------- +KOKKOS_FUNCTION +child_models::child_models() +{ +}; + +KOKKOS_FUNCTION +baby1::baby1(double glitter, double food) +{ + this_glitter = glitter; + this_food = food; +} + +KOKKOS_FUNCTION +double baby1::math(double jump, double bounce) +{ + return this_glitter + this_food; +} + +KOKKOS_FUNCTION +baby2::baby2() +{ + this_glitter = 0.0; + this_food = 0.0; +} + +KOKKOS_FUNCTION +double baby2::math(double jump, double bounce) +{ + double sum = jump * bounce; + return sum; +} diff --git a/examples/virtualFcnKokkos/inherited_inits.cpp b/examples/virtualFcnKokkos/inherited_inits.cpp index 89fbcb1e..932ddc60 100644 --- a/examples/virtualFcnKokkos/inherited_inits.cpp +++ b/examples/virtualFcnKokkos/inherited_inits.cpp @@ -1,53 +1,73 @@ +/********************************************************************************************** + © 2020. Triad National Security, LLC. All rights reserved. + This program was produced under U.S. Government contract 89233218CNA000001 for Los Alamos + National Laboratory (LANL), which is operated by Triad National Security, LLC for the U.S. + Department of Energy/National Nuclear Security Administration. All rights in the program are + reserved by Triad National Security, LLC, and the U.S. Department of Energy/National Nuclear + Security Administration. The Government is granted for itself and others acting on its behalf a + nonexclusive, paid-up, irrevocable worldwide license in this material to reproduce, prepare + derivative works, distribute copies to the public, perform publicly and display publicly, and + to permit others to do so. + This program is open source under the BSD-3 License. + Redistribution and use in source and binary forms, with or without modification, are permitted + provided that the following conditions are met: + 1. Redistributions of source code must retain the above copyright notice, this list of + conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright notice, this list of + conditions and the following disclaimer in the documentation and/or other materials + provided with the distribution. + 3. Neither the name of the copyright holder nor the names of its contributors may be used + to endorse or promote products derived from this software without specific prior + written permission. + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + **********************************************************************************************/ #include "inherited_inits.hpp" -void AllocateHost(ParentHost1D h_parent, u_int idx, size_t size) +void AllocateHost(ParentHost1D h_parent, u_int idx, size_t size) { - - h_parent(idx).child = (child_models *) kmalloc(size); - + h_parent(idx).child = (child_models*) kmalloc(size); } -void FreeHost(ParentHost1D h_parent) +void FreeHost(ParentHost1D h_parent) { - for (int mem = 0; mem < h_parent.extent(0); mem++) { kfree(h_parent(mem).child); } - } -void InitChildModels(Parent1D parent, u_int idx, baby2 baby2_inp) +void InitChildModels(Parent1D parent, u_int idx, baby2 baby2_inp) { - Kokkos::parallel_for( "CreateObjects", 1, KOKKOS_LAMBDA(const int&) { - //CreateChildObjects(parent, baby2_inp, idx); - new ((baby2 *)parent(idx).child) baby2{baby2_inp}; + // CreateChildObjects(parent, baby2_inp, idx); + new ((baby2*)parent(idx).child) baby2{ baby2_inp }; }); - } - -void InitChildModels(Parent1D parent, u_int idx, baby1 baby1_inp) +void InitChildModels(Parent1D parent, u_int idx, baby1 baby1_inp) { - Kokkos::parallel_for( "CreateObjects", 1, KOKKOS_LAMBDA(const int&) { - //CreateChildObjects(parent, baby1_inp, idx); - new ((baby1 *)parent(idx).child) baby1{baby1_inp}; + // CreateChildObjects(parent, baby1_inp, idx); + new ((baby1*)parent(idx).child) baby1{ baby1_inp }; }); - } - void ClearDeviceModels(Parent1D parent) { - Kokkos::parallel_for( "DestroyObjects", 1, KOKKOS_LAMBDA(const int&) { parent(0).child->~child_models(); parent(1).child->~child_models(); }); - } - diff --git a/examples/virtualFcnKokkos/kokkos_alias.h b/examples/virtualFcnKokkos/kokkos_alias.h index b952291f..e0e09ae9 100644 --- a/examples/virtualFcnKokkos/kokkos_alias.h +++ b/examples/virtualFcnKokkos/kokkos_alias.h @@ -1,16 +1,49 @@ +/********************************************************************************************** + © 2020. Triad National Security, LLC. All rights reserved. + This program was produced under U.S. Government contract 89233218CNA000001 for Los Alamos + National Laboratory (LANL), which is operated by Triad National Security, LLC for the U.S. + Department of Energy/National Nuclear Security Administration. All rights in the program are + reserved by Triad National Security, LLC, and the U.S. Department of Energy/National Nuclear + Security Administration. The Government is granted for itself and others acting on its behalf a + nonexclusive, paid-up, irrevocable worldwide license in this material to reproduce, prepare + derivative works, distribute copies to the public, perform publicly and display publicly, and + to permit others to do so. + This program is open source under the BSD-3 License. + Redistribution and use in source and binary forms, with or without modification, are permitted + provided that the following conditions are met: + 1. Redistributions of source code must retain the above copyright notice, this list of + conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright notice, this list of + conditions and the following disclaimer in the documentation and/or other materials + provided with the distribution. + 3. Neither the name of the copyright holder nor the names of its contributors may be used + to endorse or promote products derived from this software without specific prior + written permission. + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + **********************************************************************************************/ #ifndef KOKKOS_ALIAS_H #define KOKKOS_ALIAS_H -#include +#include #include "parents.h" #include #include "matar.h" -//MACROS to make the code less scary -//#define kmalloc(size) ( Kokkos::kokkos_malloc(size) ) -//#define kfree(pnt) ( Kokkos::kokkos_free(pnt) ) -//#define ProfileRegionStart ( Kokkos::Profiling::pushRegion ) -//#define ProfileRegionEnd ( Kokkos::Profiling::popRegion ) +// MACROS to make the code less scary +// #define kmalloc(size) ( Kokkos::kokkos_malloc(size) ) +// #define kfree(pnt) ( Kokkos::kokkos_free(pnt) ) +// #define ProfileRegionStart ( Kokkos::Profiling::pushRegion ) +// #define ProfileRegionEnd ( Kokkos::Profiling::popRegion ) using real_t = double; using u_int = unsigned int; @@ -40,26 +73,26 @@ using TeamPolicy = Kokkos::TeamPolicy; using mdrange_policy2 = Kokkos::MDRangePolicy>; using mdrange_policy3 = Kokkos::MDRangePolicy>; -using RMatrix1D = Kokkos::View; -using RMatrix2D = Kokkos::View; -using RMatrix3D = Kokkos::View; -using RMatrix4D = Kokkos::View; -using RMatrix5D = Kokkos::View; -using IMatrix1D = Kokkos::View; -using IMatrix2D = Kokkos::View; -using IMatrix3D = Kokkos::View; -using IMatrix4D = Kokkos::View; -using IMatrix5D = Kokkos::View; -using SVar = Kokkos::View; -using SArray1D = Kokkos::View; -using SArray2D = Kokkos::View; -using SArray3D = Kokkos::View; -using SArray4D = Kokkos::View; -using SArray5D = Kokkos::View; +using RMatrix1D = Kokkos::View; +using RMatrix2D = Kokkos::View; +using RMatrix3D = Kokkos::View; +using RMatrix4D = Kokkos::View; +using RMatrix5D = Kokkos::View; +using IMatrix1D = Kokkos::View; +using IMatrix2D = Kokkos::View; +using IMatrix3D = Kokkos::View; +using IMatrix4D = Kokkos::View; +using IMatrix5D = Kokkos::View; +using SVar = Kokkos::View; +using SArray1D = Kokkos::View; +using SArray2D = Kokkos::View; +using SArray3D = Kokkos::View; +using SArray4D = Kokkos::View; +using SArray5D = Kokkos::View; -using SHArray1D = Kokkos::View; +using SHArray1D = Kokkos::View; -using Parent1D = Kokkos::View; -using ParentHost1D = Kokkos::View; +using Parent1D = Kokkos::View; +using ParentHost1D = Kokkos::View; #endif diff --git a/examples/virtualFcnKokkos/main_kokkos_vfcn.cpp b/examples/virtualFcnKokkos/main_kokkos_vfcn.cpp index 5ef05a6a..a1959524 100644 --- a/examples/virtualFcnKokkos/main_kokkos_vfcn.cpp +++ b/examples/virtualFcnKokkos/main_kokkos_vfcn.cpp @@ -1,50 +1,76 @@ +/********************************************************************************************** + © 2020. Triad National Security, LLC. All rights reserved. + This program was produced under U.S. Government contract 89233218CNA000001 for Los Alamos + National Laboratory (LANL), which is operated by Triad National Security, LLC for the U.S. + Department of Energy/National Nuclear Security Administration. All rights in the program are + reserved by Triad National Security, LLC, and the U.S. Department of Energy/National Nuclear + Security Administration. The Government is granted for itself and others acting on its behalf a + nonexclusive, paid-up, irrevocable worldwide license in this material to reproduce, prepare + derivative works, distribute copies to the public, perform publicly and display publicly, and + to permit others to do so. + This program is open source under the BSD-3 License. + Redistribution and use in source and binary forms, with or without modification, are permitted + provided that the following conditions are met: + 1. Redistributions of source code must retain the above copyright notice, this list of + conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright notice, this list of + conditions and the following disclaimer in the documentation and/or other materials + provided with the distribution. + 3. Neither the name of the copyright holder nor the names of its contributors may be used + to endorse or promote products derived from this software without specific prior + written permission. + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + **********************************************************************************************/ #include #include "inherited_inits.hpp" -int main() { - - +int main() +{ // Kokkos GPU test - + Kokkos::initialize(); { - - - int num_parent = 2; // number of materials - Parent1D parent("parent", num_parent); // Initialize Kokkos View on the GPU of type material, size num_parent - auto h_parent = Kokkos::create_mirror_view(parent); // Create a host view of the Kokkos View + int num_parent = 2; // number of materials + Parent1D parent("parent", num_parent); // Initialize Kokkos View on the GPU of type material, size num_parent + auto h_parent = Kokkos::create_mirror_view(parent); // Create a host view of the Kokkos View + AllocateHost(h_parent, 0, BABY2_SIZE); // Function performed on Host to do raw Kokkos allocation of baby2 GPU space inside of Host data structure + AllocateHost(h_parent, 1, BABY1_SIZE); // Function performed on Host to do raw Kokkos allocation of baby1 GPU space inside of Host data structure - AllocateHost(h_parent, 0, BABY2_SIZE); // Function performed on Host to do raw Kokkos allocation of baby2 GPU space inside of Host data structure - AllocateHost(h_parent, 1, BABY1_SIZE); // Function performed on Host to do raw Kokkos allocation of baby1 GPU space inside of Host data structure + Kokkos::deep_copy(parent, h_parent); // deep copy Host data (allocated above) to the GPU Kokkos View. GPU View now has the class space allocated - Kokkos::deep_copy(parent, h_parent); // deep copy Host data (allocated above) to the GPU Kokkos View. GPU View now has the class space allocated + InitChildModels(parent, 0, baby2{}); // Kokkos Function to create new instances of the baby2 model on the GPU + InitChildModels(parent, 1, baby1{ 1.4, 1.0 }); // Kokkos Function to create new instances of the baby1 models on the GPU - InitChildModels(parent, 0, baby2{}); // Kokkos Function to create new instances of the baby2 model on the GPU - InitChildModels(parent, 1, baby1{1.4,1.0}); // Kokkos Function to create new instances of the baby1 models on the GPU - - // Model test, also shows a Kokkos reduction - double value_1; - Kokkos::parallel_reduce( - "CheckValues", - num_parent, - KOKKOS_LAMBDA(const int idx, real_t &lsum) + // Model test, also shows a Kokkos reduction + double value_1; + Kokkos::parallel_reduce( + "CheckValues", + num_parent, + KOKKOS_LAMBDA(const int idx, real_t & lsum) { lsum += parent(idx).child->math(2.0, 4.0); } - , value_1); - - printf("value %f\n", value_1); + , value_1); - ClearDeviceModels(parent); // Kokkos Function to call deconstructors of objects on the GPU + printf("value %f\n", value_1); - FreeHost(h_parent); // Function performed on Host to free the allocated GPU classes inside of the Host mirror + ClearDeviceModels(parent); // Kokkos Function to call deconstructors of objects on the GPU - + FreeHost(h_parent); // Function performed on Host to free the allocated GPU classes inside of the Host mirror } Kokkos::finalize(); - printf("--- finished ---\n"); return 0; diff --git a/examples/virtualFcnKokkos/parents.h b/examples/virtualFcnKokkos/parents.h index ea95d489..e8d33d22 100644 --- a/examples/virtualFcnKokkos/parents.h +++ b/examples/virtualFcnKokkos/parents.h @@ -1,56 +1,46 @@ // // parent.h -// +// // // #ifndef PARENT_H #define PARENT_H #include "child.hpp" +class parent_variables +{ +public: + // child_variables *child_var; + int num_pnts; + int type; -class parent_variables{ - - public: - //child_variables *child_var; - int num_pnts; - int type; - - // child variables - double *child_p; // var - - - // variables - double *var_fake1; + // child variables + double* child_p; // var + // variables + double* var_fake1; // ... - - // default constructor - KOKKOS_FUNCTION - parent_variables() {}; + // default constructor + KOKKOS_FUNCTION + parent_variables() {}; - - // deconstructor - KOKKOS_FUNCTION - ~parent_variables(){}; - + // deconstructor + KOKKOS_FUNCTION + ~parent_variables() {}; }; -class parent_models{ - - public: - child_models *child; +class parent_models +{ +public: + child_models* child; // ... - - + // deconstructor - KOKKOS_FUNCTION - ~parent_models(){}; - - + KOKKOS_FUNCTION + ~parent_models() {}; }; // end of parent - #endif diff --git a/examples/virtualFcnMATAR/classes.cpp b/examples/virtualFcnMATAR/classes.cpp index 32da1a5d..a5bae74b 100644 --- a/examples/virtualFcnMATAR/classes.cpp +++ b/examples/virtualFcnMATAR/classes.cpp @@ -1,35 +1,81 @@ +/********************************************************************************************** + © 2020. Triad National Security, LLC. All rights reserved. + This program was produced under U.S. Government contract 89233218CNA000001 for Los Alamos + National Laboratory (LANL), which is operated by Triad National Security, LLC for the U.S. + Department of Energy/National Nuclear Security Administration. All rights in the program are + reserved by Triad National Security, LLC, and the U.S. Department of Energy/National Nuclear + Security Administration. The Government is granted for itself and others acting on its behalf a + nonexclusive, paid-up, irrevocable worldwide license in this material to reproduce, prepare + derivative works, distribute copies to the public, perform publicly and display publicly, and + to permit others to do so. + This program is open source under the BSD-3 License. + Redistribution and use in source and binary forms, with or without modification, are permitted + provided that the following conditions are met: + 1. Redistributions of source code must retain the above copyright notice, this list of + conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright notice, this list of + conditions and the following disclaimer in the documentation and/or other materials + provided with the distribution. + 3. Neither the name of the copyright holder nor the names of its contributors may be used + to endorse or promote products derived from this software without specific prior + written permission. + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + **********************************************************************************************/ #include "classes.hpp" /* Shape */ KOKKOS_FUNCTION -Shape::Shape() {} +Shape::Shape() +{ +} KOKKOS_FUNCTION -Shape::~Shape() {} - +Shape::~Shape() +{ +} /* Circle */ KOKKOS_FUNCTION -Circle::Circle(double r) : radius(r) {} +Circle::Circle(double r) : radius(r) +{ +} KOKKOS_FUNCTION -Circle::~Circle() {} +Circle::~Circle() +{ +} KOKKOS_FUNCTION -double Circle::area() { - double result = atan(1)*4 * radius * radius; - return result; +double Circle::area() +{ + double result = atan(1) * 4 * radius * radius; + return result; } /* Square */ KOKKOS_FUNCTION -Square::Square(double l) : length(l) {} +Square::Square(double l) : length(l) +{ +} KOKKOS_FUNCTION -Square::~Square() {} +Square::~Square() +{ +} KOKKOS_FUNCTION -double Square::area() { - double result = length * length; - return result; +double Square::area() +{ + double result = length * length; + return result; } diff --git a/examples/virtualFcnMATAR/main.cpp b/examples/virtualFcnMATAR/main.cpp index 247d93d5..31ef8e5d 100644 --- a/examples/virtualFcnMATAR/main.cpp +++ b/examples/virtualFcnMATAR/main.cpp @@ -1,3 +1,36 @@ +/********************************************************************************************** + © 2020. Triad National Security, LLC. All rights reserved. + This program was produced under U.S. Government contract 89233218CNA000001 for Los Alamos + National Laboratory (LANL), which is operated by Triad National Security, LLC for the U.S. + Department of Energy/National Nuclear Security Administration. All rights in the program are + reserved by Triad National Security, LLC, and the U.S. Department of Energy/National Nuclear + Security Administration. The Government is granted for itself and others acting on its behalf a + nonexclusive, paid-up, irrevocable worldwide license in this material to reproduce, prepare + derivative works, distribute copies to the public, perform publicly and display publicly, and + to permit others to do so. + This program is open source under the BSD-3 License. + Redistribution and use in source and binary forms, with or without modification, are permitted + provided that the following conditions are met: + 1. Redistributions of source code must retain the above copyright notice, this list of + conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright notice, this list of + conditions and the following disclaimer in the documentation and/or other materials + provided with the distribution. + 3. Neither the name of the copyright holder nor the names of its contributors may be used + to endorse or promote products derived from this software without specific prior + written permission. + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + **********************************************************************************************/ #include #include #include "matar.h" @@ -11,83 +44,84 @@ using namespace mtr; // Pointer wrapper, because kokkos does not like pointers as template args -struct ShapePtr{ - Shape *shape; +struct ShapePtr +{ + Shape* shape; }; -int main(int argc, char* argv[]) { - Kokkos::initialize(argc, argv); - { // kokkos scope +int main(int argc, char* argv[]) +{ + Kokkos::initialize(argc, argv); + { // kokkos scope + const size_t num_shapes = 4; + DCArrayKokkos shape_array(num_shapes); - const size_t num_shapes = 4; - DCArrayKokkos shape_array(num_shapes); - - // Allocate memory on GPU for shapes. Even=Circle, Odd=Square - for (size_t i = 0; i < num_shapes; i++) { - if (i % 2 == 0) { - shape_array.host(i).shape = (Circle*)Kokkos::kokkos_malloc(sizeof(Circle)); - } else { - shape_array.host(i).shape = (Square*)Kokkos::kokkos_malloc(sizeof(Square)); - } - } - // Update device side of array of memory location on GPU - shape_array.update_device(); + // Allocate memory on GPU for shapes. Even=Circle, Odd=Square + for (size_t i = 0; i < num_shapes; i++) { + if (i % 2 == 0) { + shape_array.host(i).shape = (Circle*)Kokkos::kokkos_malloc(sizeof(Circle)); + } + else { + shape_array.host(i).shape = (Square*)Kokkos::kokkos_malloc(sizeof(Square)); + } + } + // Update device side of array of memory location on GPU + shape_array.update_device(); - // Create shapes using `placement new`. Even=Circle, Odd=Square. Radius=i, Length=i. - FOR_ALL(i, 0, num_shapes, { - if (i % 2 == 0){ - new ((Circle*)shape_array(i).shape) Circle(i); - } else { - new ((Square*)shape_array(i).shape) Square(i); - } - }); - Kokkos::fence(); + // Create shapes using `placement new`. Even=Circle, Odd=Square. Radius=i, Length=i. + FOR_ALL(i, 0, num_shapes, { + if (i % 2 == 0) { + new ((Circle*)shape_array(i).shape) Circle(i); + } + else { + new ((Square*)shape_array(i).shape) Square(i); + } + }); + Kokkos::fence(); - // Calculate Area - DCArrayKokkos area_array(num_shapes); - FOR_ALL(i, 0, num_shapes, { - area_array(i) = shape_array(i).shape->area(); - }); - Kokkos::fence(); - area_array.update_host(); + // Calculate Area + DCArrayKokkos area_array(num_shapes); + FOR_ALL(i, 0, num_shapes, { + area_array(i) = shape_array(i).shape->area(); + }); + Kokkos::fence(); + area_array.update_host(); - // Check result - for (size_t i = 0; i < num_shapes; i++) { - double area; - if (i % 2 == 0) { - area = atan(1)*4 * i * i; - if (area != area_array.host(i)) { - printf("Circle radius=%.3f, calc_area=%.3f, actual_area=%.3f\n", i, area_array.host(i), area); - } - } else { - area = i * i; - if (area != area_array.host(i)) { - printf("Square length=%.3f, calc_area=%.3f, actual_area=%.3f\n", i, area_array.host(i), area); - } - } - - if (area != area_array.host(i)) { - throw std::runtime_error("calculated area NOT EQUAL actual area"); - } - } + // Check result + for (size_t i = 0; i < num_shapes; i++) { + double area; + if (i % 2 == 0) { + area = atan(1) * 4 * i * i; + if (area != area_array.host(i)) { + printf("Circle radius=%.3f, calc_area=%.3f, actual_area=%.3f\n", i, area_array.host(i), area); + } + } + else { + area = i * i; + if (area != area_array.host(i)) { + printf("Square length=%.3f, calc_area=%.3f, actual_area=%.3f\n", i, area_array.host(i), area); + } + } - // Destroy shapes - FOR_ALL(i, 0, num_shapes, { - shape_array(i).shape->~Shape(); - }); - Kokkos::fence(); + if (area != area_array.host(i)) { + throw std::runtime_error("calculated area NOT EQUAL actual area"); + } + } - // Free GPU memory - for (size_t i = 0; i < num_shapes; i++) { - Kokkos::kokkos_free(shape_array.host(i).shape); - } + // Destroy shapes + FOR_ALL(i, 0, num_shapes, { + shape_array(i).shape->~Shape(); + }); + Kokkos::fence(); - printf("COMPLETED SUCCESSFULLY!!!\n"); + // Free GPU memory + for (size_t i = 0; i < num_shapes; i++) { + Kokkos::kokkos_free(shape_array.host(i).shape); + } - } // end kokkos scope - Kokkos::finalize(); + printf("COMPLETED SUCCESSFULLY!!!\n"); + } // end kokkos scope + Kokkos::finalize(); - return 0; + return 0; } - - diff --git a/examples/watt-graph/floyd.cpp b/examples/watt-graph/floyd.cpp index 9918fd9b..c7a5dd39 100644 --- a/examples/watt-graph/floyd.cpp +++ b/examples/watt-graph/floyd.cpp @@ -1,3 +1,36 @@ +/********************************************************************************************** + © 2020. Triad National Security, LLC. All rights reserved. + This program was produced under U.S. Government contract 89233218CNA000001 for Los Alamos + National Laboratory (LANL), which is operated by Triad National Security, LLC for the U.S. + Department of Energy/National Nuclear Security Administration. All rights in the program are + reserved by Triad National Security, LLC, and the U.S. Department of Energy/National Nuclear + Security Administration. The Government is granted for itself and others acting on its behalf a + nonexclusive, paid-up, irrevocable worldwide license in this material to reproduce, prepare + derivative works, distribute copies to the public, perform publicly and display publicly, and + to permit others to do so. + This program is open source under the BSD-3 License. + Redistribution and use in source and binary forms, with or without modification, are permitted + provided that the following conditions are met: + 1. Redistributions of source code must retain the above copyright notice, this list of + conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright notice, this list of + conditions and the following disclaimer in the documentation and/or other materials + provided with the distribution. + 3. Neither the name of the copyright holder nor the names of its contributors may be used + to endorse or promote products derived from this software without specific prior + written permission. + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + **********************************************************************************************/ #include #include #include @@ -7,16 +40,18 @@ using namespace mtr; // matar namespace -//Helper function to prefill a graph, seems generically useful -void graphFiller(int n, int diag, int off_diag, CArray &G){ - int i,j; - for(i = 0; i < n; i++){ - for(j =0; j < n; j++){ - if(i == j){ - G(i,j) = diag; - } else { - G(i,j) = off_diag; - } +// Helper function to prefill a graph, seems generically useful +void graphFiller(int n, int diag, int off_diag, CArray& G) +{ + int i, j; + for (i = 0; i < n; i++) { + for (j = 0; j < n; j++) { + if (i == j) { + G(i, j) = diag; + } + else { + G(i, j) = off_diag; + } } } } @@ -25,120 +60,124 @@ void graphFiller(int n, int diag, int off_diag, CArray &G){ // n := size of graph // p := rewire prob. should be 0 <= p <= 1 // G := an empty matrix to place the edges. -// A watts storgatz graph is one where each node is connected to it's k nearest neighbors but each edge has a small -// rewiring chance. This is to show that a graph with mostly local connections can have short average shortest distances -void wattsStorgatzGraph(int k, int n, double p, CArray &G){ - int idx_forw, idx_back, random_edge, random_idx; - double coin_flip; - for(int i = 0; i < n; i++){ - for(int j = 1 ; j <= k; j++){ - idx_forw = (i + j) % n; - idx_back = (i + n - j) % n; - coin_flip = ((double) std::rand())/RAND_MAX; - if(coin_flip < p){ - // ramdom number from (k, n-k) - random_edge = rand() % (n - 2*k) + (k); +// A watts storgatz graph is one where each node is connected to it's k nearest neighbors but each edge has a small +// rewiring chance. This is to show that a graph with mostly local connections can have short average shortest distances +void wattsStorgatzGraph(int k, int n, double p, CArray& G) +{ + int idx_forw, idx_back, random_edge, random_idx; + double coin_flip; + for (int i = 0; i < n; i++) { + for (int j = 1 ; j <= k; j++) { + idx_forw = (i + j) % n; + idx_back = (i + n - j) % n; + coin_flip = ((double) std::rand()) / RAND_MAX; + if (coin_flip < p) { + // ramdom number from (k, n-k) + random_edge = rand() % (n - 2 * k) + (k); // index is i + the above offset - random_idx = (i + random_edge) % n; + random_idx = (i + random_edge) % n; G(i, random_idx) = 1; - }else{ - G(i,idx_forw) = 1; - } - G(i,idx_back) = 1; - } - } + } + else{ + G(i, idx_forw) = 1; + } + G(i, idx_back) = 1; + } + } } - -void floydW(CArray G, CArray &res, int n_nodes){ - int i,j,k; - graphFiller(n_nodes, 0, INT_MAX, res); - for(i = 0; i G, CArray& res, int n_nodes) +{ + int i, j, k; + graphFiller(n_nodes, 0, INT_MAX, res); + for (i = 0; i < n_nodes; i++) { + for (j = 0; j < n_nodes; j++) { + if (G(i, j) == 1) { + res(i, j) = 1; + } + } + } + for (k = 0; k < n_nodes; k++) { + for (i = 0; i < n_nodes; i++) { + for (j = 0; j < n_nodes ; j++) { + if (i == j) { + continue; + } + int dist1 = res(i, k) + res(k, j); + int dist2 = res(i, j); + if (dist1 < 0) { dist1 = INT_MAX; } - if(dist2 < 0){ + if (dist2 < 0) { dist2 = INT_MAX; } - if(dist1 < dist2){ - res(i,j) = dist1; - } - } - } - } + if (dist1 < dist2) { + res(i, j) = dist1; + } + } + } + } } -double averageDistance(CArray G, int n){ - int i,j; +double averageDistance(CArray G, int n) +{ + int i, j; double total = 0.0; - for(i = 0; i n){ + for (i = 0; i < n; i++) { + for (j = 0; j < n; j++) { + if (G(i, j) > n) { printf("Ohh dear, this shouldn't happen\n"); } - total += ((double) G(i,j)) / n; + total += ((double) G(i, j)) / n; } } - return total / ( n ); + return total / (n); } -void printer(CArray G, int n){ - int i,j; - for(i = 0; i< n; i++){ - for(j = 0; j G, int n) +{ + int i, j; + for (i = 0; i < n; i++) { + for (j = 0; j < n; j++) { + printf("%d ", G(i, j)); } printf("\n"); } } - -int main(int argc, char** argv){ - int node_size = 4000; - double rewire_p = 0.0; - int k_nearest = 6; - if((argc > 4) || (argc == 1)){ +int main(int argc, char** argv) +{ + int node_size = 4000; + double rewire_p = 0.0; + int k_nearest = 6; + if ((argc > 4) || (argc == 1)) { printf("Usage is ./test_kokkoks_floyd \n"); - printf("Using default values: [number of nodes: %d] [rewire_prob : %.2f] [k_nearest : %d]\n", node_size, rewire_p, k_nearest); - } else { - node_size = atoi(argv[1]); - rewire_p = atof(argv[2]); - k_nearest = atoi(argv[3]); + printf("Using default values: [number of nodes: %d] [rewire_prob : %.2f] [k_nearest : %d]\n", node_size, rewire_p, k_nearest); + } + else { + node_size = atoi(argv[1]); + rewire_p = atof(argv[2]); + k_nearest = atoi(argv[3]); } printf("%d, %.3f, %d,", node_size, rewire_p, k_nearest); - auto start = std::chrono::high_resolution_clock::now(); // start clock - auto G = CArray(node_size, node_size); - auto results = CArray (node_size, node_size); + auto start = std::chrono::high_resolution_clock::now(); // start clock + auto G = CArray(node_size, node_size); + auto results = CArray(node_size, node_size); wattsStorgatzGraph(k_nearest, node_size, rewire_p, G); - auto lap = std::chrono::high_resolution_clock::now(); // start clock - auto elapsed = std::chrono::duration_cast(lap-start); + auto lap = std::chrono::high_resolution_clock::now(); // start clock + auto elapsed = std::chrono::duration_cast(lap - start); printf("%.2f,", elapsed.count() * 1e-9); floydW(G, results, node_size); auto lap2 = std::chrono::high_resolution_clock::now(); // start clock - elapsed = std::chrono::duration_cast(lap2-lap); + elapsed = std::chrono::duration_cast(lap2 - lap); printf("%.2f,", elapsed.count() * 1e-9); double average_steps = averageDistance(results, node_size); - auto lap3 = std::chrono::high_resolution_clock::now(); // start clock - elapsed = std::chrono::duration_cast(lap3-lap2); - auto elapsed2 = std::chrono::duration_cast(lap3-start); + auto lap3 = std::chrono::high_resolution_clock::now(); // start clock + elapsed = std::chrono::duration_cast(lap3 - lap2); + auto elapsed2 = std::chrono::duration_cast(lap3 - start); printf("%.2f, %.2f, ", elapsed.count() * 1e-9, elapsed2.count() * 1e-9); - printf("%f\n", average_steps); - + printf("%f\n", average_steps); } diff --git a/examples/watt-graph/kokkos_floyd.cpp b/examples/watt-graph/kokkos_floyd.cpp index ff4bdd5b..16f8d409 100644 --- a/examples/watt-graph/kokkos_floyd.cpp +++ b/examples/watt-graph/kokkos_floyd.cpp @@ -1,3 +1,36 @@ +/********************************************************************************************** + © 2020. Triad National Security, LLC. All rights reserved. + This program was produced under U.S. Government contract 89233218CNA000001 for Los Alamos + National Laboratory (LANL), which is operated by Triad National Security, LLC for the U.S. + Department of Energy/National Nuclear Security Administration. All rights in the program are + reserved by Triad National Security, LLC, and the U.S. Department of Energy/National Nuclear + Security Administration. The Government is granted for itself and others acting on its behalf a + nonexclusive, paid-up, irrevocable worldwide license in this material to reproduce, prepare + derivative works, distribute copies to the public, perform publicly and display publicly, and + to permit others to do so. + This program is open source under the BSD-3 License. + Redistribution and use in source and binary forms, with or without modification, are permitted + provided that the following conditions are met: + 1. Redistributions of source code must retain the above copyright notice, this list of + conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright notice, this list of + conditions and the following disclaimer in the documentation and/or other materials + provided with the distribution. + 3. Neither the name of the copyright holder nor the names of its contributors may be used + to endorse or promote products derived from this software without specific prior + written permission. + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + **********************************************************************************************/ #include #include #include @@ -11,137 +44,138 @@ using namespace mtr; // matar namespace using gen_t = Kokkos::Random_XorShift64_Pool; - // k := connect to k nearest neighbors // n := size of graph // p := rewire prob. should be 0 <= p <= 1 // G := an empty matrix to place the edges. -// A watts storgatz graph is one where each node is connected to it's k nearest neighbors but each edge has a small -// rewiring chance. This is to show that a graph with mostly local connections can have short average shortest distances -void wattsStorgatzGraph(int k, int n, double p, CArrayKokkos &G){ - +// A watts storgatz graph is one where each node is connected to it's k nearest neighbors but each edge has a small +// rewiring chance. This is to show that a graph with mostly local connections can have short average shortest distances +void wattsStorgatzGraph(int k, int n, double p, CArrayKokkos& G) +{ gen_t rand_pool64(5374857); CArrayKokkos coins(n, k); - CArrayKokkos offsets(n, k); - FOR_ALL(i , 0, n, - j, 1, k+1, { - // Get a random number state from the pool for the active thread - gen_t::generator_type rand_gen = rand_pool64.get_state(); + CArrayKokkos offsets(n, k); + FOR_ALL(i, 0, n, + j, 1, k + 1, { + // Get a random number state from the pool for the active thread + gen_t::generator_type rand_gen = rand_pool64.get_state(); - // generate random numbers in the range (0,10] - coins(i, j-1) = ((double) rand_gen.urand64(10000)) / 10000.0; - offsets(i, j-1) = rand_gen.urand64(n - 2*k) + k; - }); + // generate random numbers in the range (0,10] + coins(i, j - 1) = ((double) rand_gen.urand64(10000)) / 10000.0; + offsets(i, j - 1) = rand_gen.urand64(n - 2 * k) + k; + }); Kokkos::fence(); FOR_ALL(i, 0, n, - j, 1, k+1,{ - // Give the state back, which will allow another thread to acquire it - int idx_forw = (i + j) % n; - int idx_back = (i + n - j) % n; - G(i,idx_back) = 1; - if(coins(i, j-1) < p ){ - int random_idx = offsets(i, j-1); - random_idx = (i + random_idx) % n; - G(i, random_idx) = 1; - } else { - G(i, idx_forw) = 1; - } - }); + j, 1, k + 1, { + // Give the state back, which will allow another thread to acquire it + int idx_forw = (i + j) % n; + int idx_back = (i + n - j) % n; + G(i, idx_back) = 1; + if (coins(i, j - 1) < p) { + int random_idx = offsets(i, j - 1); + random_idx = (i + random_idx) % n; + G(i, random_idx) = 1; + } + else { + G(i, idx_forw) = 1; + } + }); Kokkos::fence(); return; } - -void floydW(CArrayKokkos G, CArrayKokkos &res, int n_nodes){ - int k; +void floydW(CArrayKokkos G, CArrayKokkos& res, int n_nodes) +{ + int k; FOR_ALL(i, 0, n_nodes, j, 0, n_nodes, { - if(G(i,j) == 1){ - res(i,j) = 1; - } - }); - for(k = 0; k < n_nodes; k++){ - FOR_ALL(i, 0, n_nodes, + if (G(i, j) == 1) { + res(i, j) = 1; + } + }); + for (k = 0; k < n_nodes; k++) { + FOR_ALL(i, 0, n_nodes, j, 0, n_nodes, { - if(i != j){ - - float dist1 = res(i,k) + res(k,j); - float dist2 = res(i,j); - res(i,j) = (dist1 < dist2) ? dist1 : dist2; - } + if (i != j) { + float dist1 = res(i, k) + res(k, j); + float dist2 = res(i, j); + res(i, j) = (dist1 < dist2) ? dist1 : dist2; + } }); } } -double averageDistance(CArrayKokkos G, int n){ +double averageDistance(CArrayKokkos G, int n) +{ double total = 0; double loc_sum; REDUCE_SUM(i, 0, n, j, 0, n, loc_sum, { - loc_sum += ((double) G(i,j)) /n ; - }, total); - return total /(n ); + loc_sum += ((double) G(i, j)) / n; + }, total); + return total / (n); } -void printer(CArrayKokkos G, int n){ - int x = 0; - x++; - FOR_ALL(i, 0, n, - j, 0, n,{ - printf("%d, %d) : %d \n", i, j, G(i,j)); - }); +void printer(CArrayKokkos G, int n) +{ + int x = 0; + x++; + FOR_ALL(i, 0, n, + j, 0, n, { + printf("%d, %d) : %d \n", i, j, G(i, j)); + }); } - -int main(int argc, char** argv){ - int node_size = 4000; - double rewire_p = 0.0; - int k_nearest = 6; - if((argc > 4) || (argc == 1)){ +int main(int argc, char** argv) +{ + int node_size = 4000; + double rewire_p = 0.0; + int k_nearest = 6; + if ((argc > 4) || (argc == 1)) { printf("Usage is ./test_kokkoks_floyd \n"); - printf("Using default values: [number of nodes: %d] [rewire_prob : %.2f] [k_nearest : %d]\n", node_size, rewire_p, k_nearest); - } else { - node_size = atoi(argv[1]); - rewire_p = atof(argv[2]); - k_nearest = atoi(argv[3]); + printf("Using default values: [number of nodes: %d] [rewire_prob : %.2f] [k_nearest : %d]\n", node_size, rewire_p, k_nearest); + } + else { + node_size = atoi(argv[1]); + rewire_p = atof(argv[2]); + k_nearest = atoi(argv[3]); } printf("%d, %.5f, %d", node_size, rewire_p, k_nearest); Kokkos::initialize(); { - - auto start = std::chrono::high_resolution_clock::now(); // start clock - CArrayKokkos G(node_size, node_size); - CArrayKokkos results(node_size, node_size); - FOR_ALL(i, 0, node_size, - j, 0, node_size,{ - G(i,j) = 0; - if(i == j){ - results(i,j) = 0; - } else { - results(i,j) = std::numeric_limits::infinity(); + auto start = std::chrono::high_resolution_clock::now(); // start clock + CArrayKokkos G(node_size, node_size); + CArrayKokkos results(node_size, node_size); + FOR_ALL(i, 0, node_size, + j, 0, node_size, { + G(i, j) = 0; + if (i == j) { + results(i, j) = 0; + } + else { + results(i, j) = std::numeric_limits::infinity(); } }); - - wattsStorgatzGraph(k_nearest, node_size, rewire_p, G); - - auto lap = std::chrono::high_resolution_clock::now(); // start clock - - auto elapsed = std::chrono::duration_cast(lap-start); - printf(", %.2f,", elapsed.count() * 1e-9); - floydW(G, results, node_size); - - auto lap2 = std::chrono::high_resolution_clock::now(); // start clock - elapsed = std::chrono::duration_cast(lap2-lap); - printf("%.2f,", elapsed.count() * 1e-9); - double average_steps = averageDistance(results, node_size); - - auto lap3 = std::chrono::high_resolution_clock::now(); // start clock - elapsed = std::chrono::duration_cast(lap3-lap2); - auto elapsed2 = std::chrono::duration_cast(lap3-start); - printf("%.2f, %.2f, ", elapsed.count() * 1e-9, elapsed2.count() * 1e-9); - printf("%f\n", average_steps); - + wattsStorgatzGraph(k_nearest, node_size, rewire_p, G); + + auto lap = std::chrono::high_resolution_clock::now(); // start clock + + auto elapsed = std::chrono::duration_cast(lap - start); + printf(", %.2f,", elapsed.count() * 1e-9); + floydW(G, results, node_size); + + auto lap2 = std::chrono::high_resolution_clock::now(); // start clock + elapsed = std::chrono::duration_cast(lap2 - lap); + printf("%.2f,", elapsed.count() * 1e-9); + double average_steps = averageDistance(results, node_size); + + auto lap3 = std::chrono::high_resolution_clock::now(); // start clock + elapsed = std::chrono::duration_cast(lap3 - lap2); + auto elapsed2 = std::chrono::duration_cast(lap3 - start); + + printf("%.2f, %.2f, ", elapsed.count() * 1e-9, elapsed2.count() * 1e-9); + printf("%f\n", average_steps); } - Kokkos::finalize(); + Kokkos::finalize(); }