diff --git a/scripts/build-fierro.sh b/scripts/build-fierro.sh index 42b60d177..83a555bf8 100755 --- a/scripts/build-fierro.sh +++ b/scripts/build-fierro.sh @@ -7,6 +7,7 @@ show_help() { echo " --build_action=. Default is 'full-app'" echo " --machine=. Default is 'linux'" echo " --build_cores=. Default is set 1" + echo " --intel_mkl=. Default is 'disabled'" echo " --heffte_build_type=. Default is set 'fftw'" echo " --help: Display this help message" echo " " @@ -63,6 +64,7 @@ machine="linux" kokkos_build_type="serial" heffte_build_type="fftw" build_cores="1" +intel_mkl="disabled" # Define arrays of valid options valid_build_action=("full-app" "set-env" "install-trilinos" "install-hdf5" "install-heffte" "install-uncrustify" "fierro") @@ -70,6 +72,7 @@ valid_solver=("all" "explicit" "explicit-evpfft" "explicit-ls-evpfft" "implicit" valid_kokkos_build_types=("serial" "openmp" "pthreads" "cuda" "hip") valid_heffte_build_types=("fftw" "cufft" "rocfft") valid_machines=("darwin" "chicoma" "linux" "mac" "msu") +valid_intel_mkl=("disabled" "enabled") # Parse command line arguments for arg in "$@"; do @@ -114,6 +117,16 @@ for arg in "$@"; do return 1 fi ;; + --intel_mkl=*) + option="${arg#*=}" + if [[ " ${valid_intel_mkl[*]} " == *" $option "* ]]; then + intel_mkl="$option" + else + echo "Error: Invalid --intel_mkl specified." + show_help + return 1 + fi + ;; --heffte_build_type=*) option="${arg#*=}" if [[ " ${valid_heffte_build_types[*]} " == *" $option "* ]]; then @@ -170,6 +183,7 @@ echo "Building based on these argument options:" echo "Build action - ${build_action}" echo "Solver - ${solver}" echo "Kokkos backend - ${kokkos_build_type}" +echo "Intel MKL library - ${intel_mkl}" echo "Machine - ${machine}" if [ "${solver}" = "explicit-evpfft" ] || [ "${solver}" = "explicit-ls-evpfft" ]; then echo "HEFFTE - ${heffte_build_type}" @@ -184,14 +198,14 @@ source setup-env.sh ${machine} ${kokkos_build_type} ${build_cores} # Next, do action based on args if [ "$build_action" = "full-app" ]; then source uncrustify-install.sh - source trilinos-install.sh ${kokkos_build_type} ${machine} + source trilinos-install.sh ${kokkos_build_type} ${machine} ${intel_mkl} if [ "$solver" = "explicit-evpfft" ] || [ "${solver}" = "explicit-ls-evpfft" ]; then source hdf5-install.sh source heffte-install.sh ${heffte_build_type} ${machine} fi source cmake_build.sh ${solver} ${heffte_build_type} ${kokkos_build_type} elif [ "$build_action" = "install-trilinos" ]; then - source trilinos-install.sh ${kokkos_build_type} ${machine} + source trilinos-install.sh ${kokkos_build_type} ${machine} ${intel_mkl} elif [ "$build_action" = "install-hdf5" ]; then source hdf5-install.sh elif [ "$build_action" = "install-heffte" ]; then diff --git a/scripts/cmake_build.sh b/scripts/cmake_build.sh index 55c7f49c2..dd6cd480e 100644 --- a/scripts/cmake_build.sh +++ b/scripts/cmake_build.sh @@ -12,7 +12,7 @@ kokkos_build_type="${3}" if { [ ! -d "${ELEMENTS_SOURCE_DIR}/elements" ] || [ ! -d "${ELEMENTS_SOURCE_DIR}/matar/src" ] ;} then echo "Missing submodules, downloading them...." - git submodule update --recursive "${ELEMENTS_SOURCE_DIR}" + git submodule update --init --recursive "${ELEMENTS_SOURCE_DIR}" fi if [ ! -d "${TRILINOS_INSTALL_DIR}/lib" ]; then diff --git a/scripts/trilinos-install.sh b/scripts/trilinos-install.sh index 9c7aad598..7bc7e6768 100644 --- a/scripts/trilinos-install.sh +++ b/scripts/trilinos-install.sh @@ -2,6 +2,7 @@ kokkos_build_type="${1}" machine="${2}" +intel_mkl="${3}" # If all arguments are valid, you can use them in your script as needed echo "Trilinos Kokkos Build Type: $kokkos_build_type" @@ -28,7 +29,9 @@ fi #check if Trilinos library files were installed, install them otherwise. [ -d "${TRILINOS_BUILD_DIR}/lib" ] && echo "Directory ${TRILINOS_BUILD_DIR}/lib exists, assuming successful installation; delete build folder and run build script again if there was an environment error that has been corrected." -if [ ! -d "${TRILINOS_BUILD_DIR}/lib" ] +[ -d "${TRILINOS_BUILD_DIR}/lib64" ] && echo "Directory ${TRILINOS_BUILD_DIR}/lib64 exists, assuming successful installation; delete build folder and run build script again if there was an environment error that has been corrected." + +if [ ! -d "${TRILINOS_BUILD_DIR}/lib" ] && [ ! -d "${TRILINOS_BUILD_DIR}/lib64" ] then echo "Directory Trilinos/build/lib does not exist, compiling Trilinos (this might take a while)...." @@ -113,6 +116,26 @@ ${ADDITIONS[@]} -D CMAKE_INSTALL_PREFIX=${TRILINOS_INSTALL_DIR} ) +# Flags for building with Intel MKL library +INTEL_MKL_ADDITIONS=( +-D TPL_ENABLE_MKL=ON +-D BLAS_LIBRARY_NAMES="libmkl_rt.so" +-D BLAS_LIBRARY_DIRS="$MKLROOT/lib/intel64" +-D LAPACK_LIBRARY_NAMES="libmkl_rt.so" +-D LAPACK_LIBRARY_DIRS="$MKLROOT/lib/intel64" +-D MKL_LIBRARY_DIRS="$MKLROOT/lib/intel64" +-D MKL_LIBRARY_NAMES="mkl_rt" +-D MKL_INCLUDE_DIRS="$MKLROOT/include" +) + +echo "**** Intel MKL = ${intel_mkl} ****" +if [ "$intel_mkl" = "enabled" ]; then + echo "**** assuming MKL installation at $MKLROOT ****" + cmake_options+=( + ${INTEL_MKL_ADDITIONS[@]} + ) +fi + if [ "$kokkos_build_type" = "openmp" ]; then cmake_options+=( ${OPENMP_ADDITIONS[@]} diff --git a/scripts/uncrustify-install.sh b/scripts/uncrustify-install.sh index b0cf694b5..49312555d 100644 --- a/scripts/uncrustify-install.sh +++ b/scripts/uncrustify-install.sh @@ -2,10 +2,10 @@ # Check if the uncrustify build directory exists and is not empty in the parent directory; if not, clone it -if [ ! -d "${UNCRUSTIFY_SOURCE_DIR}" ]; +if [ ! -d "${UNCRUSTIFY_SOURCE_DIR}/src" ]; then echo "Missing Uncrustify submodule, downloading...." - git submodule update ${UNCRUSTIFY_SOURCE_DIR} + git submodule update --init ${UNCRUSTIFY_SOURCE_DIR} fi if [ ! -f "${UNCRUSTIFY_BUILD_DIR}/uncrustify" ]; then diff --git a/src/Parallel-Solvers/Implicit-Lagrange/Implicit_Solver.cpp b/src/Parallel-Solvers/Implicit-Lagrange/Implicit_Solver.cpp index e8fad7eea..72d7cad72 100644 --- a/src/Parallel-Solvers/Implicit-Lagrange/Implicit_Solver.cpp +++ b/src/Parallel-Solvers/Implicit-Lagrange/Implicit_Solver.cpp @@ -186,8 +186,9 @@ void Implicit_Solver::run(){ all_initial_node_coords_distributed = all_node_coords_distributed; //print element imbalance stats - int rnum_global_sum = 0; - MPI_Allreduce(&rnum_elem, &rnum_global_sum, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD); + long long int rnum_global_sum = 0; + long long int temp_rnum_elem = rnum_elem; + MPI_Allreduce(&temp_rnum_elem, &rnum_global_sum, 1, MPI_LONG_LONG_INT, MPI_SUM, MPI_COMM_WORLD); double local_imbalance, max_imbalance, avg_elem; max_imbalance = 0; avg_elem = rnum_global_sum/((double) nranks); diff --git a/src/Parallel-Solvers/Optimization_Modules/Fierro_Optimization_Objective.hpp b/src/Parallel-Solvers/Optimization_Modules/Fierro_Optimization_Objective.hpp new file mode 100644 index 000000000..d765f13c5 --- /dev/null +++ b/src/Parallel-Solvers/Optimization_Modules/Fierro_Optimization_Objective.hpp @@ -0,0 +1,133 @@ +// @HEADER +// ************************************************************************ +// +// Rapid Optimization Library (ROL) Package +// Copyright (2014) Sandia Corporation +// +// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive +// license for use of this work by or on behalf of the U.S. Government. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact lead developers: +// Drew Kouri (dpkouri@sandia.gov) and +// Denis Ridzal (dridzal@sandia.gov) +// +// ************************************************************************ +// @HEADER + +#ifndef FIERRO_OPTIMIZATION_OBJECTIVE_H +#define FIERRO_OPTIMIZATION_OBJECTIVE_H + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include "Tpetra_Details_DefaultTypes.hpp" + +#include "ROL_Types.hpp" +#include +#include "ROL_Elementwise_Reduce.hpp" +#include "ROL_Objective.hpp" +#include "ROL_BoundConstraint.hpp" + +/** @ingroup func_group + \class ROL::ObjectiveMMA + \brief Provides the interface to to Method of Moving Asymptotes + Objective function + + --- +*/ + +class FierroOptimizationObjective : public ROL::Objective { + + typedef Tpetra::Map<>::local_ordinal_type LO; + typedef Tpetra::Map<>::global_ordinal_type GO; + typedef Tpetra::Map<>::node_type Node; + typedef Tpetra::Map Map; + typedef Tpetra::MultiVector MV; + typedef ROL::Vector V; + typedef const ROL::Vector const_V; + typedef ROL::TpetraMultiVector ROL_MV; + + using traits = Kokkos::ViewTraits; + using array_layout = typename traits::array_layout; + using execution_space = typename traits::execution_space; + using device_type = typename traits::device_type; + using memory_traits = typename traits::memory_traits; + using global_size_t = Tpetra::global_size_t; + + typedef Kokkos::View values_array; + typedef Kokkos::View global_indices_array; + typedef Kokkos::View indices_array; + + //typedef Kokkos::DualView::t_dev vec_array; + typedef MV::dual_view_type::t_dev vec_array; + typedef MV::dual_view_type::t_host host_vec_array; + typedef Kokkos::View const_host_vec_array; + typedef MV::dual_view_type dual_vec_array; + typedef ROL::Objective OBJ; + typedef ROL::BoundConstraint BND; + int update_count = 0; + +private: + + real_t fval_; // Original objective value + + real_t tol_; + + ROL::Ptr getVector( const V& x ) { + return dynamic_cast(x).getVector(); + } + + ROL::Ptr getVector( V& x ) { + return dynamic_cast(x).getVector(); + } + +public: + + bool time_accumulation; + real_t objective_accumulation; + + FierroOptimizationObjective(){ + objective_accumulation = 0; + time_accumulation = false; + } + + +}; // class ObjectiveMMA + + +#endif // FIERRO_MMA_OBJECTIVE + diff --git a/src/Parallel-Solvers/Parallel-Explicit/Example_Yaml_Scripts/example_implosion.yaml b/src/Parallel-Solvers/Parallel-Explicit/Example_Yaml_Scripts/example_implosion.yaml new file mode 100644 index 000000000..ea43b3273 --- /dev/null +++ b/src/Parallel-Solvers/Parallel-Explicit/Example_Yaml_Scripts/example_implosion.yaml @@ -0,0 +1,84 @@ +num_dims: 3 +dynamic_options: + time_final: 20.0 + dt_min: 1.e-8 + dt_max: 1.e-2 + dt_start: 1.e-5 + cycle_stop: 2000000 + +mesh_generation_options: + type: Box + origin: [0, 0, 0] + length: [1.2, 1.2, 1.2] + num_elems: [32, 32, 32] + +output_options: + timer_output_level: thorough + output_file_format: vtk + graphics_step: 0.25 + +fea_module_parameters: + - type: SGH + material_id: 0 + boundary_conditions: + # Tag X plane + - surface: + type: x_plane + plane_position: 0.0 + type: reflected + + # Tag Y plane + - surface: + type: y_plane + plane_position: 0.0 + type: reflected + + # Tag Z plane + - surface: + type: z_plane + plane_position: 0.0 + type: reflected + + loading_conditions: + # Load radially around 0 0 0 corner + - volume: + type: sphere + radius1: 1.0 + radius2: 2.5 + type: body_force + component_x: -0.00000005 + component_y: -0.00000005 + component_z: -0.00000005 + +materials: + - id: 0 + eos_model: ideal_gas + strength_model: none + elastic_modulus: 10 + poisson_ratio: 0.3 + maximum_limiter: 1 + q1: 2.0 + q2: 4.0 + q1ex: 2.0 + q2ex: 4.0 + eos_global_vars: + - 1.666666666666667 + - 1.0E-14 + - 1.0 + +regions: + - volume: + type: global + material_id: 0 + den: 1.0 + sie: 1.e-10 + + velocity: cartesian + u: 0.0 + v: 0.0 + w: 0.0 + + velocity: cartesian + u: 0.0 + v: 0.0 + w: 0.0 diff --git a/src/Parallel-Solvers/Parallel-Explicit/Example_Yaml_Scripts/example_sedov_sgh_opt.yaml b/src/Parallel-Solvers/Parallel-Explicit/Example_Yaml_Scripts/example_sedov_sgh_opt.yaml index 000c0b724..6ffd94127 100644 --- a/src/Parallel-Solvers/Parallel-Explicit/Example_Yaml_Scripts/example_sedov_sgh_opt.yaml +++ b/src/Parallel-Solvers/Parallel-Explicit/Example_Yaml_Scripts/example_sedov_sgh_opt.yaml @@ -105,14 +105,17 @@ regions: optimization_options: optimization_process: topology_optimization optimization_objective: minimize_kinetic_energy - objective_regions: - - type: box - x1: 0.4 - x2: 0.8 - y1: 0.4 - y2: 0.8 - z1: 0.4 - z2: 0.8 + use_solve_checkpoints: true + num_solve_checkpoints : 10 + disable_forward_solve_output: true + optimization_output_freq: 1 + rol_params: + subproblem_algorithm: line_search + initial_constraint_penalty: 1.e-2 + step_tolerance: 1.e-3 + gradient_tolerance: 1.e-3 + constraint_tolerance: 1.e-3 + iteration_limit: 20 density_epsilon: 0.1 variable_outer_shell: true constraints: diff --git a/src/Parallel-Solvers/Parallel-Explicit/Example_Yaml_Scripts/example_sgh_opt_region.yaml b/src/Parallel-Solvers/Parallel-Explicit/Example_Yaml_Scripts/example_sgh_opt_region.yaml index c6c56ce20..1f457dfdb 100644 --- a/src/Parallel-Solvers/Parallel-Explicit/Example_Yaml_Scripts/example_sgh_opt_region.yaml +++ b/src/Parallel-Solvers/Parallel-Explicit/Example_Yaml_Scripts/example_sgh_opt_region.yaml @@ -106,6 +106,9 @@ optimization_options: optimization_process: topology_optimization optimization_objective: minimize_kinetic_energy use_solve_checkpoints: true + num_solve_checkpoints : 10 + disable_forward_solve_output: true + optimization_output_freq: 1 rol_params: subproblem_algorithm: line_search initial_constraint_penalty: 1.e-2 diff --git a/src/Parallel-Solvers/Parallel-Explicit/Example_Yaml_Scripts/example_sgh_opt_restart.yaml b/src/Parallel-Solvers/Parallel-Explicit/Example_Yaml_Scripts/example_sgh_opt_restart.yaml index 7d139f46d..9f5b45796 100644 --- a/src/Parallel-Solvers/Parallel-Explicit/Example_Yaml_Scripts/example_sgh_opt_restart.yaml +++ b/src/Parallel-Solvers/Parallel-Explicit/Example_Yaml_Scripts/example_sgh_opt_restart.yaml @@ -108,10 +108,10 @@ regions: optimization_options: optimization_process: topology_optimization optimization_objective: minimize_kinetic_energy - use_gradient_tally: true use_solve_checkpoints: true + num_solve_checkpoints : 10 disable_forward_solve_output: true - optimization_output_freq: 20 + optimization_output_freq: 1 optimization_parameters_xml_file: false xml_parameters_file_name: "optimization_parameters.xml" rol_params: diff --git a/src/Parallel-Solvers/Parallel-Explicit/Explicit_Solver.cpp b/src/Parallel-Solvers/Parallel-Explicit/Explicit_Solver.cpp index 91c20bb05..65cdb7d2a 100644 --- a/src/Parallel-Solvers/Parallel-Explicit/Explicit_Solver.cpp +++ b/src/Parallel-Solvers/Parallel-Explicit/Explicit_Solver.cpp @@ -194,8 +194,9 @@ void Explicit_Solver::run() { init_maps(); //print element imbalance stats - int rnum_global_sum = 0; - MPI_Allreduce(&rnum_elem, &rnum_global_sum, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD); + long long int rnum_global_sum = 0; + long long int temp_rnum_elem = rnum_elem; + MPI_Allreduce(&temp_rnum_elem, &rnum_global_sum, 1, MPI_LONG_LONG_INT, MPI_SUM, MPI_COMM_WORLD); double local_imbalance, max_imbalance, avg_elem; max_imbalance = 0; avg_elem = rnum_global_sum/((double) nranks); @@ -359,11 +360,11 @@ void Explicit_Solver::run() { } */ - std::cout << " RUNTIME OF CODE ON TASK " << myrank << " is "<< current_cpu-initial_CPU_time << " comms time " + *fos << " Runtime of code is "<< current_cpu-initial_CPU_time << " comms time " << communication_time << " host to dev time " << host2dev_time << " dev to host time " << dev2host_time << std::endl; if(simparam.output_options.timer_output_level == TIMER_VERBOSITY::thorough){ - std::cout << " OUTPUT TIME OF CODE ON TASK " << myrank << " is "<< output_time << std::endl; + *fos << " Output time of code is "<< output_time << std::endl; } //parallel_vtk_writer(); diff --git a/src/Parallel-Solvers/Parallel-Explicit/SGH_Solver/include/FEA_Module_SGH.h b/src/Parallel-Solvers/Parallel-Explicit/SGH_Solver/include/FEA_Module_SGH.h index fb294c7f6..4d0f4ffac 100644 --- a/src/Parallel-Solvers/Parallel-Explicit/SGH_Solver/include/FEA_Module_SGH.h +++ b/src/Parallel-Solvers/Parallel-Explicit/SGH_Solver/include/FEA_Module_SGH.h @@ -678,6 +678,7 @@ class FEA_Module_SGH : public FEA_Module // Dual Views of the corner struct variables DViewCArrayKokkos corner_force; + DCArrayKokkos corner_external_force; DViewCArrayKokkos corner_mass; // Boundary Conditions Data diff --git a/src/Parallel-Solvers/Parallel-Explicit/SGH_Solver/src/FEA_Module_SGH.cpp b/src/Parallel-Solvers/Parallel-Explicit/SGH_Solver/src/FEA_Module_SGH.cpp index 0d4ee757e..2036ce050 100644 --- a/src/Parallel-Solvers/Parallel-Explicit/SGH_Solver/src/FEA_Module_SGH.cpp +++ b/src/Parallel-Solvers/Parallel-Explicit/SGH_Solver/src/FEA_Module_SGH.cpp @@ -600,10 +600,6 @@ void FEA_Module_SGH::comm_variables(Teuchos::RCP zp) node_densities_distributed->describe(*fos, Teuchos::VERB_EXTREME); *fos << std::endl; std::fflush(stdout); - - // communicate design densities - // create import object using local node indices map and all indices map - Tpetra::Import importer(map, all_node_map); #endif // comms to get ghosts all_node_densities_distributed->doImport(*test_node_densities_distributed, *importer, Tpetra::INSERT); @@ -921,6 +917,8 @@ void FEA_Module_SGH::sgh_solve() tiny = dynamic_options.tiny; small = dynamic_options.small; + double cached_pregraphics_dt = fuzz; + size_t num_bdy_nodes = mesh->num_bdy_nodes; size_t cycle; real_t objective_accumulation, global_objective_accumulation; @@ -1170,6 +1168,10 @@ void FEA_Module_SGH::sgh_solve() // loop over the max number of time integration cycles for (cycle = 0; cycle < cycle_stop; cycle++) { + + //save timestep from before graphics output contraction + cached_pregraphics_dt = dt; + // get the step if (num_dim == 2) { get_timestep2D(*mesh, @@ -1278,6 +1280,21 @@ void FEA_Module_SGH::sgh_solve() cycle); } + if (have_loading_conditions) { + applied_forces(material, + *mesh, + node_coords, + node_vel, + node_mass, + elem_den, + elem_vol, + elem_div, + elem_mat_id, + corner_force, + rk_alpha, + cycle); + } + #ifdef DEBUG if (myrank == 1) { std::cout << "rk_alpha = " << rk_alpha << ", dt = " << dt << std::endl; @@ -1317,20 +1334,6 @@ void FEA_Module_SGH::sgh_solve() node_mass, corner_force); - if (have_loading_conditions) { - applied_forces(material, - *mesh, - node_coords, - node_vel, - node_mass, - elem_den, - elem_vol, - elem_div, - elem_mat_id, - corner_force, - rk_alpha, - cycle); - } // ---- apply force boundary conditions to the boundary patches---- boundary_velocity(*mesh, boundary, node_vel); @@ -1825,6 +1828,7 @@ void FEA_Module_SGH::sgh_solve() Explicit_Solver_Pointer_->output_time += comm_time2 - comm_time1; graphics_time = time_value + graphics_dt_ival; + dt = cached_pregraphics_dt; } // end if // end of calculation diff --git a/src/Parallel-Solvers/Parallel-Explicit/SGH_Solver/src/energy_sgh.cpp b/src/Parallel-Solvers/Parallel-Explicit/SGH_Solver/src/energy_sgh.cpp index 936aa4c8e..f5b925746 100644 --- a/src/Parallel-Solvers/Parallel-Explicit/SGH_Solver/src/energy_sgh.cpp +++ b/src/Parallel-Solvers/Parallel-Explicit/SGH_Solver/src/energy_sgh.cpp @@ -1,5 +1,5 @@ /********************************************************************************************** - © 2020. Triad National Security, LLC. All rights reserved. + � 2020. Triad National Security, LLC. All rights reserved. This program was produced under U.S. Government contract 89233218CNA000001 for Los Alamos National Laboratory (LANL), which is operated by Triad National Security, LLC for the U.S. Department of Energy/National Nuclear Security Administration. All rights in the program are @@ -34,6 +34,8 @@ #include "mesh.h" #include "state.h" #include "FEA_Module_SGH.h" +#include "Simulation_Parameters/Simulation_Parameters_Explicit.h" +#include "Simulation_Parameters/FEA_Module/SGH_Parameters.h" ///////////////////////////////////////////////////////////////////////////// /// diff --git a/src/Parallel-Solvers/Parallel-Explicit/SGH_Solver/src/force_gradients_sgh.cpp b/src/Parallel-Solvers/Parallel-Explicit/SGH_Solver/src/force_gradients_sgh.cpp index 567789239..cd32db21e 100644 --- a/src/Parallel-Solvers/Parallel-Explicit/SGH_Solver/src/force_gradients_sgh.cpp +++ b/src/Parallel-Solvers/Parallel-Explicit/SGH_Solver/src/force_gradients_sgh.cpp @@ -361,17 +361,14 @@ void FEA_Module_SGH::get_force_vgradient_sgh(const DCArrayKokkos& ma } else{ // Using a full tensoral Riemann jump relation - mu_term = muc(node_lid) - * sqrt(area_normal(node_lid, 0) * area_normal(node_lid, 0) - + area_normal(node_lid, 1) * area_normal(node_lid, 1) - + area_normal(node_lid, 2) * area_normal(node_lid, 2) ); + real_t area_normal_norm = sqrt(area_normal(node_lid, 0) * area_normal(node_lid, 0) + +area_normal(node_lid, 1) * area_normal(node_lid, 1) + +area_normal(node_lid, 2) * area_normal(node_lid, 2)); + mu_term = muc(node_lid)*area_normal_norm; for (int igradient = 0; igradient < num_nodes_in_elem; igradient++) { for (int jdim = 0; jdim < num_dims; jdim++) { - muc_gradient(node_lid, igradient, jdim) = muc_gradient(node_lid, igradient, jdim) - * sqrt(area_normal(node_lid, 0) * area_normal(node_lid, 0) - + area_normal(node_lid, 1) * area_normal(node_lid, 1) - + area_normal(node_lid, 2) * area_normal(node_lid, 2) ); + muc_gradient(node_lid, igradient, jdim) = muc_gradient(node_lid, igradient, jdim)*area_normal_norm; } } } @@ -509,26 +506,32 @@ void FEA_Module_SGH::get_force_vgradient_sgh(const DCArrayKokkos& ma // loop over dimension for (int dim = 0; dim < num_dims; dim++) { + const double current_node_vel = node_vel(rk_level, node_gid, dim); // assign gradient of corner contribution of force to relevant matrix entries with non-zero node velocity gradient for (int igradient = 0; igradient < num_nodes_in_elem; igradient++) { size_t gradient_node_gid = nodes_in_elem(elem_gid, igradient); column_index = num_dims * Global_Gradient_Matrix_Assembly_Map(elem_gid, igradient, node_lid); - for (int jdim = 0; jdim < num_dims; jdim++) { - if (node_lid == igradient && jdim == dim) { - if (map->isNodeLocalElement(gradient_node_gid)) { + + if (map->isNodeLocalElement(gradient_node_gid)) { + for (int jdim = 0; jdim < num_dims; jdim++) { + if (node_lid == igradient && jdim == dim) { Force_Gradient_Velocities(gradient_node_gid * num_dims + jdim, column_index + dim) += phi * (muc(node_lid) * (vel_star_gradient(dim, igradient, jdim) - 1) + - muc_gradient(node_lid, igradient, jdim) * (vel_star(dim) - node_vel(rk_level, node_gid, dim))); + muc_gradient(node_lid, igradient, jdim) * (vel_star(dim) - current_node_vel)); } + else { + Force_Gradient_Velocities(gradient_node_gid * num_dims + jdim, column_index + dim) += phi * (muc(node_lid) * (vel_star_gradient(dim, igradient, jdim)) + + muc_gradient(node_lid, igradient, jdim) * (vel_star(dim) - current_node_vel)); + } + } + } + for (int jdim = 0; jdim < num_dims; jdim++) { + if (node_lid == igradient && jdim == dim) { corner_gradient_storage(corner_gid, dim, igradient, jdim) = phi * (muc(node_lid) * (vel_star_gradient(dim, igradient, jdim) - 1) + - muc_gradient(node_lid, igradient, jdim) * (vel_star(dim) - node_vel(rk_level, node_gid, dim))); + muc_gradient(node_lid, igradient, jdim) * (vel_star(dim) - current_node_vel)); } else { - if (map->isNodeLocalElement(gradient_node_gid)) { - Force_Gradient_Velocities(gradient_node_gid * num_dims + jdim, column_index + dim) += phi * (muc(node_lid) * (vel_star_gradient(dim, igradient, jdim)) + - muc_gradient(node_lid, igradient, jdim) * (vel_star(dim) - node_vel(rk_level, node_gid, dim))); - } corner_gradient_storage(corner_gid, dim, igradient, jdim) = phi * (muc(node_lid) * (vel_star_gradient(dim, igradient, jdim)) + - muc_gradient(node_lid, igradient, jdim) * (vel_star(dim) - node_vel(rk_level, node_gid, dim))); + muc_gradient(node_lid, igradient, jdim) * (vel_star(dim) - current_node_vel)); } } } @@ -1093,8 +1096,6 @@ void FEA_Module_SGH::get_force_ugradient_sgh(const DCArrayKokkos& ma ViewCArrayKokkos vel_star_gradient(vel_star_gradient_array, num_dims, num_nodes_in_elem, num_dims); ViewCArrayKokkos vel_grad(vel_grad_array, num_dims, num_dims); - // --- abviatations of variables --- - // element volume double vol = elem_vol(elem_gid); @@ -1326,22 +1327,20 @@ void FEA_Module_SGH::get_force_ugradient_sgh(const DCArrayKokkos& ma // Using a full tensoral Riemann jump relation mu_term = muc(node_lid) * sqrt(area_normal(node_lid, 0) * area_normal(node_lid, 0) - + area_normal(node_lid, 1) * area_normal(node_lid, 1) - + area_normal(node_lid, 2) * area_normal(node_lid, 2) ); + + area_normal(node_lid, 1) * area_normal(node_lid, 1) + + area_normal(node_lid, 2) * area_normal(node_lid, 2) ); + real_t area_normal_norm = sqrt(area_normal(node_lid, 0) * area_normal(node_lid, 0) + +area_normal(node_lid, 1) * area_normal(node_lid, 1) + +area_normal(node_lid, 2) * area_normal(node_lid, 2)); for (int igradient = 0; igradient < num_nodes_in_elem; igradient++) { for (int jdim = 0; jdim < num_dims; jdim++) { mu_term_gradient = muc_gradient(node_lid, igradient, jdim) - * sqrt(area_normal(node_lid, 0) * area_normal(node_lid, 0) - + area_normal(node_lid, 1) * area_normal(node_lid, 1) - + area_normal(node_lid, 2) * area_normal(node_lid, 2) ); + * area_normal_norm; mu_term_gradient += muc(node_lid) * (area_normal(node_lid, 0) * area_normal_gradients(node_lid, 0, igradient, jdim) + area_normal(node_lid, 1) * area_normal_gradients(node_lid, 1, igradient, jdim) - + area_normal(node_lid, 2) * area_normal_gradients(node_lid, 2, igradient, jdim)) / - sqrt(area_normal(node_lid, 0) * area_normal(node_lid, 0) - + area_normal(node_lid, 1) * area_normal(node_lid, 1) - + area_normal(node_lid, 2) * area_normal(node_lid, 2) ); - + + area_normal(node_lid, 2) * area_normal_gradients(node_lid, 2, igradient, jdim))/ area_normal_norm; + muc_gradient(node_lid, igradient, jdim) = mu_term_gradient; } } @@ -1470,40 +1469,34 @@ void FEA_Module_SGH::get_force_ugradient_sgh(const DCArrayKokkos& ma // loop over dimension for (int dim = 0; dim < num_dims; dim++) { + const double current_node_vel = node_vel(rk_level, node_gid, dim); // assign gradient of corner contribution of force to relevant matrix entries with non-zero node velocity gradient for (int igradient = 0; igradient < num_nodes_in_elem; igradient++) { - for (int jdim = 0; jdim < num_dims; jdim++) { - size_t gradient_node_gid = nodes_in_elem(elem_gid, igradient); - // if(!map->isNodeLocalElement(gradient_node_gid)) continue; - column_index = num_dims * Global_Gradient_Matrix_Assembly_Map(elem_gid, igradient, node_lid); - if (map->isNodeLocalElement(gradient_node_gid)) { + size_t gradient_node_gid = nodes_in_elem(elem_gid, igradient); + // if(!map->isNodeLocalElement(gradient_node_gid)) continue; + column_index = num_dims * Global_Gradient_Matrix_Assembly_Map(elem_gid, igradient, node_lid); + + if (map->isNodeLocalElement(gradient_node_gid)) { + for (int jdim = 0; jdim < num_dims; jdim++) { Force_Gradient_Positions(gradient_node_gid * num_dims + jdim, column_index + dim) += area_normal(node_lid, 0) * tau_gradient(0, dim, igradient, jdim) + area_normal(node_lid, 1) * tau_gradient(1, dim, igradient, jdim) + area_normal(node_lid, 2) * tau_gradient(2, dim, igradient, jdim) + area_normal_gradients(node_lid, 0, igradient, jdim) * tau(0, dim) + area_normal_gradients(node_lid, 1, igradient, jdim) * tau(1, dim) + area_normal_gradients(node_lid, 2, igradient, jdim) * tau(2, dim) - + phi * muc_gradient(node_lid, igradient, jdim) * (vel_star(dim) - node_vel(rk_level, node_gid, dim)) + + phi * muc_gradient(node_lid, igradient, jdim) * (vel_star(dim) - current_node_vel) + phi * muc(node_lid) * (vel_star_gradient(dim, igradient, jdim)); } + } + for (int jdim = 0; jdim < num_dims; jdim++) { corner_gradient_storage(corner_gid, dim, igradient, jdim) = area_normal(node_lid, 0) * tau_gradient(0, dim, igradient, jdim) + area_normal(node_lid, 1) * tau_gradient(1, dim, igradient, jdim) + area_normal(node_lid, 2) * tau_gradient(2, dim, igradient, jdim) + area_normal_gradients(node_lid, 0, igradient, jdim) * tau(0, dim) + area_normal_gradients(node_lid, 1, igradient, jdim) * tau(1, dim) + area_normal_gradients(node_lid, 2, igradient, jdim) * tau(2, dim) - + phi * muc_gradient(node_lid, igradient, jdim) * (vel_star(dim) - node_vel(rk_level, node_gid, dim)) + + phi * muc_gradient(node_lid, igradient, jdim) * (vel_star(dim) - current_node_vel) + phi * muc(node_lid) * (vel_star_gradient(dim, igradient, jdim)); - // if(map->isNodeLocalElement(gradient_node_gid)){ - // Force_Gradient_Positions(gradient_node_gid*num_dims+jdim, column_index+dim) += - // + area_normal_gradients(node_lid, 0, igradient, jdim)*tau(0, dim) - // + area_normal_gradients(node_lid, 1, igradient, jdim)*tau(1, dim) - // + area_normal_gradients(node_lid, 2, igradient, jdim)*tau(2, dim); - // } - // corner_gradient_storage(corner_gid,dim,igradient,jdim) = - // + area_normal_gradients(node_lid, 0, igradient, jdim)*tau(0, dim) - // + area_normal_gradients(node_lid, 1, igradient, jdim)*tau(1, dim) - // + area_normal_gradients(node_lid, 2, igradient, jdim)*tau(2, dim); } } } // end loop over dimension diff --git a/src/Parallel-Solvers/Parallel-Explicit/SGH_Solver/src/force_sgh.cpp b/src/Parallel-Solvers/Parallel-Explicit/SGH_Solver/src/force_sgh.cpp index c3dee5cf9..fca75f951 100644 --- a/src/Parallel-Solvers/Parallel-Explicit/SGH_Solver/src/force_sgh.cpp +++ b/src/Parallel-Solvers/Parallel-Explicit/SGH_Solver/src/force_sgh.cpp @@ -1,5 +1,5 @@ /********************************************************************************************** - © 2020. Triad National Security, LLC. All rights reserved. + � 2020. Triad National Security, LLC. All rights reserved. This program was produced under U.S. Government contract 89233218CNA000001 for Los Alamos National Laboratory (LANL), which is operated by Triad National Security, LLC for the U.S. Department of Energy/National Nuclear Security Administration. All rights in the program are @@ -899,6 +899,7 @@ void FEA_Module_SGH::applied_forces(const DCArrayKokkos& material, const size_t num_dim = mesh.num_dims; const_vec_array all_initial_node_coords = all_initial_node_coords_distributed->getLocalView(Tpetra::Access::ReadOnly); const size_t num_lcs = module_params->loading.size(); + const size_t num_corners = mesh.num_corners; const DCArrayKokkos mat_fill = simparam->mat_fill; const DCArrayKokkos loading = module_params->loading; @@ -906,45 +907,50 @@ void FEA_Module_SGH::applied_forces(const DCArrayKokkos& material, // debug check // std::cout << "NUMBER OF LOADING CONDITIONS: " << num_lcs << std::endl; - // walk over the nodes to update the velocity - FOR_ALL_CLASS(node_gid, 0, nlocal_nodes, { - double current_node_coords[3]; - size_t dof_id; - double node_force[3]; - double applied_force[3]; - double radius; + //initialize + FOR_ALL_CLASS(corner_id, 0, num_corners, { for (size_t dim = 0; dim < num_dim; dim++) { - node_force[dim] = 0.0; - current_node_coords[dim] = all_initial_node_coords(node_gid, dim); - } // end for dim - radius = sqrt(current_node_coords[0] * current_node_coords[0] + current_node_coords[1] * current_node_coords[1] + current_node_coords[2] * current_node_coords[2]); - for (size_t ilc = 0; ilc < num_lcs; ilc++) { - // debug check - // std::cout << "LOADING CONDITION VOLUME TYPE: " << to_string(loading(ilc).volume) << std::endl; - - bool fill_this = loading(ilc).volume.contains(current_node_coords); - if (fill_this) { - // loop over all corners around the node and calculate the nodal force - for (size_t corner_lid = 0; corner_lid < num_corners_in_node(node_gid); corner_lid++) { - // Get corner gid - size_t corner_gid = corners_in_node(node_gid, corner_lid); - applied_force[0] = loading(ilc).x; - applied_force[1] = loading(ilc).y; - applied_force[2] = loading(ilc).z; + corner_external_force(corner_id,dim) = 0; + } + }); // end parallel for + Kokkos::fence(); + + for (size_t ilc = 0; ilc < num_lcs; ilc++) { + + const Volume current_volume = loading(ilc).volume; + const double applied_force[] = {loading(ilc).x, loading(ilc).y, loading(ilc).z}; + + FOR_ALL_CLASS(elem_id, 0, rnum_elem, { + double current_node_coords[3]; + size_t dof_id; + double node_force[3]; + double radius; + size_t node_id; + size_t corner_id; + // std::cout << elem_mass(elem_id) <& elem_ double twelth = 1. / 12.; // element volume gradient - for (int inode = 0; inode < 8; inode++) { - for (int idim = 0; idim < num_dims; idim++) { - switch (num_dims * inode + idim) { - case 0: - gradient_result = - ((y(2) * (z(1) - z(3)) + y(7) * (z(3) - z(4)) + y(5) * (-z(1) + z(4)) + y(1) * (-z(2) - z(3) + z(4) + z(5)) + y(3) * (z(1) + z(2) - z(4) - z(7)) + y(4) * - (-z(1) + z(3) - z(5) + z(7)))) * twelth; - break; - case 3: - gradient_result = - ((y(3) * (-z(0) + z(2)) + y(4) * (z(0) - z(5)) + y(0) * (z(2) + z(3) - z(4) - z(5)) + y(6) * (-z(2) + z(5)) + y(5) * (z(0) - z(2) + z(4) - z(6)) + y(2) * - (-z(0) - z(3) + z(5) + z(6)))) * twelth; - break; - - case 6: - gradient_result = - ((y(0) * (-z(1) + z(3)) + y(5) * (z(1) - z(6)) + y(1) * (z(0) + z(3) - z(5) - z(6)) + y(7) * (-z(3) + z(6)) + y(6) * (z(1) - z(3) + z(5) - z(7)) + y(3) * - (-z(0) - z(1) + z(6) + z(7)))) * twelth; - break; - case 9: - gradient_result = - ((y(1) * (z(0) - z(2)) + y(7) * (-z(0) + z(2) - z(4) + z(6)) + y(6) * (z(2) - z(7)) + y(2) * (z(0) + z(1) - z(6) - z(7)) + y(4) * (-z(0) + z(7)) + y(0) * - (-z(1) - z(2) + z(4) + z(7)))) * twelth; - break; - case 12: - gradient_result = - ((y(1) * (-z(0) + z(5)) + y(7) * (z(0) + z(3) - z(5) - z(6)) + y(3) * (z(0) - z(7)) + y(0) * (z(1) - z(3) + z(5) - z(7)) + y(6) * (-z(5) + z(7)) + y(5) * - (-z(0) - z(1) + z(6) + z(7)))) * twelth; - break; - case 15: - gradient_result = - ((y(0) * (z(1) - z(4)) + y(7) * (z(4) - z(6)) + y(2) * (-z(1) + z(6)) + y(1) * (-z(0) + z(2) - z(4) + z(6)) + y(4) * (z(0) + z(1) - z(6) - z(7)) + y(6) * - (-z(1) - z(2) + z(4) + z(7)))) * twelth; - break; - case 18: - gradient_result = - ((y(1) * (z(2) - z(5)) + y(7) * (-z(2) - z(3) + z(4) + z(5)) + y(5) * (z(1) + z(2) - z(4) - z(7)) + y(4) * (z(5) - z(7)) + y(3) * (-z(2) + z(7)) + y(2) * - (-z(1) + z(3) - z(5) + z(7)))) * twelth; - break; - case 21: - gradient_result = - ((y(0) * (-z(3) + z(4)) + y(6) * (z(2) + z(3) - z(4) - z(5)) + y(2) * (z(3) - z(6)) + y(3) * (z(0) - z(2) + z(4) - z(6)) + y(5) * (-z(4) + z(6)) + y(4) * - (-z(0) - z(3) + z(5) + z(6)))) * twelth; - break; - case 1: - gradient_result = - (x(1) * ((z(2) + z(3) - z(4) - z(5))) + - x(7) * ((-z(3) + z(4))) + - x(3) * ((-z(1) - z(2) + z(4) + z(7))) + - x(5) * ((z(1) - z(4))) + - x(2) * ((-z(1) + z(3))) + - x(4) * ((z(1) - z(3) + z(5) - z(7)))) * twelth; - break; - case 4: - gradient_result = - (x(3) * ((z(0) - z(2))) + - x(5) * ((-z(0) + z(2) - z(4) + z(6))) + - x(6) * ((z(2) - z(5))) + - x(0) * ((-z(2) - z(3) + z(4) + z(5))) + - x(2) * ((z(0) + z(3) - z(5) - z(6))) + - x(4) * ((-z(0) + z(5)))) * twelth; - break; - case 7: - gradient_result = - (x(1) * ((-z(0) - z(3) + z(5) + z(6))) + - x(7) * ((z(3) - z(6))) + - x(3) * ((z(0) + z(1) - z(6) - z(7))) + - x(5) * ((-z(1) + z(6))) + - x(6) * ((-z(1) + z(3) - z(5) + z(7))) + - x(0) * ((z(1) - z(3)))) * twelth; - break; - case 10: - gradient_result = - (x(1) * ((-z(0) + z(2))) + - x(7) * ((z(0) - z(2) + z(4) - z(6))) + - x(6) * ((-z(2) + z(7))) + - x(0) * ((z(1) + z(2) - z(4) - z(7))) + - x(2) * ((-z(0) - z(1) + z(6) + z(7))) + - x(4) * ((z(0) - z(7)))) * twelth; - break; - case 13: - gradient_result = - (x(1) * ((z(0) - z(5))) + - x(7) * ((-z(0) - z(3) + z(5) + z(6))) + - x(3) * ((-z(0) + z(7))) + - x(5) * ((z(0) + z(1) - z(6) - z(7))) + - x(6) * ((z(5) - z(7))) + - x(0) * ((-z(1) + z(3) - z(5) + z(7)))) * twelth; - break; - case 16: - gradient_result = - (x(1) * ((z(0) - z(2) + z(4) - z(6))) + - x(7) * ((-z(4) + z(6))) + - x(6) * ((z(1) + z(2) - z(4) - z(7))) + - x(0) * ((-z(1) + z(4))) + - x(2) * ((z(1) - z(6))) + - x(4) * ((-z(0) - z(1) + z(6) + z(7)))) * twelth; - break; - case 19: - gradient_result = - (x(1) * ((-z(2) + z(5))) + - x(7) * ((z(2) + z(3) - z(4) - z(5))) + - x(3) * ((z(2) - z(7))) + - x(5) * ((-z(1) - z(2) + z(4) + z(7))) + - x(2) * ((z(1) - z(3) + z(5) - z(7))) + - x(4) * ((-z(5) + z(7)))) * twelth; - break; - case 22: - gradient_result = - (x(3) * ((-z(0) + z(2) - z(4) + z(6))) + - x(5) * ((z(4) - z(6))) + - x(6) * ((-z(2) - z(3) + z(4) + z(5))) + - x(0) * ((z(3) - z(4))) + - x(2) * ((-z(3) + z(6))) + - x(4) * ((z(0) + z(3) - z(5) - z(6)))) * twelth; - break; - case 2: - gradient_result = - (x(1) * (-y(3) + y(4) + y(5) - y(2)) + - x(7) * (y(3) - y(4)) + - x(3) * (y(1) - y(7) + y(2) - y(4)) + - x(5) * (-y(1) + y(4)) + - x(2) * (y(1) - y(3)) + - x(4) * (-y(1) + y(7) + y(3) - y(5))) * twelth; - break; - case 5: - gradient_result = - (x(3) * (y(2) - y(0)) + - x(5) * (y(0) - y(2) + y(4) - y(6)) + - x(6) * (y(5) - y(2)) + - x(0) * (y(2) - y(5) + y(3) - y(4)) + - x(2) * (-y(0) + y(5) + y(6) - y(3)) + - x(4) * (y(0) - y(5))) * twelth; - break; - case 8: - gradient_result = - (x(1) * (y(3) + y(0) - y(6) - y(5)) + - x(7) * (y(6) - y(3)) + - x(3) * (-y(1) + y(7) + y(6) - y(0)) + - x(5) * (y(1) - y(6)) + - x(6) * (y(1) - y(7) + y(5) - y(3)) + - x(0) * (-y(1) + y(3))) * twelth; - break; - case 11: - gradient_result = - (x(1) * (y(0) - y(2)) + - x(7) * (-y(0) + y(6) + y(2) - y(4)) + - x(6) * (-y(7) + y(2)) + - x(0) * (-y(2) + y(7) - y(1) + y(4)) + - x(2) * (y(0) + y(1) - y(7) - y(6)) + - x(4) * (y(7) - y(0))) * twelth; - break; - case 14: - gradient_result = - (x(1) * (-y(0) + y(5)) + - x(7) * (y(0) - y(6) + y(3) - y(5)) + - x(3) * (-y(7) + y(0)) + - x(5) * (-y(0) + y(7) - y(1) + y(6)) + - x(6) * (y(7) - y(5)) + - x(0) * (-y(7) + y(5) + y(1) - y(3))) * twelth; - break; - case 17: - gradient_result = - (x(1) * (-y(4) - y(0) + y(6) + y(2)) + - x(7) * (-y(6) + y(4)) + - x(6) * (-y(1) + y(7) + y(4) - y(2)) + - x(0) * (y(1) - y(4)) + - x(2) * (-y(1) + y(6)) + - x(4) * (y(1) - y(7) + y(0) - y(6))) * twelth; - break; - case 20: - gradient_result = - (x(1) * (-y(5) + y(2)) + - x(7) * (-y(2) - y(3) + y(5) + y(4)) + - x(3) * (y(7) - y(2)) + - x(5) * (-y(7) + y(2) + y(1) - y(4)) + - x(2) * (-y(5) - y(1) + y(7) + y(3)) + - x(4) * (-y(7) + y(5))) * twelth; - break; - case 23: - gradient_result = - (x(3) * (-y(6) - y(2) + y(4) + y(0)) + - x(5) * (-y(4) + y(6)) + - x(6) * (-y(5) - y(4) + y(3) + y(2)) + - x(0) * (-y(3) + y(4)) + - x(2) * (-y(6) + y(3)) + - x(4) * (-y(3) - y(0) + y(6) + y(5))) * twelth; - break; - } - elem_vol_gradients(inode, idim) = gradient_result; - } - } - return; + //x gradients + elem_vol_gradients(0, 0) = + ((y(2) * (z(1) - z(3)) + y(7) * (z(3) - z(4)) + y(5) * (-z(1) + z(4)) + y(1) * (-z(2) - z(3) + z(4) + z(5)) + y(3) * (z(1) + z(2) - z(4) - z(7)) + y(4) * + (-z(1) + z(3) - z(5) + z(7)))) * twelth; + elem_vol_gradients(1, 0) = + ((y(3) * (-z(0) + z(2)) + y(4) * (z(0) - z(5)) + y(0) * (z(2) + z(3) - z(4) - z(5)) + y(6) * (-z(2) + z(5)) + y(5) * (z(0) - z(2) + z(4) - z(6)) + y(2) * + (-z(0) - z(3) + z(5) + z(6)))) * twelth; + elem_vol_gradients(2, 0) = + ((y(0) * (-z(1) + z(3)) + y(5) * (z(1) - z(6)) + y(1) * (z(0) + z(3) - z(5) - z(6)) + y(7) * (-z(3) + z(6)) + y(6) * (z(1) - z(3) + z(5) - z(7)) + y(3) * + (-z(0) - z(1) + z(6) + z(7)))) * twelth; + elem_vol_gradients(3, 0) = + ((y(1) * (z(0) - z(2)) + y(7) * (-z(0) + z(2) - z(4) + z(6)) + y(6) * (z(2) - z(7)) + y(2) * (z(0) + z(1) - z(6) - z(7)) + y(4) * (-z(0) + z(7)) + y(0) * + (-z(1) - z(2) + z(4) + z(7)))) * twelth; + elem_vol_gradients(4, 0) = + ((y(1) * (-z(0) + z(5)) + y(7) * (z(0) + z(3) - z(5) - z(6)) + y(3) * (z(0) - z(7)) + y(0) * (z(1) - z(3) + z(5) - z(7)) + y(6) * (-z(5) + z(7)) + y(5) * + (-z(0) - z(1) + z(6) + z(7)))) * twelth; + elem_vol_gradients(5, 0) = + ((y(0) * (z(1) - z(4)) + y(7) * (z(4) - z(6)) + y(2) * (-z(1) + z(6)) + y(1) * (-z(0) + z(2) - z(4) + z(6)) + y(4) * (z(0) + z(1) - z(6) - z(7)) + y(6) * + (-z(1) - z(2) + z(4) + z(7)))) * twelth; + elem_vol_gradients(6, 0) = + ((y(1) * (z(2) - z(5)) + y(7) * (-z(2) - z(3) + z(4) + z(5)) + y(5) * (z(1) + z(2) - z(4) - z(7)) + y(4) * (z(5) - z(7)) + y(3) * (-z(2) + z(7)) + y(2) * + (-z(1) + z(3) - z(5) + z(7)))) * twelth; + elem_vol_gradients(7, 0) = + ((y(0) * (-z(3) + z(4)) + y(6) * (z(2) + z(3) - z(4) - z(5)) + y(2) * (z(3) - z(6)) + y(3) * (z(0) - z(2) + z(4) - z(6)) + y(5) * (-z(4) + z(6)) + y(4) * + (-z(0) - z(3) + z(5) + z(6)))) * twelth; + + //y gradients + elem_vol_gradients(0, 1) = + (x(1) * ((z(2) + z(3) - z(4) - z(5))) + + x(7) * ((-z(3) + z(4))) + + x(3) * ((-z(1) - z(2) + z(4) + z(7))) + + x(5) * ((z(1) - z(4))) + + x(2) * ((-z(1) + z(3))) + + x(4) * ((z(1) - z(3) + z(5) - z(7)))) * twelth; + elem_vol_gradients(1, 1) = + (x(3) * ((z(0) - z(2))) + + x(5) * ((-z(0) + z(2) - z(4) + z(6))) + + x(6) * ((z(2) - z(5))) + + x(0) * ((-z(2) - z(3) + z(4) + z(5))) + + x(2) * ((z(0) + z(3) - z(5) - z(6))) + + x(4) * ((-z(0) + z(5)))) * twelth; + elem_vol_gradients(2, 1) = + (x(1) * ((-z(0) - z(3) + z(5) + z(6))) + + x(7) * ((z(3) - z(6))) + + x(3) * ((z(0) + z(1) - z(6) - z(7))) + + x(5) * ((-z(1) + z(6))) + + x(6) * ((-z(1) + z(3) - z(5) + z(7))) + + x(0) * ((z(1) - z(3)))) * twelth; + elem_vol_gradients(3, 1) = + (x(1) * ((-z(0) + z(2))) + + x(7) * ((z(0) - z(2) + z(4) - z(6))) + + x(6) * ((-z(2) + z(7))) + + x(0) * ((z(1) + z(2) - z(4) - z(7))) + + x(2) * ((-z(0) - z(1) + z(6) + z(7))) + + x(4) * ((z(0) - z(7)))) * twelth; + elem_vol_gradients(4, 1) = + (x(1) * ((z(0) - z(5))) + + x(7) * ((-z(0) - z(3) + z(5) + z(6))) + + x(3) * ((-z(0) + z(7))) + + x(5) * ((z(0) + z(1) - z(6) - z(7))) + + x(6) * ((z(5) - z(7))) + + x(0) * ((-z(1) + z(3) - z(5) + z(7)))) * twelth; + elem_vol_gradients(5, 1) = + (x(1) * ((z(0) - z(2) + z(4) - z(6))) + + x(7) * ((-z(4) + z(6))) + + x(6) * ((z(1) + z(2) - z(4) - z(7))) + + x(0) * ((-z(1) + z(4))) + + x(2) * ((z(1) - z(6))) + + x(4) * ((-z(0) - z(1) + z(6) + z(7)))) * twelth; + elem_vol_gradients(6, 1) = + (x(1) * ((-z(2) + z(5))) + + x(7) * ((z(2) + z(3) - z(4) - z(5))) + + x(3) * ((z(2) - z(7))) + + x(5) * ((-z(1) - z(2) + z(4) + z(7))) + + x(2) * ((z(1) - z(3) + z(5) - z(7))) + + x(4) * ((-z(5) + z(7)))) * twelth; + elem_vol_gradients(7, 1) = + (x(3) * ((-z(0) + z(2) - z(4) + z(6))) + + x(5) * ((z(4) - z(6))) + + x(6) * ((-z(2) - z(3) + z(4) + z(5))) + + x(0) * ((z(3) - z(4))) + + x(2) * ((-z(3) + z(6))) + + x(4) * ((z(0) + z(3) - z(5) - z(6)))) * twelth; + + //z gradients + elem_vol_gradients(0, 2) = + (x(1) * (-y(3) + y(4) + y(5) - y(2)) + + x(7) * (y(3) - y(4)) + + x(3) * (y(1) - y(7) + y(2) - y(4)) + + x(5) * (-y(1) + y(4)) + + x(2) * (y(1) - y(3)) + + x(4) * (-y(1) + y(7) + y(3) - y(5))) * twelth; + elem_vol_gradients(1, 2) = + (x(3) * (y(2) - y(0)) + + x(5) * (y(0) - y(2) + y(4) - y(6)) + + x(6) * (y(5) - y(2)) + + x(0) * (y(2) - y(5) + y(3) - y(4)) + + x(2) * (-y(0) + y(5) + y(6) - y(3)) + + x(4) * (y(0) - y(5))) * twelth; + elem_vol_gradients(2, 2) = + (x(1) * (y(3) + y(0) - y(6) - y(5)) + + x(7) * (y(6) - y(3)) + + x(3) * (-y(1) + y(7) + y(6) - y(0)) + + x(5) * (y(1) - y(6)) + + x(6) * (y(1) - y(7) + y(5) - y(3)) + + x(0) * (-y(1) + y(3))) * twelth; + elem_vol_gradients(3, 2) = + (x(1) * (y(0) - y(2)) + + x(7) * (-y(0) + y(6) + y(2) - y(4)) + + x(6) * (-y(7) + y(2)) + + x(0) * (-y(2) + y(7) - y(1) + y(4)) + + x(2) * (y(0) + y(1) - y(7) - y(6)) + + x(4) * (y(7) - y(0))) * twelth; + elem_vol_gradients(4, 2) = + (x(1) * (-y(0) + y(5)) + + x(7) * (y(0) - y(6) + y(3) - y(5)) + + x(3) * (-y(7) + y(0)) + + x(5) * (-y(0) + y(7) - y(1) + y(6)) + + x(6) * (y(7) - y(5)) + + x(0) * (-y(7) + y(5) + y(1) - y(3))) * twelth; + elem_vol_gradients(5, 2) = + (x(1) * (-y(4) - y(0) + y(6) + y(2)) + + x(7) * (-y(6) + y(4)) + + x(6) * (-y(1) + y(7) + y(4) - y(2)) + + x(0) * (y(1) - y(4)) + + x(2) * (-y(1) + y(6)) + + x(4) * (y(1) - y(7) + y(0) - y(6))) * twelth; + elem_vol_gradients(6, 2) = + (x(1) * (-y(5) + y(2)) + + x(7) * (-y(2) - y(3) + y(5) + y(4)) + + x(3) * (y(7) - y(2)) + + x(5) * (-y(7) + y(2) + y(1) - y(4)) + + x(2) * (-y(5) - y(1) + y(7) + y(3)) + + x(4) * (-y(7) + y(5))) * twelth; + elem_vol_gradients(7, 2) = + (x(3) * (-y(6) - y(2) + y(4) + y(0)) + + x(5) * (-y(4) + y(6)) + + x(6) * (-y(5) - y(4) + y(3) + y(2)) + + x(0) * (-y(3) + y(4)) + + x(2) * (-y(6) + y(3)) + + x(4) * (-y(3) - y(0) + y(6) + y(5))) * twelth; } // end subroutine ///////////////////////////////////////////////////////////////////////////// diff --git a/src/Parallel-Solvers/Parallel-Explicit/SGH_Solver/src/momentum.cpp b/src/Parallel-Solvers/Parallel-Explicit/SGH_Solver/src/momentum.cpp index 9c03ecf85..93b1a1d43 100644 --- a/src/Parallel-Solvers/Parallel-Explicit/SGH_Solver/src/momentum.cpp +++ b/src/Parallel-Solvers/Parallel-Explicit/SGH_Solver/src/momentum.cpp @@ -1,5 +1,5 @@ /********************************************************************************************** - © 2020. Triad National Security, LLC. All rights reserved. + � 2020. Triad National Security, LLC. All rights reserved. This program was produced under U.S. Government contract 89233218CNA000001 for Los Alamos National Laboratory (LANL), which is operated by Triad National Security, LLC for the U.S. Department of Energy/National Nuclear Security Administration. All rights in the program are @@ -33,6 +33,7 @@ **********************************************************************************************/ #include "state.h" #include "FEA_Module_SGH.h" +#include "Simulation_Parameters/FEA_Module/SGH_Parameters.h" ///////////////////////////////////////////////////////////////////////////// /// @@ -54,32 +55,59 @@ void FEA_Module_SGH::update_velocity_sgh(double rk_alpha, { const size_t rk_level = rk_num_bins - 1; const size_t num_dims = num_dim; + const size_t num_lcs = module_params->loading.size(); // walk over the nodes to update the velocity - FOR_ALL_CLASS(node_gid, 0, nlocal_nodes, { - double node_force[3]; - for (size_t dim = 0; dim < num_dims; dim++) { - node_force[dim] = 0.0; - } // end for dim - - // loop over all corners around the node and calculate the nodal force - for (size_t corner_lid = 0; corner_lid < num_corners_in_node(node_gid); corner_lid++) { - // Get corner gid - size_t corner_gid = corners_in_node(node_gid, corner_lid); - - // loop over dimension + if(num_lcs){ + FOR_ALL_CLASS(node_gid, 0, nlocal_nodes, { + double node_force[3]; for (size_t dim = 0; dim < num_dims; dim++) { - node_force[dim] += corner_force(corner_gid, dim); + node_force[dim] = 0.0; } // end for dim - } // end for corner_lid - // update the velocity - for (int dim = 0; dim < num_dims; dim++) { - node_vel(rk_level, node_gid, dim) = node_vel(0, node_gid, dim) + - rk_alpha * dt * node_force[dim] / node_mass(node_gid); - } // end for dim - }); // end for parallel for over nodes + // loop over all corners around the node and calculate the nodal force + for (size_t corner_lid = 0; corner_lid < num_corners_in_node(node_gid); corner_lid++) { + // Get corner gid + size_t corner_gid = corners_in_node(node_gid, corner_lid); + + // loop over dimension + for (size_t dim = 0; dim < num_dims; dim++) { + node_force[dim] += corner_force(corner_gid, dim) + corner_external_force(corner_gid, dim); + } // end for dim + } // end for corner_lid + + // update the velocity + for (int dim = 0; dim < num_dims; dim++) { + node_vel(rk_level, node_gid, dim) = node_vel(0, node_gid, dim) + + rk_alpha * dt * node_force[dim] / node_mass(node_gid); + } // end for dim + }); // end for parallel for over nodes + } + else{ + FOR_ALL_CLASS(node_gid, 0, nlocal_nodes, { + double node_force[3]; + for (size_t dim = 0; dim < num_dims; dim++) { + node_force[dim] = 0.0; + } // end for dim + // loop over all corners around the node and calculate the nodal force + for (size_t corner_lid = 0; corner_lid < num_corners_in_node(node_gid); corner_lid++) { + // Get corner gid + size_t corner_gid = corners_in_node(node_gid, corner_lid); + + // loop over dimension + for (size_t dim = 0; dim < num_dims; dim++) { + node_force[dim] += corner_force(corner_gid, dim); + } // end for dim + } // end for corner_lid + + // update the velocity + for (int dim = 0; dim < num_dims; dim++) { + node_vel(rk_level, node_gid, dim) = node_vel(0, node_gid, dim) + + rk_alpha * dt * node_force[dim] / node_mass(node_gid); + } // end for dim + }); // end for parallel for over nodes + } return; } // end subroutine update_velocity diff --git a/src/Parallel-Solvers/Parallel-Explicit/SGH_Solver/src/power_gradients_sgh.cpp b/src/Parallel-Solvers/Parallel-Explicit/SGH_Solver/src/power_gradients_sgh.cpp index f9068c2a8..1a2d76f9c 100644 --- a/src/Parallel-Solvers/Parallel-Explicit/SGH_Solver/src/power_gradients_sgh.cpp +++ b/src/Parallel-Solvers/Parallel-Explicit/SGH_Solver/src/power_gradients_sgh.cpp @@ -372,15 +372,15 @@ void FEA_Module_SGH::get_power_ugradient_sgh(double rk_alpha, // calculate the Power=F dot V for this corner for (size_t dim = 0; dim < num_dims; dim++) { + const double current_node_vel = node_vel(rk_level, node_gid, dim); for (int igradient = 0; igradient < num_nodes_in_elem; igradient++) { - for (size_t jdim = 0; jdim < num_dims; jdim++) { - column_id = Element_Gradient_Matrix_Assembly_Map(elem_gid, igradient); - gradient_node_id = nodes_in_elem(elem_gid, igradient); - if (!map->isNodeLocalElement(gradient_node_id)) { - continue; + column_id = Element_Gradient_Matrix_Assembly_Map(elem_gid, igradient); + gradient_node_id = nodes_in_elem(elem_gid, igradient); + if (map->isNodeLocalElement(gradient_node_id)) { + for (size_t jdim = 0; jdim < num_dims; jdim++) { + Power_Gradient_Positions(gradient_node_id * num_dims + jdim, column_id) -= + corner_gradient_storage(corner_gid, dim, igradient, jdim) * current_node_vel * node_radius; } - Power_Gradient_Positions(gradient_node_id * num_dims + jdim, column_id) -= - corner_gradient_storage(corner_gid, dim, igradient, jdim) * node_vel(rk_level, node_gid, dim) * node_radius; } } } // end for dim @@ -446,21 +446,21 @@ void FEA_Module_SGH::get_power_vgradient_sgh(double rk_alpha, // calculate the Power=F dot V for this corner for (size_t dim = 0; dim < num_dims; dim++) { + const double current_node_vel = node_vel(rk_level, node_gid, dim); for (int igradient = 0; igradient < num_nodes_in_elem; igradient++) { column_id = Element_Gradient_Matrix_Assembly_Map(elem_gid, igradient); gradient_node_id = nodes_in_elem(elem_gid, igradient); - if (!map->isNodeLocalElement(gradient_node_id)) { - continue; - } - for (size_t jdim = 0; jdim < num_dims; jdim++) { - if (node_lid == igradient && jdim == dim) { - Power_Gradient_Velocities(gradient_node_id * num_dims + jdim, column_id) -= - corner_gradient_storage(corner_gid, dim, igradient, jdim) * node_vel(rk_level, node_gid, dim) - * node_radius + corner_force(corner_gid, dim) * node_radius; - } - else { - Power_Gradient_Velocities(gradient_node_id * num_dims + jdim, column_id) -= - corner_gradient_storage(corner_gid, dim, igradient, jdim) * node_vel(rk_level, node_gid, dim) * node_radius; + if (map->isNodeLocalElement(gradient_node_id)) { + for (size_t jdim = 0; jdim < num_dims; jdim++) { + if (node_lid == igradient && jdim == dim) { + Power_Gradient_Velocities(gradient_node_id * num_dims + jdim, column_id) -= + corner_gradient_storage(corner_gid, dim, igradient, jdim) * current_node_vel + * node_radius + corner_force(corner_gid, dim) * node_radius; + } + else { + Power_Gradient_Velocities(gradient_node_id * num_dims + jdim, column_id) -= + corner_gradient_storage(corner_gid, dim, igradient, jdim) * current_node_vel * node_radius; + } } } } diff --git a/src/Parallel-Solvers/Parallel-Explicit/SGH_Solver/src/setup_sgh.cpp b/src/Parallel-Solvers/Parallel-Explicit/SGH_Solver/src/setup_sgh.cpp index b45326d27..f86e6f1b2 100644 --- a/src/Parallel-Solvers/Parallel-Explicit/SGH_Solver/src/setup_sgh.cpp +++ b/src/Parallel-Solvers/Parallel-Explicit/SGH_Solver/src/setup_sgh.cpp @@ -115,6 +115,11 @@ void FEA_Module_SGH::setup() corner_force = DViewCArrayKokkos(&corner_interface.force(0, 0), num_corners, num_dim); corner_mass = DViewCArrayKokkos(&corner_interface.mass(0), num_corners); + //external force storage + if(num_lcs){ + corner_external_force = DCArrayKokkos(num_corners, num_dim); + } + // allocate elem_vel_grad elem_vel_grad = DCArrayKokkos(num_elems, 3, 3); diff --git a/src/Parallel-Solvers/Parallel-Explicit/SGH_Solver/src/sgh_optimization.cpp b/src/Parallel-Solvers/Parallel-Explicit/SGH_Solver/src/sgh_optimization.cpp index 40d7c1169..c1c68234a 100644 --- a/src/Parallel-Solvers/Parallel-Explicit/SGH_Solver/src/sgh_optimization.cpp +++ b/src/Parallel-Solvers/Parallel-Explicit/SGH_Solver/src/sgh_optimization.cpp @@ -563,10 +563,15 @@ void FEA_Module_SGH::compute_topology_optimization_adjoint_full(Teuchos::RCPnum_dims; bool use_solve_checkpoints = simparam->optimization_options.use_solve_checkpoints; bool use_gradient_tally = simparam->optimization_options.use_gradient_tally; + const size_t num_lcs = module_params->loading_conditions.size(); real_t global_dt, current_time; size_t current_data_index, next_data_index; int print_cycle = simparam->dynamic_options.print_cycle; - + + double total_adjoint_time = Explicit_Solver_Pointer_->CPU_Time(); + double state_adjoint_time = 0; + double state_adjoint_time_start, state_adjoint_time_end; + auto optimization_objective_regions = simparam->optimization_options.optimization_objective_regions; //initialize tally storage of gradient vector cached_design_gradients_distributed->putScalar(0); // initialize first adjoint vector at last_time_step to 0 as the terminal value @@ -682,7 +687,6 @@ void FEA_Module_SGH::compute_topology_optimization_adjoint_full(Teuchos::RCPCPU_Time(); // force_gradient_velocity->describe(*fos,Teuchos::VERB_EXTREME); const_vec_array previous_force_gradient_position = force_gradient_position->getLocalView(Tpetra::Access::ReadOnly); // const_vec_array current_force_gradient_position = force_gradient_position->getLocalView (Tpetra::Access::ReadOnly); @@ -862,10 +882,9 @@ void FEA_Module_SGH::compute_topology_optimization_adjoint_full(Teuchos::RCPgetLocalView(Tpetra::Access::ReadWrite); vec_array phi_midpoint_adjoint_vector = phi_adjoint_vector_distributed->getLocalView(Tpetra::Access::ReadWrite); vec_array psi_midpoint_adjoint_vector = psi_adjoint_vector_distributed->getLocalView(Tpetra::Access::ReadWrite); - // half step update for RK2 scheme; EQUATION 1 - if(simparam->optimization_options.optimization_objective_regions.size()){ - int nobj_volumes = simparam->optimization_options.optimization_objective_regions.size(); + if(optimization_objective_regions.size()){ + int nobj_volumes = optimization_objective_regions.size(); const_vec_array all_initial_node_coords = all_initial_node_coords_distributed->getLocalView(Tpetra::Access::ReadOnly); FOR_ALL_CLASS(node_gid, 0, nlocal_nodes, { real_t rate_of_change; @@ -878,7 +897,7 @@ void FEA_Module_SGH::compute_topology_optimization_adjoint_full(Teuchos::RCPoptimization_options.optimization_objective_regions(ivolume).contains(current_node_coords)){ + if(optimization_objective_regions(ivolume).contains(current_node_coords)){ contained = 1; } } @@ -1027,9 +1046,10 @@ void FEA_Module_SGH::compute_topology_optimization_adjoint_full(Teuchos::RCPCPU_Time(); + // state_adjoint_time += state_adjoint_time_end-state_adjoint_time_start; // set state according to phase data at this timestep - get_vol(); // ---- Calculate velocity diveregence for the element ---- @@ -1115,6 +1135,21 @@ void FEA_Module_SGH::compute_topology_optimization_adjoint_full(Teuchos::RCPCPU_Time(); // full step update with midpoint gradient for RK2 scheme; EQUATION 1 - if(simparam->optimization_options.optimization_objective_regions.size()){ - int nobj_volumes = simparam->optimization_options.optimization_objective_regions.size(); + if(optimization_objective_regions.size()){ + int nobj_volumes = optimization_objective_regions.size(); const_vec_array all_initial_node_coords = all_initial_node_coords_distributed->getLocalView(Tpetra::Access::ReadOnly); FOR_ALL_CLASS(node_gid, 0, nlocal_nodes, { real_t rate_of_change; @@ -1199,8 +1235,9 @@ void FEA_Module_SGH::compute_topology_optimization_adjoint_full(Teuchos::RCPoptimization_options.optimization_objective_regions(ivolume).contains(current_node_coords)){ + if(optimization_objective_regions(ivolume).contains(current_node_coords)){ contained = 1; } } @@ -1316,6 +1353,9 @@ void FEA_Module_SGH::compute_topology_optimization_adjoint_full(Teuchos::RCPassign(*all_adjoint_vector_distributed); (*psi_adjoint_vector_data)[cycle]->assign(*psi_adjoint_vector_distributed); } + + // state_adjoint_time_end = Explicit_Solver_Pointer_->CPU_Time(); + // state_adjoint_time += state_adjoint_time_end-state_adjoint_time_start; } // end view scope //tally contribution to the gradient vector @@ -1367,7 +1407,6 @@ void FEA_Module_SGH::compute_topology_optimization_adjoint_full(Teuchos::RCPCPU_Time(); get_force_dgradient_sgh(material, *mesh, node_coords, @@ -1476,7 +1531,9 @@ void FEA_Module_SGH::compute_topology_optimization_adjoint_full(Teuchos::RCPCPU_Time(); + //state_adjoint_time += state_adjoint_time_end-state_adjoint_time_start; compute_topology_optimization_gradient_tally(design_densities_distributed, cached_design_gradients_distributed, cycle, global_dt); } @@ -1489,6 +1546,10 @@ void FEA_Module_SGH::compute_topology_optimization_adjoint_full(Teuchos::RCPdescribe(*fos,Teuchos::VERB_EXTREME); } + + double total_adjoint_time_end = Explicit_Solver_Pointer_->CPU_Time(); + *fos << "ADJOINT CALCULATION TIME ON RANK " << myrank << " IS " << total_adjoint_time_end-total_adjoint_time << std::endl; + // std::cout << "ADJOINT STATE CALCULATION TIME ON RANK " << myrank << " IS " << state_adjoint_time << std::endl; } ///////////////////////////////////////////////////////////////////////////// diff --git a/src/Parallel-Solvers/Parallel-Explicit/Topology_Optimization/Kinetic_Energy_Minimize.h b/src/Parallel-Solvers/Parallel-Explicit/Topology_Optimization/Kinetic_Energy_Minimize.h index cb369efe1..fc7937718 100644 --- a/src/Parallel-Solvers/Parallel-Explicit/Topology_Optimization/Kinetic_Energy_Minimize.h +++ b/src/Parallel-Solvers/Parallel-Explicit/Topology_Optimization/Kinetic_Energy_Minimize.h @@ -53,12 +53,13 @@ #include "ROL_Types.hpp" #include #include "ROL_Objective.hpp" +#include "Fierro_Optimization_Objective.hpp" #include "ROL_Elementwise_Reduce.hpp" #include "FEA_Module_SGH.h" #include "FEA_Module_Dynamic_Elasticity.h" #include "Explicit_Solver.h" -class KineticEnergyMinimize_TopOpt : public ROL::Objective +class KineticEnergyMinimize_TopOpt : public FierroOptimizationObjective { typedef Tpetra::Map<>::local_ordinal_type LO; typedef Tpetra::Map<>::global_ordinal_type GO; @@ -99,6 +100,7 @@ typedef MV::dual_view_type dual_vec_array; real_t previous_objective_accumulation, objective_sign; bool useLC_; // Use linear form of energy. Otherwise use quadratic form. + bool first_init; //prevents ROL from calling init computation twice at start for the AL algorithm ///////////////////////////////////////////////////////////////////////////// /// @@ -133,19 +135,18 @@ typedef MV::dual_view_type dual_vec_array; } public: - bool nodal_density_flag_, time_accumulation; + bool nodal_density_flag_; int last_comm_step, last_solve_step, current_step; size_t nvalid_modules; std::vector valid_fea_modules; // modules that may interface with this objective function FEA_MODULE_TYPE set_module_type; // std::string my_fea_module = "SGH"; - real_t objective_accumulation; KineticEnergyMinimize_TopOpt(Explicit_Solver* Explicit_Solver_Pointer, bool nodal_density_flag) : useLC_(true) { Explicit_Solver_Pointer_ = Explicit_Solver_Pointer; - + first_init = false; valid_fea_modules.push_back(FEA_MODULE_TYPE::SGH); valid_fea_modules.push_back(FEA_MODULE_TYPE::Dynamic_Elasticity); nvalid_modules = valid_fea_modules.size(); @@ -167,7 +168,6 @@ typedef MV::dual_view_type dual_vec_array; last_comm_step = last_solve_step = -1; current_step = 0; time_accumulation = true; - objective_accumulation = 0; previous_gradients = Teuchos::rcp(new MV(Explicit_Solver_Pointer_->map, 1)); if(Explicit_Solver_Pointer_->simparam.optimization_options.maximize_flag){ @@ -211,14 +211,17 @@ typedef MV::dual_view_type dual_vec_array; const_host_vec_array design_densities = zp->getLocalView(Tpetra::Access::ReadOnly); if (type == ROL::UpdateType::Initial) { - // This is the first call to update - // first linear solve was done in FEA class run function already - FEM_Dynamic_Elasticity_->comm_variables(zp); - // update deformation variables - FEM_Dynamic_Elasticity_->update_forward_solve(zp); - // initial design density data was already communicated for ghost nodes in init_design() - // decide to output current optimization state - FEM_Dynamic_Elasticity_->Explicit_Solver_Pointer_->write_outputs(); + if(first_init){ + // This is the first call to update + // first linear solve was done in FEA class run function already + FEM_Dynamic_Elasticity_->comm_variables(zp); + // update deformation variables + FEM_Dynamic_Elasticity_->update_forward_solve(zp); + // initial design density data was already communicated for ghost nodes in init_design() + // decide to output current optimization state + FEM_Dynamic_Elasticity_->Explicit_Solver_Pointer_->write_outputs(); + } + first_init = true; } else if (type == ROL::UpdateType::Accept) { @@ -274,19 +277,25 @@ typedef MV::dual_view_type dual_vec_array; const_host_vec_array design_densities = zp->getLocalView(Tpetra::Access::ReadOnly); if (type == ROL::UpdateType::Initial) { - // This is the first call to update - if (Explicit_Solver_Pointer_->myrank == 0) { - *fos << "called SGH Initial" << std::endl; - } + if(first_init){ + // This is the first call to update + if (Explicit_Solver_Pointer_->myrank == 0) { + *fos << "called SGH Initial" << std::endl; + } - FEM_SGH_->comm_variables(zp); - FEM_SGH_->update_forward_solve(zp); - FEM_SGH_->compute_topology_optimization_adjoint_full(zp); - previous_objective_accumulation = objective_accumulation; - previous_gradients->assign(*(FEM_SGH_->cached_design_gradients_distributed)); - // initial design density data was already communicated for ghost nodes in init_design() - // decide to output current optimization state - // FEM_SGH_->Explicit_Solver_Pointer_->write_outputs(); + FEM_SGH_->comm_variables(zp); + FEM_SGH_->update_forward_solve(zp); + if(Explicit_Solver_Pointer_->myrank == 0){ + std::cout << "CURRENT TIME INTEGRAL OF KINETIC ENERGY " << objective_accumulation << std::endl; + } + FEM_SGH_->compute_topology_optimization_adjoint_full(zp); + previous_objective_accumulation = objective_accumulation; + previous_gradients->assign(*(FEM_SGH_->cached_design_gradients_distributed)); + // initial design density data was already communicated for ghost nodes in init_design() + // decide to output current optimization state + // FEM_SGH_->Explicit_Solver_Pointer_->write_outputs(); + } + first_init = true; } else if (type == ROL::UpdateType::Accept) { if (Explicit_Solver_Pointer_->myrank == 0) { @@ -305,7 +314,9 @@ typedef MV::dual_view_type dual_vec_array; if (Explicit_Solver_Pointer_->myrank == 0) { *fos << "called Revert" << std::endl; } objective_accumulation = previous_objective_accumulation; FEM_SGH_->cached_design_gradients_distributed->assign(*previous_gradients); - + if(Explicit_Solver_Pointer_->myrank == 0){ + std::cout << "CURRENT TIME INTEGRAL OF KINETIC ENERGY " << objective_accumulation << std::endl; + } // FEM_SGH_->comm_variables(zp); // // update deformation variables // FEM_SGH_->update_forward_solve(zp); @@ -324,8 +335,11 @@ typedef MV::dual_view_type dual_vec_array; FEM_SGH_->comm_variables(zp); // update deformation variables FEM_SGH_->update_forward_solve(zp, print_flag); + + if(Explicit_Solver_Pointer_->myrank == 0){ + std::cout << "CURRENT TIME INTEGRAL OF KINETIC ENERGY " << objective_accumulation << std::endl; + } FEM_SGH_->compute_topology_optimization_adjoint_full(zp); - // decide to output current optimization state // FEM_SGH_->Explicit_Solver_Pointer_->write_outputs(); } @@ -392,9 +406,6 @@ typedef MV::dual_view_type dual_vec_array; } std::cout.precision(10); - if (Explicit_Solver_Pointer_->myrank == 0) { - std::cout << "CURRENT TIME INTEGRAL OF KINETIC ENERGY " << objective_accumulation << std::endl; - } // std::cout << "Ended obj value on task " <myrank << std::endl; return objective_sign*objective_accumulation; diff --git a/src/Parallel-Solvers/Parallel-Explicit/optimization_parameters.xml b/src/Parallel-Solvers/Parallel-Explicit/optimization_parameters.xml index 7f2862aa5..028470728 100644 --- a/src/Parallel-Solvers/Parallel-Explicit/optimization_parameters.xml +++ b/src/Parallel-Solvers/Parallel-Explicit/optimization_parameters.xml @@ -62,7 +62,7 @@ - + diff --git a/src/Parallel-Solvers/Simulation_Parameters/Geometry.h b/src/Parallel-Solvers/Simulation_Parameters/Geometry.h index dd36d171e..aa0e593ae 100644 --- a/src/Parallel-Solvers/Simulation_Parameters/Geometry.h +++ b/src/Parallel-Solvers/Simulation_Parameters/Geometry.h @@ -47,14 +47,6 @@ struct Volume : Yaml::ValidatedYaml { double length_x = 0; double length_y = 0; double length_z = 0; - double i0_real; - double j0_real; - double k0_real; - int i0; - int j0; - int k0; - int elem_id0; - bool fill_this; // Run voxelization scheme on stl file KOKKOS_FUNCTION @@ -68,9 +60,17 @@ struct Volume : Yaml::ValidatedYaml { } KOKKOS_FUNCTION - bool contains(const double* elem_coords) { + bool contains(const double* elem_coords) const { double radius; - + bool fill_this; + double i0_real; + double j0_real; + double k0_real; + int i0; + int j0; + int k0; + int elem_id0; + switch(type) { case VOLUME_TYPE::global: return true; diff --git a/src/Parallel-Solvers/Simulation_Parameters/Optimization_Options.h b/src/Parallel-Solvers/Simulation_Parameters/Optimization_Options.h index 3d65fbfb5..fa4b259ea 100644 --- a/src/Parallel-Solvers/Simulation_Parameters/Optimization_Options.h +++ b/src/Parallel-Solvers/Simulation_Parameters/Optimization_Options.h @@ -175,6 +175,10 @@ struct Optimization_Options: Yaml::DerivedFields { if(use_solve_checkpoints){ use_gradient_tally = true; } + + if(retain_outer_shell&&variable_outer_shell){ + throw Yaml::ConfigurationException("Cannot specify both retain_outer_shell and variable_outer_shell as true"); + } } }; YAML_ADD_REQUIRED_FIELDS_FOR(Optimization_Options,