From 9076034ec594e3830bcecc9d334a6920ce096aad Mon Sep 17 00:00:00 2001 From: Daniel Holladay Date: Tue, 27 Jul 2021 11:45:57 -0600 Subject: [PATCH 1/4] Adding optimized log implementation using frexp and approximation to log on [0.5,1) with the help of Jonah. --- utils/fast-math/logs.hpp | 29 +++++++++++++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) diff --git a/utils/fast-math/logs.hpp b/utils/fast-math/logs.hpp index 90b54f8b9..50e705695 100644 --- a/utils/fast-math/logs.hpp +++ b/utils/fast-math/logs.hpp @@ -31,14 +31,39 @@ namespace BDMath { - PORTABLE_INLINE_FUNCTION + PORTABLE_FORCEINLINE_FUNCTION float log10(const float x) { // const double ILOG10 = 1./std::log(10.0); constexpr double ILOG10 = 0.43429448190325176; #if BD_USE_FMATH return fmath::log(x)*ILOG10; #else - return logf(x)*ILOG10; + int n{}; + constexpr const double LOG2OLOG10 = 0.301029995663981195; + const float y = frexpf(x, &n); + // 7the order approximation to log(x) x in [0.5, 1.0) +// const float expr = -2.808428971 + y*( +// 8.648808309 + y*( +// -15.910426569 + y*( +// 21.53943522 + y*( +// -19.56870772 + y*( +// 11.318698784 + ( +// -3.770730277 + 0.5513512194*y)*y))))); + // 5th order approximation to log(x) x in [0.5, 1.0) +// const float expr = -2.5396743497743 + y*( +// 6.420250933346 + y*( +// -8.150589926467 + y*( +// 6.830564334687 + ( +// -3.192132453902 + 0.6315814621098*y)*y))); + // 4th order approximation to log(x) x in [0.5, 1.0) + const float expr = -2.36697629372863 + y*( + 5.2672858163176 + y*( + -5.1242620871906 + ( + 2.91607506431871 - 0.69212249971711*y)*y)); + // log10 x using frexp + return ILOG10*expr + n*LOG2OLOG10; + // true log 10 x +// return logf(x)*ILOG10; #endif } From b4ab13633e7a04cffc2711a4444e8445bd5140df Mon Sep 17 00:00:00 2001 From: Jonah Miller Date: Wed, 28 Jul 2021 10:59:11 -0600 Subject: [PATCH 2/4] update --- .gitmodules | 3 -- CMakeLists.txt | 25 +++++++++-- sesame2spiner/examples/airtest.json | 32 ++++++++++++++ sesame2spiner/io_eospac.hpp | 6 +-- singularity-eos/eos/eos.hpp | 6 +-- utils/fast-math/logs.hpp | 67 ++++++++++++++--------------- utils/herumi-fmath | 1 - 7 files changed, 91 insertions(+), 49 deletions(-) create mode 100644 sesame2spiner/examples/airtest.json delete mode 160000 utils/herumi-fmath diff --git a/.gitmodules b/.gitmodules index 291e36127..07fa01dd9 100644 --- a/.gitmodules +++ b/.gitmodules @@ -10,9 +10,6 @@ [submodule "utils/variant"] path = utils/variant url = https://github.com/mpark/variant.git -[submodule "utils/herumi-fmath"] - path = utils/herumi-fmath - url = https://github.com/herumi/fmath.git [submodule "utils/kokkos"] path = utils/kokkos url = https://github.com/kokkos/kokkos.git diff --git a/CMakeLists.txt b/CMakeLists.txt index 2a59f0c95..b2f9b6d3a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -35,6 +35,10 @@ option (SINGULARITY_BUILD_CLOSURE "Mixed cell closure" ON) option (SINGULARITY_TEST_SESAME "Test the Sesame table readers" OFF) option (SINGULARITY_TEST_STELLAR_COLLAPSE "Test the stellar collapse table readers" OFF) option (SINGULARITY_EOS_SKIP_EXTRAP "Turn off eospac extrap checks" OFF) +option (SINGULARITY_USE_ACCURATE_LOGS "Swap the fast logs for accurate ones" OFF) +option (SINGULARITY_FMATH_USE_ORDER_4 "4th order interpolant for fast logs. Default is 7th order." OFF) +option (SINGULARITY_FMATH_USE_ORDER_5 "5th order interpolant for fast logs. Default is 7th order." OFF) + if (SINGULARITY_SUBMODULE_MODE) set(SINGULARITY_BETTER_DEBUG_FLAGS OFF CACHE BOOL "" FORCE) set(SINGULARITY_HIDE_MORE_WARNINGS ON CACHE BOOL "" FORCE) @@ -280,6 +284,23 @@ if (SINGULARITY_USE_EOSPAC) target_link_libraries(singularity-eos::libs INTERFACE EOSPAC::eospac) endif () +# fast math +if (SINGULARITY_USE_ACCURATE_LOGS) + target_compile_definitions(singularity-eos::flags INTERFACE + SINGULAIRTY_USE_ACCUARTE_LOGS) +endif() +if (SINGULARITY_FMATH_USE_ORDER_4 AND SINGULARITY_FMATH_USE_ORDER_5) + message(ERROR "Order 4 and order 5 interpolation both specified. Please specify only one or zero. If no specification is made, order 7 will be used.") +endif() +if (SINGULARITY_FMATH_USE_ORDER_4) + target_compile_definitions(singularity-eos::flags INTERFACE + SINGULAIRTY_FMATH_USE_ORDER_4) +endif() +if (SINGULARITY_FMATH_USE_ORDER_5) + target_compile_definitions(singularity-eos::flags INTERFACE + SINGULAIRTY_FMATH_USE_ORDER_5) +endif() + # required package ports of call find_package(PortsofCall REQUIRED) target_link_libraries(singularity-eos::flags INTERFACE PortsofCall::PortsofCall) @@ -348,10 +369,6 @@ install(DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/utils/fast-math" DESTINATION "include" FILES_MATCHING PATTERN "*.hpp" ) -install(DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/utils/herumi-fmath" - DESTINATION "include" - FILES_MATCHING PATTERN "*.hpp" - ) install(DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/utils/root-finding-1d" DESTINATION "include" FILES_MATCHING PATTERN "*.hpp" diff --git a/sesame2spiner/examples/airtest.json b/sesame2spiner/examples/airtest.json new file mode 100644 index 000000000..0e9d75911 --- /dev/null +++ b/sesame2spiner/examples/airtest.json @@ -0,0 +1,32 @@ +//====================================================================== +// Example input deck for sesame2spiner, +// a tool for converting eospac to spiner +// Author: Jonah Miller (jonahm@lanl.gov) +// © 2021. Triad National Security, LLC. All rights reserved. This +// program was produced under U.S. Government contract 89233218CNA000001 +// for Los Alamos National Laboratory (LANL), which is operated by Triad +// National Security, LLC for the U.S. Department of Energy/National +// Nuclear Security Administration. All rights in the program are +// reserved by Triad National Security, LLC, and the U.S. Department of +// Energy/National Nuclear Security Administration. The Government is +// granted for itself and others acting on its behalf a nonexclusive, +// paid-up, irrevocable worldwide license in this material to reproduce, +// prepare derivative works, distribute copies to the public, perform +// publicly and display publicly, and to permit others to do so. +//====================================================================== +{ + "savename" : "materials.sp5", + "materials" : [ + { + "matid" : 5030, + "name" : "air", + /* These set the number of grid points per decade + for each variable. The default is 20 points + per decade. + */ + "numrho/decade" : 40, + "numT/decade" : 40, + "numSie/decade" : 40 + } + ] +} diff --git a/sesame2spiner/io_eospac.hpp b/sesame2spiner/io_eospac.hpp index c85ff8246..ffd12faca 100644 --- a/sesame2spiner/io_eospac.hpp +++ b/sesame2spiner/io_eospac.hpp @@ -87,15 +87,15 @@ class Bounds { // min); // if (min <= 0) min = std::abs(10*Spiner::EPS); // if (min <= 0) min = std::abs(Spiner::EPS); - min = BDMath::log10(std::abs(min)); // fast, floating-point log - max = BDMath::log10(std::abs(max)); + min = std::log10(std::abs(min)); // fast, floating-point log + max = std::log10(std::abs(max)); Real delta = max - min; min += 0.5*shrinkRange*delta; max -= 0.5*shrinkRange*delta; if (!(std::isnan(anchor_point))) { anchor_point += offset; - anchor_point = BDMath::log10(std::abs(anchor_point)); + anchor_point = std::log10(std::abs(anchor_point)); } } diff --git a/singularity-eos/eos/eos.hpp b/singularity-eos/eos/eos.hpp index 853adcf8f..5140aef8c 100644 --- a/singularity-eos/eos/eos.hpp +++ b/singularity-eos/eos/eos.hpp @@ -869,7 +869,7 @@ class SpinerEOSDependsRhoT { toLog_(const Real x, const Real offset) const noexcept { // return std::log10(x + offset + EPS); // return std::log10(std::abs(std::max(x,-offset) + offset)+EPS); - return BDMath::log10(std::abs(std::max(x, -offset) + offset) + EPS); + return Math::log10(std::abs(std::max(x, -offset) + offset) + EPS); } PORTABLE_INLINE_FUNCTION Real __attribute__((always_inline)) fromLog_(const Real lx, const Real offset) const noexcept { @@ -1076,7 +1076,7 @@ class SpinerEOSDependsRhoSie { PORTABLE_INLINE_FUNCTION Real toLog_(const Real x, const Real offset) const { // return std::log10(std::abs(std::max(x,-offset) + offset)+EPS); - return BDMath::log10(std::abs(std::max(x, -offset) + offset) + EPS); + return Math::log10(std::abs(std::max(x, -offset) + offset) + EPS); } PORTABLE_INLINE_FUNCTION Real fromLog_(const Real lx, const Real offset) const { @@ -1262,7 +1262,7 @@ class StellarCollapse { toLog_(const Real x, const Real offset) const noexcept { // return std::log10(x + offset + EPS); // return std::log10(std::abs(std::max(x,-offset) + offset)+EPS); - return BDMath::log10(std::abs(std::max(x, -offset) + offset) + EPS); + return Math::log10(std::abs(std::max(x, -offset) + offset) + EPS); } PORTABLE_INLINE_FUNCTION Real __attribute__((always_inline)) fromLog_(const Real lx, const Real offset) const noexcept { diff --git a/utils/fast-math/logs.hpp b/utils/fast-math/logs.hpp index 50e705695..07ff13ebc 100644 --- a/utils/fast-math/logs.hpp +++ b/utils/fast-math/logs.hpp @@ -19,55 +19,52 @@ #include #include -// herumi-fmath does not work on device -// On CPUS it provides another 10% or so speedup -// for negligible cost in accuracy -#ifdef PORTABILITY_STRATEGY_KOKKOS -#define BD_USE_FMATH 0 -#else -#include -#define BD_USE_FMATH 1 -#endif -namespace BDMath { + +namespace singularity { +namespace Math { PORTABLE_FORCEINLINE_FUNCTION - float log10(const float x) { + double log10(const double x) { + +#ifndef SINGULARITY_USE_ACCURATE_LOGS // const double ILOG10 = 1./std::log(10.0); constexpr double ILOG10 = 0.43429448190325176; - #if BD_USE_FMATH - return fmath::log(x)*ILOG10; - #else + int n{}; constexpr const double LOG2OLOG10 = 0.301029995663981195; - const float y = frexpf(x, &n); - // 7the order approximation to log(x) x in [0.5, 1.0) -// const float expr = -2.808428971 + y*( -// 8.648808309 + y*( -// -15.910426569 + y*( -// 21.53943522 + y*( -// -19.56870772 + y*( -// 11.318698784 + ( -// -3.770730277 + 0.5513512194*y)*y))))); - // 5th order approximation to log(x) x in [0.5, 1.0) -// const float expr = -2.5396743497743 + y*( -// 6.420250933346 + y*( -// -8.150589926467 + y*( -// 6.830564334687 + ( -// -3.192132453902 + 0.6315814621098*y)*y))); + const double y = frexp(x, &n); +#ifdef SINGULARITY_FMATH_USE_ORDER_4 // 4th order approximation to log(x) x in [0.5, 1.0) - const float expr = -2.36697629372863 + y*( + const double expr = -2.36697629372863 + y*( 5.2672858163176 + y*( -5.1242620871906 + ( 2.91607506431871 - 0.69212249971711*y)*y)); +#elif defined(SINGULARITY_FMATH_USE_ORDER_5) + // 5th order approximation to log(x) x in [0.5, 1.0) + const double expr = -2.5396743497743 + y*( + 6.420250933346 + y*( + -8.150589926467 + y*( + 6.830564334687 + ( + -3.192132453902 + 0.6315814621098*y)*y))); +#else + // 7the order approximation to log(x) x in [0.5, 1.0) + const double expr = -2.808428971 + y*( + 8.648808309 + y*( + -15.910426569 + y*( + 21.53943522 + y*( + -19.56870772 + y*( + 11.318698784 + ( + -3.770730277 + 0.5513512194*y)*y))))); +#endif // SINGULARITY_FMATH_USE_ORDER // log10 x using frexp return ILOG10*expr + n*LOG2OLOG10; - // true log 10 x -// return logf(x)*ILOG10; - #endif +#else + return std::log10(x); // SINGULARITY_USE_ACCURATE_LOGS +#endif } -} // BDMath +} // Math +} // singularity -#undef BD_USE_FMATH #endif // _SINGULARITY_EOS_UTILS_FAST_MATH_LOGS_ diff --git a/utils/herumi-fmath b/utils/herumi-fmath deleted file mode 160000 index 168ede1bc..000000000 --- a/utils/herumi-fmath +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 168ede1bc4a822b9ee2d584a6e20d2ba409d435a From b61e2f21c7cfb24e4e2ec109a0dcff1eed18e6de Mon Sep 17 00:00:00 2001 From: Jonah Miller Date: Wed, 28 Jul 2021 18:09:50 -0600 Subject: [PATCH 3/4] fixed sesame2spiner to produce a better gridding --- CMakeLists.txt | 6 +++--- sesame2spiner/generate_files.cpp | 9 +++++++++ sesame2spiner/generate_files.hpp | 1 + sesame2spiner/io_eospac.hpp | 11 +++-------- utils/fast-math/logs.hpp | 14 +++++++------- 5 files changed, 23 insertions(+), 18 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index b2f9b6d3a..325fa288f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -35,7 +35,7 @@ option (SINGULARITY_BUILD_CLOSURE "Mixed cell closure" ON) option (SINGULARITY_TEST_SESAME "Test the Sesame table readers" OFF) option (SINGULARITY_TEST_STELLAR_COLLAPSE "Test the stellar collapse table readers" OFF) option (SINGULARITY_EOS_SKIP_EXTRAP "Turn off eospac extrap checks" OFF) -option (SINGULARITY_USE_ACCURATE_LOGS "Swap the fast logs for accurate ones" OFF) +option (SINGULARITY_USE_SINGLE_LOGS "Use single precision logs. Can harm accuracy." OFF) option (SINGULARITY_FMATH_USE_ORDER_4 "4th order interpolant for fast logs. Default is 7th order." OFF) option (SINGULARITY_FMATH_USE_ORDER_5 "5th order interpolant for fast logs. Default is 7th order." OFF) @@ -285,9 +285,9 @@ if (SINGULARITY_USE_EOSPAC) endif () # fast math -if (SINGULARITY_USE_ACCURATE_LOGS) +if (SINGULARITY_USE_SINGLE_LOGS) target_compile_definitions(singularity-eos::flags INTERFACE - SINGULAIRTY_USE_ACCUARTE_LOGS) + SINGULARITY_USE_SINGLE_LOGS) endif() if (SINGULARITY_FMATH_USE_ORDER_4 AND SINGULARITY_FMATH_USE_ORDER_5) message(ERROR "Order 4 and order 5 interpolation both specified. Please specify only one or zero. If no specification is made, order 7 will be used.") diff --git a/sesame2spiner/generate_files.cpp b/sesame2spiner/generate_files.cpp index 1c726d718..f2970b61b 100644 --- a/sesame2spiner/generate_files.cpp +++ b/sesame2spiner/generate_files.cpp @@ -302,6 +302,15 @@ void getMatBounds(int i, Real rhoAnchor = metadata.normalDensity; Real TAnchor = 298.15; + // Forces density and temperature to be in a region where an offset + // is not needed. This improves resolution at low densities and + // temperatures. + + // Extrapolation and other resolution tricks will be explored in the + // future. + if (rhoMin < STRICTLY_POS_MIN) rhoMin = STRICTLY_POS_MIN; + if (TMin < STRICTLY_POS_MIN) TMin = STRICTLY_POS_MIN; + lRhoBounds = Bounds(rhoMin,rhoMax,numRho,true,shrinklRhoBounds,rhoAnchor); lTBounds = Bounds(TMin,TMax,numT,true,shrinklTBounds,TAnchor); leBounds = Bounds(sieMin,sieMax,numSie,true,shrinkleBounds); diff --git a/sesame2spiner/generate_files.hpp b/sesame2spiner/generate_files.hpp index f0239633a..b4cac4f4d 100644 --- a/sesame2spiner/generate_files.hpp +++ b/sesame2spiner/generate_files.hpp @@ -27,6 +27,7 @@ using nlohmann::json; constexpr int PPD_DEFAULT = 50; +constexpr Real STRICTLY_POS_MIN = 1e-9; herr_t saveMaterial(hid_t loc, const SesameMetadata& metadata, diff --git a/sesame2spiner/io_eospac.hpp b/sesame2spiner/io_eospac.hpp index ffd12faca..fe4218663 100644 --- a/sesame2spiner/io_eospac.hpp +++ b/sesame2spiner/io_eospac.hpp @@ -74,20 +74,15 @@ class Bounds { if (convertToLog) { // should be single-precision epsilon b/c that's whats used for the logs constexpr Real epsilon = std::numeric_limits::epsilon(); - const Real min_offset = std::max(10*std::abs(epsilon), - std::abs(epsilon*max)); + const Real min_offset = 10*std::abs(epsilon); if (min < 0) offset = std::abs(min) + min_offset; else if ( min == 0 ) { offset = min_offset; } min += offset; max += offset; - // min = std::max(std::max(10*std::abs(Spiner::EPS), - // std::abs(Spiner::EPS*max)), - // min); - // if (min <= 0) min = std::abs(10*Spiner::EPS); - // if (min <= 0) min = std::abs(Spiner::EPS); - min = std::log10(std::abs(min)); // fast, floating-point log + + min = std::log10(std::abs(min)); max = std::log10(std::abs(max)); Real delta = max - min; min += 0.5*shrinkRange*delta; diff --git a/utils/fast-math/logs.hpp b/utils/fast-math/logs.hpp index 07ff13ebc..8354fd3cf 100644 --- a/utils/fast-math/logs.hpp +++ b/utils/fast-math/logs.hpp @@ -23,17 +23,20 @@ namespace singularity { namespace Math { - + PORTABLE_FORCEINLINE_FUNCTION double log10(const double x) { - -#ifndef SINGULARITY_USE_ACCURATE_LOGS + // const double ILOG10 = 1./std::log(10.0); constexpr double ILOG10 = 0.43429448190325176; int n{}; constexpr const double LOG2OLOG10 = 0.301029995663981195; - const double y = frexp(x, &n); +#ifndef SINGULARITY_USE_SINGLE_LOGS + const double y = frexp(x, &n); // default is double precision +#else + const float y = frexpf((float)x, &n); // faster but less accurate +#endif #ifdef SINGULARITY_FMATH_USE_ORDER_4 // 4th order approximation to log(x) x in [0.5, 1.0) const double expr = -2.36697629372863 + y*( @@ -59,9 +62,6 @@ namespace Math { #endif // SINGULARITY_FMATH_USE_ORDER // log10 x using frexp return ILOG10*expr + n*LOG2OLOG10; -#else - return std::log10(x); // SINGULARITY_USE_ACCURATE_LOGS -#endif } } // Math From 3fb71907fea35843d70b98c349bb1744d97d4c9f Mon Sep 17 00:00:00 2001 From: Jonah Miller Date: Wed, 28 Jul 2021 18:31:24 -0600 Subject: [PATCH 4/4] oops fix profile stellar collapse --- sesame2spiner/examples/airtest.json | 2 +- sesame2spiner/examples/materials.json | 2 +- sesame2spiner/parser.hpp | 2 +- test/profile_stellar_collapse.cpp | 4 ++-- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/sesame2spiner/examples/airtest.json b/sesame2spiner/examples/airtest.json index 0e9d75911..0c2db623e 100644 --- a/sesame2spiner/examples/airtest.json +++ b/sesame2spiner/examples/airtest.json @@ -21,7 +21,7 @@ "matid" : 5030, "name" : "air", /* These set the number of grid points per decade - for each variable. The default is 20 points + for each variable. The default is 50 points per decade. */ "numrho/decade" : 40, diff --git a/sesame2spiner/examples/materials.json b/sesame2spiner/examples/materials.json index d7333dd99..5299d1f3f 100644 --- a/sesame2spiner/examples/materials.json +++ b/sesame2spiner/examples/materials.json @@ -34,7 +34,7 @@ "matid" : 2961, "name" : "titanium", /* These set the number of grid points per decade - for each variable. The default is 20 points + for each variable. The default is 50 points per decade. */ "numrho/decade" : 30, diff --git a/sesame2spiner/parser.hpp b/sesame2spiner/parser.hpp index a94d73849..ee88d4a0c 100644 --- a/sesame2spiner/parser.hpp +++ b/sesame2spiner/parser.hpp @@ -42,7 +42,7 @@ const std::string EXAMPLESTRING = R"( "matid" : 2961, "name" : "titanium", /* These set the number of grid points per decade - for each variable. The default is 20 points + for each variable. The default is 50 points per decade. */ "numrho/decade" : 30, diff --git a/test/profile_stellar_collapse.cpp b/test/profile_stellar_collapse.cpp index 3d935690c..997637970 100644 --- a/test/profile_stellar_collapse.cpp +++ b/test/profile_stellar_collapse.cpp @@ -63,8 +63,8 @@ struct Bounds { } min += offset; max += offset; - min = BDMath::log10(std::abs(min)); // fast, floating-point log - max = BDMath::log10(std::abs(max)); + min = std::log10(std::abs(min)); // fast, floating-point log + max = std::log10(std::abs(max)); } grid = RegularGrid1D(min, max, N); }