From 305eab713a7f0404e62a5dab7246e1293ac879cd Mon Sep 17 00:00:00 2001 From: Metin Cakircali Date: Fri, 16 Aug 2024 23:34:08 +0200 Subject: [PATCH] perf(Time): try to avoid std::regex --- src/eckit/types/Time.cc | 151 +++++++++++++++++++++++----------------- 1 file changed, 89 insertions(+), 62 deletions(-) diff --git a/src/eckit/types/Time.cc b/src/eckit/types/Time.cc index 402253f22..4be3b6b92 100644 --- a/src/eckit/types/Time.cc +++ b/src/eckit/types/Time.cc @@ -8,34 +8,64 @@ * does it submit to any jurisdiction. */ -#include +#include #include +#include +#include +#include +#include -#include "eckit/eckit.h" - +#include "eckit/exception/Exceptions.h" #include "eckit/persist/DumpLoad.h" #include "eckit/types/Time.h" #include "eckit/utils/Hash.h" -#include "eckit/utils/Tokenizer.h" namespace { - static thread_local std::regex digits_time_("^-?[0-9]+$"); - static thread_local std::regex float_hours_("^-?[0-9]*\\.[0-9]+$"); - static thread_local std::regex hhmmss_("^([0-9]+):([0-5]?[0-9])(:[0-5]?[0-9])?$"); - static thread_local std::regex ddhhmmss_("^-?([0-9]+[dD])?([0-9]+[hH])?([0-9]+[mM])?([0-9]+[sS])?$"); -} - -namespace eckit { //---------------------------------------------------------------------------------------------------------------------- -inline void printTime(std::ostream& s, long n) { +const std::regex hhmmss_("^([0-9]+):([0-5]?[0-9])(:[0-5]?[0-9])?$"); +const std::regex ddhhmmss_("^-?([0-9]+[dD])?([0-9]+[hH])?([0-9]+[mM])?([0-9]+[sS])?$"); + +// DIGITS: "^-?[0-9]+$" +// FLOAT: "^-?[0-9]*\\.[0-9]+$" +enum class TimeFormat { UNKOWN, OTHER, DIGITS, DECIMAL }; + +TimeFormat checkTimeFormat(const std::string_view time) { + bool hasDigit = false; + bool hasDecimal = false; + + const std::size_t start = (time[0] == '-') ? 1 : 0; + + for (auto i = start; i < time.length(); i++) { + if (time[i] == '.') { + if (hasDecimal || i == time.length() - 1) { return TimeFormat::UNKOWN; } + hasDecimal = true; + } else if (isdigit(time[i]) == 0) { + return TimeFormat::OTHER; + } else { + hasDigit = true; + } + } + + if (!hasDigit) { return TimeFormat::UNKOWN; } + + return hasDecimal ? TimeFormat::DECIMAL : TimeFormat::DIGITS; +} + +void printTime(std::ostream& s, long n) { if (n < 10) { s << '0'; } s << n; } +} // namespace + +//---------------------------------------------------------------------------------------------------------------------- + +namespace eckit { + Time::Time(long seconds, bool extended) : seconds_(static_cast(seconds)) { if ((seconds >= 86400 && !extended) || seconds < 0) { @@ -50,9 +80,10 @@ Time::Time(const std::string& s, bool extended) { long mm = 0; long hh = 0; long dd = 0; - std::smatch m; - - if (std::regex_match (s, m, digits_time_)) { + + const auto format = checkTimeFormat(s); + + if (format == TimeFormat::DIGITS) { long t = std::stol(s); int sign = (s[0] == '-' ? 1 : 0); if (extended || s.length() <= 2+sign) { // cases: h, hh, (or hhh..h for step parsing) @@ -67,60 +98,56 @@ Time::Time(const std::string& s, bool extended) { ss = t % 100; } } - } - else { - if (std::regex_match (s, m, float_hours_)) { - long sec = std::round(std::stod(s)*3600); - hh = sec/3600; - sec -= hh*3600; - mm = sec/60; - sec -= mm*60; - ss = sec; - } - else { - if (std::regex_match (s, m, hhmmss_)) { - for (int i=1; i= 60 || ss >= 60 || (!extended && (hh >= 24 || dd > 0 || hh < 0 || mm < 0 || ss < 0))) {