From 10525990397beff5f822f5897dcf84215885c966 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ervin=20Heged=C3=BCs?= Date: Fri, 14 Aug 2020 12:20:43 +0000 Subject: [PATCH] Aligned v3 behavior to the fixed version --- CHANGES | 1 + src/msc3.cc | 69 +++++++++++++++++++++++++++++++++++----------------- src/regex.cc | 19 +++++++++++++++ src/regex.h | 13 ++++++++++ 4 files changed, 80 insertions(+), 22 deletions(-) diff --git a/CHANGES b/CHANGES index a44bda5..f5c94db 100644 --- a/CHANGES +++ b/CHANGES @@ -2,6 +2,7 @@ v0.3 - YYYY-MM-DD ----------------- * refactoring regex.cc for pcre4msc3 * add `-f` argument for pcre4msc3 + * aligned v3 behavior to the fix (#2348) v0.2 - 2020-04-02 ----------------- diff --git a/src/msc3.cc b/src/msc3.cc index c536efb..a2cad2e 100644 --- a/src/msc3.cc +++ b/src/msc3.cc @@ -12,7 +12,7 @@ void showhelp(char * name) { std::cout << "OPTIONS:" << std::endl; std::cout << "\t-h\tThis help" << std::endl; std::cout << "\t-n N\titerate pcre_regex as Nth times. Default value is 1." << std::endl; - std::cout << "\t-f\tForce to use modified regex matching method." << std::endl; + std::cout << "\t-f\tForce to use old v3 regex matching method." << std::endl; std::cout << "\t-t T\tExpects a float value; if the (last) pcre_exec time is greather than this," << std::endl; std::cout << "\t \tthe exit status of program will non-zero." << std::endl; std::cout << "\t-d \tShow debug information." << std::endl; @@ -25,7 +25,7 @@ int main(int argc, char ** argv) { char * patternfile = NULL, * subjectfile = NULL; char c; int icnt = 1, rc = 0; - bool use_fixed = false; + bool use_old = false; float time_limit = 0.0; double m_sub = 0.0; int debuglevel = 0; // may be later we can use different level... @@ -41,7 +41,7 @@ int main(int argc, char ** argv) { showhelp(argv[0]); return EXIT_SUCCESS; case 'f': - use_fixed = true; + use_old = true; break; case 'n': icnt = atoi(optarg); @@ -119,18 +119,18 @@ int main(int argc, char ** argv) { re = new Regex(pattern, debuglevel); std::list retval; + std::vector captures; for(int i = 0; i < icnt; i++) { re->m_retList.clear(); clock_t m_start = clock(); - if (use_fixed == false) { - retval = re->searchAll(subject); + if (use_old == false) { + re->searchOneMatch(subject, captures); } else { - rc = re->searchAll2(subject, ((debuglevel == 1) ? 10 : 0)); - retval = re->m_retList; + retval = re->searchAll(subject); } clock_t m_end = clock(); m_sub = (m_end - m_start) / double(CLOCKS_PER_SEC); @@ -148,26 +148,51 @@ int main(int argc, char ** argv) { // show captured substrings if debug was set if (debuglevel == 1) { debugvalue(debuglevel, "CAPTURES", ""); - retval.reverse(); - for(auto s: retval) { - std::string subpatt = ""; - if (s.offset() > 0) { - subpatt += subject.substr(0, s.offset()); + if (use_old == false) { + for (const SMatchCapture& capture : captures) { + const std::string capture_substring(subject.substr(capture.m_offset, capture.m_length)); + std::string subpatt = ""; + if (capture.m_offset > 0) { + subpatt += subject.substr(0, capture.m_offset); + } + subpatt += BOLDGREEN + capture_substring + RESET; + if (capture.m_offset + capture_substring.size() < subject.size()) { + subpatt += subject.substr(capture.m_offset + capture_substring.size()); + } + std::cout << subpatt << std::endl; } - subpatt += BOLDGREEN + s.str() + RESET; - if (s.offset() + s.str().size() < subject.size()) { - subpatt += subject.substr(s.offset() + s.str().size()); + + debugvalue(debuglevel, "OVECTOR", ""); + std::cout << "["; + size_t si = 0; + for(auto capture: captures) { + const std::string capture_substring(subject.substr(capture.m_offset, capture.m_length)); + std::cout << capture.m_offset << ", " << capture.m_offset + capture_substring.size() << ((si++ < captures.size()-1) ? ", " : ""); } - std::cout << subpatt << std::endl; + std::cout << "]" << std::endl; } + else { + retval.reverse(); + for(auto s: retval) { + std::string subpatt = ""; + if (s.offset() > 0) { + subpatt += subject.substr(0, s.offset()); + } + subpatt += BOLDGREEN + s.str() + RESET; + if (s.offset() + s.str().size() < subject.size()) { + subpatt += subject.substr(s.offset() + s.str().size()); + } + std::cout << subpatt << std::endl; + } - debugvalue(debuglevel, "OVECTOR", ""); - std::cout << "["; - size_t si = 0; - for(auto s: retval) { - std::cout << s.offset() << ", " << s.offset() + s.str().size() << ((si++ < retval.size()-1) ? ", " : ""); + debugvalue(debuglevel, "OVECTOR", ""); + std::cout << "["; + size_t si = 0; + for(auto s: retval) { + std::cout << s.offset() << ", " << s.offset() + s.str().size() << ((si++ < retval.size()-1) ? ", " : ""); + } + std::cout << "]" << std::endl; } - std::cout << "]" << std::endl; } // end debug diff --git a/src/regex.cc b/src/regex.cc index 6e4e560..917cb99 100644 --- a/src/regex.cc +++ b/src/regex.cc @@ -68,6 +68,25 @@ Regex::~Regex() { } } +bool Regex::searchOneMatch(const std::string& s, std::vector& captures) const { + const char *subject = s.c_str(); + int ovector[OVECCOUNT]; + + int rc = pcre_exec(m_pc, m_pce, subject, s.size(), 0, 0, ovector, OVECCOUNT); + + for (int i = 0; i < rc; i++) { + size_t start = ovector[2*i]; + size_t end = ovector[2*i+1]; + size_t len = end - start; + if (end > s.size()) { + continue; + } + SMatchCapture capture(i, start, len); + captures.push_back(capture); + } + + return (rc > 0); +} std::list Regex::searchAll(const std::string& s) { const char *subject = s.c_str(); diff --git a/src/regex.h b/src/regex.h index ae7221b..ffd66af 100644 --- a/src/regex.h +++ b/src/regex.h @@ -7,6 +7,7 @@ #include #include #include +#include #include "regexutils.h" @@ -36,6 +37,17 @@ class SMatch { size_t m_offset; }; +struct SMatchCapture { + SMatchCapture(size_t group, size_t offset, size_t length) : + m_group(group), + m_offset(offset), + m_length(length) { } + + size_t m_group; // E.g. 0 = full match; 6 = capture group 6 + size_t m_offset; // offset of match within the analyzed string + size_t m_length; +}; + class Regex { public: explicit Regex(const std::string& pattern_, int debuglevel); @@ -52,6 +64,7 @@ class Regex { std::list m_retList; std::list searchAll(const std::string& s); + bool searchOneMatch(const std::string& s, std::vector& captures) const; int searchAll2(const std::string& s, size_t capturelen); };