Skip to content

Commit

Permalink
Aligned v3 behavior to the fixed version
Browse files Browse the repository at this point in the history
  • Loading branch information
airween committed Aug 14, 2020
1 parent 1b7f23c commit 1052599
Show file tree
Hide file tree
Showing 4 changed files with 80 additions and 22 deletions.
1 change: 1 addition & 0 deletions CHANGES
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ v0.3 - YYYY-MM-DD
-----------------
* refactoring regex.cc for pcre4msc3
* add `-f` argument for pcre4msc3
* aligned v3 behavior to the fix (#2348)

v0.2 - 2020-04-02
-----------------
Expand Down
69 changes: 47 additions & 22 deletions src/msc3.cc
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ void showhelp(char * name) {
std::cout << "OPTIONS:" << std::endl;
std::cout << "\t-h\tThis help" << std::endl;
std::cout << "\t-n N\titerate pcre_regex as Nth times. Default value is 1." << std::endl;
std::cout << "\t-f\tForce to use modified regex matching method." << std::endl;
std::cout << "\t-f\tForce to use old v3 regex matching method." << std::endl;
std::cout << "\t-t T\tExpects a float value; if the (last) pcre_exec time is greather than this," << std::endl;
std::cout << "\t \tthe exit status of program will non-zero." << std::endl;
std::cout << "\t-d \tShow debug information." << std::endl;
Expand All @@ -25,7 +25,7 @@ int main(int argc, char ** argv) {
char * patternfile = NULL, * subjectfile = NULL;
char c;
int icnt = 1, rc = 0;
bool use_fixed = false;
bool use_old = false;
float time_limit = 0.0;
double m_sub = 0.0;
int debuglevel = 0; // may be later we can use different level...
Expand All @@ -41,7 +41,7 @@ int main(int argc, char ** argv) {
showhelp(argv[0]);
return EXIT_SUCCESS;
case 'f':
use_fixed = true;
use_old = true;
break;
case 'n':
icnt = atoi(optarg);
Expand Down Expand Up @@ -119,18 +119,18 @@ int main(int argc, char ** argv) {

re = new Regex(pattern, debuglevel);
std::list<SMatch> retval;
std::vector<SMatchCapture> captures;

for(int i = 0; i < icnt; i++) {

re->m_retList.clear();

clock_t m_start = clock();
if (use_fixed == false) {
retval = re->searchAll(subject);
if (use_old == false) {
re->searchOneMatch(subject, captures);
}
else {
rc = re->searchAll2(subject, ((debuglevel == 1) ? 10 : 0));
retval = re->m_retList;
retval = re->searchAll(subject);
}
clock_t m_end = clock();
m_sub = (m_end - m_start) / double(CLOCKS_PER_SEC);
Expand All @@ -148,26 +148,51 @@ int main(int argc, char ** argv) {
// show captured substrings if debug was set
if (debuglevel == 1) {
debugvalue(debuglevel, "CAPTURES", "");
retval.reverse();
for(auto s: retval) {
std::string subpatt = "";
if (s.offset() > 0) {
subpatt += subject.substr(0, s.offset());
if (use_old == false) {
for (const SMatchCapture& capture : captures) {
const std::string capture_substring(subject.substr(capture.m_offset, capture.m_length));
std::string subpatt = "";
if (capture.m_offset > 0) {
subpatt += subject.substr(0, capture.m_offset);
}
subpatt += BOLDGREEN + capture_substring + RESET;
if (capture.m_offset + capture_substring.size() < subject.size()) {
subpatt += subject.substr(capture.m_offset + capture_substring.size());
}
std::cout << subpatt << std::endl;
}
subpatt += BOLDGREEN + s.str() + RESET;
if (s.offset() + s.str().size() < subject.size()) {
subpatt += subject.substr(s.offset() + s.str().size());

debugvalue(debuglevel, "OVECTOR", "");
std::cout << "[";
size_t si = 0;
for(auto capture: captures) {
const std::string capture_substring(subject.substr(capture.m_offset, capture.m_length));
std::cout << capture.m_offset << ", " << capture.m_offset + capture_substring.size() << ((si++ < captures.size()-1) ? ", " : "");
}
std::cout << subpatt << std::endl;
std::cout << "]" << std::endl;
}
else {
retval.reverse();
for(auto s: retval) {
std::string subpatt = "";
if (s.offset() > 0) {
subpatt += subject.substr(0, s.offset());
}
subpatt += BOLDGREEN + s.str() + RESET;
if (s.offset() + s.str().size() < subject.size()) {
subpatt += subject.substr(s.offset() + s.str().size());
}
std::cout << subpatt << std::endl;
}

debugvalue(debuglevel, "OVECTOR", "");
std::cout << "[";
size_t si = 0;
for(auto s: retval) {
std::cout << s.offset() << ", " << s.offset() + s.str().size() << ((si++ < retval.size()-1) ? ", " : "");
debugvalue(debuglevel, "OVECTOR", "");
std::cout << "[";
size_t si = 0;
for(auto s: retval) {
std::cout << s.offset() << ", " << s.offset() + s.str().size() << ((si++ < retval.size()-1) ? ", " : "");
}
std::cout << "]" << std::endl;
}
std::cout << "]" << std::endl;
}
// end debug

Expand Down
19 changes: 19 additions & 0 deletions src/regex.cc
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,25 @@ Regex::~Regex() {
}
}

bool Regex::searchOneMatch(const std::string& s, std::vector<SMatchCapture>& captures) const {
const char *subject = s.c_str();
int ovector[OVECCOUNT];

int rc = pcre_exec(m_pc, m_pce, subject, s.size(), 0, 0, ovector, OVECCOUNT);

for (int i = 0; i < rc; i++) {
size_t start = ovector[2*i];
size_t end = ovector[2*i+1];
size_t len = end - start;
if (end > s.size()) {
continue;
}
SMatchCapture capture(i, start, len);
captures.push_back(capture);
}

return (rc > 0);
}

std::list<SMatch> Regex::searchAll(const std::string& s) {
const char *subject = s.c_str();
Expand Down
13 changes: 13 additions & 0 deletions src/regex.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#include <fstream>
#include <string>
#include <list>
#include <vector>

#include "regexutils.h"

Expand Down Expand Up @@ -36,6 +37,17 @@ class SMatch {
size_t m_offset;
};

struct SMatchCapture {
SMatchCapture(size_t group, size_t offset, size_t length) :
m_group(group),
m_offset(offset),
m_length(length) { }

size_t m_group; // E.g. 0 = full match; 6 = capture group 6
size_t m_offset; // offset of match within the analyzed string
size_t m_length;
};

class Regex {
public:
explicit Regex(const std::string& pattern_, int debuglevel);
Expand All @@ -52,6 +64,7 @@ class Regex {
std::list<SMatch> m_retList;

std::list<SMatch> searchAll(const std::string& s);
bool searchOneMatch(const std::string& s, std::vector<SMatchCapture>& captures) const;
int searchAll2(const std::string& s, size_t capturelen);
};

Expand Down

0 comments on commit 1052599

Please sign in to comment.