Skip to content

Commit

Permalink
Allow quantifiers {0} and {0,0}.
Browse files Browse the repository at this point in the history
Regexps with quantifiers `{0}` and `{0,0}` were considered syntactically invalid. These quantifiers are not  actually useful, but they are now allowed for compatibility with other regexp engines.

Closes #2037.
  • Loading branch information
plusvic committed Feb 12, 2024
1 parent c1c5d89 commit f3b3027
Show file tree
Hide file tree
Showing 3 changed files with 34 additions and 59 deletions.
76 changes: 32 additions & 44 deletions libyara/re_lexer.c
Original file line number Diff line number Diff line change
Expand Up @@ -1169,20 +1169,14 @@ YY_RULE_SETUP
yyterminate();
}

if (hi_bound == 0 && lo_bound == 0)
{
yyerror(yyscanner, lex_env, "bad repeat interval");
yyterminate();
}

yylval->range = (hi_bound << 16) | lo_bound;

return _RANGE_;
}
YY_BREAK
case 2:
YY_RULE_SETUP
#line 156 "libyara/re_lexer.l"
#line 150 "libyara/re_lexer.l"
{

// Example: {10}
Expand All @@ -1198,20 +1192,14 @@ YY_RULE_SETUP
yyterminate();
}

if (value == 0)
{
yyerror(yyscanner, lex_env, "bad repeat interval");
yyterminate();
}

yylval->range = (value << 16) | value;

return _RANGE_;
}
YY_BREAK
case 3:
YY_RULE_SETUP
#line 183 "libyara/re_lexer.l"
#line 171 "libyara/re_lexer.l"
{

// Start of a negated character class. Example: [^abcd]
Expand All @@ -1223,7 +1211,7 @@ YY_RULE_SETUP
YY_BREAK
case 4:
YY_RULE_SETUP
#line 192 "libyara/re_lexer.l"
#line 180 "libyara/re_lexer.l"
{

// Start of character negated class containing a ].
Expand All @@ -1238,7 +1226,7 @@ YY_RULE_SETUP
YY_BREAK
case 5:
YY_RULE_SETUP
#line 205 "libyara/re_lexer.l"
#line 193 "libyara/re_lexer.l"
{

// Start of character class containing a ].
Expand All @@ -1253,7 +1241,7 @@ YY_RULE_SETUP
YY_BREAK
case 6:
YY_RULE_SETUP
#line 218 "libyara/re_lexer.l"
#line 206 "libyara/re_lexer.l"
{

// Start of character class. Example: [abcd]
Expand All @@ -1266,7 +1254,7 @@ YY_RULE_SETUP
case 7:
/* rule 7 can match eol */
YY_RULE_SETUP
#line 228 "libyara/re_lexer.l"
#line 216 "libyara/re_lexer.l"
{

// Any non-special character is passed as a CHAR token to the scanner.
Expand All @@ -1277,63 +1265,63 @@ YY_RULE_SETUP
YY_BREAK
case 8:
YY_RULE_SETUP
#line 237 "libyara/re_lexer.l"
#line 225 "libyara/re_lexer.l"
{
return _WORD_CHAR_;
}
YY_BREAK
case 9:
YY_RULE_SETUP
#line 242 "libyara/re_lexer.l"
#line 230 "libyara/re_lexer.l"
{
return _NON_WORD_CHAR_;
}
YY_BREAK
case 10:
YY_RULE_SETUP
#line 247 "libyara/re_lexer.l"
#line 235 "libyara/re_lexer.l"
{
return _SPACE_;
}
YY_BREAK
case 11:
YY_RULE_SETUP
#line 252 "libyara/re_lexer.l"
#line 240 "libyara/re_lexer.l"
{
return _NON_SPACE_;
}
YY_BREAK
case 12:
YY_RULE_SETUP
#line 257 "libyara/re_lexer.l"
#line 245 "libyara/re_lexer.l"
{
return _DIGIT_;
}
YY_BREAK
case 13:
YY_RULE_SETUP
#line 262 "libyara/re_lexer.l"
#line 250 "libyara/re_lexer.l"
{
return _NON_DIGIT_;
}
YY_BREAK
case 14:
YY_RULE_SETUP
#line 267 "libyara/re_lexer.l"
#line 255 "libyara/re_lexer.l"
{
return _WORD_BOUNDARY_;
}
YY_BREAK
case 15:
YY_RULE_SETUP
#line 271 "libyara/re_lexer.l"
#line 259 "libyara/re_lexer.l"
{
return _NON_WORD_BOUNDARY_;
}
YY_BREAK
case 16:
YY_RULE_SETUP
#line 276 "libyara/re_lexer.l"
#line 264 "libyara/re_lexer.l"
{

yyerror(yyscanner, lex_env, "backreferences are not allowed");
Expand All @@ -1342,7 +1330,7 @@ YY_RULE_SETUP
YY_BREAK
case 17:
YY_RULE_SETUP
#line 283 "libyara/re_lexer.l"
#line 271 "libyara/re_lexer.l"
{

uint8_t c;
Expand All @@ -1369,7 +1357,7 @@ YY_RULE_SETUP
YY_BREAK
case 18:
YY_RULE_SETUP
#line 308 "libyara/re_lexer.l"
#line 296 "libyara/re_lexer.l"
{

// End of character class.
Expand All @@ -1385,7 +1373,7 @@ YY_RULE_SETUP
case 19:
/* rule 19 can match eol */
YY_RULE_SETUP
#line 322 "libyara/re_lexer.l"
#line 310 "libyara/re_lexer.l"
{

// A range inside a character class. The regexp is...
Expand Down Expand Up @@ -1450,7 +1438,7 @@ YY_RULE_SETUP
YY_BREAK
case 20:
YY_RULE_SETUP
#line 385 "libyara/re_lexer.l"
#line 373 "libyara/re_lexer.l"
{

for (int i = 0; i < 32; i++)
Expand All @@ -1459,7 +1447,7 @@ YY_RULE_SETUP
YY_BREAK
case 21:
YY_RULE_SETUP
#line 392 "libyara/re_lexer.l"
#line 380 "libyara/re_lexer.l"
{

for (int i = 0; i < 32; i++)
Expand All @@ -1468,7 +1456,7 @@ YY_RULE_SETUP
YY_BREAK
case 22:
YY_RULE_SETUP
#line 399 "libyara/re_lexer.l"
#line 387 "libyara/re_lexer.l"
{

for (int i = 0; i < 32; i++)
Expand All @@ -1477,7 +1465,7 @@ YY_RULE_SETUP
YY_BREAK
case 23:
YY_RULE_SETUP
#line 406 "libyara/re_lexer.l"
#line 394 "libyara/re_lexer.l"
{

for (int i = 0; i < 32; i++)
Expand All @@ -1486,7 +1474,7 @@ YY_RULE_SETUP
YY_BREAK
case 24:
YY_RULE_SETUP
#line 413 "libyara/re_lexer.l"
#line 401 "libyara/re_lexer.l"
{

for (char c = '0'; c <= '9'; c++)
Expand All @@ -1495,7 +1483,7 @@ YY_RULE_SETUP
YY_BREAK
case 25:
YY_RULE_SETUP
#line 420 "libyara/re_lexer.l"
#line 408 "libyara/re_lexer.l"
{

for (int i = 0; i < 32; i++)
Expand All @@ -1515,7 +1503,7 @@ YY_RULE_SETUP
YY_BREAK
case 26:
YY_RULE_SETUP
#line 438 "libyara/re_lexer.l"
#line 426 "libyara/re_lexer.l"
{

uint8_t c;
Expand All @@ -1540,7 +1528,7 @@ YY_RULE_SETUP
YY_BREAK
case 27:
YY_RULE_SETUP
#line 461 "libyara/re_lexer.l"
#line 449 "libyara/re_lexer.l"
{

if (yytext[0] >= 32 && yytext[0] < 127)
Expand All @@ -1558,7 +1546,7 @@ YY_RULE_SETUP
}
YY_BREAK
case YY_STATE_EOF(char_class):
#line 478 "libyara/re_lexer.l"
#line 466 "libyara/re_lexer.l"
{

// End of regexp reached while scanning a character class.
Expand All @@ -1569,7 +1557,7 @@ case YY_STATE_EOF(char_class):
YY_BREAK
case 28:
YY_RULE_SETUP
#line 487 "libyara/re_lexer.l"
#line 475 "libyara/re_lexer.l"
{

if (yytext[0] >= 32 && yytext[0] < 127)
Expand All @@ -1584,18 +1572,18 @@ YY_RULE_SETUP
}
YY_BREAK
case YY_STATE_EOF(INITIAL):
#line 501 "libyara/re_lexer.l"
#line 489 "libyara/re_lexer.l"
{

yyterminate();
}
YY_BREAK
case 29:
YY_RULE_SETUP
#line 506 "libyara/re_lexer.l"
#line 494 "libyara/re_lexer.l"
ECHO;
YY_BREAK
#line 1598 "libyara/re_lexer.c"
#line 1586 "libyara/re_lexer.c"

case YY_END_OF_BUFFER:
{
Expand Down Expand Up @@ -2744,7 +2732,7 @@ void yyfree (void * ptr , yyscan_t yyscanner)

#define YYTABLES_NAME "yytables"

#line 506 "libyara/re_lexer.l"
#line 494 "libyara/re_lexer.l"


int escaped_char_value(
Expand Down
12 changes: 0 additions & 12 deletions libyara/re_lexer.l
Original file line number Diff line number Diff line change
Expand Up @@ -141,12 +141,6 @@ hex_digit [0-9a-fA-F]
yyterminate();
}

if (hi_bound == 0 && lo_bound == 0)
{
yyerror(yyscanner, lex_env, "bad repeat interval");
yyterminate();
}

yylval->range = (hi_bound << 16) | lo_bound;

return _RANGE_;
Expand All @@ -168,12 +162,6 @@ hex_digit [0-9a-fA-F]
yyterminate();
}

if (value == 0)
{
yyerror(yyscanner, lex_env, "bad repeat interval");
yyterminate();
}

yylval->range = (value << 16) | value;

return _RANGE_;
Expand Down
5 changes: 2 additions & 3 deletions tests/test-rules.c
Original file line number Diff line number Diff line change
Expand Up @@ -2700,6 +2700,8 @@ void test_re()
assert_false_regexp("(bc+d$|ef*g.|h?i(j|k))", "effg");
assert_false_regexp("(bc+d$|ef*g.|h?i(j|k))", "bcdd");
assert_true_regexp("(bc+d$|ef*g.|h?i(j|k))", "reffgz", "effgz");
assert_true_regexp("abcx{0,0}", "abcx", "abc");
assert_true_regexp("abcx{0}", "abcx", "abc");

// Test case for issue #324
assert_true_regexp("whatever| x. x", " xy x", " xy x");
Expand All @@ -2708,9 +2710,6 @@ void test_re()
assert_regexp_syntax_error("\\x0");
assert_regexp_syntax_error("\\x");

assert_regexp_syntax_error("x{0,0}");
assert_regexp_syntax_error("x{0}");

assert_regexp_syntax_error("\\xxy");

// Test case for issue #682
Expand Down

0 comments on commit f3b3027

Please sign in to comment.