diff --git a/cmd/regex_format.go b/cmd/regex_format.go index bc69620..3de7b99 100644 --- a/cmd/regex_format.go +++ b/cmd/regex_format.go @@ -12,6 +12,7 @@ import ( "os" "path" "path/filepath" + "regexp" "strings" "github.com/spf13/cobra" @@ -20,6 +21,7 @@ import ( "github.com/coreruleset/crs-toolchain/v2/regex" "github.com/coreruleset/crs-toolchain/v2/regex/parser" "github.com/coreruleset/crs-toolchain/v2/regex/processors" + "github.com/coreruleset/crs-toolchain/v2/utils" ) const ( @@ -191,7 +193,7 @@ func processFile(filePath string, ctxt *processors.Context, checkOnly bool) erro } if !checkStandardHeader(lines) { - logger.Info().Msgf("file %s does not have standard header", filePath) + logger.Info().Msgf("file %s does not have standard header", filename) // prepend the standard header lines = append([]string{regexAssemblyStandardHeader}, lines...) } @@ -205,15 +207,15 @@ func processFile(filePath string, ctxt *processors.Context, checkOnly bool) erro return err } // sanity check: if we are using an ignore-case flag, we don't need to have any uppercase letters in the file - foundUppercase, errMessage := findUpperCaseOnIgnoreCaseFlag(lines, raParser.Flags['i']) + foundUppercase, errMessage := findUpperCaseCharacterClassOnIgnoreCaseFlag(lines, raParser.Flags['i']) if foundUppercase { - logger.Warn().Msgf("File contains uppercase letters, but ignore-case flag is set. Please check your source files.") + logger.Warn().Msgf("%s contains uppercase letters in character classes, but ignore-case flag is set. Please check your source files.", filename) logger.Warn().Msgf("%s", errMessage) logger.Warn().Msg("Be aware that because of file inclusions and definitions, the actual line number or file might be different.") } equalContent := bytes.Equal(currentContents, newContents) if !equalContent || foundUppercase { - message = formatMessage(fmt.Sprintf("File %s not properly formatted", filePath)) + message = formatMessage(fmt.Sprintf("%s not properly formatted", filename)) fmt.Println(message) processFileError = &UnformattedFileError{filePath: filePath} } @@ -318,9 +320,9 @@ func checkStandardHeader(lines []string) bool { return false } -// findUpperCaseOnIgnoreCaseFlag checks if the file contains uppercase letters when the ignore-case flag is set +// findUpperCaseCharacterClassOnIgnoreCaseFlag checks if the file contains uppercase letters when the ignore-case flag is set // returns true if the file contains uppercase letters, and an error message pointing the line where it was found. -func findUpperCaseOnIgnoreCaseFlag(lines []string, iFlag bool) (bool, string) { +func findUpperCaseCharacterClassOnIgnoreCaseFlag(lines []string, iFlag bool) (bool, string) { res := false definition := false message := "" @@ -344,7 +346,7 @@ func findUpperCaseOnIgnoreCaseFlag(lines []string, iFlag bool) (bool, string) { if definition { // restore the original line line = lines[i] - index = definitionRegex.FindSubmatchIndex([]byte(line))[3] + index += definitionRegex.FindSubmatchIndex([]byte(line))[3] } if index > 0 { fill = strings.Repeat("=", index) @@ -361,24 +363,28 @@ func findUpperCaseOnIgnoreCaseFlag(lines []string, iFlag bool) (bool, string) { // findUppercaseNonEscaped finds an uppercase character that is not escaped in a given line // returns true if the line contains an uppercase character that is not escaped, and the index of the character func findUppercaseNonEscaped(line string) (bool, int) { - for i, c := range line { - if c >= 'A' && c <= 'Z' { - // go back and check if the character is escaped - count := 0 - runes := []rune(line[:i]) // Convert string to rune slice - // Iterate over the rune slice in reverse order - for j := len(runes) - 1; j >= 0; j-- { - if runes[j] == '\\' { - // we found a backslash, so we need to check if it is escaped - count++ - // if the character is not escaped, return the index - } else { - break + characterClassRegex := regexp.MustCompile(`(?:^|[^\\])\[.+?[^\\]\]`) + matches := characterClassRegex.FindAllStringIndex(line, -1) + if matches == nil { + return false, -1 + } + + for _, location := range matches { + start := location[0] + end := location[1] + // Skip match of non-backslash character before the first bracket. + // Don't skip if the match is at the start of the string. + if start > 0 { + start += 1 + } + match := line[start:end] + for i, c := range match { + if c >= 'A' && c <= 'Z' { + // Escaped upper case letters are probably shortahands like `\W`, `\S` + if !utils.IsEscaped(match, i) { + return true, i + start } } - if count%2 == 0 { - return true, i - } } } return false, -1 diff --git a/cmd/regex_format_test.go b/cmd/regex_format_test.go index de36947..4bf8667 100644 --- a/cmd/regex_format_test.go +++ b/cmd/regex_format_test.go @@ -508,12 +508,11 @@ this is a regex _, err := rootCmd.ExecuteC() s.EqualError(err, fmt.Sprintf("File not properly formatted: %s", path.Join(s.dataDir, "123456.ra"))) - - s.Contains(out.String(), "File contains uppercase letters, but ignore-case flag is set. Please check your source files.") + s.Contains(out.String(), "123456.ra contains uppercase letters in character classes, but ignore-case flag is set. Please check your source files.") s.Contains(out.String(), "{1,3}[Bb]lah\\n======^ [HERE]\\n\"}\n") } -func (s *formatTestSuite) TestIgnoreCaseFlagWithUppercasePositionIndicator() { +func (s *formatTestSuite) TestIgnoreCaseFlagWithUppercase_FirstCharacter() { // send logs to buffer out := &bytes.Buffer{} log := zerolog.New(out) @@ -521,88 +520,120 @@ func (s *formatTestSuite) TestIgnoreCaseFlagWithUppercasePositionIndicator() { s.writeDataFile("123456.ra", regexAssemblyStandardHeader+` ##!+ i -First letter is uppercase +[First] letter is uppercase `) rootCmd.SetArgs([]string{"-d", s.tempDir, "regex", "format", "-c", "123456", "-o", "github"}) _, err := rootCmd.ExecuteC() s.EqualError(err, fmt.Sprintf("File not properly formatted: %s", path.Join(s.dataDir, "123456.ra"))) + s.Contains(out.String(), "123456.ra contains uppercase letters in character classes, but ignore-case flag is set. Please check your source files.") + s.Contains(out.String(), "[First] letter is uppercase\\n=^ [HERE]\\n\"}\n") +} + +func (s *formatTestSuite) TestIgnoreCaseFlagWithUppercase_LastCharacter() { + // send logs to buffer + out := &bytes.Buffer{} + log := zerolog.New(out) + logger = log.With().Str("component", "parser-test").Logger() - s.Contains(out.String(), "File contains uppercase letters, but ignore-case flag is set. Please check your source files.") - s.Contains(out.String(), "First letter is uppercase\\n^ [HERE]\\n\"}\n") + s.writeDataFile("123456.ra", regexAssemblyStandardHeader+` +##!+ i +Last letter is upper[casE] +`) + rootCmd.SetArgs([]string{"-d", s.tempDir, "regex", "format", "-c", "123456", "-o", "github"}) + + _, err := rootCmd.ExecuteC() + s.EqualError(err, fmt.Sprintf("File not properly formatted: %s", path.Join(s.dataDir, "123456.ra"))) + s.Contains(out.String(), "123456.ra contains uppercase letters in character classes, but ignore-case flag is set. Please check your source files.") + s.Contains(out.String(), "Last letter is upper[casE]\\n========================^ [HERE]\\n\"}\n") } -func (s *formatTestSuite) TestIgnoreCaseFlagWithUppercaseEscapedUppercase() { +func (s *formatTestSuite) TestIgnoreCaseFlagWithUppercase_PlusDefinitions() { // send logs to buffer out := &bytes.Buffer{} log := zerolog.New(out) logger = log.With().Str("component", "parser-test").Logger() s.writeDataFile("123456.ra", regexAssemblyStandardHeader+` +##!> define homer [simpson] ##!+ i -this regex has a \S letter which should not match -multiple escape sequences \S\S\S +multiple escape sequences \A\B\S should be good. `) rootCmd.SetArgs([]string{"-d", s.tempDir, "regex", "format", "-c", "123456", "-o", "github"}) _, err := rootCmd.ExecuteC() - s.NoError(err) + s.Require().NoError(err) } -func (s *formatTestSuite) TestIgnoreCaseFlagWithUppercaseUnevenlyEscapedUppercase() { +func (s *formatTestSuite) TestIgnoreCaseFlagWithUppercase_PlusDefinitionsWithUppercase() { // send logs to buffer out := &bytes.Buffer{} log := zerolog.New(out) logger = log.With().Str("component", "parser-test").Logger() s.writeDataFile("123456.ra", regexAssemblyStandardHeader+` +##!> define homer No_[Bueno] ##!+ i multiple escape sequences \A\B\S should be good. -odd number of escape sequences should be good also \\\A\\\S -even number of escape sequences should be bad \\\\A\\B `) rootCmd.SetArgs([]string{"-d", s.tempDir, "regex", "format", "-c", "123456", "-o", "github"}) _, err := rootCmd.ExecuteC() s.EqualError(err, fmt.Sprintf("File not properly formatted: %s", path.Join(s.dataDir, "123456.ra"))) - s.Contains(out.String(), "File contains uppercase letters, but ignore-case flag is set. Please check your source files.") - s.Contains(out.String(), "even number of escape sequences should be bad") + s.Contains(out.String(), "123456.ra contains uppercase letters in character classes, but ignore-case flag is set. Please check your source files.") + s.Contains(out.String(), "##!> define homer No_[Bueno]\\n======================^ [HERE]") } -func (s *formatTestSuite) TestIgnoreCaseFlagWithUppercase_PlusDefinitions() { +func (s *formatTestSuite) TestIgnoreCaseFlagWithUppercase_ComplexCharacterClass() { // send logs to buffer out := &bytes.Buffer{} log := zerolog.New(out) logger = log.With().Str("component", "parser-test").Logger() s.writeDataFile("123456.ra", regexAssemblyStandardHeader+` -##!> define homer simpson ##!+ i -multiple escape sequences \A\B\S should be good. +I'm complex: [^S$%_+-fG-]. `) rootCmd.SetArgs([]string{"-d", s.tempDir, "regex", "format", "-c", "123456", "-o", "github"}) _, err := rootCmd.ExecuteC() - s.Require().NoError(err) + s.EqualError(err, fmt.Sprintf("File not properly formatted: %s", path.Join(s.dataDir, "123456.ra"))) + s.Contains(out.String(), "123456.ra contains uppercase letters in character classes, but ignore-case flag is set. Please check your source files.") + s.Contains(out.String(), "I'm complex: [^S$%_+-fG-].\\n===============^ [HERE]") } -func (s *formatTestSuite) TestIgnoreCaseFlagWithUppercase_PlusDefinitionsWithUppercase() { +func (s *formatTestSuite) TestIgnoreCaseFlagWithUppercase_CharacterClassWithBrackets() { // send logs to buffer out := &bytes.Buffer{} log := zerolog.New(out) logger = log.With().Str("component", "parser-test").Logger() s.writeDataFile("123456.ra", regexAssemblyStandardHeader+` -##!> define homer No_Bueno ##!+ i -multiple escape sequences \A\B\S should be good. +Chara[ct]er class with brackets: [$l\][2R [fl\]iF] `) rootCmd.SetArgs([]string{"-d", s.tempDir, "regex", "format", "-c", "123456", "-o", "github"}) _, err := rootCmd.ExecuteC() s.EqualError(err, fmt.Sprintf("File not properly formatted: %s", path.Join(s.dataDir, "123456.ra"))) - s.Contains(out.String(), "File contains uppercase letters, but ignore-case flag is set. Please check your source files.") - s.Contains(out.String(), "##!> define homer No_Bueno\\n==================^ [HERE]") + s.Contains(out.String(), "123456.ra contains uppercase letters in character classes, but ignore-case flag is set. Please check your source files.") + s.Contains(out.String(), `Chara[ct]er class with brackets: [$l\\][2R [fl\\]iF]\n========================================^ [HERE]`) +} + +func (s *formatTestSuite) TestIgnoreCaseFlagWithUppercase_IgnoreShortHands() { + // send logs to buffer + out := &bytes.Buffer{} + log := zerolog.New(out) + logger = log.With().Str("component", "parser-test").Logger() + + s.writeDataFile("123456.ra", regexAssemblyStandardHeader+` +##!+ i +[\W\S] +`) + rootCmd.SetArgs([]string{"-d", s.tempDir, "regex", "format", "-c", "123456", "-o", "github"}) + + _, err := rootCmd.ExecuteC() + s.Require().NoError(err) } func (s *formatTestSuite) writeDataFile(filename string, contents string) { diff --git a/regex/operators/assembler.go b/regex/operators/assembler.go index 8f4c137..577b26f 100644 --- a/regex/operators/assembler.go +++ b/regex/operators/assembler.go @@ -17,6 +17,7 @@ import ( "github.com/coreruleset/crs-toolchain/v2/regex" "github.com/coreruleset/crs-toolchain/v2/regex/parser" "github.com/coreruleset/crs-toolchain/v2/regex/processors" + "github.com/coreruleset/crs-toolchain/v2/utils" ) // Create the processor stack @@ -326,11 +327,11 @@ func (o *Operator) findGroupBodyEnd(input string, groupBodyStart int) (int, bool char := input[index] switch char { case '(': - if !regex.IsEscaped(input, index) { + if !utils.IsEscaped(input, index) { parensCounter++ } case ')': - if !regex.IsEscaped(input, index) { + if !utils.IsEscaped(input, index) { parensCounter-- } case '|': diff --git a/utils/utils.go b/utils/utils.go new file mode 100644 index 0000000..0bfe58a --- /dev/null +++ b/utils/utils.go @@ -0,0 +1,15 @@ +// Copyright 2024 OWASP ModSecurity Core Rule Set Project +// SPDX-License-Identifier: Apache-2.0 + +package utils + +func IsEscaped(input string, position int) bool { + escapeCounter := 0 + for backtrackIndex := position - 1; backtrackIndex >= 0; backtrackIndex-- { + if input[backtrackIndex] != '\\' { + break + } + escapeCounter++ + } + return escapeCounter%2 != 0 +} diff --git a/utils/utils_test.go b/utils/utils_test.go new file mode 100644 index 0000000..2e4f125 --- /dev/null +++ b/utils/utils_test.go @@ -0,0 +1,42 @@ +// Copyright 2024 OWASP ModSecurity Core Rule Set Project +// SPDX-License-Identifier: Apache-2.0 + +package utils + +import ( + "testing" + + "github.com/stretchr/testify/suite" +) + +type utilsTestSuite struct { + suite.Suite +} + +func TestRunUtilsTestSuite(t *testing.T) { + suite.Run(t, new(utilsTestSuite)) +} + +func (s *utilsTestSuite) TestIsEscaped() { + s.True(IsEscaped(`abc\(de`, 4)) + s.True(IsEscaped(`\(abc`, 1)) + s.True(IsEscaped(`abc\(`, 4)) +} + +func (s *utilsTestSuite) TestIsEscaped_Backslashes() { + s.True(IsEscaped(`abc\\de`, 4)) + s.True(IsEscaped(`\\abc`, 1)) + s.True(IsEscaped(`abc\\`, 4)) +} + +func (s *utilsTestSuite) TestIsEscaped_Not() { + s.False(IsEscaped(`abc\\(de`, 5)) + s.False(IsEscaped(`\\(abc`, 2)) + s.False(IsEscaped(`abc\\(`, 5)) +} + +func (s *utilsTestSuite) TestIsEscaped_Not_Backslashes() { + s.False(IsEscaped(`abc\\\de`, 5)) + s.False(IsEscaped(`\\\abc`, 2)) + s.False(IsEscaped(`abc\\\`, 5)) +}