Skip to content

Commit

Permalink
Merge pull request #111 from coreruleset/go-1.22-update
Browse files Browse the repository at this point in the history
feat: update to Go 1.22
  • Loading branch information
theseion authored Feb 18, 2024
2 parents bbdbed1 + 032c9e0 commit 46fba6a
Show file tree
Hide file tree
Showing 7 changed files with 54 additions and 38 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/lint.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,6 @@ jobs:
- name: Install Go
uses: actions/setup-go@v5
with:
go-version: v1.21.x
go-version: v1.22.x
cache: true
- run: go run mage.go lint
2 changes: 1 addition & 1 deletion .github/workflows/regression.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ jobs:
test:
strategy:
matrix:
go-version: [1.21.x]
go-version: [1.22.x]
os: [ubuntu-latest]
runs-on: ${{ matrix.os }}
steps:
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ jobs:
name: Set up Go
uses: actions/setup-go@v5
with:
go-version: ^1.19
go-version: ^1.22
cache: true
-
name: Login to GitHub Container Registry
Expand Down
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
module github.com/coreruleset/crs-toolchain

go 1.21
go 1.22

require (
dario.cat/mergo v1.0.0
Expand Down
58 changes: 37 additions & 21 deletions regex/operators/assembler.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (
"bytes"
"errors"
"fmt"
"regexp"
"sort"
"strings"

Expand All @@ -18,12 +19,6 @@ import (
"github.com/coreruleset/crs-toolchain/regex/processors"
)

var metaGroupReplacements = map[string]string{
"(?-s:.)": ".",
"(?m:^)": "^",
"(?m:$)": "$",
}

// Create the processor stack
var processorStack ProcessorStack
var processor processors.IProcessor
Expand Down Expand Up @@ -136,11 +131,11 @@ func (a *Operator) complete(assembleParser *parser.Parser) string {
result = a.runSimplificationAssembly(result)
logger.Trace().Msgf("After simplification assembly: %s\n", result)
result = a.useHexEscapes(result)
logger.Trace().Msgf("After simplification assembly: %s\n", result)
logger.Trace().Msgf("After replacing non-printable characters with hex escapes: %s\n", result)
result = a.escapeDoublequotes(result)
logger.Trace().Msgf("After escaping double quotes: %s\n", result)
result = a.useHexBackslashes(result)
logger.Trace().Msgf("After use hex backslashes: %s\n", result)
logger.Trace().Msgf("After replacing plain backslashes with hex escapse: %s\n", result)
result = a.includeVerticalTabInSpaceClass(result)
logger.Trace().Msgf("After including vertical tabs: %s\n", result)
result = a.dontUseFlagsForMetaCharacters(result)
Expand Down Expand Up @@ -217,12 +212,7 @@ func (a *Operator) useHexBackslashes(input string) string {
// compatible engines.
func (a *Operator) includeVerticalTabInSpaceClass(input string) string {
logger.Trace().Msg("Fixing up regex to include \\v in white space class matches")
// Note: replacement order is important. Don't use a map.
result := strings.ReplaceAll(input, `[\t-\n\f-\r ]`, `[\s\v]`)
result = strings.ReplaceAll(result, `[^\t-\n\f-\r ]`, `[^\s\v]`)
// There's a range attached, can't just replace
result = strings.ReplaceAll(result, `\t-\n\f-\r -`, `\s\v -`)
return strings.ReplaceAll(result, `\t-\n\f-\r `, `\s\v`)
return strings.ReplaceAll(input, `\t\n\f\r `, `\s\v`)
}

// rassemble-go doesn't provide an option to specify literals.
Expand Down Expand Up @@ -256,19 +246,45 @@ func (a *Operator) useHexEscapes(input string) string {
return sb.String()
}

// The Go regexp/syntax library will convert:
// - a dot (`.`) into `(?-s:.)`
// - a caret (`^`) into `(?m:^)`
// - a dollar (`$`) into (?m:$)`
// We want to retain the original dot.
// The Go regexp/syntax library will convert insert flags when it encounters
// meta characters that could be ambiguous, such as `^`, `$`, `.`.
// Remove both flags for the current context, e.g., `...(?m)...`, and flag groups
// applied to subexpressions, e.g., `...(?m:...)...`
func (a *Operator) dontUseFlagsForMetaCharacters(input string) string {
result := input
for needle, replacement := range metaGroupReplacements {
result = strings.ReplaceAll(result, needle, replacement)
flagsStartRegexp := regexp.MustCompile(`\(\?[-misU]+\)`)
result = flagsStartRegexp.ReplaceAllString(result, "")

flagGroupStartRegexp := regexp.MustCompile(`\(\?[-misU]+:`)
for {
location := flagGroupStartRegexp.FindStringIndex(result)
if len(location) > 0 {
result = replaceFlagGroup(result, location)
} else {
break
}
}
return result
}

// Remove flag groups like `...(?-s:...)...`
func replaceFlagGroup(input string, location []int) string {
parensCounter := 1
groupStart := location[0]
bodyStart := location[1]
index := bodyStart
for ; parensCounter > 0; index++ {
char := input[index]
switch char {
case '(':
parensCounter++
case ')':
parensCounter--
}
}
return input[:groupStart] + input[bodyStart:index-1] + input[index:]
}

func (a *Operator) startPreprocessor(processorName string, args []string) error {
logger.Trace().Msgf("Found processor %s start\n", processorName)
switch processorName {
Expand Down
24 changes: 12 additions & 12 deletions regex/operators/assembler_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -271,7 +271,7 @@ b`
assembler := NewAssembler(s.ctx)
output, err := assembler.Run(contents)
s.Require().NoError(err)
s.Equal("a prefix[a-b]", output)
s.Equal("a prefix[ab]", output)
}

func (s *specialCommentsTestSuite) TestHandlesSuffixComment() {
Expand All @@ -281,7 +281,7 @@ b`
assembler := NewAssembler(s.ctx)
output, err := assembler.Run(contents)
s.Require().NoError(err)
s.Equal("[a-b]a suffix", output)
s.Equal("[ab]a suffix", output)
}

func (s *specialCasesTestSuite) TestIgnoresEmptyLines() {
Expand Down Expand Up @@ -327,7 +327,7 @@ b\x5c\x48
assembler := NewAssembler(s.ctx)
output, err := assembler.Run(contents)
s.Require().NoError(err)
s.Equal(`[a-b]\x5cH`, output)
s.Equal(`[ab]\x5cH`, output)
}

func (s *specialCasesTestSuite) TestSpecialComments_HandlesEscapedAlternationsCorrectly() {
Expand Down Expand Up @@ -568,7 +568,7 @@ d

output, err := assembler.Run(contents)
s.Require().NoError(err)
s.Equal(`[^0-9A-Z_a-z]*\(two(?:a+b|[c-d])`, output)
s.Equal(`[^0-9A-Z_a-z]*\(two(?:a+b|[cd])`, output)

}

Expand All @@ -582,7 +582,7 @@ d

output, err := assembler.Run(contents)
s.Require().NoError(err)
s.Equal(`(?:a+b|[c-d])[^0-9A-Z_a-z]*\(two`, output)
s.Equal(`(?:a+b|[cd])[^0-9A-Z_a-z]*\(two`, output)

}
func (s *assemblerTestSuite) TestAssemble_Assembling_3() {
Expand Down Expand Up @@ -717,7 +717,7 @@ func (s *assemblerTestSuite) TestAssemble_ConcatenatingWithStoredInput() {
output, err := assembler.Run(contents)
s.Require().NoError(err)

s.Equal(`(?:\x5c|%(?:2f|5c))\.(?:%0[0-1])?(?:\x5c|%(?:2f|5c))`, output)
s.Equal(`(?:\x5c|%(?:2f|5c))\.(?:%0[01])?(?:\x5c|%(?:2f|5c))`, output)

}

Expand Down Expand Up @@ -807,7 +807,7 @@ d
output, err := assembler.Run(contents)
s.Require().NoError(err)

s.Equal(`[a-b][c-d]`, output)
s.Equal(`[ab][cd]`, output)

}
func (s *assemblerTestSuite) TestAssemble_ConcatenationWithPrefixAndSuffix() {
Expand All @@ -825,7 +825,7 @@ b
output, err := assembler.Run(contents)
s.Require().NoError(err)

s.Equal(`prefix[a-b]suffix`, output)
s.Equal(`prefix[ab]suffix`, output)

}
func (s *assemblerTestSuite) TestAssemble_AssembleWrappedInGroupWithTailConcatenation() {
Expand All @@ -845,7 +845,7 @@ more
output, err := assembler.Run(contents)
s.Require().NoError(err)

s.Equal(`[a-b][c-d]more`, output)
s.Equal(`[ab][cd]more`, output)

}
func (s *assemblerTestSuite) TestAssemble_AssembleWrappedInGroupWithTailAlternation() {
Expand All @@ -863,7 +863,7 @@ more
output, err := assembler.Run(contents)
s.Require().NoError(err)

s.Equal(`[a-b][c-d]|more`, output)
s.Equal(`[ab][cd]|more`, output)

}
func (s *assemblerTestSuite) TestAssemble_NestedGroups() {
Expand All @@ -885,7 +885,7 @@ func (s *assemblerTestSuite) TestAssemble_RemoveExtraGroups() {
output, err := assembler.Run(contents)
s.Require().NoError(err)

s.Equal(`a[b-c]d`, output)
s.Equal(`a[bc]d`, output)
}

// The Go regexp/syntax library will convert a dot (`.`) into `(?-s:.)`.
Expand Down Expand Up @@ -913,7 +913,7 @@ func (s *assemblerTestSuite) TestAssemble_DotRemainsDotWithSflag() {
}

// The Go regexp/syntax library will convert a caret (`^`) into `(?m:^)`.
// We want to retain the original dot.
// We want to retain the original without the flag.
func (s *assemblerTestSuite) TestAssemble_CaretRemainsCaret() {
contents := "^a|b"
assembler := NewAssembler(s.ctx)
Expand Down
2 changes: 1 addition & 1 deletion regex/processors/assemble_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ func (s *assembleTestSuite) TestAssemble_RegularExpressions() {

s.Require().NoError(err)
s.Len(output, 1)
s.Equal("(?:(?:home[,r]|(?-s:.)imps[a-c]{2}n))", output[0])
s.Equal("(?:(?:(?-s:home[,r]|.imps[a-c]{2}n)))", output[0])
}

func (s *assembleTestSuite) TestAssemble_InvalidRegularExpressionFails() {
Expand Down

0 comments on commit 46fba6a

Please sign in to comment.