cloudevents · duglin · May 14, 2024 · May 1, 2024 · May 9, 2024 · May 9, 2024
@@ -6,17 +6,14 @@
 package expression
 
 import (
-	"regexp"
-	"strings"
-
 	cesql "github.com/cloudevents/sdk-go/sql/v2"
 	"github.com/cloudevents/sdk-go/sql/v2/utils"
 	cloudevents "github.com/cloudevents/sdk-go/v2"
 )
 
 type likeExpression struct {
 	baseUnaryExpression
-	pattern *regexp.Regexp
+	pattern string
 }
 
 func (l likeExpression) Evaluate(event cloudevents.Event) (interface{}, error) {
@@ -30,70 +27,65 @@ func (l likeExpression) Evaluate(event cloudevents.Event) (interface{}, error) {
 		return nil, err
 	}
 
-	return l.pattern.MatchString(val.(string)), nil
+	return matchString(val.(string), l.pattern), nil
+
 }
 
 func NewLikeExpression(child cesql.Expression, pattern string) (cesql.Expression, error) {
-	// Converting to regex is not the most performant impl, but it works
-	p, err := convertLikePatternToRegex(pattern)
-	if err != nil {
-		return nil, err
-	}
-
 	return likeExpression{
 		baseUnaryExpression: baseUnaryExpression{
 			child: child,
 		},
-		pattern: p,
+		pattern: pattern,
 	}, nil
 }
 
-func convertLikePatternToRegex(pattern string) (*regexp.Regexp, error) {
-	var chunks []string
-	chunks = append(chunks, "^")
+func matchString(text, pattern string) bool {
+	textLen := len(text)
+	patternLen := len(pattern)
+	textIdx := 0
+	patternIdx := 0
+	lastWildcardIdx := -1
+	lastMatchIdx := 0
 
-	var chunk strings.Builder
+	if patternLen == 0 {
+		return patternLen == textLen
+	}
 
-	for i := 0; i < len(pattern); i++ {
-		if pattern[i] == '\\' && i < len(pattern)-1 {
-			if pattern[i+1] == '%' {
-				// \% case
-				chunk.WriteRune('%')
-				chunks = append(chunks, "\\Q"+chunk.String()+"\\E")
-				chunk.Reset()
-				i++
-				continue
-			} else if pattern[i+1] == '_' {
-				// \_ case
-				chunk.WriteRune('_')
-				chunks = append(chunks, "\\Q"+chunk.String()+"\\E")
-				chunk.Reset()
-				i++
-				continue
-			} else {
-				// if there is an actual literal \ character, we need to include that in the string
-				chunk.WriteRune('\\')
-			}
-		} else if pattern[i] == '_' {
-			// replace with .
-			chunks = append(chunks, "\\Q"+chunk.String()+"\\E")
-			chunk.Reset()
-			chunks = append(chunks, ".")
-		} else if pattern[i] == '%' {
-			// replace with .*
-			chunks = append(chunks, "\\Q"+chunk.String()+"\\E")
-			chunk.Reset()
-			chunks = append(chunks, ".*")
+	for textIdx < textLen {
+		if patternIdx < patternLen-1 && pattern[patternIdx] == '\\' &&
+			((pattern[patternIdx+1] == '_' || pattern[patternIdx+1] == '%') &&
+				pattern[patternIdx+1] == text[textIdx]) {
+			// handle escaped characters -> pattern needs to increment two places here
+			patternIdx += 2
+			textIdx += 1
+		} else if patternIdx < patternLen && (pattern[patternIdx] == '_' || pattern[patternIdx] == text[textIdx]) {
+			// handle non escaped characters
+			textIdx += 1
+			patternIdx += 1
+		} else if patternIdx < patternLen && pattern[patternIdx] == '%' {
+			// handle wildcard characters
+			lastWildcardIdx = patternIdx
+			lastMatchIdx = textIdx
+			patternIdx += 1
+		} else if lastWildcardIdx != -1 {
+			// greedy match didn't work, try again from the last known match
+			patternIdx = lastWildcardIdx + 1
+			lastMatchIdx += 1
+			textIdx = lastMatchIdx
 		} else {
-			chunk.WriteByte(pattern[i])
+			return false
 		}
 	}
 
-	if chunk.Len() != 0 {
-		chunks = append(chunks, "\\Q"+chunk.String()+"\\E")
-	}
+	// consume remaining pattern characters as long as they are wildcards
+	for patternIdx < patternLen {
+		if pattern[patternIdx] != '%' {
+			return false
+		}
 
-	chunks = append(chunks, "$")
+		patternIdx += 1
+	}
 
-	return regexp.Compile(strings.Join(chunks, ""))
+	return true
 }