From 941daffe51dd5f29890310ac32d942b38c2c5636 Mon Sep 17 00:00:00 2001 From: Yonas Habteab Date: Thu, 27 Apr 2023 18:30:33 +0200 Subject: [PATCH] WIP: Enhance filter parser --- internal/filter/contracts.go | 5 + internal/filter/parser.peg | 191 +++++++++++++++++++++++++++++++++ internal/filter/parser_test.go | 117 ++++++++++---------- internal/filter/types.go | 110 ++++++++++++++++--- 4 files changed, 348 insertions(+), 75 deletions(-) create mode 100644 internal/filter/parser.peg diff --git a/internal/filter/contracts.go b/internal/filter/contracts.go index 7c8691318..acc79fd65 100644 --- a/internal/filter/contracts.go +++ b/internal/filter/contracts.go @@ -13,3 +13,8 @@ type Filterable interface { type Filter interface { Eval(filterable Filterable) bool } + +type Chainable interface { + Add(rule ...Filter) + Rules() []Filter +} diff --git a/internal/filter/parser.peg b/internal/filter/parser.peg new file mode 100644 index 000000000..31c15cd7a --- /dev/null +++ b/internal/filter/parser.peg @@ -0,0 +1,191 @@ +{ +package filter + +func ParseFilter(expr string, opts ...Option) (Filter, error) { + filter, err := Parse("", []byte(expr), opts...) + if err != nil { + parserErr := err.(errList)[0].(*parserError) + return nil, fmt.Errorf("invalid filter '%s', %s", expr, parserErr.Inner) + } + + return filter.(Filter), nil +} +} + +Rule <- not:BinaryNot group:LogicalFilterGroup op:LogicalOperator group2:LogicalFilterGroup { + chain, err := NewChain(group2.(Filter), op.(string)) + if err != nil { + return nil, err + } + + none, err := NewChain(group.(Filter), not.(string)) + if err != nil { + return nil, err + } + + chain.Add(none.(Filter)) + + return chain, nil + } + / not:BinaryNot group:LogicalFilterGroup { + return NewChain(group.(Filter), not.(string)) + } + / group:LogicalFilterGroup op:LogicalOperator not:BinaryNot group2:LogicalFilterGroup { + none, err := NewChain(group2.(Filter), not.(string)) + if err != nil { + return nil, err + } + + chain, err := NewChain(group.(Filter), op.(string)) + if err != nil { + return nil, err + } + + chain.Add(none.(Filter)) + + return chain, nil + } + / group:LogicalFilterGroup & EOF { + return group, nil + } +LogicalFilterGroup <- open:OpenBrace chain:FilterChain clo:ClosingBrace { + return chain, nil + } + / chain:FilterChain { + return chain, nil + } +FilterChain <- chain:LogicalConditionExpr { + return chain, nil + } + / cond:Condition rules:LogicalConditionExpr+ { + filters := rules.([]interface{}) + if len(filters) == 0 { + return cond, nil + } + + var rule Chainable + chains := make(map[string]Chainable, 1) + for i := len(filters)-1; i >= 0; i-- { + chain := filters[i].(Chainable) + if i == 0 { + chain.Add(cond.(Filter)) + } + + if i == len(filters)-1 { + rule = chain + continue + } + + // We don't need a nested filter chain of the same type! + if reflect.TypeOf(chain) == reflect.TypeOf(rule) { + rule.Add(chain.Rules()...) + continue + } + + lastRule, ok := chains[reflect.TypeOf(chain).String()] + if !ok { + chains[reflect.TypeOf(chain).String()] = chain + } else { + lastRule.Add(chain.Rules()...) + } + } + + for _, chain := range chains { + rule.Add(chain.(Filter)) + } + + return rule, nil + } + / cond:Condition { + return cond, nil + } +LogicalConditionExpr <- op:LogicalOperator not:BinaryNot cond:Condition { + chain, err := NewChain(cond.(Filter), not.(string)) + if err != nil { + return nil, err + } + + return NewChain(chain.(Filter), op.(string)) + } + / not:BinaryNot cond:Condition { + return NewChain(cond.(Filter), not.(string)) + } + / op:LogicalOperator cond:Condition { + return NewChain(cond.(Filter), op.(string)) + } +Condition <- col:Identifier op:Operator val:Identifier { + column, err := url.QueryUnescape(col.(string)) + if err != nil { + return nil, err + } + + value, err := url.QueryUnescape(val.(string)) + if err != nil { + return nil, err + } + + return NewCondition(column, op.(string), value) + } + / expr:ExistsExpr { + return expr, nil + } +ExistsExpr <- col:Identifier &LogicalOperator { + return NewExists(col.(string)) + } + / col:Identifier &EOF { + return NewExists(col.(string)) + } +Operator <- ( "<=" / ">=" / "!=" / "=" / "<"/ ">" ) { + c.globalStore["lastMatch"] = "op" + return string(c.text), nil + } +OpenBrace <- open:"(" { + val, ok := c.globalStore["braces"] + if !ok { + c.globalStore["braces"] = 1 + } else { + c.globalStore["braces"] = val.(int) + 1 + } + + return string(c.text), nil + } +ClosingBrace <- clos:")" { + val, ok := c.globalStore["braces"] + if !ok { + c.globalStore["braces"] = -1 + } else { + c.globalStore["braces"] = val.(int) - 1 + } + + return string(c.text), nil +} +BinaryNot <- not:"!" { + c.globalStore["lastMatch"] = "logicalOp" + return string(c.text), nil +} +LogicalOperator <- ( "&" / "|" ) { + c.globalStore["lastMatch"] = "logicalOp" + return string(c.text), nil +} +Identifier "column or value" <- [a-zA-Z0-9_%*]+ { + c.globalStore["lastMatch"] = "identifier" + return string(c.text), nil + } + / ! { + val, ok := c.globalStore["lastMatch"] + if ok && (val == "op" || val == "logicalOp") { + panic(fmt.Sprintf("unexpected '%s' at pos %d", string(c.text), c.pos.col)) + } + + braces, ok := c.globalStore["braces"] + if ok && braces.(int) > 0 { + return false, errors.New("missing closing parenthesis ')'") + } + + if ok && braces.(int) < 0 { + return false, errors.New("missing opening parenthesis '('") + } + + return false, nil + } +EOF <- !. diff --git a/internal/filter/parser_test.go b/internal/filter/parser_test.go index ebbba4c75..61b822ccf 100644 --- a/internal/filter/parser_test.go +++ b/internal/filter/parser_test.go @@ -10,49 +10,43 @@ func TestParser(t *testing.T) { t.Parallel() t.Run("MissingLogicalOperatorsAfterConditionsAreDetected", func(t *testing.T) { - _, err := Parse("(a=b|c=d)e=f") + _, err := ParseFilter("(a=b|c=d)e=f") - expected := "invalid filter '(a=b|c=d)e=f', unexpected e at pos 10: Expected logical operator" + expected := "invalid filter '(a=b|c=d)e=f', no match found, expected: \"&\", \"|\" or EOF" assert.EqualError(t, err, expected, "Errors should be the same") }) t.Run("MissingLogicalOperatorsAfterOperatorsAreDetected", func(t *testing.T) { - _, err := Parse("(a=b|c=d|)e=f") + _, err := ParseFilter("(a=b|c=d|)e=f") - expected := "invalid filter '(a=b|c=d|)e=f', unexpected e at pos 11: Expected logical operator" + expected := "invalid filter '(a=b|c=d|)e=f', unexpected '|' at pos 9" assert.EqualError(t, err, expected, "Errors should be the same") }) t.Run("ParserIdentifiesInvalidExpression", func(t *testing.T) { - _, err := Parse("col=(") - assert.EqualError(t, err, "invalid filter 'col=(', unexpected ( at pos 5", "Errors should be the same") + _, err := ParseFilter("col=(") + assert.EqualError(t, err, "invalid filter 'col=(', unexpected '=' at pos 4", "Errors should be the same") - _, err = Parse("(((x=a)&y=b") - assert.EqualError(t, err, "invalid filter '(((x=a)&y=b', missing 2 closing ')' at pos 11", "Errors should be the same") + _, err = ParseFilter("(((x=a)&y=b") + assert.EqualError(t, err, "invalid filter '(((x=a)&y=b', missing closing parenthesis ')'", "Errors should be the same") - _, err = Parse("(x=a)&y=b)") + _, err = ParseFilter("(x=a)&y=b)") assert.EqualError(t, err, "invalid filter '(x=a)&y=b)', unexpected ) at pos 10", "Errors should be the same") - _, err = Parse("!(&") - assert.EqualError(t, err, "invalid filter '!(&', unexpected & at pos 3", "Errors should be the same") + _, err = ParseFilter("!(&") + assert.EqualError(t, err, "invalid filter '!(&', unexpected '&' at pos 3", "Errors should be the same") - _, err = Parse("!(!&") - assert.EqualError(t, err, "invalid filter '!(!&', unexpected & at pos 4: operator level 1", "Errors should be the same") + _, err = ParseFilter("foo&bar=(te(st)") + assert.EqualError(t, err, "invalid filter 'foo&bar=(te(st)', unexpected '=' at pos 8", "Errors should be the same") - _, err = Parse("!(|test") - assert.EqualError(t, err, "invalid filter '!(|test', unexpected | at pos 3", "Errors should be the same") + _, err = ParseFilter("foo&bar=te(st)") + assert.EqualError(t, err, "invalid filter 'foo&bar=te(st)', no match found, expected: \"!\", \"&\", \"|\", [a-zA-Z0-9_%*] or EOF", "Errors should be the same") - _, err = Parse("foo&bar=(te(st)") - assert.EqualError(t, err, "invalid filter 'foo&bar=(te(st)', unexpected ( at pos 9", "Errors should be the same") - - _, err = Parse("foo&bar=te(st)") - assert.EqualError(t, err, "invalid filter 'foo&bar=te(st)', unexpected ( at pos 11", "Errors should be the same") - - _, err = Parse("foo&bar=test)") + _, err = ParseFilter("foo&bar=test)") assert.EqualError(t, err, "invalid filter 'foo&bar=test)', unexpected ) at pos 13", "Errors should be the same") - _, err = Parse("!()|&()&)") - assert.EqualError(t, err, "invalid filter '!()|&()&)', unexpected closing ')' at pos 9", "Errors should be the same") + _, err = ParseFilter("!()|&()&)") + assert.EqualError(t, err, "invalid filter '!()|&()&)', unexpected '(' at pos 2", "Errors should be the same") }) } @@ -60,103 +54,106 @@ func TestFilter(t *testing.T) { t.Parallel() t.Run("ParserIdentifiesAllKindOfFilters", func(t *testing.T) { - rule, err := Parse("foo=bar") + rule, err := ParseFilter("foo=bar") assert.Nil(t, err, "There should be no errors but got: %s", err) assert.IsType(t, &Equal{}, rule) - rule, err = Parse("foo!=bar") + rule, err = ParseFilter("foo!=bar") assert.Nil(t, err, "There should be no errors but got: %s", err) assert.IsType(t, &UnEqual{}, rule) - rule, err = Parse("foo=bar*") + rule, err = ParseFilter("foo=bar*") assert.Nil(t, err, "There should be no errors but got: %s", err) assert.IsType(t, &Like{}, rule) - rule, err = Parse("foo!=bar*") + rule, err = ParseFilter("foo!=bar*") assert.Nil(t, err, "There should be no errors but got: %s", err) assert.IsType(t, &Unlike{}, rule) - rule, err = Parse("foobar") + rule, err = ParseFilter("foo>bar") assert.Nil(t, err, "There should be no errors but got: %s", err) assert.IsType(t, &GreaterThan{}, rule) - rule, err = Parse("foo>=bar") + rule, err = ParseFilter("foo>=bar") assert.Nil(t, err, "There should be no errors but got: %s", err) assert.IsType(t, &GreaterThanOrEqual{}, rule) - rule, err = Parse("foo=bar&bar=foo") + rule, err = ParseFilter("foo=bar&bar=foo") assert.Nil(t, err, "There should be no errors but got: %s", err) assert.IsType(t, &All{}, rule) - rule, err = Parse("foo=bar|bar=foo") + rule, err = ParseFilter("foo=bar|bar=foo") assert.Nil(t, err, "There should be no errors but got: %s", err) assert.IsType(t, &Any{}, rule) - rule, err = Parse("!(foo=bar|bar=foo)") + rule, err = ParseFilter("!(foo=bar|bar=foo)") assert.Nil(t, err, "There should be no errors but got: %s", err) assert.IsType(t, &None{}, rule) - rule, err = Parse("!foo") + rule, err = ParseFilter("!foo") assert.Nil(t, err, "There should be no errors but got: %s", err) - assert.Equal(t, &None{rules: []Filter{NewExists("foo")}}, rule) + exists, _ := NewExists("foo") + assert.Equal(t, &None{Filters: []Filter{exists}}, rule) - rule, err = Parse("foo") + rule, err = ParseFilter("foo") assert.Nil(t, err, "There should be no errors but got: %s", err) - assert.Equal(t, NewExists("foo"), rule) + assert.Equal(t, exists, rule) - rule, err = Parse("!(foo=bar|bar=foo)&(foo=bar|bar=foo)") + rule, err = ParseFilter("!(foo=bar|bar=foo)&(foo=bar|bar=foo)") assert.Nil(t, err, "There should be no errors but got: %s", err) - expected := &All{rules: []Filter{ - &None{rules: []Filter{ - &Equal{column: "foo", value: "bar"}, - &Equal{column: "bar", value: "foo"}, + expected := &All{Filters: []Filter{ + &Any{Filters: []Filter{ + &Equal{Column: "bar", Value: "foo"}, + &Equal{Column: "foo", Value: "bar"}, }}, - &Any{rules: []Filter{ - &Equal{column: "foo", value: "bar"}, - &Equal{column: "bar", value: "foo"}, + &None{Filters: []Filter{ + &Any{Filters: []Filter{ + &Equal{Column: "bar", Value: "foo"}, + &Equal{Column: "foo", Value: "bar"}, + }}, }}, }} assert.Equal(t, expected, rule) }) t.Run("ParserIdentifiesSingleCondition", func(t *testing.T) { - rule, err := Parse("foo=bar") + rule, err := ParseFilter("foo=bar") assert.Nil(t, err, "There should be no errors but got: %s", err) - expected := &Equal{column: "foo", value: "bar"} + expected := &Equal{Column: "foo", Value: "bar"} assert.Equal(t, expected, rule, "Parser doesn't parse single condition correctly") }) t.Run("UrlEncodedFilterExpression", func(t *testing.T) { - rule, err := Parse("col%3Cumnval%28ue") + rule, err = ParseFilter("col%28umn>val%28ue") assert.Nil(t, err, "There should be no errors but got: %s", err) - assert.Equal(t, &GreaterThan{column: "col(umn", value: "val(ue"}, rule) + assert.Equal(t, &GreaterThan{Column: "col(umn", Value: "val(ue"}, rule) - rule, err = Parse("col%29umn>=val%29ue") + rule, err = ParseFilter("col%29umn>=val%29ue") assert.Nil(t, err, "There should be no errors but got: %s", err) - assert.Equal(t, &GreaterThanOrEqual{column: "col)umn", value: "val)ue"}, rule) + assert.Equal(t, &GreaterThanOrEqual{Column: "col)umn", Value: "val)ue"}, rule) }) } @@ -193,7 +190,7 @@ func FuzzParser(f *testing.F) { f.Add("col%29umn>val%29ue") f.Fuzz(func(t *testing.T, expr string) { - _, err := Parse(expr) + _, err := ParseFilter(expr) if strings.Count(expr, "(") != strings.Count(expr, ")") { assert.Error(t, err) diff --git a/internal/filter/types.go b/internal/filter/types.go index f5559c50f..bed083d22 100644 --- a/internal/filter/types.go +++ b/internal/filter/types.go @@ -1,12 +1,26 @@ package filter -// All represents a filter chain type that matches when all of its Rules matches. +import ( + "fmt" + "net/url" + "strings" +) + +// All represents a filter chain type that matches when all of its Filters matches. type All struct { - rules []Filter + Filters []Filter +} + +func (a *All) Rules() []Filter { + return a.Filters +} + +func (a *All) Add(rule ...Filter) { + a.Filters = append(a.Filters, rule...) } func (a *All) Eval(filterable Filterable) bool { - for _, rule := range a.rules { + for _, rule := range a.Filters { if !rule.Eval(filterable) { return false } @@ -15,13 +29,21 @@ func (a *All) Eval(filterable Filterable) bool { return true } -// Any represents a filter chain type that matches when at least one of its Rules matches. +// Any represents a filter chain type that matches when at least one of its Filters matches. type Any struct { - rules []Filter + Filters []Filter +} + +func (a *Any) Rules() []Filter { + return a.Filters +} + +func (a *Any) Add(rule ...Filter) { + a.Filters = append(a.Filters, rule...) } func (a *Any) Eval(filterable Filterable) bool { - for _, rule := range a.rules { + for _, rule := range a.Filters { if rule.Eval(filterable) { return true } @@ -30,13 +52,21 @@ func (a *Any) Eval(filterable Filterable) bool { return false } -// None represents a filter chain type that matches when none of its Rules matches. +// None represents a filter chain type that matches when none of its Filters matches. type None struct { - rules []Filter + Filters []Filter +} + +func (n *None) Rules() []Filter { + return n.Filters +} + +func (n *None) Add(rule ...Filter) { + n.Filters = append(n.Filters, rule...) } func (n *None) Eval(filterable Filterable) bool { - for _, rule := range n.rules { + for _, rule := range n.Filters { if rule.Eval(filterable) { return false } @@ -45,23 +75,73 @@ func (n *None) Eval(filterable Filterable) bool { return true } +func NewChain(rule Filter, op string) (Filter, error) { + switch op { + case "!": + if _, ok := rule.(*None); ok { + return rule, nil + } + + return &None{Filters: []Filter{rule}}, nil + case "&": + if _, ok := rule.(*All); ok { + return rule, nil + } + + return &All{Filters: []Filter{rule}}, nil + case "|": + if _, ok := rule.(*Any); ok { + return rule, nil + } + + return &Any{Filters: []Filter{rule}}, nil + default: + return nil, fmt.Errorf("invalid operator %s provided", op) + } +} + // Condition represents a single filter condition. type Condition struct { column string value string } -func NewCondition(column string, value string) *Condition { - return &Condition{ - column: column, - value: value, +func NewCondition(column string, operator string, value string) (Filter, error) { + switch operator { + case "=": + if strings.Contains(value, "*") { + return &Like{column: column, value: value}, nil + } + + return &Equal{column: column, value: value}, nil + case "!=": + if strings.Contains(value, "*") { + return &Unlike{column: column, value: value}, nil + } + + return &UnEqual{column: column, value: value}, nil + case ">": + return &GreaterThan{column: column, value: value}, nil + case ">=": + return &GreaterThanOrEqual{column: column, value: value}, nil + case "<": + return &LessThan{column: column, value: value}, nil + case "<=": + return &LessThanOrEqual{column: column, value: value}, nil + default: + return nil, fmt.Errorf("invalid operator %s provided", operator) } } type Exists Condition -func NewExists(column string) *Exists { - return &Exists{column: column} +func NewExists(column string) (*Exists, error) { + escaped, err := url.QueryUnescape(column) + if err != nil { + return nil, err + } + + return &Exists{column: escaped}, nil } func (e *Exists) Eval(filterable Filterable) bool {