From 35b25224ec197b45f887e5d71ed546ee2d382721 Mon Sep 17 00:00:00 2001 From: Juan Pablo Tosso Date: Tue, 5 Nov 2024 11:14:43 +0000 Subject: [PATCH 01/11] feat(transaction): data type prediction for lazy predictions --- internal/corazarules/rule_match.go | 2 + types/value_metadata.go | 84 ++++++++++++++++++++++++++++++ 2 files changed, 86 insertions(+) create mode 100644 types/value_metadata.go diff --git a/internal/corazarules/rule_match.go b/internal/corazarules/rule_match.go index 67f8f7dc1..2ccf5b988 100644 --- a/internal/corazarules/rule_match.go +++ b/internal/corazarules/rule_match.go @@ -29,6 +29,8 @@ type MatchData struct { // Keeps track of the chain depth in which the data matched. // Multiphase specific field ChainLevel_ int + // Metadata of the matched data + Metadata_ types.DataMetadataList } var _ types.MatchData = (*MatchData)(nil) diff --git a/types/value_metadata.go b/types/value_metadata.go new file mode 100644 index 000000000..b02a74cb8 --- /dev/null +++ b/types/value_metadata.go @@ -0,0 +1,84 @@ +// Copyright 2023 Juan Pablo Tosso and the OWASP Coraza contributors +// SPDX-License-Identifier: Apache-2.0 +package types + +import "unicode" + +// ValueMetadata is the type of metadata that a value can have. +type ValueMetadata int + +const ( + // ValueMetadataAlphanumeric represents an alphanumeric value. + ValueMetadataAlphanumeric ValueMetadata = iota + // ValueMetadataAscii represents an ASCII value. + ValueMetadataAscii + // ValueMetadataBase64 represents a base64 value. + ValueMetadataBase64 + // ValueMetadataURI represents a URI value. + ValueMetadataURI + // ValueMetadataDomain represents a domain value. + ValueMetadataDomain + // ValueMetadataNumeric represents a numeric value, either integer or float. + ValueMetadataNumeric + // ValueMetadataBoolean represents a boolean value. + ValueMetadataBoolean + // ValueMetadataUnicode represents a unicode value. + ValueMetadataUnicode +) + +// NewValueMetadata returns a new ValueMetadata from a string. +func NewValueMetadata(metadata string) (ValueMetadata, bool) { + switch metadata { + case "alphanumeric": + return ValueMetadataAlphanumeric, true + case "ascii": + return ValueMetadataAscii, true + case "base64": + return ValueMetadataBase64, true + case "uri": + return ValueMetadataURI, true + case "domain": + return ValueMetadataDomain, true + case "numeric": + return ValueMetadataNumeric, true + case "boolean": + return ValueMetadataBoolean, true + case "unicode": + return ValueMetadataUnicode, true + } + return 0, false +} + +// DataMetadataList is a list of ValueMetadata. +type DataMetadataList struct { + metadata map[ValueMetadata]bool + testedTypes []ValueMetadata +} + +func (v *DataMetadataList) Test(data string, metadataType ValueMetadata) bool { + result, ok := v.metadata[metadataType] + if !ok { + // we do the analysis only once + switch metadataType { + case ValueMetadataAlphanumeric: + return v.testAlphanumeric(data) + default: + // this should not happen + return false + } + } + return result + +} + +func (v *DataMetadataList) testAlphanumeric(data string) bool { + res := true + for _, c := range data { + if !unicode.IsLetter(c) && !unicode.IsNumber(c) { + res = false + break + } + } + v.metadata[ValueMetadataAlphanumeric] = res + return res +} From 97ecf2cc31264669417d93646a5dc5e2804343fa Mon Sep 17 00:00:00 2001 From: Juan Pablo Tosso Date: Tue, 5 Nov 2024 11:45:54 +0000 Subject: [PATCH 02/11] small fix --- types/value_metadata.go | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/types/value_metadata.go b/types/value_metadata.go index b02a74cb8..85be47130 100644 --- a/types/value_metadata.go +++ b/types/value_metadata.go @@ -4,12 +4,12 @@ package types import "unicode" -// ValueMetadata is the type of metadata that a value can have. -type ValueMetadata int +// DataMetadata is the type of metadata that a value can have. +type DataMetadata int const ( // ValueMetadataAlphanumeric represents an alphanumeric value. - ValueMetadataAlphanumeric ValueMetadata = iota + ValueMetadataAlphanumeric DataMetadata = iota // ValueMetadataAscii represents an ASCII value. ValueMetadataAscii // ValueMetadataBase64 represents a base64 value. @@ -27,7 +27,7 @@ const ( ) // NewValueMetadata returns a new ValueMetadata from a string. -func NewValueMetadata(metadata string) (ValueMetadata, bool) { +func NewValueMetadata(metadata string) (DataMetadata, bool) { switch metadata { case "alphanumeric": return ValueMetadataAlphanumeric, true @@ -51,11 +51,10 @@ func NewValueMetadata(metadata string) (ValueMetadata, bool) { // DataMetadataList is a list of ValueMetadata. type DataMetadataList struct { - metadata map[ValueMetadata]bool - testedTypes []ValueMetadata + metadata map[DataMetadata]bool } -func (v *DataMetadataList) Test(data string, metadataType ValueMetadata) bool { +func (v *DataMetadataList) Test(data string, metadataType DataMetadata) bool { result, ok := v.metadata[metadataType] if !ok { // we do the analysis only once From d5f40d0a9ef6b3c022614891fe6545c926d5f093 Mon Sep 17 00:00:00 2001 From: Roshan Piyush Date: Wed, 6 Nov 2024 10:30:30 +0000 Subject: [PATCH 03/11] Add experimental MatchData --- experimental/types/rule_match.go | 55 ++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) create mode 100644 experimental/types/rule_match.go diff --git a/experimental/types/rule_match.go b/experimental/types/rule_match.go new file mode 100644 index 000000000..fe36b4bf7 --- /dev/null +++ b/experimental/types/rule_match.go @@ -0,0 +1,55 @@ +// Copyright 2022 Juan Pablo Tosso and the OWASP Coraza contributors +// SPDX-License-Identifier: Apache-2.0 + +package types + +import ( + "github.com/corazawaf/coraza/v3/types" + "github.com/corazawaf/coraza/v3/types/variables" +) + +// MatchData works like VariableKey but is used for logging, +// so it contains the collection as a string, and it's value +type MatchData interface { + // Variable + Variable() variables.RuleVariable + // Key of the variable, blank if no key is required + Key() string + // Value of the current VARIABLE:KEY + Value() string + // Message is the expanded macro message + Message() string + // Data is the expanded logdata of the macro + Data() string + // Chain depth of variable match + ChainLevel() int + // Metadata of the matched data + Metadata() types.DataMetadataList +} + +// MatchedRule contains a list of macro expanded messages, +// matched variables and a pointer to the rule +type MatchedRule interface { + // Message is the macro expanded message + Message() string + // Data is the macro expanded logdata + Data() string + // URI is the full request uri unparsed + URI() string + // TransactionID is the transaction ID + TransactionID() string + // Disruptive is whether this rule will perform disruptive actions (note also pass, allow, redirect are considered disruptive actions) + Disruptive() bool + // ServerIPAddress is the address of the server + ServerIPAddress() string + // ClientIPAddress is the address of the client + ClientIPAddress() string + // MatchedDatas is the matched variables. + MatchedDatas() []MatchData + + Rule() types.RuleMetadata + + AuditLog() string + + ErrorLog() string +} From 22f8061a4547d3e9c8321ef808fd47a9b74d1c18 Mon Sep 17 00:00:00 2001 From: Roshan Piyush Date: Wed, 6 Nov 2024 21:26:14 +0000 Subject: [PATCH 04/11] experimental metadata package --- experimental/collection/collection.go | 66 +++++++++++++++++++ experimental/plugins/macro/macro.go | 2 +- experimental/plugins/plugintypes/rule.go | 4 +- .../plugins/plugintypes/transaction.go | 2 +- experimental/types/rule_match.go | 30 +-------- .../types}/value_metadata.go | 0 internal/actions/setvar.go | 2 +- internal/actions/setvar_test.go | 2 +- internal/collections/concat.go | 4 +- internal/collections/concat_test.go | 2 +- internal/collections/map.go | 4 +- internal/collections/named.go | 4 +- internal/collections/noop.go | 4 +- internal/collections/single.go | 4 +- internal/collections/sized.go | 4 +- internal/corazarules/rule_match.go | 16 ++++- internal/corazarules/rule_match_test.go | 2 +- internal/corazawaf/rule.go | 11 ++-- internal/corazawaf/rule_multiphase.go | 24 ++++--- internal/corazawaf/rule_test.go | 19 +++--- internal/corazawaf/transaction.go | 22 +++---- internal/corazawaf/transaction_test.go | 7 +- 22 files changed, 147 insertions(+), 88 deletions(-) create mode 100644 experimental/collection/collection.go rename {types => experimental/types}/value_metadata.go (100%) diff --git a/experimental/collection/collection.go b/experimental/collection/collection.go new file mode 100644 index 000000000..5ae5e1746 --- /dev/null +++ b/experimental/collection/collection.go @@ -0,0 +1,66 @@ +// Copyright 2022 Juan Pablo Tosso and the OWASP Coraza contributors +// SPDX-License-Identifier: Apache-2.0 + +package collection + +import ( + "regexp" + + "github.com/corazawaf/coraza/v3/experimental/types" +) + +// Collection are used to store VARIABLE data +// for transactions, this data structured is designed +// to store slices of data for keys +// Important: CollectionMaps ARE NOT concurrent safe +type Collection interface { + // FindAll returns matches for all the items in this Collection. + FindAll() []types.MatchData + + // Name returns the name for the current CollectionMap + Name() string +} + +// Single is a Collection with a single element. +type Single interface { + Collection + + // Get returns the value of this Single + Get() string +} + +// Keyed is a Collection with elements that can be selected by key. +type Keyed interface { + Collection + + // Get returns a slice of strings for a key + Get(key string) []string + + // FindRegex returns a slice of MatchData for the regex + FindRegex(key *regexp.Regexp) []types.MatchData + + // FindString returns a slice of MatchData for the string + FindString(key string) []types.MatchData +} + +// Map are used to store VARIABLE data +// for transactions, this data structured is designed +// to store slices of data for keys +// Important: CollectionMaps ARE NOT concurrent safe +type Map interface { + Keyed + + // Add a value to some key + Add(key string, value string) + + // Set will replace the key's value with this slice + Set(key string, values []string) + + // SetIndex will place the value under the index + // If the index is higher than the current size of the CollectionMap + // it will be appended + SetIndex(key string, index int, value string) + + // Remove deletes the key from the CollectionMap + Remove(key string) +} diff --git a/experimental/plugins/macro/macro.go b/experimental/plugins/macro/macro.go index fabd039d3..e79e71522 100644 --- a/experimental/plugins/macro/macro.go +++ b/experimental/plugins/macro/macro.go @@ -8,7 +8,7 @@ import ( "fmt" "strings" - "github.com/corazawaf/coraza/v3/collection" + "github.com/corazawaf/coraza/v3/experimental/collection" "github.com/corazawaf/coraza/v3/experimental/plugins/plugintypes" "github.com/corazawaf/coraza/v3/types/variables" ) diff --git a/experimental/plugins/plugintypes/rule.go b/experimental/plugins/plugintypes/rule.go index 765d3573d..7d8b92fb5 100644 --- a/experimental/plugins/plugintypes/rule.go +++ b/experimental/plugins/plugintypes/rule.go @@ -3,7 +3,9 @@ package plugintypes -import "github.com/corazawaf/coraza/v3/types" +import ( + "github.com/corazawaf/coraza/v3/types" +) // Rule is a rule executed against a transaction. type Rule interface { diff --git a/experimental/plugins/plugintypes/transaction.go b/experimental/plugins/plugintypes/transaction.go index a34a4732a..1abb57cf2 100644 --- a/experimental/plugins/plugintypes/transaction.go +++ b/experimental/plugins/plugintypes/transaction.go @@ -4,8 +4,8 @@ package plugintypes import ( - "github.com/corazawaf/coraza/v3/collection" "github.com/corazawaf/coraza/v3/debuglog" + "github.com/corazawaf/coraza/v3/experimental/collection" "github.com/corazawaf/coraza/v3/types" "github.com/corazawaf/coraza/v3/types/variables" ) diff --git a/experimental/types/rule_match.go b/experimental/types/rule_match.go index fe36b4bf7..ed58e4d4e 100644 --- a/experimental/types/rule_match.go +++ b/experimental/types/rule_match.go @@ -4,7 +4,6 @@ package types import ( - "github.com/corazawaf/coraza/v3/types" "github.com/corazawaf/coraza/v3/types/variables" ) @@ -24,32 +23,5 @@ type MatchData interface { // Chain depth of variable match ChainLevel() int // Metadata of the matched data - Metadata() types.DataMetadataList -} - -// MatchedRule contains a list of macro expanded messages, -// matched variables and a pointer to the rule -type MatchedRule interface { - // Message is the macro expanded message - Message() string - // Data is the macro expanded logdata - Data() string - // URI is the full request uri unparsed - URI() string - // TransactionID is the transaction ID - TransactionID() string - // Disruptive is whether this rule will perform disruptive actions (note also pass, allow, redirect are considered disruptive actions) - Disruptive() bool - // ServerIPAddress is the address of the server - ServerIPAddress() string - // ClientIPAddress is the address of the client - ClientIPAddress() string - // MatchedDatas is the matched variables. - MatchedDatas() []MatchData - - Rule() types.RuleMetadata - - AuditLog() string - - ErrorLog() string + Metadata() DataMetadataList } diff --git a/types/value_metadata.go b/experimental/types/value_metadata.go similarity index 100% rename from types/value_metadata.go rename to experimental/types/value_metadata.go diff --git a/internal/actions/setvar.go b/internal/actions/setvar.go index 399921772..bab778318 100644 --- a/internal/actions/setvar.go +++ b/internal/actions/setvar.go @@ -8,7 +8,7 @@ import ( "strconv" "strings" - "github.com/corazawaf/coraza/v3/collection" + "github.com/corazawaf/coraza/v3/experimental/collection" "github.com/corazawaf/coraza/v3/experimental/plugins/macro" "github.com/corazawaf/coraza/v3/experimental/plugins/plugintypes" "github.com/corazawaf/coraza/v3/types/variables" diff --git a/internal/actions/setvar_test.go b/internal/actions/setvar_test.go index ff73a4193..76ab5afca 100644 --- a/internal/actions/setvar_test.go +++ b/internal/actions/setvar_test.go @@ -8,8 +8,8 @@ import ( "strings" "testing" - "github.com/corazawaf/coraza/v3/collection" "github.com/corazawaf/coraza/v3/debuglog" + "github.com/corazawaf/coraza/v3/experimental/collection" "github.com/corazawaf/coraza/v3/experimental/plugins/plugintypes" "github.com/corazawaf/coraza/v3/internal/corazawaf" ) diff --git a/internal/collections/concat.go b/internal/collections/concat.go index f54489e0d..eaa557e9d 100644 --- a/internal/collections/concat.go +++ b/internal/collections/concat.go @@ -7,9 +7,9 @@ import ( "regexp" "strings" - "github.com/corazawaf/coraza/v3/collection" + "github.com/corazawaf/coraza/v3/experimental/collection" + "github.com/corazawaf/coraza/v3/experimental/types" "github.com/corazawaf/coraza/v3/internal/corazarules" - "github.com/corazawaf/coraza/v3/types" "github.com/corazawaf/coraza/v3/types/variables" ) diff --git a/internal/collections/concat_test.go b/internal/collections/concat_test.go index 77761e4b7..8a27c8000 100644 --- a/internal/collections/concat_test.go +++ b/internal/collections/concat_test.go @@ -8,7 +8,7 @@ import ( "strings" "testing" - "github.com/corazawaf/coraza/v3/types" + "github.com/corazawaf/coraza/v3/experimental/types" "github.com/corazawaf/coraza/v3/types/variables" ) diff --git a/internal/collections/map.go b/internal/collections/map.go index 069c8e6a9..d9312b19d 100644 --- a/internal/collections/map.go +++ b/internal/collections/map.go @@ -7,9 +7,9 @@ import ( "regexp" "strings" - "github.com/corazawaf/coraza/v3/collection" + "github.com/corazawaf/coraza/v3/experimental/collection" + "github.com/corazawaf/coraza/v3/experimental/types" "github.com/corazawaf/coraza/v3/internal/corazarules" - "github.com/corazawaf/coraza/v3/types" "github.com/corazawaf/coraza/v3/types/variables" ) diff --git a/internal/collections/named.go b/internal/collections/named.go index 88d9166fd..5ac93ffb0 100644 --- a/internal/collections/named.go +++ b/internal/collections/named.go @@ -8,9 +8,9 @@ import ( "regexp" "strings" - "github.com/corazawaf/coraza/v3/collection" + "github.com/corazawaf/coraza/v3/experimental/collection" + "github.com/corazawaf/coraza/v3/experimental/types" "github.com/corazawaf/coraza/v3/internal/corazarules" - "github.com/corazawaf/coraza/v3/types" "github.com/corazawaf/coraza/v3/types/variables" ) diff --git a/internal/collections/noop.go b/internal/collections/noop.go index ff4ad0485..32ed622e7 100644 --- a/internal/collections/noop.go +++ b/internal/collections/noop.go @@ -4,8 +4,8 @@ package collections import ( - "github.com/corazawaf/coraza/v3/collection" - "github.com/corazawaf/coraza/v3/types" + "github.com/corazawaf/coraza/v3/experimental/collection" + "github.com/corazawaf/coraza/v3/experimental/types" ) var Noop collection.Collection = &noop{} diff --git a/internal/collections/single.go b/internal/collections/single.go index 7c2038fc0..7703f69d7 100644 --- a/internal/collections/single.go +++ b/internal/collections/single.go @@ -7,9 +7,9 @@ import ( "fmt" "strings" - "github.com/corazawaf/coraza/v3/collection" + "github.com/corazawaf/coraza/v3/experimental/collection" + "github.com/corazawaf/coraza/v3/experimental/types" "github.com/corazawaf/coraza/v3/internal/corazarules" - "github.com/corazawaf/coraza/v3/types" "github.com/corazawaf/coraza/v3/types/variables" ) diff --git a/internal/collections/sized.go b/internal/collections/sized.go index 146395ddc..6cbca547b 100644 --- a/internal/collections/sized.go +++ b/internal/collections/sized.go @@ -9,9 +9,9 @@ import ( "strconv" "strings" - "github.com/corazawaf/coraza/v3/collection" + "github.com/corazawaf/coraza/v3/experimental/collection" + "github.com/corazawaf/coraza/v3/experimental/types" "github.com/corazawaf/coraza/v3/internal/corazarules" - "github.com/corazawaf/coraza/v3/types" "github.com/corazawaf/coraza/v3/types/variables" ) diff --git a/internal/corazarules/rule_match.go b/internal/corazarules/rule_match.go index 2ccf5b988..5487b8bb4 100644 --- a/internal/corazarules/rule_match.go +++ b/internal/corazarules/rule_match.go @@ -8,7 +8,9 @@ import ( "fmt" "strconv" "strings" + "unsafe" + experimentalTypes "github.com/corazawaf/coraza/v3/experimental/types" "github.com/corazawaf/coraza/v3/types" "github.com/corazawaf/coraza/v3/types/variables" ) @@ -30,11 +32,13 @@ type MatchData struct { // Multiphase specific field ChainLevel_ int // Metadata of the matched data - Metadata_ types.DataMetadataList + Metadata_ experimentalTypes.DataMetadataList } var _ types.MatchData = (*MatchData)(nil) +var _ experimentalTypes.MatchData = (*MatchData)(nil) + func (m *MatchData) Variable() variables.RuleVariable { return m.Variable_ } @@ -59,6 +63,10 @@ func (m *MatchData) ChainLevel() int { return m.ChainLevel_ } +func (m *MatchData) Metadata() experimentalTypes.DataMetadataList { + return m.Metadata_ +} + // ActionName is used to identify an action. type DisruptiveAction int @@ -102,7 +110,7 @@ type MatchedRule struct { // Client IP address ClientIPAddress_ string // A slice of matched variables - MatchedDatas_ []types.MatchData + MatchedDatas_ []experimentalTypes.MatchData Rule_ types.RuleMetadata @@ -144,6 +152,10 @@ func (mr *MatchedRule) ClientIPAddress() string { } func (mr *MatchedRule) MatchedDatas() []types.MatchData { + return *(*[]types.MatchData)(unsafe.Pointer(&mr.MatchedDatas_)) +} + +func (mr *MatchedRule) MatchedDatasExperimental_() []experimentalTypes.MatchData { return mr.MatchedDatas_ } diff --git a/internal/corazarules/rule_match_test.go b/internal/corazarules/rule_match_test.go index 3c0dad40f..ba7161b16 100644 --- a/internal/corazarules/rule_match_test.go +++ b/internal/corazarules/rule_match_test.go @@ -7,8 +7,8 @@ import ( "strings" "testing" + "github.com/corazawaf/coraza/v3/experimental/types" "github.com/corazawaf/coraza/v3/internal/variables" - "github.com/corazawaf/coraza/v3/types" ) func TestErrorLogMessagesSizesNoExtraRuleDetails(t *testing.T) { diff --git a/internal/corazawaf/rule.go b/internal/corazawaf/rule.go index cd6cd6269..8ec69fa13 100644 --- a/internal/corazawaf/rule.go +++ b/internal/corazawaf/rule.go @@ -13,6 +13,7 @@ import ( "github.com/corazawaf/coraza/v3/debuglog" "github.com/corazawaf/coraza/v3/experimental/plugins/macro" "github.com/corazawaf/coraza/v3/experimental/plugins/plugintypes" + experimentalTypes "github.com/corazawaf/coraza/v3/experimental/types" "github.com/corazawaf/coraza/v3/internal/corazarules" "github.com/corazawaf/coraza/v3/internal/memoize" "github.com/corazawaf/coraza/v3/types" @@ -163,7 +164,7 @@ const chainLevelZero = 0 // the matched variables, keys and values (MatchData) func (r *Rule) Evaluate(phase types.RulePhase, tx plugintypes.TransactionState, cache map[transformationKey]*transformationValue) { // collectiveMatchedValues lives across recursive calls of doEvaluate - var collectiveMatchedValues []types.MatchData + var collectiveMatchedValues []experimentalTypes.MatchData logger := tx.DebugLogger() @@ -180,14 +181,14 @@ func (r *Rule) Evaluate(phase types.RulePhase, tx plugintypes.TransactionState, const noID = 0 -func (r *Rule) doEvaluate(logger debuglog.Logger, phase types.RulePhase, tx *Transaction, collectiveMatchedValues *[]types.MatchData, chainLevel int, cache map[transformationKey]*transformationValue) []types.MatchData { +func (r *Rule) doEvaluate(logger debuglog.Logger, phase types.RulePhase, tx *Transaction, collectiveMatchedValues *[]experimentalTypes.MatchData, chainLevel int, cache map[transformationKey]*transformationValue) []experimentalTypes.MatchData { tx.Capture = r.Capture if multiphaseEvaluation { computeRuleChainMinPhase(r) } - var matchedValues []types.MatchData + var matchedValues []experimentalTypes.MatchData // we log if we are the parent rule logger.Debug().Msg("Evaluating rule") defer logger.Debug().Msg("Finished rule evaluation") @@ -226,7 +227,7 @@ func (r *Rule) doEvaluate(logger debuglog.Logger, phase types.RulePhase, tx *Tra if multiphaseEvaluation && multiphaseSkipVariable(r, v.Variable, phase) { continue } - var values []types.MatchData + var values []experimentalTypes.MatchData for _, c := range ecol { if c.Variable == v.Variable { // TODO shall we check the pointer? @@ -381,7 +382,7 @@ func (r *Rule) doEvaluate(logger debuglog.Logger, phase types.RulePhase, tx *Tra return matchedValues } -func (r *Rule) transformArg(arg types.MatchData, argIdx int, cache map[transformationKey]*transformationValue) ([]string, []error) { +func (r *Rule) transformArg(arg experimentalTypes.MatchData, argIdx int, cache map[transformationKey]*transformationValue) ([]string, []error) { if r.MultiMatch { // TODOs: // - We don't need to run every transformation. We could try for each until found diff --git a/internal/corazawaf/rule_multiphase.go b/internal/corazawaf/rule_multiphase.go index 00033a837..2c5b0af6d 100644 --- a/internal/corazawaf/rule_multiphase.go +++ b/internal/corazawaf/rule_multiphase.go @@ -5,7 +5,9 @@ package corazawaf import ( "strings" + "unsafe" + experimentalTypes "github.com/corazawaf/coraza/v3/experimental/types" "github.com/corazawaf/coraza/v3/types" "github.com/corazawaf/coraza/v3/types/variables" ) @@ -281,18 +283,18 @@ func multiphaseSkipVariable(r *Rule, variable variables.RuleVariable, phase type // REQUEST_URI - REQUEST_URI - REQUEST_HEADERS // REQUEST_URI - REQUEST_HEADERS - REQUEST_BODY // REQUEST_URI - REQUEST_HEADERS - REQUEST_HEADERS -func generateChainMatches(tx *Transaction, matchedValues []types.MatchData, currentDepth int, buildingMatchedChain []types.MatchData, matchedChainsResult *[][]types.MatchData) { +func generateChainMatches(tx *Transaction, matchedValues []experimentalTypes.MatchData, currentDepth int, buildingMatchedChain []experimentalTypes.MatchData, matchedChainsResult *[][]experimentalTypes.MatchData) { finalDepth := matchedChainDepth(matchedValues) // Iterate the variables based on the chain level (first all the variables at level 0, then all the variables at level 1, etc.) for _, mv := range matchedValues { if mv.ChainLevel() == currentDepth { - var localebuildingMatchedChain []types.MatchData + var localebuildingMatchedChain []experimentalTypes.MatchData if buildingMatchedChain == nil { - localebuildingMatchedChain = []types.MatchData{} + localebuildingMatchedChain = []experimentalTypes.MatchData{} } else { - localebuildingMatchedChain = make([]types.MatchData, len(buildingMatchedChain)) + localebuildingMatchedChain = make([]experimentalTypes.MatchData, len(buildingMatchedChain)) copy(localebuildingMatchedChain, buildingMatchedChain) } localebuildingMatchedChain = append(localebuildingMatchedChain, mv) @@ -312,17 +314,19 @@ func generateChainMatches(tx *Transaction, matchedValues []types.MatchData, curr // Currently, it is intended for chained matches because the same variables are evaluated multiple times and not // constained to the min phase. If the same match is found, the actions of the most inner rule are skipped and the match // is not added to matchedValues (and removed from collectiveMatchedValues) -func isMultiphaseDoubleEvaluation(tx *Transaction, phase types.RulePhase, r *Rule, collectiveMatchedValues *[]types.MatchData, mr types.MatchData) bool { +func isMultiphaseDoubleEvaluation(tx *Transaction, phase types.RulePhase, r *Rule, collectiveMatchedValues *[]experimentalTypes.MatchData, mr experimentalTypes.MatchData) bool { *collectiveMatchedValues = append(*collectiveMatchedValues, mr) for _, matchedRule := range tx.matchedRules { - if matchedRule.Rule().ID() == r.ParentID_ && matchedChainDepth(matchedRule.MatchedDatas()) == matchedChainDepth(*collectiveMatchedValues) { + matchedData := matchedRule.MatchedDatas() + matchedDataExp := *(*[]experimentalTypes.MatchData)(unsafe.Pointer(&matchedData)) + if matchedRule.Rule().ID() == r.ParentID_ && matchedChainDepth(matchedDataExp) == matchedChainDepth(*collectiveMatchedValues) { // This might be a double match, let's generate the chains that aready matched and the one that just matched // let's see if all the latter already matched. // generateChainMatches generates matched chains based on the matchedValues and populates matchedChains and collectiveMatchedChains variables - var matchedChains, collectiveMatchedChains [][]types.MatchData - generateChainMatches(tx, matchedRule.MatchedDatas(), 0, nil, &matchedChains) + var matchedChains, collectiveMatchedChains [][]experimentalTypes.MatchData + generateChainMatches(tx, matchedDataExp, 0, nil, &matchedChains) generateChainMatches(tx, *collectiveMatchedValues, 0, nil, &collectiveMatchedChains) // Check if a newly matched chain (part of collectiveMatchedChain) already matched @@ -359,7 +363,7 @@ func isMultiphaseDoubleEvaluation(tx *Transaction, phase types.RulePhase, r *Rul } // chainPartOf checks if a chain is part of a list of already matched chains -func chainPartOf(newMatchedChain []types.MatchData, matchedChains [][]types.MatchData) bool { +func chainPartOf(newMatchedChain []experimentalTypes.MatchData, matchedChains [][]experimentalTypes.MatchData) bool { for _, matchedChain := range matchedChains { var differentMatch bool for n, newMatchedValue := range newMatchedChain { @@ -378,7 +382,7 @@ func chainPartOf(newMatchedChain []types.MatchData, matchedChains [][]types.Matc } // matchedChainDepth returns the depth of a matched chain returning the lowest chain level between all the the matched values -func matchedChainDepth(datas []types.MatchData) int { +func matchedChainDepth(datas []experimentalTypes.MatchData) int { depth := 0 for _, matchedValue := range datas { if matchedValue.ChainLevel() > depth { diff --git a/internal/corazawaf/rule_test.go b/internal/corazawaf/rule_test.go index 06d9c219e..a1370ef04 100644 --- a/internal/corazawaf/rule_test.go +++ b/internal/corazawaf/rule_test.go @@ -11,6 +11,7 @@ import ( "github.com/corazawaf/coraza/v3/debuglog" "github.com/corazawaf/coraza/v3/experimental/plugins/macro" "github.com/corazawaf/coraza/v3/experimental/plugins/plugintypes" + experimentalTypes "github.com/corazawaf/coraza/v3/experimental/types" "github.com/corazawaf/coraza/v3/internal/corazarules" "github.com/corazawaf/coraza/v3/types" "github.com/corazawaf/coraza/v3/types/variables" @@ -32,7 +33,7 @@ func TestMatchEvaluate(t *testing.T) { tx := NewWAF().NewTransaction() tx.AddGetRequestArgument("test", "0") - var matchedValues []types.MatchData + var matchedValues []experimentalTypes.MatchData matchdata := r.doEvaluate(debuglog.Noop(), types.PhaseRequestHeaders, tx, &matchedValues, 0, tx.transformationCache) if len(matchdata) != 1 { t.Errorf("Expected 1 matchdata from a SecActions rule, got %d", len(matchdata)) @@ -56,7 +57,7 @@ func TestNoMatchEvaluate(t *testing.T) { tx := NewWAF().NewTransaction() tx.AddGetRequestArgument("test", "999") - var matchedValues []types.MatchData + var matchedValues []experimentalTypes.MatchData matchdata := r.doEvaluate(debuglog.Noop(), types.PhaseRequestHeaders, tx, &matchedValues, 0, tx.transformationCache) if len(matchdata) != 0 { t.Errorf("Expected 0 matchdata from a SecActions rule, got %d", len(matchdata)) @@ -102,7 +103,7 @@ func TestNoMatchEvaluateBecauseOfException(t *testing.T) { tx := NewWAF().NewTransaction() tx.AddGetRequestArgument("test", "0") tx.RemoveRuleTargetByID(1, tc.variable, "test") - var matchedValues []types.MatchData + var matchedValues []experimentalTypes.MatchData matchdata := r.doEvaluate(debuglog.Noop(), types.PhaseRequestHeaders, tx, &matchedValues, 0, tx.transformationCache) if len(matchdata) != 0 { t.Errorf("Expected 0 matchdata, got %d", len(matchdata)) @@ -139,7 +140,7 @@ func TestFlowActionIfDetectionOnlyEngine(t *testing.T) { tx := NewWAF().NewTransaction() tx.RuleEngine = types.RuleEngineDetectionOnly - var matchedValues []types.MatchData + var matchedValues []experimentalTypes.MatchData matchdata := r.doEvaluate(debuglog.Noop(), types.PhaseRequestHeaders, tx, &matchedValues, 0, tx.transformationCache) if len(matchdata) != 1 { t.Errorf("Expected 1 matchdata, got %d", len(matchdata)) @@ -193,7 +194,7 @@ func TestDisruptiveActionFromChainNotEvaluated(t *testing.T) { r.Chain = chainedRule tx := NewWAF().NewTransaction() - var matchedValues []types.MatchData + var matchedValues []experimentalTypes.MatchData matchdata := r.doEvaluate(debuglog.Noop(), types.PhaseRequestHeaders, tx, &matchedValues, 0, tx.transformationCache) if len(matchdata) != 2 { t.Errorf("Expected 2 matchdata from a SecActions chained rule (total 2 rules), got %d", len(matchdata)) @@ -212,7 +213,7 @@ func TestRuleDetailsTransferredToTransaction(t *testing.T) { r.operator = nil tx := NewWAF().NewTransaction() - var matchedValues []types.MatchData + var matchedValues []experimentalTypes.MatchData r.doEvaluate(debuglog.Noop(), types.PhaseRequestHeaders, tx, &matchedValues, 0, tx.transformationCache) if tx.variables.rule.Get("id")[0] != strconv.Itoa(r.ParentID()) { t.Errorf("Expected id: %d (parent id), got %s", r.ParentID(), tx.variables.rule.Get("id")[0]) @@ -237,7 +238,7 @@ func TestSecActionMessagePropagationInMatchData(t *testing.T) { // SecAction uses nil operator r.operator = nil tx := NewWAF().NewTransaction() - var matchedValues []types.MatchData + var matchedValues []experimentalTypes.MatchData matchdata := r.doEvaluate(debuglog.Noop(), types.PhaseRequestHeaders, tx, &matchedValues, 0, tx.transformationCache) if len(matchdata) != 1 { t.Errorf("Expected 1 matchdata from a SecActions rule, got %d", len(matchdata)) @@ -566,7 +567,7 @@ func TestCaptureNotPropagatedToInnerChainRule(t *testing.T) { chainedRule.Capture = false r.Chain = chainedRule tx := NewWAF().NewTransaction() - var matchedValues []types.MatchData + var matchedValues []experimentalTypes.MatchData r.doEvaluate(debuglog.Noop(), types.PhaseRequestHeaders, tx, &matchedValues, 0, tx.transformationCache) // We expect that capture is false after doEvaluate. if tx.Capture { @@ -604,7 +605,7 @@ func TestExpandMacroAfterWholeRuleEvaluation(t *testing.T) { tx.ProcessURI("0", "GET", "HTTP/1.1") tx.AddGetRequestArgument("test", "0") - var matchedValues []types.MatchData + var matchedValues []experimentalTypes.MatchData matchdata := r.doEvaluate(debuglog.Noop(), types.PhaseRequestHeaders, tx, &matchedValues, 0, tx.transformationCache) if len(matchdata) != 2 { t.Errorf("Expected 2 matchdata from a chained rule (total 2 rules), got %d", len(matchdata)) diff --git a/internal/corazawaf/transaction.go b/internal/corazawaf/transaction.go index 7fce408f3..95d12eb50 100644 --- a/internal/corazawaf/transaction.go +++ b/internal/corazawaf/transaction.go @@ -18,9 +18,10 @@ import ( "strings" "time" - "github.com/corazawaf/coraza/v3/collection" "github.com/corazawaf/coraza/v3/debuglog" + "github.com/corazawaf/coraza/v3/experimental/collection" "github.com/corazawaf/coraza/v3/experimental/plugins/plugintypes" + experimentalTypes "github.com/corazawaf/coraza/v3/experimental/types" "github.com/corazawaf/coraza/v3/internal/auditlog" "github.com/corazawaf/coraza/v3/internal/bodyprocessors" "github.com/corazawaf/coraza/v3/internal/collections" @@ -486,7 +487,7 @@ func (tx *Transaction) matchVariable(match *corazarules.MatchData) { } // MatchRule Matches a rule to be logged -func (tx *Transaction) MatchRule(r *Rule, mds []types.MatchData) { +func (tx *Transaction) MatchRule(r *Rule, mds []experimentalTypes.MatchData) { tx.debugLogger.Debug().Int("rule_id", r.ID_).Msg("Rule matched") // tx.MatchedRules = append(tx.MatchedRules, mr) @@ -561,13 +562,13 @@ func (tx *Transaction) GetStopWatch() string { // GetField Retrieve data from collections applying exceptions // In future releases we may remove the exceptions slice and // make it easier to use -func (tx *Transaction) GetField(rv ruleVariableParams) []types.MatchData { +func (tx *Transaction) GetField(rv ruleVariableParams) []experimentalTypes.MatchData { col := tx.Collection(rv.Variable) if col == nil { - return []types.MatchData{} + return []experimentalTypes.MatchData{} } - var matches []types.MatchData + var matches []experimentalTypes.MatchData // Now that we have access to the collection, we can apply the exceptions switch { case rv.KeyRx != nil: @@ -588,7 +589,7 @@ func (tx *Transaction) GetField(rv ruleVariableParams) []types.MatchData { // in the most common scenario filteredMatches length will be // the same as matches length, so we avoid allocating per result - filteredMatches := make([]types.MatchData, 0, len(matches)) + filteredMatches := make([]experimentalTypes.MatchData, 0, len(matches)) for _, c := range matches { isException := false @@ -600,14 +601,13 @@ func (tx *Transaction) GetField(rv ruleVariableParams) []types.MatchData { } } if !isException { - filteredMatches = append(filteredMatches, c) + filteredMatches = append(filteredMatches, c.(experimentalTypes.MatchData)) } } - matches = filteredMatches if rv.Count { - count := len(matches) - matches = []types.MatchData{ + count := len(filteredMatches) + filteredMatches = []experimentalTypes.MatchData{ &corazarules.MatchData{ Variable_: rv.Variable, Key_: rv.KeyStr, @@ -615,7 +615,7 @@ func (tx *Transaction) GetField(rv ruleVariableParams) []types.MatchData { }, } } - return matches + return filteredMatches } // RemoveRuleTargetByID Removes the VARIABLE:KEY from the rule ID diff --git a/internal/corazawaf/transaction_test.go b/internal/corazawaf/transaction_test.go index 0bb0c4c65..0bc5dcd07 100644 --- a/internal/corazawaf/transaction_test.go +++ b/internal/corazawaf/transaction_test.go @@ -13,10 +13,11 @@ import ( "strings" "testing" - "github.com/corazawaf/coraza/v3/collection" "github.com/corazawaf/coraza/v3/debuglog" + "github.com/corazawaf/coraza/v3/experimental/collection" "github.com/corazawaf/coraza/v3/experimental/plugins/macro" "github.com/corazawaf/coraza/v3/experimental/plugins/plugintypes" + experimentalTypes "github.com/corazawaf/coraza/v3/experimental/types" "github.com/corazawaf/coraza/v3/internal/collections" "github.com/corazawaf/coraza/v3/internal/corazarules" utils "github.com/corazawaf/coraza/v3/internal/strings" @@ -708,7 +709,7 @@ func TestAuditLogFields(t *testing.T) { rule := NewRule() rule.ID_ = 131 rule.Log = true - tx.MatchRule(rule, []types.MatchData{ + tx.MatchRule(rule, []experimentalTypes.MatchData{ &corazarules.MatchData{ Variable_: variables.UniqueID, }, @@ -849,7 +850,7 @@ func TestLogCallback(t *testing.T) { rule.Phase_ = 1 rule.Log = true _ = rule.AddAction("deny", testCase.action) - tx.MatchRule(rule, []types.MatchData{ + tx.MatchRule(rule, []experimentalTypes.MatchData{ &corazarules.MatchData{ Variable_: variables.UniqueID, }, From 225d1daf3943b144edd94317ac93e7387747301e Mon Sep 17 00:00:00 2001 From: Roshan Piyush Date: Thu, 7 Nov 2024 12:22:07 +0000 Subject: [PATCH 05/11] Implement rule metadata filter --- experimental/types/value_metadata.go | 139 ++++++++++++++++++++++---- internal/actions/tag.go | 13 +++ internal/corazarules/rule_match.go | 9 +- internal/corazawaf/rule.go | 23 ++++- internal/corazawaf/rule_multiphase.go | 11 +- 5 files changed, 170 insertions(+), 25 deletions(-) diff --git a/experimental/types/value_metadata.go b/experimental/types/value_metadata.go index 85be47130..6b98fc54f 100644 --- a/experimental/types/value_metadata.go +++ b/experimental/types/value_metadata.go @@ -2,7 +2,9 @@ // SPDX-License-Identifier: Apache-2.0 package types -import "unicode" +import ( + "unicode" +) // DataMetadata is the type of metadata that a value can have. type DataMetadata int @@ -29,6 +31,10 @@ const ( // NewValueMetadata returns a new ValueMetadata from a string. func NewValueMetadata(metadata string) (DataMetadata, bool) { switch metadata { + case "numeric": + return ValueMetadataNumeric, true + case "boolean": + return ValueMetadataBoolean, true case "alphanumeric": return ValueMetadataAlphanumeric, true case "ascii": @@ -39,10 +45,6 @@ func NewValueMetadata(metadata string) (DataMetadata, bool) { return ValueMetadataURI, true case "domain": return ValueMetadataDomain, true - case "numeric": - return ValueMetadataNumeric, true - case "boolean": - return ValueMetadataBoolean, true case "unicode": return ValueMetadataUnicode, true } @@ -54,23 +56,34 @@ type DataMetadataList struct { metadata map[DataMetadata]bool } -func (v *DataMetadataList) Test(data string, metadataType DataMetadata) bool { - result, ok := v.metadata[metadataType] - if !ok { - // we do the analysis only once - switch metadataType { - case ValueMetadataAlphanumeric: - return v.testAlphanumeric(data) - default: - // this should not happen - return false +func (v *DataMetadataList) Evaluate(data string) { + // we do the analysis only once + if v.metadata == nil { + v.metadata = make(map[DataMetadata]bool) + for metadataType := range v.metadata { + switch metadataType { + case ValueMetadataNumeric: + v.evaluateNumeric(data) + case ValueMetadataBoolean: + v.evaluateBoolean(data) + case ValueMetadataAlphanumeric: + v.evaluateAlphanumeric(data) + case ValueMetadataAscii: + v.evaluateAscii(data) + case ValueMetadataBase64: + v.evaluateBase64(data) + // case ValueMetadataURI: + // result = result || v.evaluateURI(data) + // case ValueMetadataDomain: + // result = result || v.evaluateDomain(data) + // case ValueMetadataUnicode: + // result = result || v.evaluateUnicode(data) + } } } - return result - } -func (v *DataMetadataList) testAlphanumeric(data string) bool { +func (v *DataMetadataList) evaluateAlphanumeric(data string) bool { res := true for _, c := range data { if !unicode.IsLetter(c) && !unicode.IsNumber(c) { @@ -81,3 +94,93 @@ func (v *DataMetadataList) testAlphanumeric(data string) bool { v.metadata[ValueMetadataAlphanumeric] = res return res } + +func (v *DataMetadataList) evaluateAscii(data string) bool { + res := true + for i := 0; i < len(data); i++ { + if data[i] > unicode.MaxASCII { + res = false + break + } + } + v.metadata[ValueMetadataAscii] = res + return res +} + +func isBase64(c byte) bool { + return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9') || c == '+' || c == '/' +} + +func (v *DataMetadataList) evaluateBase64(data string) bool { + res := true + for i := 0; i < len(data); i++ { + if !isBase64(data[i]) { + res = false + break + } + } + v.metadata[ValueMetadataBase64] = res + return res +} + +func (v *DataMetadataList) evaluateNumeric(data string) bool { + res := true + for _, c := range data { + if !unicode.IsNumber(c) { + res = false + break + } + } + v.metadata[ValueMetadataNumeric] = res + return res +} + +func (v *DataMetadataList) evaluateBoolean(data string) bool { + res := true + if data == "true" || data == "false" { + res = true + } + v.metadata[ValueMetadataBoolean] = res + return res +} + +func (v *DataMetadataList) TestNumeric() bool { + return v.metadata[ValueMetadataNumeric] +} + +func (v *DataMetadataList) TestBoolean() bool { + return v.metadata[ValueMetadataBoolean] +} + +func (v *DataMetadataList) TestAlphanumeric() bool { + return v.metadata[ValueMetadataAlphanumeric] +} + +func (v *DataMetadataList) TestAscii() bool { + return v.metadata[ValueMetadataAscii] +} + +func (v *DataMetadataList) TestBase64() bool { + return v.metadata[ValueMetadataBase64] +} + +func (v *DataMetadataList) TestURI() bool { + return v.metadata[ValueMetadataURI] +} + +func (v *DataMetadataList) TestDomain() bool { + return v.metadata[ValueMetadataDomain] +} + +func (v *DataMetadataList) TestUnicode() bool { + return v.metadata[ValueMetadataUnicode] +} + +func (v *DataMetadataList) Test(metadataTypes []DataMetadata) bool { + for _, metadataType := range metadataTypes { + if !v.metadata[metadataType] { + return false + } + } + return true +} diff --git a/internal/actions/tag.go b/internal/actions/tag.go index 8f911a030..800fc14b7 100644 --- a/internal/actions/tag.go +++ b/internal/actions/tag.go @@ -4,6 +4,9 @@ package actions import ( + "fmt" + "strings" + "github.com/corazawaf/coraza/v3/experimental/plugins/plugintypes" "github.com/corazawaf/coraza/v3/internal/corazawaf" ) @@ -30,6 +33,16 @@ func (a *tagFn) Init(r plugintypes.RuleMetadata, data string) error { return ErrMissingArguments } r.(*corazawaf.Rule).Tags_ = append(r.(*corazawaf.Rule).Tags_, data) + if strings.HasPrefix(data, "metadatafilter/") { + filters_string := strings.Split(data, "/") + filters := strings.Split(filters_string[1], ",") + for _, filter := range filters { + ok := r.(*corazawaf.Rule).AddAllowedMetadata(filter) + if ok != nil { + return fmt.Errorf("invalid metadata filter: %s", filter) + } + } + } return nil } diff --git a/internal/corazarules/rule_match.go b/internal/corazarules/rule_match.go index 5487b8bb4..fd6cb8ab6 100644 --- a/internal/corazarules/rule_match.go +++ b/internal/corazarules/rule_match.go @@ -8,7 +8,6 @@ import ( "fmt" "strconv" "strings" - "unsafe" experimentalTypes "github.com/corazawaf/coraza/v3/experimental/types" "github.com/corazawaf/coraza/v3/types" @@ -64,6 +63,8 @@ func (m *MatchData) ChainLevel() int { } func (m *MatchData) Metadata() experimentalTypes.DataMetadataList { + // Evaluate the metadata if it's not set + m.Metadata_.Evaluate(m.Value_) return m.Metadata_ } @@ -152,7 +153,11 @@ func (mr *MatchedRule) ClientIPAddress() string { } func (mr *MatchedRule) MatchedDatas() []types.MatchData { - return *(*[]types.MatchData)(unsafe.Pointer(&mr.MatchedDatas_)) + var matchedDatas []types.MatchData + for _, md := range mr.MatchedDatas_ { + matchedDatas = append(matchedDatas, md) + } + return matchedDatas } func (mr *MatchedRule) MatchedDatasExperimental_() []experimentalTypes.MatchData { diff --git a/internal/corazawaf/rule.go b/internal/corazawaf/rule.go index 8ec69fa13..85c44516c 100644 --- a/internal/corazawaf/rule.go +++ b/internal/corazawaf/rule.go @@ -147,6 +147,8 @@ type Rule struct { // chainedRules containing rules with just PhaseUnknown variables, may potentially // be anticipated. This boolean ensures that it happens withPhaseUnknownVariable bool + + allowedMetadatas []experimentalTypes.DataMetadata } func (r *Rule) ParentID() int { @@ -157,6 +159,10 @@ func (r *Rule) Status() int { return r.DisruptiveStatus } +func (r *Rule) AllowedMetadatas() []experimentalTypes.DataMetadata { + return r.allowedMetadatas +} + const chainLevelZero = 0 // Evaluate will evaluate the current rule for the indicated transaction @@ -242,8 +248,14 @@ func (r *Rule) doEvaluate(logger debuglog.Logger, phase types.RulePhase, tx *Tra vLog = logger.With(debuglog.Str("variable", v.Variable.Name())) } vLog.Debug().Msg("Expanding arguments for rule") - + allowedMetadatas := r.AllowedMetadatas() for i, arg := range values { + if len(allowedMetadatas) > 0 { + argDataMetadataList := arg.Metadata() + if argDataMetadataList.Test(allowedMetadatas) { + continue + } + } args, errs := r.transformArg(arg, i, cache) if len(errs) > 0 { vWarnLog := vLog.Warn() @@ -609,6 +621,15 @@ func (r *Rule) AddTransformation(name string, t plugintypes.Transformation) erro return nil } +func (r *Rule) AddAllowedMetadata(metadataName string) error { + metadata, ok := experimentalTypes.NewValueMetadata(metadataName) + if !ok { + return fmt.Errorf("invalid metadata %q not found", metadataName) + } + r.allowedMetadatas = append(r.allowedMetadatas, metadata) + return nil +} + // ClearTransformations clears all the transformations // it is mostly used by the "none" transformation func (r *Rule) ClearTransformations() { diff --git a/internal/corazawaf/rule_multiphase.go b/internal/corazawaf/rule_multiphase.go index 2c5b0af6d..74c5a69c1 100644 --- a/internal/corazawaf/rule_multiphase.go +++ b/internal/corazawaf/rule_multiphase.go @@ -318,15 +318,18 @@ func isMultiphaseDoubleEvaluation(tx *Transaction, phase types.RulePhase, r *Rul *collectiveMatchedValues = append(*collectiveMatchedValues, mr) for _, matchedRule := range tx.matchedRules { - matchedData := matchedRule.MatchedDatas() - matchedDataExp := *(*[]experimentalTypes.MatchData)(unsafe.Pointer(&matchedData)) - if matchedRule.Rule().ID() == r.ParentID_ && matchedChainDepth(matchedDataExp) == matchedChainDepth(*collectiveMatchedValues) { + matchedDatas := matchedRule.MatchedDatas() + var matchedDatasExp []experimentalTypes.MatchData + for _, v := range matchedDatas { + matchedDatasExp = append(matchedDatasExp, *(*experimentalTypes.MatchData)(unsafe.Pointer(&v))) + } + if matchedRule.Rule().ID() == r.ParentID_ && matchedChainDepth(matchedDatasExp) == matchedChainDepth(*collectiveMatchedValues) { // This might be a double match, let's generate the chains that aready matched and the one that just matched // let's see if all the latter already matched. // generateChainMatches generates matched chains based on the matchedValues and populates matchedChains and collectiveMatchedChains variables var matchedChains, collectiveMatchedChains [][]experimentalTypes.MatchData - generateChainMatches(tx, matchedDataExp, 0, nil, &matchedChains) + generateChainMatches(tx, matchedDatasExp, 0, nil, &matchedChains) generateChainMatches(tx, *collectiveMatchedValues, 0, nil, &collectiveMatchedChains) // Check if a newly matched chain (part of collectiveMatchedChain) already matched From f72b47cede4a15aa22876b48a1df2c8a6c699dde Mon Sep 17 00:00:00 2001 From: Roshan Piyush Date: Thu, 7 Nov 2024 17:53:15 +0000 Subject: [PATCH 06/11] Add tests --- experimental/types/value_metadata.go | 76 ++++++---------------------- internal/corazawaf/rule.go | 4 +- internal/seclang/parser_test.go | 31 ++++++++++++ testing/engine/metatdafilter.go | 60 ++++++++++++++++++++++ 4 files changed, 109 insertions(+), 62 deletions(-) create mode 100644 testing/engine/metatdafilter.go diff --git a/experimental/types/value_metadata.go b/experimental/types/value_metadata.go index 6b98fc54f..847712483 100644 --- a/experimental/types/value_metadata.go +++ b/experimental/types/value_metadata.go @@ -60,39 +60,25 @@ func (v *DataMetadataList) Evaluate(data string) { // we do the analysis only once if v.metadata == nil { v.metadata = make(map[DataMetadata]bool) - for metadataType := range v.metadata { - switch metadataType { - case ValueMetadataNumeric: - v.evaluateNumeric(data) - case ValueMetadataBoolean: - v.evaluateBoolean(data) - case ValueMetadataAlphanumeric: - v.evaluateAlphanumeric(data) - case ValueMetadataAscii: - v.evaluateAscii(data) - case ValueMetadataBase64: - v.evaluateBase64(data) - // case ValueMetadataURI: - // result = result || v.evaluateURI(data) - // case ValueMetadataDomain: - // result = result || v.evaluateDomain(data) - // case ValueMetadataUnicode: - // result = result || v.evaluateUnicode(data) - } - } + v.evaluateNumeric(data) + v.evaluateBoolean(data) + v.evaluateAlphanumeric(data) + v.evaluateAscii(data) + v.evaluateBase64(data) + // v.evaluateURI(data) + // v.evaluateDomain(data) + // v.evaluateUnicode(data) } } func (v *DataMetadataList) evaluateAlphanumeric(data string) bool { - res := true for _, c := range data { if !unicode.IsLetter(c) && !unicode.IsNumber(c) { - res = false + v.metadata[ValueMetadataAlphanumeric] = false break } } - v.metadata[ValueMetadataAlphanumeric] = res - return res + return v.metadata[ValueMetadataAlphanumeric] } func (v *DataMetadataList) evaluateAscii(data string) bool { @@ -136,7 +122,7 @@ func (v *DataMetadataList) evaluateNumeric(data string) bool { } func (v *DataMetadataList) evaluateBoolean(data string) bool { - res := true + res := false if data == "true" || data == "false" { res = true } @@ -144,43 +130,11 @@ func (v *DataMetadataList) evaluateBoolean(data string) bool { return res } -func (v *DataMetadataList) TestNumeric() bool { - return v.metadata[ValueMetadataNumeric] -} - -func (v *DataMetadataList) TestBoolean() bool { - return v.metadata[ValueMetadataBoolean] -} - -func (v *DataMetadataList) TestAlphanumeric() bool { - return v.metadata[ValueMetadataAlphanumeric] -} - -func (v *DataMetadataList) TestAscii() bool { - return v.metadata[ValueMetadataAscii] -} - -func (v *DataMetadataList) TestBase64() bool { - return v.metadata[ValueMetadataBase64] -} - -func (v *DataMetadataList) TestURI() bool { - return v.metadata[ValueMetadataURI] -} - -func (v *DataMetadataList) TestDomain() bool { - return v.metadata[ValueMetadataDomain] -} - -func (v *DataMetadataList) TestUnicode() bool { - return v.metadata[ValueMetadataUnicode] -} - -func (v *DataMetadataList) Test(metadataTypes []DataMetadata) bool { +func (v *DataMetadataList) IsInScope(metadataTypes []DataMetadata) bool { for _, metadataType := range metadataTypes { - if !v.metadata[metadataType] { - return false + if v.metadata[metadataType] { + return true } } - return true + return false } diff --git a/internal/corazawaf/rule.go b/internal/corazawaf/rule.go index 85c44516c..a7eb90ef9 100644 --- a/internal/corazawaf/rule.go +++ b/internal/corazawaf/rule.go @@ -249,10 +249,12 @@ func (r *Rule) doEvaluate(logger debuglog.Logger, phase types.RulePhase, tx *Tra } vLog.Debug().Msg("Expanding arguments for rule") allowedMetadatas := r.AllowedMetadatas() + vLog.Debug().Msg("Allowed metadata for rule" + fmt.Sprint(allowedMetadatas)) for i, arg := range values { if len(allowedMetadatas) > 0 { argDataMetadataList := arg.Metadata() - if argDataMetadataList.Test(allowedMetadatas) { + if !argDataMetadataList.IsInScope(allowedMetadatas) { + vLog.Debug().Msg("Skipping evaluation for " + arg.Key() + " because it is not in scope") continue } } diff --git a/internal/seclang/parser_test.go b/internal/seclang/parser_test.go index b03ce1fc8..81779075e 100644 --- a/internal/seclang/parser_test.go +++ b/internal/seclang/parser_test.go @@ -30,6 +30,37 @@ func TestInterruption(t *testing.T) { } } +func TestAllowedMetadataTags(t *testing.T) { + waf := coraza.NewWAF() + p := NewParser(waf) + if err := p.FromString(` + SecRule ARGS "@rx 123" "id:1,block,log,tag:'metadatafilter/numeric',phase:2" + SecRule ARGS "@rx abc" "id:2,block,log,tag:'metadatafilter/numeric',phase:2" + SecRule ARGS "@rx a5" "id:3,block,log,tag:'metadatafilter/boolean',phase:2" + SecRule ARGS "@rx true" "id:4,block,log,tag:'metadatafilter/boolean,alphanumeric',phase:2" + SecRule ARGS "@rx a5" "id:5,block,log,phase:2" + `); err != nil { + t.Errorf("Could not create from string: %s", err.Error()) + } + tx := waf.NewTransaction() + tx.ProcessURI("http://localhost/test.php?m1=123&m2=abc123&m3=true&m4=a5", "GET", "1.1") + tx.ProcessRequestHeaders() + tx.ProcessRequestBody() + matchedRules := tx.MatchedRules() + if len(matchedRules) != 3 { + t.Errorf("Expected 4 matched rule, got %d", len(matchedRules)) + } + if matchedRules[0].Rule().ID() != 1 { + t.Errorf("Expected matched rule ID 1, got %d", matchedRules[0].Rule().ID()) + } + if matchedRules[1].Rule().ID() != 4 { + t.Errorf("Expected matched rule ID 1, got %d", matchedRules[0].Rule().ID()) + } + if matchedRules[2].Rule().ID() != 5 { + t.Errorf("Expected matched rule ID 1, got %d", matchedRules[0].Rule().ID()) + } +} + func TestDirectivesCaseInsensitive(t *testing.T) { waf := coraza.NewWAF() p := NewParser(waf) diff --git a/testing/engine/metatdafilter.go b/testing/engine/metatdafilter.go new file mode 100644 index 000000000..4d0522403 --- /dev/null +++ b/testing/engine/metatdafilter.go @@ -0,0 +1,60 @@ +// Copyright 2022 Juan Pablo Tosso and the OWASP Coraza contributors +// SPDX-License-Identifier: Apache-2.0 + +package engine + +import ( + "github.com/corazawaf/coraza/v3/testing/profile" +) + +var _ = profile.RegisterProfile(profile.Profile{ + Meta: profile.Meta{ + Author: "roshan.piyush", + Description: "Test if the matchers works", + Enabled: true, + Name: "metadatafilter.yaml", + }, + Tests: []profile.Test{ + { + Title: "actions", + Stages: []profile.Stage{ + { + Stage: profile.SubStage{ + Input: profile.StageInput{ + DestAddr: "127.0.0.1", + Method: "GET", + URI: "/test.php?m1=abc&m2=true&m3=xabc123&m4=abc.sjdjd&m5=abc@123", + Headers: map[string]string{ + "content-type": "application/json", + }, + }, + + Output: profile.ExpectedOutput{ + TriggeredRules: []int{ + 26, + 27, + 32, + }, + NonTriggeredRules: []int{ + 28, + 29, + 30, + 31, + }, + }, + }, + }, + }, + }, + }, + Rules: ` +SecDebugLogLevel 9 +SecRule ARGS "@rx 123" "block,id:26, log, phase: 2, tag:'metadatafilter/ascii'" +SecRule ARGS "@rx true" "block,id:27, log, phase: 2, tag:'metadatafilter/boolean,alphanumeric'" +SecRule ARGS "@rx abc" "block,id:28, log, phase: 2, tag:'metadatafilter/boolean'" +SecRule ARGS "@rx abc" "block,id:29, log, phase: 2, tag:'metadatafilter/alphanumeric'" +SecRule ARGS "@rx @" "block,id:30, log, phase: 2, tag:'metadatafilter/boolean,alphanumeric'" +SecRule ARGS "@rx @" "block,id:31, log, phase: 2, tag:'metadatafilter/boolean'" +SecRule ARGS "@rx @" "block,id:32, log, phase: 2, tag:'metadatafilter/ascii'" +`, +}) From df316334e6f3cbe5d5b76772934c437c8ae49ef9 Mon Sep 17 00:00:00 2001 From: Roshan Piyush Date: Thu, 7 Nov 2024 18:24:40 +0000 Subject: [PATCH 07/11] Resolve merge conflict --- internal/corazawaf/transaction.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/internal/corazawaf/transaction.go b/internal/corazawaf/transaction.go index d140fe51c..453335e8a 100644 --- a/internal/corazawaf/transaction.go +++ b/internal/corazawaf/transaction.go @@ -608,8 +608,8 @@ func (tx *Transaction) GetField(rv ruleVariableParams) []experimentalTypes.Match matches = matches[:filteredCount] if rv.Count { - count := len(filteredMatches) - filteredMatches = []experimentalTypes.MatchData{ + count := len(matches) + matches = []experimentalTypes.MatchData{ &corazarules.MatchData{ Variable_: rv.Variable, Key_: rv.KeyStr, @@ -617,7 +617,7 @@ func (tx *Transaction) GetField(rv ruleVariableParams) []experimentalTypes.Match }, } } - return filteredMatches + return matches } // RemoveRuleTargetByID Removes the VARIABLE:KEY from the rule ID From f498edb39b3b0e5f03b7d0490fec76d8cabcf53d Mon Sep 17 00:00:00 2001 From: Roshan Piyush Date: Thu, 7 Nov 2024 18:32:10 +0000 Subject: [PATCH 08/11] Lint --- internal/seclang/parser_test.go | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/internal/seclang/parser_test.go b/internal/seclang/parser_test.go index 81779075e..43bc3d300 100644 --- a/internal/seclang/parser_test.go +++ b/internal/seclang/parser_test.go @@ -45,7 +45,13 @@ func TestAllowedMetadataTags(t *testing.T) { tx := waf.NewTransaction() tx.ProcessURI("http://localhost/test.php?m1=123&m2=abc123&m3=true&m4=a5", "GET", "1.1") tx.ProcessRequestHeaders() - tx.ProcessRequestBody() + interrupt, err := tx.ProcessRequestBody() + if err != nil { + t.Error(err) + } + if interrupt != nil { + t.Error("Transaction interrupted") + } matchedRules := tx.MatchedRules() if len(matchedRules) != 3 { t.Errorf("Expected 4 matched rule, got %d", len(matchedRules)) From d624f412a15fa72742bb19a2f08c701fc48549be Mon Sep 17 00:00:00 2001 From: Roshan Piyush Date: Thu, 7 Nov 2024 18:32:10 +0000 Subject: [PATCH 09/11] Lint --- internal/seclang/parser_test.go | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/internal/seclang/parser_test.go b/internal/seclang/parser_test.go index 81779075e..43bc3d300 100644 --- a/internal/seclang/parser_test.go +++ b/internal/seclang/parser_test.go @@ -45,7 +45,13 @@ func TestAllowedMetadataTags(t *testing.T) { tx := waf.NewTransaction() tx.ProcessURI("http://localhost/test.php?m1=123&m2=abc123&m3=true&m4=a5", "GET", "1.1") tx.ProcessRequestHeaders() - tx.ProcessRequestBody() + interrupt, err := tx.ProcessRequestBody() + if err != nil { + t.Error(err) + } + if interrupt != nil { + t.Error("Transaction interrupted") + } matchedRules := tx.MatchedRules() if len(matchedRules) != 3 { t.Errorf("Expected 4 matched rule, got %d", len(matchedRules)) From db5d0c680b47fac5fb461eec7e21f306e14cb40b Mon Sep 17 00:00:00 2001 From: Roshan Piyush Date: Fri, 8 Nov 2024 12:08:01 +0000 Subject: [PATCH 10/11] Remove unsafe pointer --- internal/corazawaf/rule_multiphase.go | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/internal/corazawaf/rule_multiphase.go b/internal/corazawaf/rule_multiphase.go index 74c5a69c1..18e966a6a 100644 --- a/internal/corazawaf/rule_multiphase.go +++ b/internal/corazawaf/rule_multiphase.go @@ -321,7 +321,15 @@ func isMultiphaseDoubleEvaluation(tx *Transaction, phase types.RulePhase, r *Rul matchedDatas := matchedRule.MatchedDatas() var matchedDatasExp []experimentalTypes.MatchData for _, v := range matchedDatas { - matchedDatasExp = append(matchedDatasExp, *(*experimentalTypes.MatchData)(unsafe.Pointer(&v))) + matchedDatasExp = append(matchedDatasExp, &experimentalTypes.MatchData{ + Variable_: v.Variable(), + Key_: v.Key(), + Value_: v.Value(), + Message_: v.Message(), + Data_: v.Data(), + ChainLevel_: v.ChainLevel(), + }) + } } if matchedRule.Rule().ID() == r.ParentID_ && matchedChainDepth(matchedDatasExp) == matchedChainDepth(*collectiveMatchedValues) { // This might be a double match, let's generate the chains that aready matched and the one that just matched From 2ce0c291f0449989222551370a06f80e0b1cf2f6 Mon Sep 17 00:00:00 2001 From: Roshan Piyush Date: Fri, 8 Nov 2024 12:11:38 +0000 Subject: [PATCH 11/11] Lint --- internal/corazawaf/rule_multiphase.go | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/internal/corazawaf/rule_multiphase.go b/internal/corazawaf/rule_multiphase.go index 18e966a6a..b4da24eec 100644 --- a/internal/corazawaf/rule_multiphase.go +++ b/internal/corazawaf/rule_multiphase.go @@ -5,9 +5,9 @@ package corazawaf import ( "strings" - "unsafe" experimentalTypes "github.com/corazawaf/coraza/v3/experimental/types" + "github.com/corazawaf/coraza/v3/internal/corazarules" "github.com/corazawaf/coraza/v3/types" "github.com/corazawaf/coraza/v3/types/variables" ) @@ -321,15 +321,14 @@ func isMultiphaseDoubleEvaluation(tx *Transaction, phase types.RulePhase, r *Rul matchedDatas := matchedRule.MatchedDatas() var matchedDatasExp []experimentalTypes.MatchData for _, v := range matchedDatas { - matchedDatasExp = append(matchedDatasExp, &experimentalTypes.MatchData{ - Variable_: v.Variable(), - Key_: v.Key(), - Value_: v.Value(), - Message_: v.Message(), - Data_: v.Data(), + matchedDatasExp = append(matchedDatasExp, &corazarules.MatchData{ + Variable_: v.Variable(), + Key_: v.Key(), + Value_: v.Value(), + Message_: v.Message(), + Data_: v.Data(), ChainLevel_: v.ChainLevel(), }) - } } if matchedRule.Rule().ID() == r.ParentID_ && matchedChainDepth(matchedDatasExp) == matchedChainDepth(*collectiveMatchedValues) { // This might be a double match, let's generate the chains that aready matched and the one that just matched