Skip to content

Commit

Permalink
Add attributes filter to cascadingfilter
Browse files Browse the repository at this point in the history
  • Loading branch information
pmm-sumo committed Oct 29, 2021
1 parent 09f6a97 commit 0c7ed50
Show file tree
Hide file tree
Showing 8 changed files with 430 additions and 49 deletions.
62 changes: 46 additions & 16 deletions processor/cascadingfilterprocessor/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -47,12 +47,15 @@ filtering or additional policy evaluation. This typically happens e.g. when heal

Each of the specified drop rules has several properties:
- `name` (required): identifies the rule
- `numeric_attribute: {key: <name>, min_value: <min_value>, max_value: <max_value>}`: selects span by matching numeric
attribute (either at resource of span level)
- `string_attribute: {key: <name>, values: [<value1>, <value2>]}`: selects span by matching string attribute that is one
of the provided values (either at resource of span level); when `use_regex` (`false` by default) is set to `true`
the provided collection of values is evaluated as regular expressions
- `name_pattern: <regex>`: selects the span if its operation name matches the provided regular expression
- `attributes: <list of attributes>`: list of attribute-level filters (both span level and resource level is being evaluated).
When several elements are specified, conditions for each of them must be met. Each entry might contain a number of fields:
- `key: <name>`: name of the attribute key
- `values: [<value1>, value2>]` (default=`empty`): list of string values, when present at least
one of them must be matched
- `use_regex: <use_regex>` (default=`false`): indication whether values provided should be treated as regular expressions
- `ranges: [{min_value: <min_value>, max_value: <max_value>}]` (default=`empty`): list of numeric ranges; when present at least
one must be matched


## Accepted trace configuration
Expand All @@ -64,17 +67,25 @@ it selects the traces only if the global limit is not exceeded by other policies

Additionally, each of the policy might have any of the following filtering criteria defined. They are evaluated for
each of the trace spans. If at least one span matching all defined criteria is found, the trace is selected:
- `numeric_attribute: {key: <name>, min_value: <min_value>, max_value: <max_value>}`: selects span by matching numeric
attribute (either at resource of span level)
- `string_attribute: {key: <name>, values: [<value1>, <value2>], use_regex: <use_regex>}`: selects span by matching string attribute that is one
of the provided values (either at resource of span level); when `use_regex` (`false` by default) is set to `true`
the provided collection of values is evaluated as regular expressions
- `attributes: <list of attributes>`: list of attribute-level filters (both span level and resource level is being evaluated).
When several elements are specified, conditions for each of them must be met. Each entry might contain a number of fields:
- `key: <name>`: name of the attribute key
- `values: [<value1>, value2>]` (default=`empty`): list of string values, when present at least
one of them must be matched
- `use_regex: <use_regex>` (default=`false`): indication whether values provided should be treated as regular expressions
- `ranges: [{min_value: <min_value>, max_value: <max_value>}]` (default=`empty`): list of numeric ranges; when present at least
one must be matched
- `properties: { min_number_of_errors: <number>}`: selects the trace if it has at least provided number of errors
(determined based on the span status field value)
- `properties: { min_number_of_spans: <number>}`: selects the trace if it has at least provided number of spans
- `properties: { min_duration: <duration>}`: selects the span if the duration is greater or equal the given value
(use `s` or `ms` as the suffix to indicate unit)
- `properties: { name_pattern: <regex>`}: selects the span if its operation name matches the provided regular expression
- _(deprecated)_ `numeric_attribute: {key: <name>, min_value: <min_value>, max_value: <max_value>}`: selects span by matching numeric
attribute (either at resource of span level)
- _(deprecated)_ `string_attribute: {key: <name>, values: [<value1>, <value2>], use_regex: <use_regex>}`: selects span by matching string attribute that is one
of the provided values (either at resource of span level); when `use_regex` (`false` by default) is set to `true`
the provided collection of values is evaluated as regular expressions

To invert the decision (which is still a subject to rate limiting), additional property can be configured:
- `invert_match: <invert>` (default=`false`): when set to `true`, the opposite decision is selected for the trace. E.g.
Expand Down Expand Up @@ -115,7 +126,11 @@ processors:
- name: remove-all-traces-with-health-span
name_pattern: "health.*"
- name: remove-all-traces-with-healthcheck-service
string_attribute: {key: service.name, values: ["healthcheck/.*"], use_regex: true}
attributes:
- key: service.name
values:
- "healthcheck/.*"
use_regex: true
```
### Filtering out healhtchecks and traffic shaping
Expand All @@ -135,7 +150,11 @@ cascadingfilter:
- name: remove-all-traces-with-health-span
name_pattern: "health.*"
- name: remove-all-traces-with-healthcheck-service
string_attribute: {key: service.name, values: [healthcheck]}
attributes:
- key: service.name
values:
- "healthcheck/.*"
use_regex: true
trace_accept_filters:
- name: tail-based-duration
properties:
Expand Down Expand Up @@ -164,8 +183,11 @@ cascadingfilter:
- name: remove-all-traces-with-health-span
name_pattern: "health.*"
- name: remove-all-traces-with-healthcheck-service
string_attribute: {key: service.name, values: [healthcheck.*]}
use_regex: true
attributes:
- key: service.name
values:
- "healthcheck/.*"
use_regex: true
trace_accept_filters:
- name: tail-based-duration
properties:
Expand All @@ -180,8 +202,16 @@ cascadingfilter:
name_pattern: "foo.*"
min_duration: 10s
spans_per_second: 1000 # <- adjust the output traffic level
- name: traces-with-some-attribute
string_attribute: {key: important-key, values: [value1, value2]}
- name: some-service-traces-with-some-attribute
attributes:
- key: service.name
values:
- some-service
- key: important-key
values:
- value1
- value2
use_regex: true
spans_per_second: 300 # <- adjust the output traffic level
- name: everything_else
spans_per_second: -1 # If there's anything left in the budget, it will randomly select remaining traces
Expand Down
22 changes: 22 additions & 0 deletions processor/cascadingfilterprocessor/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@ type TraceAcceptCfg struct {
NumericAttributeCfg *NumericAttributeCfg `mapstructure:"numeric_attribute"`
// Configs for string attribute filter sampling policy evaluator.
StringAttributeCfg *StringAttributeCfg `mapstructure:"string_attribute"`
// AttributesCfg keeps generic string/numeric attributes for multiple keys
AttributeCfg []AttributeCfg `mapstructure:"attributes"`
// Configs for properties sampling policy evaluator.
PropertiesCfg PropertiesCfg `mapstructure:"properties"`
// SpansPerSecond specifies the rule budget that should never be exceeded for it
Expand Down Expand Up @@ -70,6 +72,24 @@ type StringAttributeCfg struct {
UseRegex bool `mapstructure:"use_regex"`
}

// AttributeRange defines min/max range for single entry
type AttributeRange struct {
MinValue int64 `mapstructure:"min"`
MaxValue int64 `mapstructure:"max"`
}

// AttributeCfg holds a universal config specification for a given key
type AttributeCfg struct {
// Tag that the filter is going to be matching against.
Key string `mapstructure:"key"`
// Values is the set of attribute values that if any is equal to the actual attribute value to be considered a match.
Values []string `mapstructure:"values"`
// UseRegex (default=false) treats the values provided as regular expressions when matching the string values
UseRegex bool `mapstructure:"use_regex"`
// Ranges keep numeric attribute ranges
Ranges []AttributeRange `mapstructure:"ranges"`
}

// TraceRejectCfg holds the configurable settings which drop all traces matching the specified criteria (all of them)
// before further processing
type TraceRejectCfg struct {
Expand All @@ -79,6 +99,8 @@ type TraceRejectCfg struct {
NumericAttributeCfg *NumericAttributeCfg `mapstructure:"numeric_attribute"`
// StringAttributeCfg (config) configs string attribute filter evaluator.
StringAttributeCfg *StringAttributeCfg `mapstructure:"string_attribute"`
// AttributesCfg keeps generic string/numeric attributes for multiple keys
AttributeCfg []AttributeCfg `mapstructure:"attributes"`
// NamePattern (optional) describes a regular expression that must be met by any span operation name
NamePattern *string `mapstructure:"name_pattern"`
}
Expand Down
12 changes: 12 additions & 0 deletions processor/cascadingfilterprocessor/config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,18 @@ func TestLoadConfig(t *testing.T) {
MinDuration: &minDurationValue,
},
},
{
Name: "include-some-attrs",
SpansPerSecond: 500,
AttributeCfg: []cfconfig.AttributeCfg{
{
Key: "foo",
Values: []string{"abc"},
UseRegex: false,
Ranges: nil,
},
},
},
},
})

Expand Down
158 changes: 158 additions & 0 deletions processor/cascadingfilterprocessor/sampling/attrs_filter_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,158 @@
// Copyright The OpenTelemetry Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package sampling

import (
"math"
"regexp"
"testing"
"time"

"go.opentelemetry.io/collector/model/pdata"
"go.uber.org/zap"
)

func newAttrsFilter(filters []attributeFilter) policyEvaluator {
return policyEvaluator{
logger: zap.NewNop(),
attrs: filters,
maxSpansPerSecond: math.MaxInt32,
}
}

func newAttrFilter(key string, regexValues []string, ranges []attributeRange) attributeFilter {
var patterns []*regexp.Regexp
for _, value := range regexValues {
re := regexp.MustCompile(value)
patterns = append(patterns, re)
}

return attributeFilter{
key: key,
values: nil,
patterns: patterns,
ranges: ranges,
}
}

func TestAttributesFilter(t *testing.T) {
filterFooPattern := newAttrFilter("foo", []string{"foob.*"}, nil)
filterBarPattern := newAttrFilter("bar", []string{"baz.*"}, nil)
filterCooNothing := newAttrFilter("coo", nil, nil)
filterFooRange := newAttrFilter("foo", nil, []attributeRange{{minValue: 100, maxValue: 150}})
filterFooRangesOrPatterns := newAttrFilter("foo", []string{"foo.*", "claz.*"}, []attributeRange{{minValue: 100, maxValue: 150}, {minValue: 200, maxValue: 250}})

composite := newAttrsFilter([]attributeFilter{filterFooRangesOrPatterns, filterBarPattern})
bar := newAttrsFilter([]attributeFilter{filterBarPattern})
fooRange := newAttrsFilter([]attributeFilter{filterFooRange})
fooPattern := newAttrsFilter([]attributeFilter{filterFooPattern})
coo := newAttrsFilter([]attributeFilter{filterCooNothing})

fooTraces, fooAttrs := newTrace()
fooAttrs.InsertString("foo", "foobar")

fooNumTraces, fooNumAttrs := newTrace()
fooNumAttrs.InsertInt("foo", 130)

fooBarTraces, fooBarAttrs := newTrace()
fooBarAttrs.InsertString("foo", "foobar")
fooBarAttrs.InsertString("bar", "bazbar")

booTraces, booAttrs := newTrace()
booAttrs.InsertString("bar", "bazboo")

cooTraces, cooAttrs := newTrace()
cooAttrs.InsertString("coo", "fsdkfjsdkljsda")

cases := []struct {
Desc string
Evaluator policyEvaluator
Match []*TraceData
DontMatch []*TraceData
}{
{
Desc: "simple string pattern",
Evaluator: fooPattern,
Match: []*TraceData{fooTraces, fooBarTraces},
DontMatch: []*TraceData{fooNumTraces, booTraces, cooTraces},
},
{
Desc: "simple numeric ranges",
Evaluator: fooRange,
Match: []*TraceData{fooNumTraces},
DontMatch: []*TraceData{fooTraces, fooBarTraces, booTraces, cooTraces},
},
{
Desc: "simple pattern",
Evaluator: bar,
Match: []*TraceData{fooBarTraces, booTraces},
DontMatch: []*TraceData{fooTraces, fooNumTraces},
},
{
Desc: "composite",
Evaluator: composite,
Match: []*TraceData{fooBarTraces},
DontMatch: []*TraceData{fooTraces, fooNumTraces, booTraces, cooTraces},
},
{
Desc: "no pattern, just existence of key",
Evaluator: coo,
Match: []*TraceData{cooTraces},
DontMatch: []*TraceData{fooTraces, fooNumTraces, fooBarTraces, booTraces},
},
}

for _, c := range cases {
t.Run(c.Desc, func(t *testing.T) {
for _, traces := range c.Match {
c.Evaluator.invertMatch = false
evaluate(t, c.Evaluator, traces, Sampled)
c.Evaluator.invertMatch = true
evaluate(t, c.Evaluator, traces, NotSampled)
}
for _, traces := range c.DontMatch {
c.Evaluator.invertMatch = false
evaluate(t, c.Evaluator, traces, NotSampled)
c.Evaluator.invertMatch = true
evaluate(t, c.Evaluator, traces, Sampled)
}
})
}
}

func newTrace() (*TraceData, pdata.AttributeMap) {
endTs := time.Now().UnixNano()
startTs := endTs - 100000

var traceBatches []pdata.Traces

traces := pdata.NewTraces()
rs := traces.ResourceSpans().AppendEmpty()
ils := rs.InstrumentationLibrarySpans().AppendEmpty()

spans := ils.Spans()
spans.EnsureCapacity(1)

span := spans.AppendEmpty()
span.SetName("fooname")
span.SetStartTimestamp(pdata.Timestamp(startTs))
span.SetEndTimestamp(pdata.Timestamp(endTs))

traceBatches = append(traceBatches, traces)

return &TraceData{
ReceivedBatches: traceBatches,
}, span.Attributes()
}
Loading

0 comments on commit 0c7ed50

Please sign in to comment.