From 479838ab281aa1e8c5b9d6a693c259552f4e2cfd Mon Sep 17 00:00:00 2001 From: Dakota Paasman Date: Mon, 5 Feb 2024 10:12:12 -0500 Subject: [PATCH 1/9] write ottl key value func --- pkg/ottl/ottlfuncs/func_parse_key_value.go | 100 +++++++ .../ottlfuncs/func_parse_key_value_test.go | 270 ++++++++++++++++++ pkg/ottl/ottlfuncs/functions.go | 1 + 3 files changed, 371 insertions(+) create mode 100644 pkg/ottl/ottlfuncs/func_parse_key_value.go create mode 100644 pkg/ottl/ottlfuncs/func_parse_key_value_test.go diff --git a/pkg/ottl/ottlfuncs/func_parse_key_value.go b/pkg/ottl/ottlfuncs/func_parse_key_value.go new file mode 100644 index 000000000000..14aa09f1e472 --- /dev/null +++ b/pkg/ottl/ottlfuncs/func_parse_key_value.go @@ -0,0 +1,100 @@ +package ottlfuncs + +import ( + "context" + "fmt" + "strings" + + "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/ottl" + "go.opentelemetry.io/collector/pdata/pcommon" +) + +type ParseKeyValueArguments[K any] struct { + Target ottl.StringGetter[K] + Delimiter ottl.Optional[string] + PairDelimiter ottl.Optional[string] +} + +func NewParseKeyValueFactory[K any]() ottl.Factory[K] { + return ottl.NewFactory("ParseKeyValue", &ParseKeyValueArguments[K]{}, createParseKeyValueFunction[K]) +} + +func createParseKeyValueFunction[K any](_ ottl.FunctionContext, oArgs ottl.Arguments) (ottl.ExprFunc[K], error) { + args, ok := oArgs.(*ParseKeyValueArguments[K]) + + if !ok { + return nil, fmt.Errorf("ParseKeyValueFactory args must be of type *ParseKeyValueArguments[K]") + } + + return parseKeyValue[K](args.Target, args.Delimiter, args.PairDelimiter) +} + +func parseKeyValue[K any](target ottl.StringGetter[K], d ottl.Optional[string], p ottl.Optional[string]) (ottl.ExprFunc[K], error) { + delimiter := "=" + if !d.IsEmpty() { + delimiter = d.Get() + } + + pair_delimiter := " " + if !p.IsEmpty() { + pair_delimiter = p.Get() + if pair_delimiter == delimiter { + return nil, fmt.Errorf("pair delimiter \"%s\" cannot be equal to delimiter \"%s\"", pair_delimiter, delimiter) + } + } + + return func(ctx context.Context, tCtx K) (any, error) { + source, err := target.Get(ctx, tCtx) + if err != nil { + return nil, err + } + + if source == "" { + return nil, fmt.Errorf("cannot parse from empty target") + } + + parsed := make(map[string]any) + for _, s := range splitString(source, pair_delimiter) { + pair := strings.SplitN(s, delimiter, 2) + if len(pair) != 2 { + return nil, fmt.Errorf("cannot split '%s' into 2 items, got %d", s, len(pair)) + } + key := strings.TrimSpace(strings.Trim(pair[0], "\"'")) + value := strings.TrimSpace(strings.Trim(pair[1], "\"'")) + parsed[key] = value + } + + result := pcommon.NewMap() + err = result.FromRaw(parsed) + return result, err + }, nil +} + +func splitString(input, delimiter string) []string { + var result []string + inQuotes := false + currentPair := "" + delimiterLength := len(delimiter) + + for i := 0; i < len(input); i++ { + if i+delimiterLength <= len(input) && input[i:i+delimiterLength] == delimiter && !inQuotes { + if currentPair == "" { + continue + } + result = append(result, currentPair) + currentPair = "" + i += delimiterLength - 1 + } else if input[i] == '"' || input[i] == '\'' { + inQuotes = !inQuotes + currentPair += string(input[i]) + } else { + currentPair += string(input[i]) + } + } + + if currentPair != "" { + result = append(result, currentPair) + } + + return result +} diff --git a/pkg/ottl/ottlfuncs/func_parse_key_value_test.go b/pkg/ottl/ottlfuncs/func_parse_key_value_test.go new file mode 100644 index 000000000000..e7d4d12d4da1 --- /dev/null +++ b/pkg/ottl/ottlfuncs/func_parse_key_value_test.go @@ -0,0 +1,270 @@ +package ottlfuncs + +import ( + "context" + "testing" + + "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/ottl" + "github.com/stretchr/testify/assert" + "go.opentelemetry.io/collector/pdata/pcommon" +) + +func Test_parseKeyValue(t *testing.T) { + tests := []struct { + name string + target ottl.StringGetter[any] + delimiter ottl.Optional[string] + pair_delimiter ottl.Optional[string] + expected map[string]any + }{ + { + name: "simple", + target: ottl.StandardStringGetter[any]{ + Getter: func(ctx context.Context, tCtx any) (any, error) { + return "name=ottl func=key_value", nil + }, + }, + delimiter: ottl.Optional[string]{}, + pair_delimiter: ottl.Optional[string]{}, + expected: map[string]any{ + "name": "ottl", + "func": "key_value", + }, + }, + { + name: "large", + target: ottl.StandardStringGetter[any]{ + Getter: func(ctx context.Context, tCtx any) (any, error) { + return `name=ottl age=1 job="software engineering" location="grand rapids michigan" src="10.3.3.76" dst=172.217.0.10 protocol=udp sport=57112 port=443 translated_src_ip=96.63.176.3 translated_port=57112`, nil + }, + }, + delimiter: ottl.Optional[string]{}, + pair_delimiter: ottl.Optional[string]{}, + expected: map[string]any{ + "age": "1", + "port": "443", + "dst": "172.217.0.10", + "job": "software engineering", + "location": "grand rapids michigan", + "name": "ottl", + "protocol": "udp", + "sport": "57112", + "src": "10.3.3.76", + "translated_port": "57112", + "translated_src_ip": "96.63.176.3", + }, + }, + { + name: "double quotes", + target: ottl.StandardStringGetter[any]{ + Getter: func(ctx context.Context, tCtx any) (any, error) { + return `requestClientApplication="Mozilla/5.0 (Windows NT 6.1; WOW64; rv:40.0) Gecko/20100101 Firefox/40.0"`, nil + }, + }, + delimiter: ottl.Optional[string]{}, + pair_delimiter: ottl.Optional[string]{}, + expected: map[string]any{ + "requestClientApplication": "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:40.0) Gecko/20100101 Firefox/40.0", + }, + }, + { + name: "single quotes", + target: ottl.StandardStringGetter[any]{ + Getter: func(ctx context.Context, tCtx any) (any, error) { + return "requestClientApplication='Mozilla/5.0 (Windows NT 6.1; WOW64; rv:40.0) Gecko/20100101 Firefox/40.0'", nil + }, + }, + delimiter: ottl.Optional[string]{}, + pair_delimiter: ottl.Optional[string]{}, + expected: map[string]any{ + "requestClientApplication": "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:40.0) Gecko/20100101 Firefox/40.0", + }, + }, + { + name: "double quotes strip leading & trailing spaces", + target: ottl.StandardStringGetter[any]{ + Getter: func(ctx context.Context, tCtx any) (any, error) { + return `name=" ottl " func=" key_ value"`, nil + }, + }, + delimiter: ottl.Optional[string]{}, + pair_delimiter: ottl.Optional[string]{}, + expected: map[string]any{ + "name": "ottl", + "func": "key_ value", + }, + }, + { + name: "! delimiter && whitespace pair delimiter", + target: ottl.StandardStringGetter[any]{ + Getter: func(ctx context.Context, tCtx any) (any, error) { + return " name!ottl func!key_value hello!world ", nil + }, + }, + delimiter: ottl.NewTestingOptional[string]("!"), + pair_delimiter: ottl.Optional[string]{}, + expected: map[string]any{ + "name": "ottl", + "func": "key_value", + "hello": "world", + }, + }, + { + name: "!! delimiter && whitespace pair delimiter with newlines", + target: ottl.StandardStringGetter[any]{ + Getter: func(ctx context.Context, tCtx any) (any, error) { + return ` +name!!ottl +func!!key_value hello!!world `, nil + }, + }, + delimiter: ottl.NewTestingOptional[string]("!!"), + pair_delimiter: ottl.Optional[string]{}, + expected: map[string]any{ + "name": "ottl", + "func": "key_value", + "hello": "world", + }, + }, + { + name: "!! delimiter && newline pair delimiter", + target: ottl.StandardStringGetter[any]{ + Getter: func(ctx context.Context, tCtx any) (any, error) { + return `name!!ottl +func!! key_value another!!pair +hello!!world `, nil + }, + }, + delimiter: ottl.NewTestingOptional[string]("!!"), + pair_delimiter: ottl.NewTestingOptional[string]("\n"), + expected: map[string]any{ + "name": "ottl", + "func": "key_value another!!pair", + "hello": "world", + }, + }, + { + name: "quoted value contains delimiter and pair delimiter", + target: ottl.StandardStringGetter[any]{ + Getter: func(ctx context.Context, tCtx any) (any, error) { + return `name="ottl="_func="=key_value"`, nil + }, + }, + delimiter: ottl.Optional[string]{}, + pair_delimiter: ottl.NewTestingOptional("_"), + expected: map[string]any{ + "name": "ottl=", + "func": "=key_value", + }, + }, + { + name: "complicated delimiters", + target: ottl.StandardStringGetter[any]{ + Getter: func(ctx context.Context, tCtx any) (any, error) { + return `k1@*v1_!_k2@**v2_!__k3@@*v3__`, nil + }, + }, + delimiter: ottl.NewTestingOptional("@*"), + pair_delimiter: ottl.NewTestingOptional("_!_"), + expected: map[string]any{ + "k1": "v1", + "k2": "*v2", + "_k3@": "v3__", + }, + }, + { + name: "leading and trailing pair delimiter", + target: ottl.StandardStringGetter[any]{ + Getter: func(ctx context.Context, tCtx any) (any, error) { + return " k1=v1 k2==v2 k3=v3= ", nil + }, + }, + delimiter: ottl.Optional[string]{}, + pair_delimiter: ottl.Optional[string]{}, + expected: map[string]any{ + "k1": "v1", + "k2": "=v2", + "k3": "v3=", + }, + }, + } + + for _, tt := range tests { + t.Run(t.Name(), func(t *testing.T) { + exprFunc, err := parseKeyValue[any](tt.target, tt.delimiter, tt.pair_delimiter) + assert.NoError(t, err) + + result, err := exprFunc(context.Background(), nil) + assert.NoError(t, err) + + actual, ok := result.(pcommon.Map) + assert.True(t, ok) + + expected := pcommon.NewMap() + assert.NoError(t, expected.FromRaw(tt.expected)) + + assert.Equal(t, expected.Len(), actual.Len()) + expected.Range(func(k string, v pcommon.Value) bool { + ev, _ := expected.Get(k) + av, ok := actual.Get(k) + assert.True(t, ok) + assert.Equal(t, ev, av) + return true + }) + }) + } +} + +func Test_parseKeyValue_equal_delimiters(t *testing.T) { + target := ottl.StandardStringGetter[any]{ + Getter: func(ctx context.Context, tCtx any) (any, error) { + return "", nil + }, + } + delimiter := ottl.NewTestingOptional[string]("=") + pair_delimiter := ottl.NewTestingOptional[string]("=") + _, err := parseKeyValue[any](target, delimiter, pair_delimiter) + assert.Error(t, err) +} + +func Test_parseKeyValue_bad_target(t *testing.T) { + target := ottl.StandardStringGetter[any]{ + Getter: func(ctx context.Context, tCtx any) (any, error) { + return 1, nil + }, + } + delimiter := ottl.NewTestingOptional[string]("=") + pair_delimiter := ottl.NewTestingOptional[string]("!") + exprFunc, err := parseKeyValue[any](target, delimiter, pair_delimiter) + assert.NoError(t, err) + _, err = exprFunc(context.Background(), nil) + assert.Error(t, err) +} + +func Test_parseKeyValue_empty_target(t *testing.T) { + target := ottl.StandardStringGetter[any]{ + Getter: func(ctx context.Context, tCtx any) (any, error) { + return "", nil + }, + } + delimiter := ottl.NewTestingOptional[string]("=") + pair_delimiter := ottl.NewTestingOptional[string]("!") + exprFunc, err := parseKeyValue[any](target, delimiter, pair_delimiter) + assert.NoError(t, err) + _, err = exprFunc(context.Background(), nil) + assert.Error(t, err) +} + +func Test_parseKeyValue_bad_split(t *testing.T) { + target := ottl.StandardStringGetter[any]{ + Getter: func(ctx context.Context, tCtx any) (any, error) { + return "name=ottl!hello_world", nil + }, + } + delimiter := ottl.NewTestingOptional[string]("=") + pair_delimiter := ottl.NewTestingOptional[string]("!") + exprFunc, err := parseKeyValue[any](target, delimiter, pair_delimiter) + assert.NoError(t, err) + _, err = exprFunc(context.Background(), nil) + assert.Error(t, err) +} diff --git a/pkg/ottl/ottlfuncs/functions.go b/pkg/ottl/ottlfuncs/functions.go index 3e498549a58c..657b88280367 100644 --- a/pkg/ottl/ottlfuncs/functions.go +++ b/pkg/ottl/ottlfuncs/functions.go @@ -57,6 +57,7 @@ func converters[K any]() []ottl.Factory[K] { NewNanosecondsFactory[K](), NewNowFactory[K](), NewParseJSONFactory[K](), + NewParseKeyValueFactory[K](), NewSecondsFactory[K](), NewSHA1Factory[K](), NewSHA256Factory[K](), From e36dbd7cdcf97d9552a1b24ccc6de7eafffa28e7 Mon Sep 17 00:00:00 2001 From: Dakota Paasman Date: Mon, 5 Feb 2024 11:01:04 -0500 Subject: [PATCH 2/9] add e2e tests, documentation, and changelog entry --- ...pkg-ottl-add-parse-key-value-function.yaml | 27 +++++++++++++++++++ pkg/ottl/e2e/e2e_test.go | 24 +++++++++++++++++ pkg/ottl/ottlfuncs/README.md | 21 +++++++++++++++ 3 files changed, 72 insertions(+) create mode 100644 .chloggen/pkg-ottl-add-parse-key-value-function.yaml diff --git a/.chloggen/pkg-ottl-add-parse-key-value-function.yaml b/.chloggen/pkg-ottl-add-parse-key-value-function.yaml new file mode 100644 index 000000000000..c6a5b206dcde --- /dev/null +++ b/.chloggen/pkg-ottl-add-parse-key-value-function.yaml @@ -0,0 +1,27 @@ +# Use this changelog template to create an entry for release notes. + +# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix' +change_type: enhancement + +# The name of the component, or a single word describing the area of concern, (e.g. filelogreceiver) +component: pkg/ottl + +# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`). +note: Add `ParseKeyValue` function for parsing key value pairs from a target string + +# Mandatory: One or more tracking issues related to the change. You can use the PR number here if no issue exists. +issues: [30998] + +# (Optional) One or more lines of additional information to render under the primary note. +# These lines will be padded with 2 spaces and then inserted directly into the document. +# Use pipe (|) for multiline entries. +subtext: + +# If your change doesn't affect end users or the exported elements of any package, +# you should instead start your pull request title with [chore] or use the "Skip Changelog" label. +# Optional: The change log or logs in which this entry should be included. +# e.g. '[user]' or '[user, api]' +# Include 'user' if the change is relevant to end users. +# Include 'api' if there is a change to a library API. +# Default: '[user]' +change_logs: [] diff --git a/pkg/ottl/e2e/e2e_test.go b/pkg/ottl/e2e/e2e_test.go index 57bbedb708fe..4235d506d568 100644 --- a/pkg/ottl/e2e/e2e_test.go +++ b/pkg/ottl/e2e/e2e_test.go @@ -437,6 +437,30 @@ func Test_e2e_converters(t *testing.T) { m.PutDouble("id", 1) }, }, + { + statement: `set(attributes["test"], ParseKeyValue("k1=v1 k2=v2"))`, + want: func(tCtx ottllog.TransformContext) { + m := tCtx.GetLogRecord().Attributes().PutEmptyMap("test") + m.PutStr("k1", "v1") + m.PutStr("k2", "v2") + }, + }, + { + statement: `set(attributes["test"], ParseKeyValue("k1!v1_k2!v2", "!", "_"))`, + want: func(tCtx ottllog.TransformContext) { + m := tCtx.GetLogRecord().Attributes().PutEmptyMap("test") + m.PutStr("k1", "v1") + m.PutStr("k2", "v2") + }, + }, + { + statement: `set(attributes["test"], ParseKeyValue("k1!v1_k2!\"v2__!__v2\"", "!", "_"))`, + want: func(tCtx ottllog.TransformContext) { + m := tCtx.GetLogRecord().Attributes().PutEmptyMap("test") + m.PutStr("k1", "v1") + m.PutStr("k2", "v2__!__v2") + }, + }, { statement: `set(attributes["test"], Seconds(Duration("1m")))`, want: func(tCtx ottllog.TransformContext) { diff --git a/pkg/ottl/ottlfuncs/README.md b/pkg/ottl/ottlfuncs/README.md index f4d6923e96f9..2f2a7ce9a7f8 100644 --- a/pkg/ottl/ottlfuncs/README.md +++ b/pkg/ottl/ottlfuncs/README.md @@ -410,6 +410,7 @@ Available Converters: - [Nanoseconds](#nanoseconds) - [Now](#now) - [ParseJSON](#parsejson) +- [ParseKeyValue](#parsekeyvalue) - [Seconds](#seconds) - [SHA1](#sha1) - [SHA256](#sha256) @@ -840,6 +841,26 @@ Examples: - `ParseJSON(body)` +### ParseKeyValue + +`ParseKeyValue(target, Optional[delimiter], Optional[pair_delimiter])` + +The `ParseKeyValue` Converter returns a `pcommon.Map` that is a result of parsing the target string for key value pairs. + +`target` is a Getter that returns a string. `delimiter` is an optional string that is used to split the key and value in a pair, the default is `=`. `pair_delimiter` is an optional string that is used to split key value pairs, the default is white space. + +For example, the following target `"k1=v1 k2=v2 k3=v3"` will use default delimiters and be parsed into the following map: +``` +{ "k1": "v1", "k2": "v2", "k3": "v3" } +``` + +Examples: + +- `ParseKeyValue("k1=v1 k2=v2 k3=v3")` +- `ParseKeyValue("k1!v1_k2!v2_k3!v3", "!", "_")` +- `ParseKeyValue(attributes["pairs"])` + + ### Seconds `Seconds(value)` From 885a56627ae04d35619129c7893d725181142dae Mon Sep 17 00:00:00 2001 From: Dakota Paasman Date: Mon, 5 Feb 2024 13:59:38 -0500 Subject: [PATCH 3/9] internal feedback --- pkg/ottl/ottlfuncs/func_parse_key_value.go | 52 +++++++++++++------ .../ottlfuncs/func_parse_key_value_test.go | 44 ++++++++++++++++ 2 files changed, 81 insertions(+), 15 deletions(-) diff --git a/pkg/ottl/ottlfuncs/func_parse_key_value.go b/pkg/ottl/ottlfuncs/func_parse_key_value.go index 14aa09f1e472..759c100892a3 100644 --- a/pkg/ottl/ottlfuncs/func_parse_key_value.go +++ b/pkg/ottl/ottlfuncs/func_parse_key_value.go @@ -38,9 +38,10 @@ func parseKeyValue[K any](target ottl.StringGetter[K], d ottl.Optional[string], pair_delimiter := " " if !p.IsEmpty() { pair_delimiter = p.Get() - if pair_delimiter == delimiter { - return nil, fmt.Errorf("pair delimiter \"%s\" cannot be equal to delimiter \"%s\"", pair_delimiter, delimiter) - } + } + + if pair_delimiter == delimiter { + return nil, fmt.Errorf("pair delimiter \"%s\" cannot be equal to delimiter \"%s\"", pair_delimiter, delimiter) } return func(ctx context.Context, tCtx K) (any, error) { @@ -53,11 +54,16 @@ func parseKeyValue[K any](target ottl.StringGetter[K], d ottl.Optional[string], return nil, fmt.Errorf("cannot parse from empty target") } + pairs, err := splitString(source, pair_delimiter) + if err != nil { + return nil, fmt.Errorf("splitting pairs failed: %w", err) + } + parsed := make(map[string]any) - for _, s := range splitString(source, pair_delimiter) { - pair := strings.SplitN(s, delimiter, 2) + for _, p := range pairs { + pair := strings.SplitN(p, delimiter, 2) if len(pair) != 2 { - return nil, fmt.Errorf("cannot split '%s' into 2 items, got %d", s, len(pair)) + return nil, fmt.Errorf("cannot split '%s' into 2 items, got %d", p, len(pair)) } key := strings.TrimSpace(strings.Trim(pair[0], "\"'")) value := strings.TrimSpace(strings.Trim(pair[1], "\"'")) @@ -70,31 +76,47 @@ func parseKeyValue[K any](target ottl.StringGetter[K], d ottl.Optional[string], }, nil } -func splitString(input, delimiter string) []string { +// splitString will split the input on the delimiter and return the resulting slice. +// `strings.Split` is not used because it does not respect quotes and will split if the delimiter appears in a quoted value +func splitString(input, delimiter string) ([]string, error) { var result []string + quoteType := "" inQuotes := false currentPair := "" delimiterLength := len(delimiter) - for i := 0; i < len(input); i++ { - if i+delimiterLength <= len(input) && input[i:i+delimiterLength] == delimiter && !inQuotes { + i := 0 + for i < len(input) { + if !inQuotes && i+delimiterLength <= len(input) && input[i:i+delimiterLength] == delimiter { if currentPair == "" { + i++ continue } result = append(result, currentPair) currentPair = "" - i += delimiterLength - 1 + i += delimiterLength + continue } else if input[i] == '"' || input[i] == '\'' { - inQuotes = !inQuotes - currentPair += string(input[i]) - } else { - currentPair += string(input[i]) + if inQuotes { + if quoteType == string(input[i]) { + inQuotes = !inQuotes + } + } else { + quoteType = string(input[i]) + inQuotes = !inQuotes + } } + currentPair += string(input[i]) + i++ + } + + if inQuotes { + return nil, fmt.Errorf("never reached end of a quoted value, failed to parse input") } if currentPair != "" { result = append(result, currentPair) } - return result + return result, nil } diff --git a/pkg/ottl/ottlfuncs/func_parse_key_value_test.go b/pkg/ottl/ottlfuncs/func_parse_key_value_test.go index e7d4d12d4da1..f153063be311 100644 --- a/pkg/ottl/ottlfuncs/func_parse_key_value_test.go +++ b/pkg/ottl/ottlfuncs/func_parse_key_value_test.go @@ -54,6 +54,20 @@ func Test_parseKeyValue(t *testing.T) { "translated_src_ip": "96.63.176.3", }, }, + { + name: "embedded double quotes in single quoted value", + target: ottl.StandardStringGetter[any]{ + Getter: func(ctx context.Context, tCtx any) (any, error) { + return `a=b c='this is a "co ol" value'`, nil + }, + }, + delimiter: ottl.Optional[string]{}, + pair_delimiter: ottl.Optional[string]{}, + expected: map[string]any{ + "a": "b", + "c": "this is a \"co ol\" value", + }, + }, { name: "double quotes", target: ottl.StandardStringGetter[any]{ @@ -187,6 +201,20 @@ hello!!world `, nil "k3": "v3=", }, }, + { + name: "double quotes", + target: ottl.StandardStringGetter[any]{ + Getter: func(ctx context.Context, tCtx any) (any, error) { + return `a=b c='this is a "cool" value'`, nil + }, + }, + delimiter: ottl.Optional[string]{}, + pair_delimiter: ottl.Optional[string]{}, + expected: map[string]any{ + "a": "b", + "c": "this is a \"cool\" value", + }, + }, } for _, tt := range tests { @@ -225,6 +253,10 @@ func Test_parseKeyValue_equal_delimiters(t *testing.T) { pair_delimiter := ottl.NewTestingOptional[string]("=") _, err := parseKeyValue[any](target, delimiter, pair_delimiter) assert.Error(t, err) + + delimiter = ottl.NewTestingOptional[string](" ") + _, err = parseKeyValue[any](target, delimiter, ottl.Optional[string]{}) + assert.Error(t, err) } func Test_parseKeyValue_bad_target(t *testing.T) { @@ -268,3 +300,15 @@ func Test_parseKeyValue_bad_split(t *testing.T) { _, err = exprFunc(context.Background(), nil) assert.Error(t, err) } + +func Test_parseKeyValue_mismatch_quotes(t *testing.T) { + target := ottl.StandardStringGetter[any]{ + Getter: func(ctx context.Context, tCtx any) (any, error) { + return `k1=v1 k2='v2"`, nil + }, + } + exprFunc, err := parseKeyValue[any](target, ottl.Optional[string]{}, ottl.Optional[string]{}) + assert.NoError(t, err) + _, err = exprFunc(context.Background(), nil) + assert.Error(t, err) +} From 967289ccbe8a2ba2e63036f994cef28f2fc82e6d Mon Sep 17 00:00:00 2001 From: Dakota Paasman Date: Mon, 5 Feb 2024 14:45:22 -0500 Subject: [PATCH 4/9] name changes & remove inQuotes --- pkg/ottl/ottlfuncs/func_parse_key_value.go | 34 ++++++++++------------ 1 file changed, 16 insertions(+), 18 deletions(-) diff --git a/pkg/ottl/ottlfuncs/func_parse_key_value.go b/pkg/ottl/ottlfuncs/func_parse_key_value.go index 759c100892a3..c1ac1d06d25f 100644 --- a/pkg/ottl/ottlfuncs/func_parse_key_value.go +++ b/pkg/ottl/ottlfuncs/func_parse_key_value.go @@ -35,13 +35,13 @@ func parseKeyValue[K any](target ottl.StringGetter[K], d ottl.Optional[string], delimiter = d.Get() } - pair_delimiter := " " + pairDelimiter := " " if !p.IsEmpty() { - pair_delimiter = p.Get() + pairDelimiter = p.Get() } - if pair_delimiter == delimiter { - return nil, fmt.Errorf("pair delimiter \"%s\" cannot be equal to delimiter \"%s\"", pair_delimiter, delimiter) + if pairDelimiter == delimiter { + return nil, fmt.Errorf("pair delimiter \"%s\" cannot be equal to delimiter \"%s\"", pairDelimiter, delimiter) } return func(ctx context.Context, tCtx K) (any, error) { @@ -54,7 +54,7 @@ func parseKeyValue[K any](target ottl.StringGetter[K], d ottl.Optional[string], return nil, fmt.Errorf("cannot parse from empty target") } - pairs, err := splitString(source, pair_delimiter) + pairs, err := splitPairs(source, pairDelimiter) if err != nil { return nil, fmt.Errorf("splitting pairs failed: %w", err) } @@ -76,18 +76,17 @@ func parseKeyValue[K any](target ottl.StringGetter[K], d ottl.Optional[string], }, nil } -// splitString will split the input on the delimiter and return the resulting slice. +// splitPairs will split the input on the pairDelimiter and return the resulting slice. // `strings.Split` is not used because it does not respect quotes and will split if the delimiter appears in a quoted value -func splitString(input, delimiter string) ([]string, error) { +func splitPairs(input, pairDelimiter string) ([]string, error) { var result []string - quoteType := "" - inQuotes := false currentPair := "" - delimiterLength := len(delimiter) + delimiterLength := len(pairDelimiter) + quoteChar := "" // "" means we are not in quotes i := 0 for i < len(input) { - if !inQuotes && i+delimiterLength <= len(input) && input[i:i+delimiterLength] == delimiter { + if quoteChar == "" && i+delimiterLength <= len(input) && input[i:i+delimiterLength] == pairDelimiter { if currentPair == "" { i++ continue @@ -97,21 +96,20 @@ func splitString(input, delimiter string) ([]string, error) { i += delimiterLength continue } else if input[i] == '"' || input[i] == '\'' { - if inQuotes { - if quoteType == string(input[i]) { - inQuotes = !inQuotes + if quoteChar != "" { + if quoteChar == string(input[i]) { + quoteChar = "" } } else { - quoteType = string(input[i]) - inQuotes = !inQuotes + quoteChar = string(input[i]) } } currentPair += string(input[i]) i++ } - if inQuotes { - return nil, fmt.Errorf("never reached end of a quoted value, failed to parse input") + if quoteChar != "" { + return nil, fmt.Errorf("never reached end of a quoted value") } if currentPair != "" { From 2ecf32deedb5114ff4a5620582fd84dcdc39d441 Mon Sep 17 00:00:00 2001 From: Dakota Paasman Date: Tue, 6 Feb 2024 15:36:46 -0500 Subject: [PATCH 5/9] update parsing to reflect changes made to stanza keyvalue parser --- pkg/ottl/ottlfuncs/func_parse_key_value.go | 40 +++++++++---------- .../ottlfuncs/func_parse_key_value_test.go | 6 +-- 2 files changed, 22 insertions(+), 24 deletions(-) diff --git a/pkg/ottl/ottlfuncs/func_parse_key_value.go b/pkg/ottl/ottlfuncs/func_parse_key_value.go index c1ac1d06d25f..188225bb8a86 100644 --- a/pkg/ottl/ottlfuncs/func_parse_key_value.go +++ b/pkg/ottl/ottlfuncs/func_parse_key_value.go @@ -65,8 +65,8 @@ func parseKeyValue[K any](target ottl.StringGetter[K], d ottl.Optional[string], if len(pair) != 2 { return nil, fmt.Errorf("cannot split '%s' into 2 items, got %d", p, len(pair)) } - key := strings.TrimSpace(strings.Trim(pair[0], "\"'")) - value := strings.TrimSpace(strings.Trim(pair[1], "\"'")) + key := strings.TrimSpace(pair[0]) + value := strings.TrimSpace(pair[1]) parsed[key] = value } @@ -84,36 +84,34 @@ func splitPairs(input, pairDelimiter string) ([]string, error) { delimiterLength := len(pairDelimiter) quoteChar := "" // "" means we are not in quotes - i := 0 - for i < len(input) { - if quoteChar == "" && i+delimiterLength <= len(input) && input[i:i+delimiterLength] == pairDelimiter { - if currentPair == "" { - i++ + for i := 0; i < len(input); i++ { + if quoteChar == "" && i+delimiterLength <= len(input) && input[i:i+delimiterLength] == pairDelimiter { // delimiter + if currentPair == "" { // leading || trailing delimiter; ignore continue } result = append(result, currentPair) currentPair = "" - i += delimiterLength + i += delimiterLength - 1 + continue + } + + if quoteChar == "" && (input[i] == '"' || input[i] == '\'') { // start of quote + quoteChar = string(input[i]) continue - } else if input[i] == '"' || input[i] == '\'' { - if quoteChar != "" { - if quoteChar == string(input[i]) { - quoteChar = "" - } - } else { - quoteChar = string(input[i]) - } } + if string(input[i]) == quoteChar { // end of quote + quoteChar = "" + continue + } + currentPair += string(input[i]) - i++ } - if quoteChar != "" { + if quoteChar != "" { // check for closed quotes return nil, fmt.Errorf("never reached end of a quoted value") } - - if currentPair != "" { - result = append(result, currentPair) + if currentPair != "" { // avoid adding empty value bc of a trailing delimiter + return append(result, currentPair), nil } return result, nil diff --git a/pkg/ottl/ottlfuncs/func_parse_key_value_test.go b/pkg/ottl/ottlfuncs/func_parse_key_value_test.go index f153063be311..6b1200fae034 100644 --- a/pkg/ottl/ottlfuncs/func_parse_key_value_test.go +++ b/pkg/ottl/ottlfuncs/func_parse_key_value_test.go @@ -202,17 +202,17 @@ hello!!world `, nil }, }, { - name: "double quotes", + name: " embedded double quotes end single quoted value", target: ottl.StandardStringGetter[any]{ Getter: func(ctx context.Context, tCtx any) (any, error) { - return `a=b c='this is a "cool" value'`, nil + return `a=b c='this is a "co ol"'`, nil }, }, delimiter: ottl.Optional[string]{}, pair_delimiter: ottl.Optional[string]{}, expected: map[string]any{ "a": "b", - "c": "this is a \"cool\" value", + "c": "this is a \"co ol\"", }, }, } From 6b0a26d6806fa5d4fd6798d2dff3c66c70d3aed5 Mon Sep 17 00:00:00 2001 From: Dakota Paasman Date: Thu, 8 Feb 2024 08:28:57 -0500 Subject: [PATCH 6/9] add license --- pkg/ottl/ottlfuncs/func_parse_key_value.go | 3 +++ pkg/ottl/ottlfuncs/func_parse_key_value_test.go | 3 +++ 2 files changed, 6 insertions(+) diff --git a/pkg/ottl/ottlfuncs/func_parse_key_value.go b/pkg/ottl/ottlfuncs/func_parse_key_value.go index 188225bb8a86..44327060e664 100644 --- a/pkg/ottl/ottlfuncs/func_parse_key_value.go +++ b/pkg/ottl/ottlfuncs/func_parse_key_value.go @@ -1,3 +1,6 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + package ottlfuncs import ( diff --git a/pkg/ottl/ottlfuncs/func_parse_key_value_test.go b/pkg/ottl/ottlfuncs/func_parse_key_value_test.go index 6b1200fae034..b6efa35abb99 100644 --- a/pkg/ottl/ottlfuncs/func_parse_key_value_test.go +++ b/pkg/ottl/ottlfuncs/func_parse_key_value_test.go @@ -1,3 +1,6 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + package ottlfuncs import ( From 0dbc0d92cdc1ba5399a04ab7abdb0e9d71d17832 Mon Sep 17 00:00:00 2001 From: Dakota Paasman Date: Mon, 12 Feb 2024 14:06:40 -0500 Subject: [PATCH 7/9] put pair parsing into internal pkg, add some more unit tests, update fmt specifier for errs --- internal/coreinternal/parseutils/doc.go | 4 + .../coreinternal/parseutils/package_test.go | 14 ++ internal/coreinternal/parseutils/parser.go | 49 +++++ .../coreinternal/parseutils/parser_test.go | 186 ++++++++++++++++++ pkg/ottl/ottlfuncs/README.md | 2 +- pkg/ottl/ottlfuncs/func_parse_key_value.go | 54 +---- .../ottlfuncs/func_parse_key_value_test.go | 143 ++++++++++---- 7 files changed, 361 insertions(+), 91 deletions(-) create mode 100644 internal/coreinternal/parseutils/doc.go create mode 100644 internal/coreinternal/parseutils/package_test.go create mode 100644 internal/coreinternal/parseutils/parser.go create mode 100644 internal/coreinternal/parseutils/parser_test.go diff --git a/internal/coreinternal/parseutils/doc.go b/internal/coreinternal/parseutils/doc.go new file mode 100644 index 000000000000..f63f940df0a8 --- /dev/null +++ b/internal/coreinternal/parseutils/doc.go @@ -0,0 +1,4 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +package parseutils // import "github.com/open-telemetry/opentelemetry-collector-contrib/internal/coreinternal/parseutils" diff --git a/internal/coreinternal/parseutils/package_test.go b/internal/coreinternal/parseutils/package_test.go new file mode 100644 index 000000000000..20e63515f3af --- /dev/null +++ b/internal/coreinternal/parseutils/package_test.go @@ -0,0 +1,14 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +package parseutils + +import ( + "testing" + + "go.uber.org/goleak" +) + +func TestMain(m *testing.M) { + goleak.VerifyTestMain(m) +} diff --git a/internal/coreinternal/parseutils/parser.go b/internal/coreinternal/parseutils/parser.go new file mode 100644 index 000000000000..03dfc7096677 --- /dev/null +++ b/internal/coreinternal/parseutils/parser.go @@ -0,0 +1,49 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +package parseutils + +import "fmt" + +// SplitString will split the input on the delimiter and return the resulting slice while respecting quotes. Outer quotes are stripped. +// Use in place of `strings.Split` when quotes need to be respected. +// Requires `delimiter` not be an empty string +func SplitString(input, delimiter string) ([]string, error) { + var result []string + current := "" + delimiterLength := len(delimiter) + quoteChar := "" // "" means we are not in quotes + + for i := 0; i < len(input); i++ { + if quoteChar == "" && i+delimiterLength <= len(input) && input[i:i+delimiterLength] == delimiter { // delimiter + if current == "" { // leading || trailing delimiter; ignore + i += delimiterLength - 1 + continue + } + result = append(result, current) + current = "" + i += delimiterLength - 1 + continue + } + + if quoteChar == "" && (input[i] == '"' || input[i] == '\'') { // start of quote + quoteChar = string(input[i]) + continue + } + if string(input[i]) == quoteChar { // end of quote + quoteChar = "" + continue + } + + current += string(input[i]) + } + + if quoteChar != "" { // check for closed quotes + return nil, fmt.Errorf("never reached the end of a quoted value") + } + if current != "" { // avoid adding empty value bc of a trailing delimiter + return append(result, current), nil + } + + return result, nil +} diff --git a/internal/coreinternal/parseutils/parser_test.go b/internal/coreinternal/parseutils/parser_test.go new file mode 100644 index 000000000000..d78ac812604f --- /dev/null +++ b/internal/coreinternal/parseutils/parser_test.go @@ -0,0 +1,186 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +package parseutils + +import ( + "fmt" + "testing" + + "github.com/stretchr/testify/assert" +) + +func Test_SplitString(t *testing.T) { + testCases := []struct { + name string + input string + delimiter string + expected []string + expectedErr error + }{ + { + name: "simple", + input: "a b c", + delimiter: " ", + expected: []string{ + "a", + "b", + "c", + }, + }, + { + name: "single quotes", + input: "a 'b c d'", + delimiter: " ", + expected: []string{ + "a", + "b c d", + }, + }, + { + name: "double quotes", + input: `a " b c " d`, + delimiter: " ", + expected: []string{ + "a", + " b c ", + "d", + }, + }, + { + name: "multi-char delimiter", + input: "abc!@! def !@! g", + delimiter: "!@!", + expected: []string{ + "abc", + " def ", + " g", + }, + }, + { + name: "leading and trailing delimiters", + input: " name=ottl func=key_value hello=world ", + delimiter: " ", + expected: []string{ + "name=ottl", + "func=key_value", + "hello=world", + }, + }, + { + name: "embedded double quotes in single quoted value", + input: `ab c='this is a "co ol" value'`, + delimiter: " ", + expected: []string{ + "ab", + `c=this is a "co ol" value`, + }, + }, + { + name: "embedded double quotes end single quoted value", + input: `ab c='this is a "co ol"'`, + delimiter: " ", + expected: []string{ + "ab", + `c=this is a "co ol"`, + }, + }, + { + name: "quoted values include whitespace", + input: `name=" ottl " func=" key_ value"`, + delimiter: " ", + expected: []string{ + "name= ottl ", + "func= key_ value", + }, + }, + { + name: "delimiter longer than input", + input: "abc", + delimiter: "aaaa", + expected: []string{ + "abc", + }, + }, + { + name: "delimiter not found", + input: "a b c", + delimiter: "!", + expected: []string{ + "a b c", + }, + }, + { + name: "newlines in input", + input: `a +b +c`, + delimiter: " ", + expected: []string{ + "a\nb\nc", + }, + }, + { + name: "newline delimiter", + input: `a b c +d e f +g +h`, + delimiter: "\n", + expected: []string{ + "a b c", + "d e f", + "g ", + "h", + }, + }, + { + name: "empty input", + input: "", + delimiter: " ", + expected: nil, + }, + { + name: "equal input and delimiter", + input: "abc", + delimiter: "abc", + expected: nil, + }, + { + name: "unclosed quotes", + input: "a 'b c", + delimiter: " ", + expectedErr: fmt.Errorf("never reached the end of a quoted value"), + }, + { + name: "mismatched quotes", + input: `a 'b c' "d '`, + delimiter: " ", + expectedErr: fmt.Errorf("never reached the end of a quoted value"), + }, + { + name: "tab delimiters", + input: "a b c", + delimiter: "\t", + expected: []string{ + "a", + "b", + "c", + }, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + result, err := SplitString(tc.input, tc.delimiter) + + if tc.expectedErr == nil { + assert.NoError(t, err) + assert.Equal(t, tc.expected, result) + } else { + assert.EqualError(t, err, tc.expectedErr.Error()) + assert.Nil(t, result) + } + }) + } +} diff --git a/pkg/ottl/ottlfuncs/README.md b/pkg/ottl/ottlfuncs/README.md index 2f2a7ce9a7f8..d99125af2868 100644 --- a/pkg/ottl/ottlfuncs/README.md +++ b/pkg/ottl/ottlfuncs/README.md @@ -847,7 +847,7 @@ Examples: The `ParseKeyValue` Converter returns a `pcommon.Map` that is a result of parsing the target string for key value pairs. -`target` is a Getter that returns a string. `delimiter` is an optional string that is used to split the key and value in a pair, the default is `=`. `pair_delimiter` is an optional string that is used to split key value pairs, the default is white space. +`target` is a Getter that returns a string. `delimiter` is an optional string that is used to split the key and value in a pair, the default is `=`. `pair_delimiter` is an optional string that is used to split key value pairs, the default is a single space (` `). For example, the following target `"k1=v1 k2=v2 k3=v3"` will use default delimiters and be parsed into the following map: ``` diff --git a/pkg/ottl/ottlfuncs/func_parse_key_value.go b/pkg/ottl/ottlfuncs/func_parse_key_value.go index 44327060e664..21eff3b2e014 100644 --- a/pkg/ottl/ottlfuncs/func_parse_key_value.go +++ b/pkg/ottl/ottlfuncs/func_parse_key_value.go @@ -8,6 +8,7 @@ import ( "fmt" "strings" + "github.com/open-telemetry/opentelemetry-collector-contrib/internal/coreinternal/parseutils" "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/ottl" "go.opentelemetry.io/collector/pdata/pcommon" ) @@ -34,17 +35,17 @@ func createParseKeyValueFunction[K any](_ ottl.FunctionContext, oArgs ottl.Argum func parseKeyValue[K any](target ottl.StringGetter[K], d ottl.Optional[string], p ottl.Optional[string]) (ottl.ExprFunc[K], error) { delimiter := "=" - if !d.IsEmpty() { + if !d.IsEmpty() && d.Get() != "" { delimiter = d.Get() } pairDelimiter := " " - if !p.IsEmpty() { + if !p.IsEmpty() && p.Get() != "" { pairDelimiter = p.Get() } if pairDelimiter == delimiter { - return nil, fmt.Errorf("pair delimiter \"%s\" cannot be equal to delimiter \"%s\"", pairDelimiter, delimiter) + return nil, fmt.Errorf("pair delimiter %q cannot be equal to delimiter %q", pairDelimiter, delimiter) } return func(ctx context.Context, tCtx K) (any, error) { @@ -57,16 +58,16 @@ func parseKeyValue[K any](target ottl.StringGetter[K], d ottl.Optional[string], return nil, fmt.Errorf("cannot parse from empty target") } - pairs, err := splitPairs(source, pairDelimiter) + pairs, err := parseutils.SplitString(source, pairDelimiter) if err != nil { - return nil, fmt.Errorf("splitting pairs failed: %w", err) + return nil, fmt.Errorf("splitting source %q into pairs failed: %w", source, err) } parsed := make(map[string]any) for _, p := range pairs { pair := strings.SplitN(p, delimiter, 2) if len(pair) != 2 { - return nil, fmt.Errorf("cannot split '%s' into 2 items, got %d", p, len(pair)) + return nil, fmt.Errorf("cannot split %q into 2 items, got %d item(s)", p, len(pair)) } key := strings.TrimSpace(pair[0]) value := strings.TrimSpace(pair[1]) @@ -78,44 +79,3 @@ func parseKeyValue[K any](target ottl.StringGetter[K], d ottl.Optional[string], return result, err }, nil } - -// splitPairs will split the input on the pairDelimiter and return the resulting slice. -// `strings.Split` is not used because it does not respect quotes and will split if the delimiter appears in a quoted value -func splitPairs(input, pairDelimiter string) ([]string, error) { - var result []string - currentPair := "" - delimiterLength := len(pairDelimiter) - quoteChar := "" // "" means we are not in quotes - - for i := 0; i < len(input); i++ { - if quoteChar == "" && i+delimiterLength <= len(input) && input[i:i+delimiterLength] == pairDelimiter { // delimiter - if currentPair == "" { // leading || trailing delimiter; ignore - continue - } - result = append(result, currentPair) - currentPair = "" - i += delimiterLength - 1 - continue - } - - if quoteChar == "" && (input[i] == '"' || input[i] == '\'') { // start of quote - quoteChar = string(input[i]) - continue - } - if string(input[i]) == quoteChar { // end of quote - quoteChar = "" - continue - } - - currentPair += string(input[i]) - } - - if quoteChar != "" { // check for closed quotes - return nil, fmt.Errorf("never reached end of a quoted value") - } - if currentPair != "" { // avoid adding empty value bc of a trailing delimiter - return append(result, currentPair), nil - } - - return result, nil -} diff --git a/pkg/ottl/ottlfuncs/func_parse_key_value_test.go b/pkg/ottl/ottlfuncs/func_parse_key_value_test.go index b6efa35abb99..02b67ee79da2 100644 --- a/pkg/ottl/ottlfuncs/func_parse_key_value_test.go +++ b/pkg/ottl/ottlfuncs/func_parse_key_value_test.go @@ -14,11 +14,11 @@ import ( func Test_parseKeyValue(t *testing.T) { tests := []struct { - name string - target ottl.StringGetter[any] - delimiter ottl.Optional[string] - pair_delimiter ottl.Optional[string] - expected map[string]any + name string + target ottl.StringGetter[any] + delimiter ottl.Optional[string] + pairDelimiter ottl.Optional[string] + expected map[string]any }{ { name: "simple", @@ -27,8 +27,8 @@ func Test_parseKeyValue(t *testing.T) { return "name=ottl func=key_value", nil }, }, - delimiter: ottl.Optional[string]{}, - pair_delimiter: ottl.Optional[string]{}, + delimiter: ottl.Optional[string]{}, + pairDelimiter: ottl.Optional[string]{}, expected: map[string]any{ "name": "ottl", "func": "key_value", @@ -41,8 +41,8 @@ func Test_parseKeyValue(t *testing.T) { return `name=ottl age=1 job="software engineering" location="grand rapids michigan" src="10.3.3.76" dst=172.217.0.10 protocol=udp sport=57112 port=443 translated_src_ip=96.63.176.3 translated_port=57112`, nil }, }, - delimiter: ottl.Optional[string]{}, - pair_delimiter: ottl.Optional[string]{}, + delimiter: ottl.Optional[string]{}, + pairDelimiter: ottl.Optional[string]{}, expected: map[string]any{ "age": "1", "port": "443", @@ -64,8 +64,8 @@ func Test_parseKeyValue(t *testing.T) { return `a=b c='this is a "co ol" value'`, nil }, }, - delimiter: ottl.Optional[string]{}, - pair_delimiter: ottl.Optional[string]{}, + delimiter: ottl.Optional[string]{}, + pairDelimiter: ottl.Optional[string]{}, expected: map[string]any{ "a": "b", "c": "this is a \"co ol\" value", @@ -78,8 +78,8 @@ func Test_parseKeyValue(t *testing.T) { return `requestClientApplication="Mozilla/5.0 (Windows NT 6.1; WOW64; rv:40.0) Gecko/20100101 Firefox/40.0"`, nil }, }, - delimiter: ottl.Optional[string]{}, - pair_delimiter: ottl.Optional[string]{}, + delimiter: ottl.Optional[string]{}, + pairDelimiter: ottl.Optional[string]{}, expected: map[string]any{ "requestClientApplication": "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:40.0) Gecko/20100101 Firefox/40.0", }, @@ -91,8 +91,8 @@ func Test_parseKeyValue(t *testing.T) { return "requestClientApplication='Mozilla/5.0 (Windows NT 6.1; WOW64; rv:40.0) Gecko/20100101 Firefox/40.0'", nil }, }, - delimiter: ottl.Optional[string]{}, - pair_delimiter: ottl.Optional[string]{}, + delimiter: ottl.Optional[string]{}, + pairDelimiter: ottl.Optional[string]{}, expected: map[string]any{ "requestClientApplication": "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:40.0) Gecko/20100101 Firefox/40.0", }, @@ -104,8 +104,8 @@ func Test_parseKeyValue(t *testing.T) { return `name=" ottl " func=" key_ value"`, nil }, }, - delimiter: ottl.Optional[string]{}, - pair_delimiter: ottl.Optional[string]{}, + delimiter: ottl.Optional[string]{}, + pairDelimiter: ottl.Optional[string]{}, expected: map[string]any{ "name": "ottl", "func": "key_ value", @@ -118,8 +118,8 @@ func Test_parseKeyValue(t *testing.T) { return " name!ottl func!key_value hello!world ", nil }, }, - delimiter: ottl.NewTestingOptional[string]("!"), - pair_delimiter: ottl.Optional[string]{}, + delimiter: ottl.NewTestingOptional[string]("!"), + pairDelimiter: ottl.Optional[string]{}, expected: map[string]any{ "name": "ottl", "func": "key_value", @@ -135,8 +135,8 @@ name!!ottl func!!key_value hello!!world `, nil }, }, - delimiter: ottl.NewTestingOptional[string]("!!"), - pair_delimiter: ottl.Optional[string]{}, + delimiter: ottl.NewTestingOptional[string]("!!"), + pairDelimiter: ottl.Optional[string]{}, expected: map[string]any{ "name": "ottl", "func": "key_value", @@ -152,8 +152,8 @@ func!! key_value another!!pair hello!!world `, nil }, }, - delimiter: ottl.NewTestingOptional[string]("!!"), - pair_delimiter: ottl.NewTestingOptional[string]("\n"), + delimiter: ottl.NewTestingOptional[string]("!!"), + pairDelimiter: ottl.NewTestingOptional[string]("\n"), expected: map[string]any{ "name": "ottl", "func": "key_value another!!pair", @@ -167,8 +167,8 @@ hello!!world `, nil return `name="ottl="_func="=key_value"`, nil }, }, - delimiter: ottl.Optional[string]{}, - pair_delimiter: ottl.NewTestingOptional("_"), + delimiter: ottl.Optional[string]{}, + pairDelimiter: ottl.NewTestingOptional("_"), expected: map[string]any{ "name": "ottl=", "func": "=key_value", @@ -181,8 +181,8 @@ hello!!world `, nil return `k1@*v1_!_k2@**v2_!__k3@@*v3__`, nil }, }, - delimiter: ottl.NewTestingOptional("@*"), - pair_delimiter: ottl.NewTestingOptional("_!_"), + delimiter: ottl.NewTestingOptional("@*"), + pairDelimiter: ottl.NewTestingOptional("_!_"), expected: map[string]any{ "k1": "v1", "k2": "*v2", @@ -196,8 +196,8 @@ hello!!world `, nil return " k1=v1 k2==v2 k3=v3= ", nil }, }, - delimiter: ottl.Optional[string]{}, - pair_delimiter: ottl.Optional[string]{}, + delimiter: ottl.Optional[string]{}, + pairDelimiter: ottl.Optional[string]{}, expected: map[string]any{ "k1": "v1", "k2": "=v2", @@ -205,24 +205,66 @@ hello!!world `, nil }, }, { - name: " embedded double quotes end single quoted value", + name: "embedded double quotes end single quoted value", target: ottl.StandardStringGetter[any]{ Getter: func(ctx context.Context, tCtx any) (any, error) { return `a=b c='this is a "co ol"'`, nil }, }, - delimiter: ottl.Optional[string]{}, - pair_delimiter: ottl.Optional[string]{}, + delimiter: ottl.Optional[string]{}, + pairDelimiter: ottl.Optional[string]{}, expected: map[string]any{ "a": "b", "c": "this is a \"co ol\"", }, }, + { + name: "more quotes", + target: ottl.StandardStringGetter[any]{ + Getter: func(ctx context.Context, tCtx any) (any, error) { + return "a=b c=d'='", nil + }, + }, + delimiter: ottl.Optional[string]{}, + pairDelimiter: ottl.Optional[string]{}, + expected: map[string]any{ + "a": "b", + "c": "d=", + }, + }, + + { + name: "long pair delimiter", + target: ottl.StandardStringGetter[any]{ + Getter: func(ctx context.Context, tCtx any) (any, error) { + return "a=b c=d", nil + }, + }, + delimiter: ottl.Optional[string]{}, + pairDelimiter: ottl.NewTestingOptional("aaaaaaaaaaaaaaaa"), + expected: map[string]any{ + "a": "b c=d", // occurs because `SplitString()` returns original string and `strings.SplitN` with N=2 will split on just the first instance of delimiter("=") + }, + }, + { + name: "empty delimiters", + target: ottl.StandardStringGetter[any]{ + Getter: func(ctx context.Context, tCtx any) (any, error) { + return "a=b c=d", nil + }, + }, + delimiter: ottl.NewTestingOptional(""), + pairDelimiter: ottl.NewTestingOptional(""), + expected: map[string]any{ + "a": "b", + "c": "d", + }, + }, } for _, tt := range tests { - t.Run(t.Name(), func(t *testing.T) { - exprFunc, err := parseKeyValue[any](tt.target, tt.delimiter, tt.pair_delimiter) + t.Run(tt.name, func(t *testing.T) { + exprFunc, err := parseKeyValue[any](tt.target, tt.delimiter, tt.pairDelimiter) assert.NoError(t, err) result, err := exprFunc(context.Background(), nil) @@ -253,8 +295,8 @@ func Test_parseKeyValue_equal_delimiters(t *testing.T) { }, } delimiter := ottl.NewTestingOptional[string]("=") - pair_delimiter := ottl.NewTestingOptional[string]("=") - _, err := parseKeyValue[any](target, delimiter, pair_delimiter) + pairDelimiter := ottl.NewTestingOptional[string]("=") + _, err := parseKeyValue[any](target, delimiter, pairDelimiter) assert.Error(t, err) delimiter = ottl.NewTestingOptional[string](" ") @@ -269,8 +311,8 @@ func Test_parseKeyValue_bad_target(t *testing.T) { }, } delimiter := ottl.NewTestingOptional[string]("=") - pair_delimiter := ottl.NewTestingOptional[string]("!") - exprFunc, err := parseKeyValue[any](target, delimiter, pair_delimiter) + pairDelimiter := ottl.NewTestingOptional[string]("!") + exprFunc, err := parseKeyValue[any](target, delimiter, pairDelimiter) assert.NoError(t, err) _, err = exprFunc(context.Background(), nil) assert.Error(t, err) @@ -283,8 +325,8 @@ func Test_parseKeyValue_empty_target(t *testing.T) { }, } delimiter := ottl.NewTestingOptional[string]("=") - pair_delimiter := ottl.NewTestingOptional[string]("!") - exprFunc, err := parseKeyValue[any](target, delimiter, pair_delimiter) + pairDelimiter := ottl.NewTestingOptional[string]("!") + exprFunc, err := parseKeyValue[any](target, delimiter, pairDelimiter) assert.NoError(t, err) _, err = exprFunc(context.Background(), nil) assert.Error(t, err) @@ -297,11 +339,11 @@ func Test_parseKeyValue_bad_split(t *testing.T) { }, } delimiter := ottl.NewTestingOptional[string]("=") - pair_delimiter := ottl.NewTestingOptional[string]("!") - exprFunc, err := parseKeyValue[any](target, delimiter, pair_delimiter) + pairDelimiter := ottl.NewTestingOptional[string]("!") + exprFunc, err := parseKeyValue[any](target, delimiter, pairDelimiter) assert.NoError(t, err) _, err = exprFunc(context.Background(), nil) - assert.Error(t, err) + assert.ErrorContains(t, err, "cannot split \"hello_world\" into 2 items, got 1 item(s)") } func Test_parseKeyValue_mismatch_quotes(t *testing.T) { @@ -315,3 +357,18 @@ func Test_parseKeyValue_mismatch_quotes(t *testing.T) { _, err = exprFunc(context.Background(), nil) assert.Error(t, err) } + +func Test_parseKeyValue_bad_delimiter(t *testing.T) { + target := ottl.StandardStringGetter[any]{ + Getter: func(ctx context.Context, tCtx any) (any, error) { + return "a=b c=d", nil + }, + } + + // covers too long of a delimiter && delimiter not found + delimiter := ottl.NewTestingOptional[string]("=============") + exprFunc, err := parseKeyValue[any](target, delimiter, ottl.Optional[string]{}) + assert.NoError(t, err) + _, err = exprFunc(context.Background(), nil) + assert.ErrorContains(t, err, "cannot split \"a=b\" into 2 items, got 1 item(s)") +} From ca573d970112d10f39e2fc49f991731aea39fb63 Mon Sep 17 00:00:00 2001 From: Dakota Paasman Date: Tue, 13 Feb 2024 07:44:08 -0500 Subject: [PATCH 8/9] update how empty string is handled --- pkg/ottl/ottlfuncs/func_parse_key_value.go | 13 ++++++-- .../ottlfuncs/func_parse_key_value_test.go | 32 ++++++++++--------- 2 files changed, 27 insertions(+), 18 deletions(-) diff --git a/pkg/ottl/ottlfuncs/func_parse_key_value.go b/pkg/ottl/ottlfuncs/func_parse_key_value.go index 21eff3b2e014..71a6c6eab7df 100644 --- a/pkg/ottl/ottlfuncs/func_parse_key_value.go +++ b/pkg/ottl/ottlfuncs/func_parse_key_value.go @@ -8,9 +8,10 @@ import ( "fmt" "strings" + "go.opentelemetry.io/collector/pdata/pcommon" + "github.com/open-telemetry/opentelemetry-collector-contrib/internal/coreinternal/parseutils" "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/ottl" - "go.opentelemetry.io/collector/pdata/pcommon" ) type ParseKeyValueArguments[K any] struct { @@ -35,12 +36,18 @@ func createParseKeyValueFunction[K any](_ ottl.FunctionContext, oArgs ottl.Argum func parseKeyValue[K any](target ottl.StringGetter[K], d ottl.Optional[string], p ottl.Optional[string]) (ottl.ExprFunc[K], error) { delimiter := "=" - if !d.IsEmpty() && d.Get() != "" { + if !d.IsEmpty() { + if d.Get() == "" { + return nil, fmt.Errorf("delimiter cannot be set to an empty string") + } delimiter = d.Get() } pairDelimiter := " " - if !p.IsEmpty() && p.Get() != "" { + if !p.IsEmpty() { + if p.Get() == "" { + return nil, fmt.Errorf("pair delimiter cannot be set to an empty string") + } pairDelimiter = p.Get() } diff --git a/pkg/ottl/ottlfuncs/func_parse_key_value_test.go b/pkg/ottl/ottlfuncs/func_parse_key_value_test.go index 02b67ee79da2..77ad9a8c9431 100644 --- a/pkg/ottl/ottlfuncs/func_parse_key_value_test.go +++ b/pkg/ottl/ottlfuncs/func_parse_key_value_test.go @@ -7,9 +7,10 @@ import ( "context" "testing" - "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/ottl" "github.com/stretchr/testify/assert" "go.opentelemetry.io/collector/pdata/pcommon" + + "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/ottl" ) func Test_parseKeyValue(t *testing.T) { @@ -246,20 +247,6 @@ hello!!world `, nil "a": "b c=d", // occurs because `SplitString()` returns original string and `strings.SplitN` with N=2 will split on just the first instance of delimiter("=") }, }, - { - name: "empty delimiters", - target: ottl.StandardStringGetter[any]{ - Getter: func(ctx context.Context, tCtx any) (any, error) { - return "a=b c=d", nil - }, - }, - delimiter: ottl.NewTestingOptional(""), - pairDelimiter: ottl.NewTestingOptional(""), - expected: map[string]any{ - "a": "b", - "c": "d", - }, - }, } for _, tt := range tests { @@ -372,3 +359,18 @@ func Test_parseKeyValue_bad_delimiter(t *testing.T) { _, err = exprFunc(context.Background(), nil) assert.ErrorContains(t, err, "cannot split \"a=b\" into 2 items, got 1 item(s)") } + +func Test_parseKeyValue_empty_delimiters(t *testing.T) { + target := ottl.StandardStringGetter[any]{ + Getter: func(ctx context.Context, tCtx any) (any, error) { + return "a=b c=d", nil + }, + } + delimiter := ottl.NewTestingOptional[string]("") + + _, err := parseKeyValue[any](target, delimiter, ottl.Optional[string]{}) + assert.ErrorContains(t, err, "delimiter cannot be set to an empty string") + + _, err = parseKeyValue[any](target, ottl.Optional[string]{}, delimiter) + assert.ErrorContains(t, err, "pair delimiter cannot be set to an empty string") +} From e1bbb38b026879ff2c2cd2abc4db6e83b2a14844 Mon Sep 17 00:00:00 2001 From: Dakota Paasman Date: Thu, 15 Feb 2024 07:27:42 -0500 Subject: [PATCH 9/9] move key-value split into parseutils --- internal/coreinternal/parseutils/parser.go | 28 +++++- .../coreinternal/parseutils/parser_test.go | 90 +++++++++++++++++++ pkg/ottl/ottlfuncs/README.md | 2 +- pkg/ottl/ottlfuncs/func_parse_key_value.go | 15 +--- .../ottlfuncs/func_parse_key_value_test.go | 4 +- 5 files changed, 123 insertions(+), 16 deletions(-) diff --git a/internal/coreinternal/parseutils/parser.go b/internal/coreinternal/parseutils/parser.go index 03dfc7096677..2758161ec565 100644 --- a/internal/coreinternal/parseutils/parser.go +++ b/internal/coreinternal/parseutils/parser.go @@ -1,9 +1,14 @@ // Copyright The OpenTelemetry Authors // SPDX-License-Identifier: Apache-2.0 -package parseutils +package parseutils // import "github.com/open-telemetry/opentelemetry-collector-contrib/internal/coreinternal/parseutils" -import "fmt" +import ( + "fmt" + "strings" + + "go.uber.org/multierr" +) // SplitString will split the input on the delimiter and return the resulting slice while respecting quotes. Outer quotes are stripped. // Use in place of `strings.Split` when quotes need to be respected. @@ -47,3 +52,22 @@ func SplitString(input, delimiter string) ([]string, error) { return result, nil } + +// ParseKeyValuePairs will split each string in `pairs` on the `delimiter` into a key and value string that get added to a map and returned. +func ParseKeyValuePairs(pairs []string, delimiter string) (map[string]any, error) { + parsed := make(map[string]any) + var err error + for _, p := range pairs { + pair := strings.SplitN(p, delimiter, 2) + if len(pair) != 2 { + err = multierr.Append(err, fmt.Errorf("cannot split %q into 2 items, got %d item(s)", p, len(pair))) + continue + } + + key := strings.TrimSpace(pair[0]) + value := strings.TrimSpace(pair[1]) + + parsed[key] = value + } + return parsed, err +} diff --git a/internal/coreinternal/parseutils/parser_test.go b/internal/coreinternal/parseutils/parser_test.go index d78ac812604f..f4f8f4b14e5d 100644 --- a/internal/coreinternal/parseutils/parser_test.go +++ b/internal/coreinternal/parseutils/parser_test.go @@ -184,3 +184,93 @@ h`, }) } } + +func Test_ParseKeyValuePairs(t *testing.T) { + testCases := []struct { + name string + pairs []string + delimiter string + expected map[string]any + expectedErr error + }{ + { + name: "multiple delimiters", + pairs: []string{"a==b", "c=d=", "e=f"}, + delimiter: "=", + expected: map[string]any{ + "a": "=b", + "c": "d=", + "e": "f", + }, + }, + { + name: "no delimiter found", + pairs: []string{"ab"}, + delimiter: "=", + expectedErr: fmt.Errorf("cannot split \"ab\" into 2 items, got 1 item(s)"), + }, + { + name: "no delimiter found 2x", + pairs: []string{"ab", "cd"}, + delimiter: "=", + expectedErr: fmt.Errorf("cannot split \"ab\" into 2 items, got 1 item(s); cannot split \"cd\" into 2 items, got 1 item(s)"), + }, + { + name: "empty pairs", + pairs: []string{}, + delimiter: "=", + expected: map[string]any{}, + }, + { + name: "empty pair string", + pairs: []string{""}, + delimiter: "=", + expectedErr: fmt.Errorf("cannot split \"\" into 2 items, got 1 item(s)"), + }, + { + name: "empty delimiter", + pairs: []string{"a=b", "c=d"}, + delimiter: "", + expected: map[string]any{ + "a": "=b", + "c": "=d", + }, + }, + { + name: "empty pairs & delimiter", + pairs: []string{}, + delimiter: "", + expected: map[string]any{}, + }, + { + name: "early delimiter", + pairs: []string{"=a=b"}, + delimiter: "=", + expected: map[string]any{ + "": "a=b", + }, + }, + { + name: "weird spacing", + pairs: []string{" a= b ", " c = d "}, + delimiter: "=", + expected: map[string]any{ + "a": "b", + "c": "d", + }, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + result, err := ParseKeyValuePairs(tc.pairs, tc.delimiter) + + if tc.expectedErr == nil { + assert.NoError(t, err) + assert.Equal(t, tc.expected, result) + } else { + assert.EqualError(t, err, tc.expectedErr.Error()) + } + }) + } +} diff --git a/pkg/ottl/ottlfuncs/README.md b/pkg/ottl/ottlfuncs/README.md index d99125af2868..794d1f411387 100644 --- a/pkg/ottl/ottlfuncs/README.md +++ b/pkg/ottl/ottlfuncs/README.md @@ -847,7 +847,7 @@ Examples: The `ParseKeyValue` Converter returns a `pcommon.Map` that is a result of parsing the target string for key value pairs. -`target` is a Getter that returns a string. `delimiter` is an optional string that is used to split the key and value in a pair, the default is `=`. `pair_delimiter` is an optional string that is used to split key value pairs, the default is a single space (` `). +`target` is a Getter that returns a string. If the returned string is empty, an error will be returned. `delimiter` is an optional string that is used to split the key and value in a pair, the default is `=`. `pair_delimiter` is an optional string that is used to split key value pairs, the default is a single space (` `). For example, the following target `"k1=v1 k2=v2 k3=v3"` will use default delimiters and be parsed into the following map: ``` diff --git a/pkg/ottl/ottlfuncs/func_parse_key_value.go b/pkg/ottl/ottlfuncs/func_parse_key_value.go index 71a6c6eab7df..1b896656ebe2 100644 --- a/pkg/ottl/ottlfuncs/func_parse_key_value.go +++ b/pkg/ottl/ottlfuncs/func_parse_key_value.go @@ -1,12 +1,11 @@ // Copyright The OpenTelemetry Authors // SPDX-License-Identifier: Apache-2.0 -package ottlfuncs +package ottlfuncs // import "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/ottl/ottlfuncs" import ( "context" "fmt" - "strings" "go.opentelemetry.io/collector/pdata/pcommon" @@ -70,15 +69,9 @@ func parseKeyValue[K any](target ottl.StringGetter[K], d ottl.Optional[string], return nil, fmt.Errorf("splitting source %q into pairs failed: %w", source, err) } - parsed := make(map[string]any) - for _, p := range pairs { - pair := strings.SplitN(p, delimiter, 2) - if len(pair) != 2 { - return nil, fmt.Errorf("cannot split %q into 2 items, got %d item(s)", p, len(pair)) - } - key := strings.TrimSpace(pair[0]) - value := strings.TrimSpace(pair[1]) - parsed[key] = value + parsed, err := parseutils.ParseKeyValuePairs(pairs, delimiter) + if err != nil { + return nil, fmt.Errorf("failed to split pairs into key-values: %w", err) } result := pcommon.NewMap() diff --git a/pkg/ottl/ottlfuncs/func_parse_key_value_test.go b/pkg/ottl/ottlfuncs/func_parse_key_value_test.go index 77ad9a8c9431..340c29b8300a 100644 --- a/pkg/ottl/ottlfuncs/func_parse_key_value_test.go +++ b/pkg/ottl/ottlfuncs/func_parse_key_value_test.go @@ -330,7 +330,7 @@ func Test_parseKeyValue_bad_split(t *testing.T) { exprFunc, err := parseKeyValue[any](target, delimiter, pairDelimiter) assert.NoError(t, err) _, err = exprFunc(context.Background(), nil) - assert.ErrorContains(t, err, "cannot split \"hello_world\" into 2 items, got 1 item(s)") + assert.ErrorContains(t, err, "failed to split pairs into key-values: cannot split \"hello_world\" into 2 items, got 1 item(s)") } func Test_parseKeyValue_mismatch_quotes(t *testing.T) { @@ -357,7 +357,7 @@ func Test_parseKeyValue_bad_delimiter(t *testing.T) { exprFunc, err := parseKeyValue[any](target, delimiter, ottl.Optional[string]{}) assert.NoError(t, err) _, err = exprFunc(context.Background(), nil) - assert.ErrorContains(t, err, "cannot split \"a=b\" into 2 items, got 1 item(s)") + assert.ErrorContains(t, err, "failed to split pairs into key-values: cannot split \"a=b\" into 2 items, got 1 item(s)") } func Test_parseKeyValue_empty_delimiters(t *testing.T) {