diff --git a/.chloggen/pkg-stanza-fix-key-value-pair-delimiter-bug.yaml b/.chloggen/pkg-stanza-fix-key-value-pair-delimiter-bug.yaml new file mode 100644 index 000000000000..d2e50b12d9ce --- /dev/null +++ b/.chloggen/pkg-stanza-fix-key-value-pair-delimiter-bug.yaml @@ -0,0 +1,27 @@ +# Use this changelog template to create an entry for release notes. + +# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix' +change_type: bug_fix + +# The name of the component, or a single word describing the area of concern, (e.g. filelogreceiver) +component: pkg/stanza + +# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`). +note: Fixed a bug in the keyvalue_parser where quoted values could be split if they contained a delimited. + +# Mandatory: One or more tracking issues related to the change. You can use the PR number here if no issue exists. +issues: [31034] + +# (Optional) One or more lines of additional information to render under the primary note. +# These lines will be padded with 2 spaces and then inserted directly into the document. +# Use pipe (|) for multiline entries. +subtext: + +# If your change doesn't affect end users or the exported elements of any package, +# you should instead start your pull request title with [chore] or use the "Skip Changelog" label. +# Optional: The change log or logs in which this entry should be included. +# e.g. '[user]' or '[user, api]' +# Include 'user' if the change is relevant to end users. +# Include 'api' if there is a change to a library API. +# Default: '[user]' +change_logs: [] diff --git a/pkg/stanza/operator/parser/keyvalue/keyvalue.go b/pkg/stanza/operator/parser/keyvalue/keyvalue.go index 7655939bdb26..7598aafdeb73 100644 --- a/pkg/stanza/operator/parser/keyvalue/keyvalue.go +++ b/pkg/stanza/operator/parser/keyvalue/keyvalue.go @@ -51,27 +51,23 @@ func (c Config) Build(logger *zap.SugaredLogger) (operator.Operator, error) { return nil, err } - if c.Delimiter == c.PairDelimiter { - return nil, errors.New("delimiter and pair_delimiter cannot be the same value") - } - if c.Delimiter == "" { return nil, errors.New("delimiter is a required parameter") } - // split on whitespace by default, if pair delimiter is set, use - // strings.Split() - pairSplitFunc := splitStringByWhitespace + pairDelimiter := " " if c.PairDelimiter != "" { - pairSplitFunc = func(input string) []string { - return strings.Split(input, c.PairDelimiter) - } + pairDelimiter = c.PairDelimiter + } + + if c.Delimiter == pairDelimiter { + return nil, errors.New("delimiter and pair_delimiter cannot be the same value") } return &Parser{ ParserOperator: parserOperator, delimiter: c.Delimiter, - pairSplitFunc: pairSplitFunc, + pairDelimiter: pairDelimiter, }, nil } @@ -79,7 +75,7 @@ func (c Config) Build(logger *zap.SugaredLogger) (operator.Operator, error) { type Parser struct { helper.ParserOperator delimiter string - pairSplitFunc func(input string) []string + pairDelimiter string } // Process will parse an entry for key value pairs. @@ -91,21 +87,25 @@ func (kv *Parser) Process(ctx context.Context, entry *entry.Entry) error { func (kv *Parser) parse(value any) (any, error) { switch m := value.(type) { case string: - return kv.parser(m, kv.delimiter) + return kv.parser(m, kv.delimiter, kv.pairDelimiter) default: return nil, fmt.Errorf("type %T cannot be parsed as key value pairs", value) } } -func (kv *Parser) parser(input string, delimiter string) (map[string]any, error) { +func (kv *Parser) parser(input string, delimiter string, pairDelimiter string) (map[string]any, error) { if input == "" { return nil, fmt.Errorf("parse from field %s is empty", kv.ParseFrom.String()) } + pairs, err := splitPairs(input, pairDelimiter) + if err != nil { + return nil, fmt.Errorf("failed to parse pairs from input: %w", err) + } + parsed := make(map[string]any) - var err error - for _, raw := range kv.pairSplitFunc(input) { + for _, raw := range pairs { m := strings.SplitN(raw, delimiter, 2) if len(m) != 2 { e := fmt.Errorf("expected '%s' to split by '%s' into two items, got %d", raw, delimiter, len(m)) @@ -113,8 +113,8 @@ func (kv *Parser) parser(input string, delimiter string) (map[string]any, error) continue } - key := strings.TrimSpace(strings.Trim(m[0], "\"'")) - value := strings.TrimSpace(strings.Trim(m[1], "\"'")) + key := strings.TrimSpace(m[0]) + value := strings.TrimSpace(m[1]) parsed[key] = value } @@ -122,14 +122,43 @@ func (kv *Parser) parser(input string, delimiter string) (map[string]any, error) return parsed, err } -// split on whitespace and preserve quoted text -func splitStringByWhitespace(input string) []string { - quoted := false - raw := strings.FieldsFunc(input, func(r rune) bool { - if r == '"' || r == '\'' { - quoted = !quoted +// splitPairs will split the input on the pairDelimiter and return the resulting slice. +// `strings.Split` is not used because it does not respect quotes and will split if the delimiter appears in a quoted value +func splitPairs(input, pairDelimiter string) ([]string, error) { + var result []string + currentPair := "" + delimiterLength := len(pairDelimiter) + quoteChar := "" // "" means we are not in quotes + + for i := 0; i < len(input); i++ { + if quoteChar == "" && i+delimiterLength <= len(input) && input[i:i+delimiterLength] == pairDelimiter { // delimiter + if currentPair == "" { // leading || trailing delimiter; ignore + continue + } + result = append(result, currentPair) + currentPair = "" + i += delimiterLength - 1 + continue + } + + if quoteChar == "" && (input[i] == '"' || input[i] == '\'') { // start of quote + quoteChar = string(input[i]) + continue } - return !quoted && r == ' ' - }) - return raw + if string(input[i]) == quoteChar { // end of quote + quoteChar = "" + continue + } + + currentPair += string(input[i]) + } + + if quoteChar != "" { // check for closed quotes + return nil, fmt.Errorf("never reached end of a quoted value") + } + if currentPair != "" { // avoid adding empty value bc of a trailing delimiter + return append(result, currentPair), nil + } + + return result, nil } diff --git a/pkg/stanza/operator/parser/keyvalue/keyvalue_test.go b/pkg/stanza/operator/parser/keyvalue/keyvalue_test.go index 5c02127d1de8..41cf7593d577 100644 --- a/pkg/stanza/operator/parser/keyvalue/keyvalue_test.go +++ b/pkg/stanza/operator/parser/keyvalue/keyvalue_test.go @@ -109,6 +109,15 @@ func TestBuild(t *testing.T) { }(), true, }, + { + "pair-delimiter-equals-default-delimiter", + func() *Config { + cfg := basicConfig() + cfg.Delimiter = " " + return cfg + }(), + true, + }, { "unset-delimiter", func() *Config { @@ -148,6 +157,13 @@ func TestParserInvalidType(t *testing.T) { require.Contains(t, err.Error(), "type []int cannot be parsed as key value pairs") } +func TestParserEmptyInput(t *testing.T) { + parser := newTestParser(t) + _, err := parser.parse("") + require.Error(t, err) + require.Contains(t, err.Error(), "parse from field body is empty") +} + func TestKVImplementations(t *testing.T) { require.Implements(t, (*operator.Operator)(nil), new(Parser)) } @@ -503,7 +519,7 @@ key=value`, false, }, { - "quoted-value-contains-delimiter", + "quoted-value-contains-whitespace-delimiter", func(kv *Config) {}, &entry.Entry{ Body: `msg="Message successfully sent at 2023-12-04 06:47:31.204222276 +0000 UTC m=+5115.932279346"`, @@ -572,6 +588,105 @@ key=value`, false, true, }, + { + "custom pair delimiter in quoted value", + func(kv *Config) { + kv.PairDelimiter = "_" + }, + &entry.Entry{ + Body: `a=b_c="d_e"`, + }, + &entry.Entry{ + Attributes: map[string]any{ + "a": "b", + "c": "d_e", + }, + Body: `a=b_c="d_e"`, + }, + false, + false, + }, + { + "embedded double quotes in single quoted value", + func(kv *Config) {}, + &entry.Entry{ + Body: `a=b c='this is a "co ol" value'`, + }, + &entry.Entry{ + Attributes: map[string]any{ + "a": "b", + "c": "this is a \"co ol\" value", + }, + Body: `a=b c='this is a "co ol" value'`, + }, + false, + false, + }, + { + "embedded double quotes end single quoted value", + func(kv *Config) {}, + &entry.Entry{ + Body: `a=b c='this is a "co ol"'`, + }, + &entry.Entry{ + Attributes: map[string]any{ + "a": "b", + "c": "this is a \"co ol\"", + }, + Body: `a=b c='this is a "co ol"'`, + }, + false, + false, + }, + { + "leading and trailing pair delimiter w/o quotes", + func(kv *Config) {}, + &entry.Entry{ + Body: " k1=v1 k2==v2 k3=v3= ", + }, + &entry.Entry{ + Attributes: map[string]any{ + "k1": "v1", + "k2": "=v2", + "k3": "v3=", + }, + Body: " k1=v1 k2==v2 k3=v3= ", + }, + false, + false, + }, + { + "complicated delimiters", + func(kv *Config) { + kv.Delimiter = "@*" + kv.PairDelimiter = "_!_" + }, + &entry.Entry{ + Body: `k1@*v1_!_k2@**v2_!__k3@@*v3__`, + }, + &entry.Entry{ + Attributes: map[string]any{ + "k1": "v1", + "k2": "*v2", + "_k3@": "v3__", + }, + Body: `k1@*v1_!_k2@**v2_!__k3@@*v3__`, + }, + false, + false, + }, + { + "unclosed quotes", + func(kv *Config) {}, + &entry.Entry{ + Body: `k1='v1' k2='v2`, + }, + &entry.Entry{ + Body: `k1='v1' k2='v2`, + }, + true, + false, + }, } for _, tc := range cases { @@ -604,28 +719,3 @@ key=value`, }) } } - -func TestSplitStringByWhitespace(t *testing.T) { - cases := []struct { - name string - intput string - output []string - }{ - { - "simple", - "k=v a=b x=\" y \" job=\"software engineering\"", - []string{ - "k=v", - "a=b", - "x=\" y \"", - "job=\"software engineering\"", - }, - }, - } - - for _, tc := range cases { - t.Run(tc.name, func(t *testing.T) { - require.Equal(t, tc.output, splitStringByWhitespace(tc.intput)) - }) - } -}