Skip to content

Commit

Permalink
fix parsing bug and add more unit tests
Browse files Browse the repository at this point in the history
  • Loading branch information
dpaasman00 committed Feb 5, 2024
1 parent 3952e23 commit 4036f1a
Show file tree
Hide file tree
Showing 2 changed files with 137 additions and 51 deletions.
81 changes: 56 additions & 25 deletions pkg/stanza/operator/parser/keyvalue/keyvalue.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,35 +51,31 @@ func (c Config) Build(logger *zap.SugaredLogger) (operator.Operator, error) {
return nil, err
}

if c.Delimiter == c.PairDelimiter {
return nil, errors.New("delimiter and pair_delimiter cannot be the same value")
}

if c.Delimiter == "" {
return nil, errors.New("delimiter is a required parameter")
}

// split on whitespace by default, if pair delimiter is set, use
// strings.Split()
pairSplitFunc := splitStringByWhitespace
pairDelimiter := " "
if c.PairDelimiter != "" {
pairSplitFunc = func(input string) []string {
return strings.Split(input, c.PairDelimiter)
}
pairDelimiter = c.PairDelimiter
}

if c.Delimiter == pairDelimiter {
return nil, errors.New("delimiter and pair_delimiter cannot be the same value")
}

return &Parser{
ParserOperator: parserOperator,
delimiter: c.Delimiter,
pairSplitFunc: pairSplitFunc,
pairDelimiter: pairDelimiter,
}, nil
}

// Parser is an operator that parses key value pairs.
type Parser struct {
helper.ParserOperator
delimiter string
pairSplitFunc func(input string) []string
pairDelimiter string
}

// Process will parse an entry for key value pairs.
Expand All @@ -91,21 +87,25 @@ func (kv *Parser) Process(ctx context.Context, entry *entry.Entry) error {
func (kv *Parser) parse(value any) (any, error) {
switch m := value.(type) {
case string:
return kv.parser(m, kv.delimiter)
return kv.parser(m, kv.delimiter, kv.pairDelimiter)
default:
return nil, fmt.Errorf("type %T cannot be parsed as key value pairs", value)
}
}

func (kv *Parser) parser(input string, delimiter string) (map[string]any, error) {
func (kv *Parser) parser(input string, delimiter string, pairDelimiter string) (map[string]any, error) {
if input == "" {
return nil, fmt.Errorf("parse from field %s is empty", kv.ParseFrom.String())
}

pairs, err := splitPairs(input, pairDelimiter)
if err != nil {
return nil, fmt.Errorf("failed to parse pairs from input: %w", err)
}

parsed := make(map[string]any)

var err error
for _, raw := range kv.pairSplitFunc(input) {
for _, raw := range pairs {
m := strings.SplitN(raw, delimiter, 2)
if len(m) != 2 {
e := fmt.Errorf("expected '%s' to split by '%s' into two items, got %d", raw, delimiter, len(m))
Expand All @@ -122,14 +122,45 @@ func (kv *Parser) parser(input string, delimiter string) (map[string]any, error)
return parsed, err
}

// split on whitespace and preserve quoted text
func splitStringByWhitespace(input string) []string {
quoted := false
raw := strings.FieldsFunc(input, func(r rune) bool {
if r == '"' || r == '\'' {
quoted = !quoted
// splitPairs will split the input on the pairDelimiter and return the resulting slice.
// `strings.Split` is not used because it does not respect quotes and will split if the delimiter appears in a quoted value
func splitPairs(input, pairDelimiter string) ([]string, error) {
var result []string
currentPair := ""
delimiterLength := len(pairDelimiter)
quoteChar := "" // "" means we are not in quotes

i := 0
for i < len(input) {
if quoteChar == "" && i+delimiterLength <= len(input) && input[i:i+delimiterLength] == pairDelimiter {
if currentPair == "" {
i++
continue
}
result = append(result, currentPair)
currentPair = ""
i += delimiterLength
continue
} else if input[i] == '"' || input[i] == '\'' {
if quoteChar != "" {
if quoteChar == string(input[i]) {
quoteChar = ""
}
} else {
quoteChar = string(input[i])
}
}
return !quoted && r == ' '
})
return raw
currentPair += string(input[i])
i++
}

if quoteChar != "" {
return nil, fmt.Errorf("never reached end of a quoted value")
}

if currentPair != "" {
result = append(result, currentPair)
}

return result, nil
}
107 changes: 81 additions & 26 deletions pkg/stanza/operator/parser/keyvalue/keyvalue_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,15 @@ func TestBuild(t *testing.T) {
}(),
true,
},
{
"pair-delimiter-equals-default-delimiter",
func() *Config {
cfg := basicConfig()
cfg.Delimiter = " "
return cfg
}(),
true,
},
{
"unset-delimiter",
func() *Config {
Expand Down Expand Up @@ -503,7 +512,7 @@ key=value`,
false,
},
{
"quoted-value-contains-delimiter",
"quoted-value-contains-whitespace-delimiter",
func(kv *Config) {},
&entry.Entry{
Body: `msg="Message successfully sent at 2023-12-04 06:47:31.204222276 +0000 UTC m=+5115.932279346"`,
Expand Down Expand Up @@ -572,6 +581,77 @@ key=value`,
false,
true,
},
{
"custom pair delimiter in quoted value",
func(kv *Config) {
kv.PairDelimiter = "_"
},
&entry.Entry{
Body: `a=b_c="d_e"`,
},
&entry.Entry{
Attributes: map[string]any{
"a": "b",
"c": "d_e",
},
Body: `a=b_c="d_e"`,
},
false,
false,
},
{
"embedded double quotes in single quoted value",
func(kv *Config) {},
&entry.Entry{
Body: `a=b c='this is a "co ol" value'`,
},
&entry.Entry{
Attributes: map[string]any{
"a": "b",
"c": "this is a \"co ol\" value",
},
Body: `a=b c='this is a "co ol" value'`,
},
false,
false,
},
{
"leading and trailing pair delimiter w/o quotes",
func(kv *Config) {},
&entry.Entry{
Body: " k1=v1 k2==v2 k3=v3= ",
},
&entry.Entry{
Attributes: map[string]any{
"k1": "v1",
"k2": "=v2",
"k3": "v3=",
},
Body: " k1=v1 k2==v2 k3=v3= ",
},
false,
false,
},
{
"complicated delimiters",
func(kv *Config) {
kv.Delimiter = "@*"
kv.PairDelimiter = "_!_"
},
&entry.Entry{
Body: `k1@*v1_!_k2@**v2_!__k3@@*v3__`,
},
&entry.Entry{
Attributes: map[string]any{
"k1": "v1",
"k2": "*v2",
"_k3@": "v3__",
},
Body: `k1@*v1_!_k2@**v2_!__k3@@*v3__`,
},
false,
false,
},
}

for _, tc := range cases {
Expand Down Expand Up @@ -604,28 +684,3 @@ key=value`,
})
}
}

func TestSplitStringByWhitespace(t *testing.T) {
cases := []struct {
name string
intput string
output []string
}{
{
"simple",
"k=v a=b x=\" y \" job=\"software engineering\"",
[]string{
"k=v",
"a=b",
"x=\" y \"",
"job=\"software engineering\"",
},
},
}

for _, tc := range cases {
t.Run(tc.name, func(t *testing.T) {
require.Equal(t, tc.output, splitStringByWhitespace(tc.intput))
})
}
}

0 comments on commit 4036f1a

Please sign in to comment.