diff --git a/CHANGELOG.next.asciidoc b/CHANGELOG.next.asciidoc index 2f996756908..88c9c2099f8 100644 --- a/CHANGELOG.next.asciidoc +++ b/CHANGELOG.next.asciidoc @@ -208,6 +208,7 @@ https://github.com/elastic/beats/compare/v8.8.1\...main[Check the HEAD diff] - [Azure] Add input metrics to the azure-eventhub input. {pull}35739[35739] - Reduce HTTPJSON metrics allocations. {pull}36282[36282] - Add support for a simplified input configuraton when running under Elastic-Agent {pull}36390[36390] +- Make HTTPJSON response body decoding errors more informative. {pull}36481[36481] *Auditbeat* diff --git a/x-pack/filebeat/input/httpjson/encoding.go b/x-pack/filebeat/input/httpjson/encoding.go index 7bf85161789..2867c5c8cd5 100644 --- a/x-pack/filebeat/input/httpjson/encoding.go +++ b/x-pack/filebeat/input/httpjson/encoding.go @@ -9,9 +9,12 @@ import ( "bytes" "encoding/csv" "encoding/json" + stdxml "encoding/xml" "errors" + "fmt" "io" "net/http" + "unicode" "github.com/elastic/mito/lib/xml" ) @@ -72,7 +75,11 @@ func encodeAsJSON(trReq transformable) ([]byte, error) { // decodeAsJSON decodes the JSON message in p into dst. func decodeAsJSON(p []byte, dst *response) error { - return json.Unmarshal(p, &dst.body) + err := json.Unmarshal(p, &dst.body) + if err != nil { + return jsonError{error: err, body: p} + } + return nil } // encodeAsForm encodes trReq as a URL encoded form. @@ -95,7 +102,7 @@ func decodeAsNdjson(p []byte, dst *response) error { for dec.More() { var o interface{} if err := dec.Decode(&o); err != nil { - return err + return jsonError{error: err, body: p} } results = append(results, o) } @@ -103,6 +110,28 @@ func decodeAsNdjson(p []byte, dst *response) error { return nil } +type jsonError struct { + error + body []byte +} + +func (e jsonError) Error() string { + switch err := e.error.(type) { + case nil: + return "" + case *json.SyntaxError: + return fmt.Sprintf("%v: text context %q", err, textContext(e.body, err.Offset)) + case *json.UnmarshalTypeError: + return fmt.Sprintf("%v: text context %q", err, textContext(e.body, err.Offset)) + default: + return err.Error() + } +} + +func (e jsonError) Unwrap() error { + return e.error +} + // decodeAsCSV decodes p as a headed CSV document into dst. func decodeAsCSV(p []byte, dst *response) error { var results []interface{} @@ -135,7 +164,7 @@ func decodeAsCSV(p []byte, dst *response) error { if err != nil { if err != io.EOF { //nolint:errorlint // csv.Reader never wraps io.EOF. - return err + return csvError{error: err, body: p} } } @@ -144,6 +173,31 @@ func decodeAsCSV(p []byte, dst *response) error { return nil } +type csvError struct { + error + body []byte +} + +func (e csvError) Error() string { + switch err := e.error.(type) { + case nil: + return "" + case *csv.ParseError: + lines := bytes.Split(e.body, []byte{'\n'}) + l := err.Line - 1 // Lines are 1-based. + if uint(l) >= uint(len(lines)) { + return err.Error() + } + return fmt.Sprintf("%v: text context %q", err, textContext(lines[l], int64(err.Column))) + default: + return err.Error() + } +} + +func (e csvError) Unwrap() error { + return e.error +} + // decodeAsZip decodes p as a ZIP archive into dst. func decodeAsZip(p []byte, dst *response) error { var results []interface{} @@ -165,7 +219,7 @@ func decodeAsZip(p []byte, dst *response) error { var o interface{} if err := dec.Decode(&o); err != nil { rc.Close() - return err + return jsonError{error: err, body: p} } results = append(results, o) } @@ -185,9 +239,81 @@ func decodeAsZip(p []byte, dst *response) error { func decodeAsXML(p []byte, dst *response) error { cdata, body, err := xml.Unmarshal(bytes.NewReader(p), dst.xmlDetails) if err != nil { - return err + return xmlError{error: err, body: p} } dst.body = body dst.header["XML-CDATA"] = []string{cdata} return nil } + +type xmlError struct { + error + body []byte +} + +func (e xmlError) Error() string { + switch err := e.error.(type) { + case nil: + return "" + case *stdxml.SyntaxError: + lines := bytes.Split(e.body, []byte{'\n'}) + l := err.Line - 1 // Lines are 1-based. + if uint(l) >= uint(len(lines)) { + return err.Error() + } + // The xml package does not provide column-level context, + // so just point to first non-whitespace character of the + // line. This doesn't make a great deal of difference + // except in deeply indented XML documents. + pos := bytes.IndexFunc(lines[l], func(r rune) bool { + return !unicode.IsSpace(r) + }) + if pos < 0 { + pos = 0 + } + return fmt.Sprintf("%v: text context %q", err, textContext(lines[l], int64(pos))) + default: + return err.Error() + } +} + +func (e xmlError) Unwrap() error { + return e.error +} + +// textContext returns the context of text around the provided position starting +// five bytes before pos and extending ten bytes, dependent on the length of the +// text and the value of pos relative to bounds. If a text truncation is made, +// an ellipsis is added to indicate this. The returned []byte should not be mutated +// as it may be shared with the caller. +func textContext(text []byte, pos int64) []byte { + left := maxInt64(0, pos-5) + text = text[left:] + var pad int64 + if left != 0 { + pad = 3 + text = append([]byte("..."), text...) + } + right := minInt(pos+10+pad, int64(len(text))) + if right != int64(len(text)) { + // Ensure we don't clobber the body's bytes. + text = append(text[:right:right], []byte("...")...) + } else { + text = text[:right] + } + return text +} + +func minInt(a, b int64) int64 { + if a < b { + return a + } + return b +} + +func maxInt64(a, b int64) int64 { + if a > b { + return a + } + return b +} diff --git a/x-pack/filebeat/input/httpjson/encoding_test.go b/x-pack/filebeat/input/httpjson/encoding_test.go index 1a835bfd941..c6ac4f48e73 100644 --- a/x-pack/filebeat/input/httpjson/encoding_test.go +++ b/x-pack/filebeat/input/httpjson/encoding_test.go @@ -8,6 +8,7 @@ import ( "archive/zip" "bytes" "encoding/json" + "net/http" "net/url" "testing" @@ -61,29 +62,103 @@ func TestDecodeZip(t *testing.T) { assert.Equal(t, []string{"a.json", "b.ndjson", "c.ndjson"}, resp.header["X-Zip-Files"]) } +func TestDecodeJSON(t *testing.T) { + tests := []struct { + body string + result string + err string + }{ + { + body: "{}", + result: "{}", + }, + { + body: "{\"a\":\"b\"}", + result: "{\"a\":\"b\"}", + }, + { + body: "[{\"a\":\"b\"},\nunfortunate text\n{\"c\":\"d\"}]", + err: `invalid character 'u' looking for beginning of value: text context "...\"},\nunfortunate text\n{\"..."`, + }, + } + for _, test := range tests { + resp := &response{} + err := decodeAsJSON([]byte(test.body), resp) + if test.err != "" { + assert.Error(t, err) + assert.EqualError(t, err, test.err) + } else { + assert.NoError(t, err) + + var j []byte + if test.body != "" { + j, err = json.Marshal(resp.body) + if err != nil { + t.Fatalf("Marshal failed: %v", err) + } + assert.JSONEq(t, test.result, string(j)) + } else { + assert.Equal(t, test.result, string(j)) + } + } + } +} + func TestDecodeNdjson(t *testing.T) { tests := []struct { body string result string + err string }{ - {"{}", "[{}]"}, - {"{\"a\":\"b\"}", "[{\"a\":\"b\"}]"}, - {"{\"a\":\"b\"}\n{\"c\":\"d\"}", "[{\"a\":\"b\"},{\"c\":\"d\"}]"}, - {"{\"a\":\"b\"}\r\n{\"c\":\"d\"}", "[{\"a\":\"b\"},{\"c\":\"d\"}]"}, - {"{\"a\":\"b\"}\r\n{\"c\":\"d\"}\n", "[{\"a\":\"b\"},{\"c\":\"d\"}]"}, - {"{\"a\":\"b\"}\r\n{\"c\":\"d\"}\r\n", "[{\"a\":\"b\"},{\"c\":\"d\"}]"}, + { + body: "{}", + result: "[{}]", + }, + { + body: "{\"a\":\"b\"}", + result: "[{\"a\":\"b\"}]", + }, + { + body: "{\"a\":\"b\"}\n{\"c\":\"d\"}", + result: "[{\"a\":\"b\"},{\"c\":\"d\"}]", + }, + { + body: "{\"a\":\"b\"}\r\n{\"c\":\"d\"}", + result: "[{\"a\":\"b\"},{\"c\":\"d\"}]", + }, + { + body: "{\"a\":\"b\"}\r\n{\"c\":\"d\"}\n", + result: "[{\"a\":\"b\"},{\"c\":\"d\"}]", + }, + { + body: "{\"a\":\"b\"}\r\n{\"c\":\"d\"}\r\n", + result: "[{\"a\":\"b\"},{\"c\":\"d\"}]", + }, + { + body: "{\"a\":\"b\"}unfortunate text\r\n{\"c\":\"d\"}\r\n", + err: `invalid character 'u' looking for beginning of value: text context "...\"b\"}unfortunate text..."`, + }, } for _, test := range tests { resp := &response{} err := decodeAsNdjson([]byte(test.body), resp) - if err != nil { - t.Fatalf("decodeAsNdjson failed: %v", err) - } - j, err := json.Marshal(resp.body) - if err != nil { - t.Fatalf("Marshal failed: %v", err) + if test.err != "" { + assert.Error(t, err) + assert.EqualError(t, err, test.err) + } else { + assert.NoError(t, err) + + var j []byte + if test.body != "" { + j, err = json.Marshal(resp.body) + if err != nil { + t.Fatalf("Marshal failed: %v", err) + } + assert.JSONEq(t, test.result, string(j)) + } else { + assert.Equal(t, test.result, string(j)) + } } - assert.Equal(t, test.result, string(j)) } } @@ -93,20 +168,18 @@ func TestDecodeCSV(t *testing.T) { result string err string }{ - {"", "", ""}, + {body: "", result: ""}, { - "EVENT_TYPE,TIMESTAMP,REQUEST_ID,ORGANIZATION_ID,USER_ID\n" + + body: "EVENT_TYPE,TIMESTAMP,REQUEST_ID,ORGANIZATION_ID,USER_ID\n" + "Login,20211018071353.465,id1,id2,user1\n" + "Login,20211018071505.579,id4,id5,user2\n", - `[{"EVENT_TYPE":"Login","TIMESTAMP":"20211018071353.465","REQUEST_ID":"id1","ORGANIZATION_ID":"id2","USER_ID":"user1"}, + result: `[{"EVENT_TYPE":"Login","TIMESTAMP":"20211018071353.465","REQUEST_ID":"id1","ORGANIZATION_ID":"id2","USER_ID":"user1"}, {"EVENT_TYPE":"Login","TIMESTAMP":"20211018071505.579","REQUEST_ID":"id4","ORGANIZATION_ID":"id5","USER_ID":"user2"}]`, - "", }, { - "EVENT_TYPE,TIMESTAMP,REQUEST_ID,ORGANIZATION_ID,USER_ID\n" + + body: "EVENT_TYPE,TIMESTAMP,REQUEST_ID,ORGANIZATION_ID,USER_ID\n" + "Login,20211018071505.579,id4,user2\n", - "", - "record on line 2: wrong number of fields", + err: "record on line 2: wrong number of fields: text context \"Login,20211...\"", }, } for _, test := range tests { @@ -132,6 +205,62 @@ func TestDecodeCSV(t *testing.T) { } } +func TestDecodeXML(t *testing.T) { + tests := []struct { + body string + result string + err string + }{ + { + body: ` + +

+ Joord Lennart +

+ + Egil's Saga + +
+`, + result: `{"o":{"p":{"n":"Joord Lennart"},"i":{"n":"Egil's Saga"}}}`, + }, + { + body: ` + +

+ Joord Lennart +

+ + Egil's Saga + +
+`, + err: `XML syntax error on line 7: element closed by : text context "... Egil's Saga"`, + }, + } + for _, test := range tests { + resp := &response{header: make(http.Header)} + err := decodeAsXML([]byte(test.body), resp) + if test.err != "" { + assert.Error(t, err) + assert.EqualError(t, err, test.err) + } else { + assert.NoError(t, err) + + var j []byte + if test.body != "" { + j, err = json.Marshal(resp.body) + if err != nil { + t.Fatalf("Marshal failed: %v", err) + } + assert.JSONEq(t, test.result, string(j)) + } else { + assert.Equal(t, test.result, string(j)) + } + } + } +} + func TestEncodeAsForm(t *testing.T) { tests := []struct { params map[string]string