From 455c0535f17a4ddcdb1c2a0bfe257020093baff7 Mon Sep 17 00:00:00 2001 From: jose Date: Wed, 6 Sep 2023 09:06:51 -0400 Subject: [PATCH 1/6] adding ndjson format --- cli/cmd/transformCmd.go | 27 +++++++++++++++++++++++---- header/header.go | 1 + validation/parserSettings.go | 3 ++- validation/parserSettings.json | 3 ++- 4 files changed, 28 insertions(+), 6 deletions(-) diff --git a/cli/cmd/transformCmd.go b/cli/cmd/transformCmd.go index 4e6ed4b..11ec23f 100644 --- a/cli/cmd/transformCmd.go +++ b/cli/cmd/transformCmd.go @@ -86,6 +86,11 @@ func doTransform() error { if err != nil { return "", err } + + if schema.Header().ParserSettings.NDJSON { + return string(b), nil + } + return strings.Join( strs.NoErrMapSlice( strings.Split(jsons.BPJ(string(b)), "\n"), @@ -95,13 +100,27 @@ func doTransform() error { record, err := doOne() if err == io.EOF { - fmt.Println("[]") + if schema.Header().ParserSettings.NDJSON { + fmt.Println("") + } else { + fmt.Println("[]") + } return nil } if err != nil { return err } - fmt.Printf("[\n%s", record) + + start := "[\n%s" + middle := ",\n%s" + end := "\n]" + if schema.Header().ParserSettings.NDJSON { + start = "%s" + middle = "\n%s" + end = "" + } + + fmt.Printf(start, record) for { record, err = doOne() if err == io.EOF { @@ -110,8 +129,8 @@ func doTransform() error { if err != nil { return err } - fmt.Printf(",\n%s", record) + fmt.Printf(middle, record) } - fmt.Println("\n]") + fmt.Println(end) return nil } diff --git a/header/header.go b/header/header.go index ed4c173..5be79ed 100644 --- a/header/header.go +++ b/header/header.go @@ -15,6 +15,7 @@ type ParserSettings struct { Version string `json:"version,omitempty"` FileFormatType string `json:"file_format_type,omitempty"` Encoding *string `json:"encoding,omitempty"` + NDJSON bool `json:"ndjson,omitempty"` } const ( diff --git a/validation/parserSettings.go b/validation/parserSettings.go index 52c1ccf..bd4ecc1 100644 --- a/validation/parserSettings.go +++ b/validation/parserSettings.go @@ -19,7 +19,8 @@ const ( "encoding": { "type": "string", "enum": [ "utf-8", "iso-8859-1", "windows-1252" ] - } + }, + "ndjson": { "type": "boolean" } }, "required": [ "version", "file_format_type" ], "additionalProperties": false diff --git a/validation/parserSettings.json b/validation/parserSettings.json index 0a3849f..92ae3a1 100644 --- a/validation/parserSettings.json +++ b/validation/parserSettings.json @@ -12,7 +12,8 @@ "encoding": { "type": "string", "enum": [ "utf-8", "iso-8859-1", "windows-1252" ] - } + }, + "ndjson": { "type": "boolean" } }, "required": [ "version", "file_format_type" ], "additionalProperties": false From 188e01d93738609a1f0555f3d100814f1b068ea5 Mon Sep 17 00:00:00 2001 From: jose Date: Mon, 11 Sep 2023 14:00:26 -0400 Subject: [PATCH 2/6] changing to use command line switch --- cli/cmd/transformCmd.go | 9 ++++++--- header/header.go | 1 - validation/parserSettings.go | 3 +-- validation/parserSettings.json | 3 +-- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/cli/cmd/transformCmd.go b/cli/cmd/transformCmd.go index 11ec23f..9ef7c21 100644 --- a/cli/cmd/transformCmd.go +++ b/cli/cmd/transformCmd.go @@ -31,6 +31,7 @@ var ( } schema string input string + ndjson bool ) func init() { @@ -39,6 +40,8 @@ func init() { transformCmd.Flags().StringVarP( &input, "input", "i", "", "input file (optional; if not specified, stdin/pipe is used)") + transformCmd.Flags().BoolVarP( + &ndjson, "ndjson", "", false, "change the output format to ndjson") } func openFile(label string, filepath string) (io.ReadCloser, error) { @@ -87,7 +90,7 @@ func doTransform() error { return "", err } - if schema.Header().ParserSettings.NDJSON { + if ndjson { return string(b), nil } @@ -100,7 +103,7 @@ func doTransform() error { record, err := doOne() if err == io.EOF { - if schema.Header().ParserSettings.NDJSON { + if ndjson { fmt.Println("") } else { fmt.Println("[]") @@ -114,7 +117,7 @@ func doTransform() error { start := "[\n%s" middle := ",\n%s" end := "\n]" - if schema.Header().ParserSettings.NDJSON { + if ndjson { start = "%s" middle = "\n%s" end = "" diff --git a/header/header.go b/header/header.go index 5be79ed..ed4c173 100644 --- a/header/header.go +++ b/header/header.go @@ -15,7 +15,6 @@ type ParserSettings struct { Version string `json:"version,omitempty"` FileFormatType string `json:"file_format_type,omitempty"` Encoding *string `json:"encoding,omitempty"` - NDJSON bool `json:"ndjson,omitempty"` } const ( diff --git a/validation/parserSettings.go b/validation/parserSettings.go index bd4ecc1..52c1ccf 100644 --- a/validation/parserSettings.go +++ b/validation/parserSettings.go @@ -19,8 +19,7 @@ const ( "encoding": { "type": "string", "enum": [ "utf-8", "iso-8859-1", "windows-1252" ] - }, - "ndjson": { "type": "boolean" } + } }, "required": [ "version", "file_format_type" ], "additionalProperties": false diff --git a/validation/parserSettings.json b/validation/parserSettings.json index 92ae3a1..0a3849f 100644 --- a/validation/parserSettings.json +++ b/validation/parserSettings.json @@ -12,8 +12,7 @@ "encoding": { "type": "string", "enum": [ "utf-8", "iso-8859-1", "windows-1252" ] - }, - "ndjson": { "type": "boolean" } + } }, "required": [ "version", "file_format_type" ], "additionalProperties": false From 8cfd180114beddb8bf7095b628cba49cc50deaa0 Mon Sep 17 00:00:00 2001 From: jose Date: Wed, 20 Sep 2023 13:52:26 -0400 Subject: [PATCH 3/6] cr comments --- cli/cmd/transformCmd.go | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/cli/cmd/transformCmd.go b/cli/cmd/transformCmd.go index 9ef7c21..0fb5ad8 100644 --- a/cli/cmd/transformCmd.go +++ b/cli/cmd/transformCmd.go @@ -41,7 +41,7 @@ func init() { transformCmd.Flags().StringVarP( &input, "input", "i", "", "input file (optional; if not specified, stdin/pipe is used)") transformCmd.Flags().BoolVarP( - &ndjson, "ndjson", "", false, "change the output format to ndjson") + &ndjson, "stream", "", false, "change the output format to ndjson") } func openFile(label string, filepath string) (io.ReadCloser, error) { @@ -90,13 +90,14 @@ func doTransform() error { return "", err } + s := string(b) if ndjson { - return string(b), nil + return s, nil } return strings.Join( strs.NoErrMapSlice( - strings.Split(jsons.BPJ(string(b)), "\n"), + strings.Split(jsons.BPJ(s), "\n"), func(s string) string { return "\t" + s }), "\n"), nil } @@ -114,16 +115,16 @@ func doTransform() error { return err } - start := "[\n%s" - middle := ",\n%s" - end := "\n]" + lparen := "[\n%s" + delim := ",\n%s" + rparen := "\n]" if ndjson { - start = "%s" - middle = "\n%s" - end = "" + lparen = "%s" + delim = "\n%s" + rparen = "" } - fmt.Printf(start, record) + fmt.Printf(lparen, record) for { record, err = doOne() if err == io.EOF { @@ -132,8 +133,8 @@ func doTransform() error { if err != nil { return err } - fmt.Printf(middle, record) + fmt.Printf(delim, record) } - fmt.Println(end) + fmt.Println(rparen) return nil } From c362e554bad5585c9540a8a4dde5f05909530409 Mon Sep 17 00:00:00 2001 From: jose Date: Wed, 4 Oct 2023 11:41:50 -0400 Subject: [PATCH 4/6] changing bool from ndjson to stream --- cli/cmd/transformCmd.go | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/cli/cmd/transformCmd.go b/cli/cmd/transformCmd.go index 0fb5ad8..0536d6d 100644 --- a/cli/cmd/transformCmd.go +++ b/cli/cmd/transformCmd.go @@ -31,7 +31,7 @@ var ( } schema string input string - ndjson bool + stream bool ) func init() { @@ -41,7 +41,7 @@ func init() { transformCmd.Flags().StringVarP( &input, "input", "i", "", "input file (optional; if not specified, stdin/pipe is used)") transformCmd.Flags().BoolVarP( - &ndjson, "stream", "", false, "change the output format to ndjson") + &stream, "stream", "", false, "change the output format to ndjson") } func openFile(label string, filepath string) (io.ReadCloser, error) { @@ -91,7 +91,7 @@ func doTransform() error { } s := string(b) - if ndjson { + if stream { return s, nil } @@ -104,7 +104,7 @@ func doTransform() error { record, err := doOne() if err == io.EOF { - if ndjson { + if stream { fmt.Println("") } else { fmt.Println("[]") @@ -118,7 +118,7 @@ func doTransform() error { lparen := "[\n%s" delim := ",\n%s" rparen := "\n]" - if ndjson { + if stream { lparen = "%s" delim = "\n%s" rparen = "" From bcf00e73d3c4161b19cf3d2312330fdaf88a807e Mon Sep 17 00:00:00 2001 From: jose Date: Wed, 4 Oct 2023 11:42:32 -0400 Subject: [PATCH 5/6] no new line for empty json if streaming --- cli/cmd/transformCmd.go | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/cli/cmd/transformCmd.go b/cli/cmd/transformCmd.go index 0536d6d..7d620ac 100644 --- a/cli/cmd/transformCmd.go +++ b/cli/cmd/transformCmd.go @@ -104,9 +104,7 @@ func doTransform() error { record, err := doOne() if err == io.EOF { - if stream { - fmt.Println("") - } else { + if !stream { fmt.Println("[]") } return nil From b26cb6d3b1ab164d6012a9aa7696d55b657ffef5 Mon Sep 17 00:00:00 2001 From: jose Date: Mon, 9 Oct 2023 12:21:12 -0400 Subject: [PATCH 6/6] changed help text --- cli/cmd/transformCmd.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cli/cmd/transformCmd.go b/cli/cmd/transformCmd.go index 7d620ac..63170ae 100644 --- a/cli/cmd/transformCmd.go +++ b/cli/cmd/transformCmd.go @@ -41,7 +41,7 @@ func init() { transformCmd.Flags().StringVarP( &input, "input", "i", "", "input file (optional; if not specified, stdin/pipe is used)") transformCmd.Flags().BoolVarP( - &stream, "stream", "", false, "change the output format to ndjson") + &stream, "stream", "", false, "if specified, each record will be a standalone/full JSON blob and printed out immediately once transform is done") } func openFile(label string, filepath string) (io.ReadCloser, error) {