From 24bb58a1dae4e5563d2698a3af0866220f6d7a18 Mon Sep 17 00:00:00 2001 From: Tim Schaub Date: Wed, 4 Oct 2023 13:31:47 -0600 Subject: [PATCH] Include the row group count in describe output --- cmd/gpq/command/describe.go | 15 +++++++++------ cmd/gpq/command/describe_test.go | 30 ++++++++++++++++++++++++++++++ 2 files changed, 39 insertions(+), 6 deletions(-) diff --git a/cmd/gpq/command/describe.go b/cmd/gpq/command/describe.go index 3e7dadb..13847e4 100644 --- a/cmd/gpq/command/describe.go +++ b/cmd/gpq/command/describe.go @@ -90,8 +90,9 @@ func (c *DescribeCmd) Run() error { fileMetadata := fileReader.MetaData() info := &DescribeInfo{ - Schema: buildSchema(fileReader, "", fileMetadata.Schema.Root()), - NumRows: fileMetadata.NumRows, + Schema: buildSchema(fileReader, "", fileMetadata.Schema.Root()), + NumRows: fileMetadata.NumRows, + NumRowGroups: int64(len(fileMetadata.RowGroups)), } metadata, geoErr := geoparquet.GetMetadata(fileMetadata.KeyValueMetadata()) @@ -213,6 +214,7 @@ func (c *DescribeCmd) formatText(info *DescribeInfo) error { footerConfig := table.RowConfig{AutoMerge: true, AutoMergeAlign: text.AlignLeft} tbl.AppendFooter(makeFooter("Rows", info.NumRows, header), footerConfig) + tbl.AppendFooter(makeFooter("Row Groups", info.NumRowGroups, header), footerConfig) if metadata != nil { version := metadata.Version if version == "" { @@ -257,10 +259,11 @@ func (c *DescribeCmd) formatJSON(info *DescribeInfo) error { } type DescribeInfo struct { - Schema *DescribeSchema `json:"schema"` - Metadata *geoparquet.Metadata `json:"metadata"` - NumRows int64 `json:"rows"` - Issues []string `json:"issues"` + Schema *DescribeSchema `json:"schema"` + Metadata *geoparquet.Metadata `json:"metadata"` + NumRows int64 `json:"rows"` + NumRowGroups int64 `json:"groups"` + Issues []string `json:"issues"` } type DescribeSchema struct { diff --git a/cmd/gpq/command/describe_test.go b/cmd/gpq/command/describe_test.go index 3622f48..c1682d4 100644 --- a/cmd/gpq/command/describe_test.go +++ b/cmd/gpq/command/describe_test.go @@ -3,6 +3,7 @@ package command_test import ( "encoding/json" + "github.com/apache/arrow/go/v14/parquet" "github.com/planetlabs/gpq/cmd/gpq/command" "github.com/planetlabs/gpq/internal/test" ) @@ -21,6 +22,7 @@ func (s *Suite) TestDescribe() { s.Require().NoError(err) s.Equal(int64(5), info.NumRows) + s.Equal(int64(1), info.NumRowGroups) s.Require().Len(info.Schema.Fields, 6) s.Equal("geometry", info.Schema.Fields[0].Name) @@ -59,6 +61,33 @@ func (s *Suite) TestDescribe() { s.Len(info.Issues, 0) } +func (s *Suite) TestDescribeNumRowGroups() { + s.writeStdin(test.ParquetFromJSON(s.T(), `[ + {"num": 0}, + {"num": 1}, + {"num": 2}, + {"num": 3}, + {"num": 4}, + {"num": 5}, + {"num": 6}, + {"num": 7} + ]`, parquet.NewWriterProperties(parquet.WithMaxRowGroupLength(2)))) + + cmd := &command.DescribeCmd{ + Format: "json", + } + + s.Require().NoError(cmd.Run()) + + output := s.readStdout() + info := &command.DescribeInfo{} + err := json.Unmarshal(output, info) + s.Require().NoError(err) + + s.Equal(int64(8), info.NumRows) + s.Equal(int64(4), info.NumRowGroups) +} + func (s *Suite) TestDescribeFromStdin() { s.writeStdin(test.GeoParquetFromJSON(s.T(), `{ "type": "FeatureCollection", @@ -88,6 +117,7 @@ func (s *Suite) TestDescribeFromStdin() { s.Require().NoError(err) s.Equal(int64(1), info.NumRows) + s.Equal(int64(1), info.NumRowGroups) s.Require().Len(info.Schema.Fields, 2) s.Equal("geometry", info.Schema.Fields[0].Name)