Skip to content

Commit

Permalink
Include the row group count in describe output
Browse files Browse the repository at this point in the history
  • Loading branch information
tschaub committed Oct 4, 2023
1 parent a387850 commit 24bb58a
Show file tree
Hide file tree
Showing 2 changed files with 39 additions and 6 deletions.
15 changes: 9 additions & 6 deletions cmd/gpq/command/describe.go
Original file line number Diff line number Diff line change
Expand Up @@ -90,8 +90,9 @@ func (c *DescribeCmd) Run() error {
fileMetadata := fileReader.MetaData()

info := &DescribeInfo{
Schema: buildSchema(fileReader, "", fileMetadata.Schema.Root()),
NumRows: fileMetadata.NumRows,
Schema: buildSchema(fileReader, "", fileMetadata.Schema.Root()),
NumRows: fileMetadata.NumRows,
NumRowGroups: int64(len(fileMetadata.RowGroups)),
}

metadata, geoErr := geoparquet.GetMetadata(fileMetadata.KeyValueMetadata())
Expand Down Expand Up @@ -213,6 +214,7 @@ func (c *DescribeCmd) formatText(info *DescribeInfo) error {

footerConfig := table.RowConfig{AutoMerge: true, AutoMergeAlign: text.AlignLeft}
tbl.AppendFooter(makeFooter("Rows", info.NumRows, header), footerConfig)
tbl.AppendFooter(makeFooter("Row Groups", info.NumRowGroups, header), footerConfig)
if metadata != nil {
version := metadata.Version
if version == "" {
Expand Down Expand Up @@ -257,10 +259,11 @@ func (c *DescribeCmd) formatJSON(info *DescribeInfo) error {
}

type DescribeInfo struct {
Schema *DescribeSchema `json:"schema"`
Metadata *geoparquet.Metadata `json:"metadata"`
NumRows int64 `json:"rows"`
Issues []string `json:"issues"`
Schema *DescribeSchema `json:"schema"`
Metadata *geoparquet.Metadata `json:"metadata"`
NumRows int64 `json:"rows"`
NumRowGroups int64 `json:"groups"`
Issues []string `json:"issues"`
}

type DescribeSchema struct {
Expand Down
30 changes: 30 additions & 0 deletions cmd/gpq/command/describe_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package command_test
import (
"encoding/json"

"github.com/apache/arrow/go/v14/parquet"
"github.com/planetlabs/gpq/cmd/gpq/command"
"github.com/planetlabs/gpq/internal/test"
)
Expand All @@ -21,6 +22,7 @@ func (s *Suite) TestDescribe() {
s.Require().NoError(err)

s.Equal(int64(5), info.NumRows)
s.Equal(int64(1), info.NumRowGroups)
s.Require().Len(info.Schema.Fields, 6)

s.Equal("geometry", info.Schema.Fields[0].Name)
Expand Down Expand Up @@ -59,6 +61,33 @@ func (s *Suite) TestDescribe() {
s.Len(info.Issues, 0)
}

func (s *Suite) TestDescribeNumRowGroups() {
s.writeStdin(test.ParquetFromJSON(s.T(), `[
{"num": 0},
{"num": 1},
{"num": 2},
{"num": 3},
{"num": 4},
{"num": 5},
{"num": 6},
{"num": 7}
]`, parquet.NewWriterProperties(parquet.WithMaxRowGroupLength(2))))

cmd := &command.DescribeCmd{
Format: "json",
}

s.Require().NoError(cmd.Run())

output := s.readStdout()
info := &command.DescribeInfo{}
err := json.Unmarshal(output, info)
s.Require().NoError(err)

s.Equal(int64(8), info.NumRows)
s.Equal(int64(4), info.NumRowGroups)
}

func (s *Suite) TestDescribeFromStdin() {
s.writeStdin(test.GeoParquetFromJSON(s.T(), `{
"type": "FeatureCollection",
Expand Down Expand Up @@ -88,6 +117,7 @@ func (s *Suite) TestDescribeFromStdin() {
s.Require().NoError(err)

s.Equal(int64(1), info.NumRows)
s.Equal(int64(1), info.NumRowGroups)
s.Require().Len(info.Schema.Fields, 2)

s.Equal("geometry", info.Schema.Fields[0].Name)
Expand Down

0 comments on commit 24bb58a

Please sign in to comment.