From ddc1fd92271f76f10bed69b09d28b46b0d003b59 Mon Sep 17 00:00:00 2001 From: Jeroen van Straten Date: Tue, 16 Aug 2022 17:38:20 +0200 Subject: [PATCH] feat: support new aggregate logic introduced in Substrait 0.9.0 (#39) BREAKING CHANGE: the grouping set index column for aggregates is now no longer generated if there are less than two grouping sets, as per the changes made in Substrait 0.9.0 --- README.md | 13 +++--- rs/src/parse/relations/aggregate.rs | 40 +++++++------------ rs/src/resources/substrait-version | 2 +- substrait | 2 +- .../aggregate/measure-and-group.yaml | 2 +- tests/tests/relations/aggregate/measure.yaml | 2 +- .../aggregate/single-set-one-expr.yaml | 2 +- .../aggregate/single-set-two-expr.yaml | 2 +- tests/tests/tpc-h/tpc-h03.yaml | 2 +- tests/tests/tpc-h/tpc-h10.yaml | 14 +++---- tests/tests/tpc-h/tpc-h14.yaml | 2 +- 11 files changed, 36 insertions(+), 47 deletions(-) diff --git a/README.md b/README.md index e9cd05c4..df107990 100644 --- a/README.md +++ b/README.md @@ -25,12 +25,13 @@ Substrait versions. Whenever Substrait makes a breaking change that affects validation, the validator will be updated accordingly and drop support for the older version. Refer to the table below for the version compatibility matrix. -| Substrait... | ... is supported by validator ... | -| ------------- | ------------------------------------ | -| 0.7.x - 0.8.x | current version | -| 0.5.x - 0.6.x | 0.0.6 | -| 0.3.x - 0.4.x | 0.0.4 - 0.0.5 | -| older | try 0.0.1, but your mileage may vary | +| Substrait... | ... is supported by validator ... | +| -------------- | ------------------------------------ | +| 0.9.x - 0.10.x | current version | +| 0.7.x - 0.8.x | 0.0.7 | +| 0.5.x - 0.6.x | 0.0.6 | +| 0.3.x - 0.4.x | 0.0.4 - 0.0.5 | +| older | try 0.0.1, but your mileage may vary | As Substrait and the validator stabilize and breaking changes become less frequent, the intention is to support more versions within a single validator diff --git a/rs/src/parse/relations/aggregate.rs b/rs/src/parse/relations/aggregate.rs index 58b0c18e..0682d535 100644 --- a/rs/src/parse/relations/aggregate.rs +++ b/rs/src/parse/relations/aggregate.rs @@ -181,15 +181,15 @@ pub fn parse_aggregate_rel( ); } - // Add the column for the grouping set index. - // FIXME: this field makes no sense for aggregate relations that only have - // measures. It's also disputable whether it should exist when there is - // only one grouping set. - fields.push(Field { - expression: expressions::Expression::Function(String::from("group_index"), vec![]), - data_type: data_type::DataType::new_integer(false), - field_type: FieldType::GroupingSetIndex, - }); + // Add the column for the grouping set index if there is more than one + // grouping set. + if sets.len() > 1 { + fields.push(Field { + expression: expressions::Expression::Function(String::from("group_index"), vec![]), + data_type: data_type::DataType::new_integer(false), + field_type: FieldType::GroupingSetIndex, + }); + } let fields = fields; // Derive schema. @@ -260,23 +260,11 @@ pub fn parse_aggregate_rel( } } FieldType::GroupingSetIndex => { - if x.groupings.is_empty() { - format!( - "Field {index}: undefined value, reserved for grouping \ - set index." - ) - } else if x.groupings.len() == 1 { - format!( - "Field {index}: always zero, representing the index of the \ - matched grouping set (of which there is only one here)." - ) - } else { - format!( - "Field {index}: integer between 0 and {} inclusive, \ - representing the index of the matched grouping set.", - x.groupings.len() - 1 - ) - } + format!( + "Field {index}: integer between 0 and {} inclusive, \ + representing the index of the matched grouping set.", + x.groupings.len() - 1 + ) } }); } diff --git a/rs/src/resources/substrait-version b/rs/src/resources/substrait-version index 8adc70fd..2774f858 100644 --- a/rs/src/resources/substrait-version +++ b/rs/src/resources/substrait-version @@ -1 +1 @@ -0.8.0 \ No newline at end of file +0.10.0 \ No newline at end of file diff --git a/substrait b/substrait index b8fb06a5..4e701455 160000 --- a/substrait +++ b/substrait @@ -1 +1 @@ -Subproject commit b8fb06a52397463bfe9cffc2c89fe71eba56b2ca +Subproject commit 4e70145508133988967db1f3dc96a45ce555867f diff --git a/tests/tests/relations/aggregate/measure-and-group.yaml b/tests/tests/relations/aggregate/measure-and-group.yaml index cd2acdff..019f1099 100644 --- a/tests/tests/relations/aggregate/measure-and-group.yaml +++ b/tests/tests/relations/aggregate/measure-and-group.yaml @@ -42,4 +42,4 @@ plan: - measure: functionReference: 1 output_type: { i64: { nullability: NULLABILITY_REQUIRED } } - __test: [ type: "STRUCT" ] + __test: [ type: "STRUCT" ] diff --git a/tests/tests/relations/aggregate/measure.yaml b/tests/tests/relations/aggregate/measure.yaml index 9dc2dc35..c66b5bf6 100644 --- a/tests/tests/relations/aggregate/measure.yaml +++ b/tests/tests/relations/aggregate/measure.yaml @@ -37,4 +37,4 @@ plan: - measure: functionReference: 1 output_type: { i64: { nullability: NULLABILITY_REQUIRED } } - __test: [ type: "STRUCT" ] + __test: [ type: "STRUCT" ] diff --git a/tests/tests/relations/aggregate/single-set-one-expr.yaml b/tests/tests/relations/aggregate/single-set-one-expr.yaml index aee213da..38430468 100644 --- a/tests/tests/relations/aggregate/single-set-one-expr.yaml +++ b/tests/tests/relations/aggregate/single-set-one-expr.yaml @@ -21,4 +21,4 @@ plan: - selection: rootReference: {} directReference: { structField: { field: 0 } } - __test: [ type: "STRUCT" ] + __test: [ type: "STRUCT" ] diff --git a/tests/tests/relations/aggregate/single-set-two-expr.yaml b/tests/tests/relations/aggregate/single-set-two-expr.yaml index 9b3d8a4a..9c9ad44b 100644 --- a/tests/tests/relations/aggregate/single-set-two-expr.yaml +++ b/tests/tests/relations/aggregate/single-set-two-expr.yaml @@ -24,4 +24,4 @@ plan: - selection: rootReference: {} directReference: { structField: { field: 0 } } - __test: [ type: "STRUCT" ] + __test: [ type: "STRUCT" ] diff --git a/tests/tests/tpc-h/tpc-h03.yaml b/tests/tests/tpc-h/tpc-h03.yaml index 5f39abd1..8e889504 100644 --- a/tests/tests/tpc-h/tpc-h03.yaml +++ b/tests/tests/tpc-h/tpc-h03.yaml @@ -75,7 +75,7 @@ plan: relations: - root: __test: - - type: "NSTRUCT, O_SHIPPRIORITY: date?>" + - type: "NSTRUCT, O_ORDERDATE: date?, O_SHIPPRIORITY: i32?>" input: fetch: common: diff --git a/tests/tests/tpc-h/tpc-h10.yaml b/tests/tests/tpc-h/tpc-h10.yaml index 4231b33c..efff5a2d 100644 --- a/tests/tests/tpc-h/tpc-h10.yaml +++ b/tests/tests/tpc-h/tpc-h10.yaml @@ -87,14 +87,14 @@ plan: __test: - type: "\ NSTRUCT<\ - C_CUSTKEY: i32, \ - C_NAME: i64, \ - REVENUE: VARCHAR?<25>, \ + C_CUSTKEY: i64, \ + C_NAME: VARCHAR?<25>, \ + REVENUE: DECIMAL?<19, 0>, \ C_ACCTBAL: DECIMAL?<19, 0>, \ - N_NAME: DECIMAL?<19, 0>, \ - C_ADDRESS: FIXEDCHAR?<25>, \ - C_PHONE: VARCHAR?<40>, \ - C_COMMENT: FIXEDCHAR?<15>\ + N_NAME: FIXEDCHAR?<25>, \ + C_ADDRESS: VARCHAR?<40>, \ + C_PHONE: FIXEDCHAR?<15>, \ + C_COMMENT: VARCHAR?<117>\ >" input: fetch: diff --git a/tests/tests/tpc-h/tpc-h14.yaml b/tests/tests/tpc-h/tpc-h14.yaml index f829f860..fdec4745 100644 --- a/tests/tests/tpc-h/tpc-h14.yaml +++ b/tests/tests/tpc-h/tpc-h14.yaml @@ -76,7 +76,7 @@ plan: relations: - root: __test: - - type: "NSTRUCT" + - type: "NSTRUCT>" input: project: common: