From 9cacfdeb065e04df750411ec70fe6bf3ad24550c Mon Sep 17 00:00:00 2001 From: Riley McDowell Date: Wed, 7 Aug 2024 16:25:43 -0500 Subject: [PATCH] TRINO: support FILTER after WITHIN GROUP agg expression --- src/sqlfluff/dialects/dialect_trino.py | 6 + .../dialects/trino/filter_aggregate.sql | 11 ++ .../dialects/trino/filter_aggregate.yml | 123 ++++++++++++++++ test/fixtures/dialects/trino/within_group.sql | 13 ++ test/fixtures/dialects/trino/within_group.yml | 139 +++++++++++++++++- 5 files changed, 291 insertions(+), 1 deletion(-) create mode 100644 test/fixtures/dialects/trino/filter_aggregate.sql create mode 100644 test/fixtures/dialects/trino/filter_aggregate.yml diff --git a/src/sqlfluff/dialects/dialect_trino.py b/src/sqlfluff/dialects/dialect_trino.py index 72c4a3c3b57..4a8fa3ff1b9 100644 --- a/src/sqlfluff/dialects/dialect_trino.py +++ b/src/sqlfluff/dialects/dialect_trino.py @@ -396,6 +396,11 @@ class WithinGroupClauseSegment(BaseSegment): """An WITHIN GROUP clause for window functions. https://trino.io/docs/current/functions/aggregate.html#array_agg + + Trino supports an optional FILTER during aggregation that comes + immediately after the WITHIN GROUP clause. + + https://trino.io/docs/current/functions/aggregate.html#filtering-during-aggregation """ type = "withingroup_clause" @@ -403,6 +408,7 @@ class WithinGroupClauseSegment(BaseSegment): "WITHIN", "GROUP", Bracketed(Ref("OrderByClauseSegment", optional=False)), + Ref("FilterClauseGrammar", optional=True), ) diff --git a/test/fixtures/dialects/trino/filter_aggregate.sql b/test/fixtures/dialects/trino/filter_aggregate.sql new file mode 100644 index 00000000000..1a5f255788d --- /dev/null +++ b/test/fixtures/dialects/trino/filter_aggregate.sql @@ -0,0 +1,11 @@ +SELECT id, + COUNT(*) FILTER (WHERE o IS NOT NULL) AS count +FROM (VALUES + (100, 2, 'a'), + (100, 1, 'b'), + (200, NULL, 'c'), + (200, 2, 'a'), + (300, NULL, 'b'), + (300, NULL, 'c') +) t(id, o, value) +GROUP BY id; diff --git a/test/fixtures/dialects/trino/filter_aggregate.yml b/test/fixtures/dialects/trino/filter_aggregate.yml new file mode 100644 index 00000000000..52b9f77d4e6 --- /dev/null +++ b/test/fixtures/dialects/trino/filter_aggregate.yml @@ -0,0 +1,123 @@ +# YML test files are auto-generated from SQL files and should not be edited by +# hand. To help enforce this, the "hash" field in the file must match a hash +# computed by SQLFluff when running the tests. Please run +# `python test/generate_parse_fixture_yml.py` to generate them after adding or +# altering SQL files. +_hash: 464b6edb476473531ba7dc5e1453ce36ca8b444d588ad3d41543bc754fb6faee +file: + statement: + select_statement: + select_clause: + - keyword: SELECT + - select_clause_element: + column_reference: + naked_identifier: id + - comma: ',' + - select_clause_element: + function: + - function_name: + function_name_identifier: COUNT + - bracketed: + start_bracket: ( + star: '*' + end_bracket: ) + - keyword: FILTER + - bracketed: + start_bracket: ( + keyword: WHERE + expression: + - column_reference: + naked_identifier: o + - keyword: IS + - keyword: NOT + - null_literal: 'NULL' + end_bracket: ) + alias_expression: + keyword: AS + naked_identifier: count + from_clause: + keyword: FROM + from_expression: + from_expression_element: + bracketed: + start_bracket: ( + table_expression: + values_clause: + - keyword: VALUES + - expression: + bracketed: + - start_bracket: ( + - numeric_literal: '100' + - comma: ',' + - numeric_literal: '2' + - comma: ',' + - quoted_literal: "'a'" + - end_bracket: ) + - comma: ',' + - expression: + bracketed: + - start_bracket: ( + - numeric_literal: '100' + - comma: ',' + - numeric_literal: '1' + - comma: ',' + - quoted_literal: "'b'" + - end_bracket: ) + - comma: ',' + - expression: + bracketed: + - start_bracket: ( + - numeric_literal: '200' + - comma: ',' + - null_literal: 'NULL' + - comma: ',' + - quoted_literal: "'c'" + - end_bracket: ) + - comma: ',' + - expression: + bracketed: + - start_bracket: ( + - numeric_literal: '200' + - comma: ',' + - numeric_literal: '2' + - comma: ',' + - quoted_literal: "'a'" + - end_bracket: ) + - comma: ',' + - expression: + bracketed: + - start_bracket: ( + - numeric_literal: '300' + - comma: ',' + - null_literal: 'NULL' + - comma: ',' + - quoted_literal: "'b'" + - end_bracket: ) + - comma: ',' + - expression: + bracketed: + - start_bracket: ( + - numeric_literal: '300' + - comma: ',' + - null_literal: 'NULL' + - comma: ',' + - quoted_literal: "'c'" + - end_bracket: ) + end_bracket: ) + alias_expression: + naked_identifier: t + bracketed: + start_bracket: ( + identifier_list: + - naked_identifier: id + - comma: ',' + - naked_identifier: o + - comma: ',' + - naked_identifier: value + end_bracket: ) + groupby_clause: + - keyword: GROUP + - keyword: BY + - column_reference: + naked_identifier: id + statement_terminator: ; diff --git a/test/fixtures/dialects/trino/within_group.sql b/test/fixtures/dialects/trino/within_group.sql index fae5287b0a4..689e791dedb 100644 --- a/test/fixtures/dialects/trino/within_group.sql +++ b/test/fixtures/dialects/trino/within_group.sql @@ -17,3 +17,16 @@ FROM (VALUES ) t(id, o, value) GROUP BY id ORDER BY id; + +-- Handle a WITHIN GROUP followed by a FILTER +SELECT id, LISTAGG(value, ',') WITHIN GROUP (ORDER BY o) FILTER (WHERE o IS NOT NULL) AS csv_value +FROM (VALUES + (100, 2, 'a'), + (100, 1, 'b'), + (200, NULL, 'c'), + (200, 2, 'a'), + (300, NULL, 'b'), + (300, 1, 'c') +) t(id, o, value) +GROUP BY id +ORDER BY id; diff --git a/test/fixtures/dialects/trino/within_group.yml b/test/fixtures/dialects/trino/within_group.yml index d72167cc657..12ab1964f87 100644 --- a/test/fixtures/dialects/trino/within_group.yml +++ b/test/fixtures/dialects/trino/within_group.yml @@ -3,7 +3,7 @@ # computed by SQLFluff when running the tests. Please run # `python test/generate_parse_fixture_yml.py` to generate them after adding or # altering SQL files. -_hash: cd402a357356eace79b2dc1ae0f7d1915e1b42f0bb860dd9c483b51ea4ba5822 +_hash: 016faab7c8653d99929df73b008bfdf51c5bc3609a6ce6e3b79843e36092f735 file: - statement: select_statement: @@ -277,3 +277,140 @@ file: - column_reference: naked_identifier: id - statement_terminator: ; +- statement: + select_statement: + select_clause: + - keyword: SELECT + - select_clause_element: + column_reference: + naked_identifier: id + - comma: ',' + - select_clause_element: + function: + function_name: + function_name_identifier: LISTAGG + bracketed: + - start_bracket: ( + - expression: + column_reference: + naked_identifier: value + - comma: ',' + - expression: + quoted_literal: "','" + - end_bracket: ) + withingroup_clause: + - keyword: WITHIN + - keyword: GROUP + - bracketed: + start_bracket: ( + orderby_clause: + - keyword: ORDER + - keyword: BY + - column_reference: + naked_identifier: o + end_bracket: ) + - keyword: FILTER + - bracketed: + start_bracket: ( + keyword: WHERE + expression: + - column_reference: + naked_identifier: o + - keyword: IS + - keyword: NOT + - null_literal: 'NULL' + end_bracket: ) + alias_expression: + keyword: AS + naked_identifier: csv_value + from_clause: + keyword: FROM + from_expression: + from_expression_element: + bracketed: + start_bracket: ( + table_expression: + values_clause: + - keyword: VALUES + - expression: + bracketed: + - start_bracket: ( + - numeric_literal: '100' + - comma: ',' + - numeric_literal: '2' + - comma: ',' + - quoted_literal: "'a'" + - end_bracket: ) + - comma: ',' + - expression: + bracketed: + - start_bracket: ( + - numeric_literal: '100' + - comma: ',' + - numeric_literal: '1' + - comma: ',' + - quoted_literal: "'b'" + - end_bracket: ) + - comma: ',' + - expression: + bracketed: + - start_bracket: ( + - numeric_literal: '200' + - comma: ',' + - null_literal: 'NULL' + - comma: ',' + - quoted_literal: "'c'" + - end_bracket: ) + - comma: ',' + - expression: + bracketed: + - start_bracket: ( + - numeric_literal: '200' + - comma: ',' + - numeric_literal: '2' + - comma: ',' + - quoted_literal: "'a'" + - end_bracket: ) + - comma: ',' + - expression: + bracketed: + - start_bracket: ( + - numeric_literal: '300' + - comma: ',' + - null_literal: 'NULL' + - comma: ',' + - quoted_literal: "'b'" + - end_bracket: ) + - comma: ',' + - expression: + bracketed: + - start_bracket: ( + - numeric_literal: '300' + - comma: ',' + - numeric_literal: '1' + - comma: ',' + - quoted_literal: "'c'" + - end_bracket: ) + end_bracket: ) + alias_expression: + naked_identifier: t + bracketed: + start_bracket: ( + identifier_list: + - naked_identifier: id + - comma: ',' + - naked_identifier: o + - comma: ',' + - naked_identifier: value + end_bracket: ) + groupby_clause: + - keyword: GROUP + - keyword: BY + - column_reference: + naked_identifier: id + orderby_clause: + - keyword: ORDER + - keyword: BY + - column_reference: + naked_identifier: id +- statement_terminator: ;