-
Notifications
You must be signed in to change notification settings - Fork 1.2k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Support unparsing implicit lateral UNNEST
plan to SQL text
#13824
Changes from 2 commits
7c5e3e3
310d4f0
5d5e55b
91ec338
271a537
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||
---|---|---|---|---|
|
@@ -33,7 +33,7 @@ use super::{ | |||
Unparser, | ||||
}; | ||||
use crate::unparser::ast::UnnestRelationBuilder; | ||||
use crate::unparser::utils::unproject_agg_exprs; | ||||
use crate::unparser::utils::{find_unnest_node_until_relation, unproject_agg_exprs}; | ||||
use crate::utils::UNNEST_PLACEHOLDER; | ||||
use datafusion_common::{ | ||||
internal_err, not_impl_err, | ||||
|
@@ -235,9 +235,10 @@ impl Unparser<'_> { | |||
plan: &LogicalPlan, | ||||
relation: &mut RelationBuilder, | ||||
alias: Option<ast::TableAlias>, | ||||
lateral: bool, | ||||
) -> Result<()> { | ||||
let mut derived_builder = DerivedRelationBuilder::default(); | ||||
derived_builder.lateral(false).alias(alias).subquery({ | ||||
derived_builder.lateral(lateral).alias(alias).subquery({ | ||||
let inner_statement = self.plan_to_sql(plan)?; | ||||
if let ast::Statement::Query(inner_query) = inner_statement { | ||||
inner_query | ||||
|
@@ -257,15 +258,17 @@ impl Unparser<'_> { | |||
alias: &str, | ||||
plan: &LogicalPlan, | ||||
relation: &mut RelationBuilder, | ||||
lateral: bool, | ||||
) -> Result<()> { | ||||
if self.dialect.requires_derived_table_alias() { | ||||
self.derive( | ||||
plan, | ||||
relation, | ||||
Some(self.new_table_alias(alias.to_string(), vec![])), | ||||
lateral, | ||||
) | ||||
} else { | ||||
self.derive(plan, relation, None) | ||||
self.derive(plan, relation, None, lateral) | ||||
} | ||||
} | ||||
|
||||
|
@@ -317,10 +320,12 @@ impl Unparser<'_> { | |||
// Projection can be top-level plan for unnest relation | ||||
// The projection generated by the `RecursiveUnnestRewriter` from a UNNEST relation will have | ||||
// only one expression, which is the placeholder column generated by the rewriter. | ||||
if self.dialect.unnest_as_table_factor() | ||||
&& p.expr.len() == 1 | ||||
&& Self::is_unnest_placeholder(&p.expr[0]) | ||||
{ | ||||
let (is_unnest, is_lateral) = if p.expr.len() == 1 { | ||||
Self::is_unnest_placeholder_with_outer_ref(&p.expr[0]) | ||||
} else { | ||||
(false, false) | ||||
}; | ||||
if self.dialect.unnest_as_table_factor() && is_unnest { | ||||
if let LogicalPlan::Unnest(unnest) = &p.input.as_ref() { | ||||
return self | ||||
.unnest_to_table_factor_sql(unnest, query, select, relation); | ||||
|
@@ -333,6 +338,7 @@ impl Unparser<'_> { | |||
"derived_projection", | ||||
plan, | ||||
relation, | ||||
is_lateral, | ||||
); | ||||
} | ||||
self.reconstruct_select_statement(plan, p, select)?; | ||||
|
@@ -365,6 +371,7 @@ impl Unparser<'_> { | |||
"derived_limit", | ||||
plan, | ||||
relation, | ||||
false, | ||||
); | ||||
} | ||||
if let Some(fetch) = &limit.fetch { | ||||
|
@@ -402,6 +409,7 @@ impl Unparser<'_> { | |||
"derived_sort", | ||||
plan, | ||||
relation, | ||||
false, | ||||
); | ||||
} | ||||
let Some(query_ref) = query else { | ||||
|
@@ -472,6 +480,7 @@ impl Unparser<'_> { | |||
"derived_distinct", | ||||
plan, | ||||
relation, | ||||
false, | ||||
); | ||||
} | ||||
let (select_distinct, input) = match distinct { | ||||
|
@@ -658,6 +667,7 @@ impl Unparser<'_> { | |||
"derived_union", | ||||
plan, | ||||
relation, | ||||
false, | ||||
); | ||||
} | ||||
|
||||
|
@@ -723,19 +733,48 @@ impl Unparser<'_> { | |||
internal_err!("Unnest input is not a Projection: {unnest:?}") | ||||
} | ||||
} | ||||
_ => not_impl_err!("Unsupported operator: {plan:?}"), | ||||
LogicalPlan::Subquery(subquery) | ||||
if find_unnest_node_until_relation(subquery.subquery.as_ref()) | ||||
.is_some() => | ||||
{ | ||||
if self.dialect.unnest_as_table_factor() { | ||||
self.select_to_sql_recursively( | ||||
subquery.subquery.as_ref(), | ||||
query, | ||||
select, | ||||
relation, | ||||
) | ||||
} else { | ||||
self.derive_with_dialect_alias( | ||||
"derived_unnest", | ||||
subquery.subquery.as_ref(), | ||||
relation, | ||||
true, | ||||
) | ||||
} | ||||
} | ||||
_ => { | ||||
not_impl_err!("Unsupported operator: {plan:?}") | ||||
} | ||||
} | ||||
} | ||||
|
||||
/// Try to find the placeholder column name generated by `RecursiveUnnestRewriter` | ||||
/// Only match the pattern `Expr::Alias(Expr::Column("__unnest_placeholder(...)"))` | ||||
fn is_unnest_placeholder(expr: &Expr) -> bool { | ||||
/// The first return value is a boolean indicating if the column is a placeholder column: | ||||
/// Try to match the pattern `Expr::Alias(Expr::Column("__unnest_placeholder(...)"))` | ||||
/// The second return value is a boolean indicating if the column uses an outer reference: | ||||
/// Try to match the pattern `Expr::Alias(Expr::Column("__unnest_placeholder(outer_ref(...)))")` | ||||
/// | ||||
/// `outer_ref` is the display result of [Expr::OuterReferenceColumn] | ||||
fn is_unnest_placeholder_with_outer_ref(expr: &Expr) -> (bool, bool) { | ||||
if let Expr::Alias(Alias { expr, .. }) = expr { | ||||
if let Expr::Column(Column { name, .. }) = expr.as_ref() { | ||||
return name.starts_with(UNNEST_PLACEHOLDER); | ||||
if let Some(prefix) = name.strip_prefix(UNNEST_PLACEHOLDER) { | ||||
return (true, prefix.starts_with("(outer_ref(")); | ||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not a huge fan of this string matching. At least the UNNEST_PLACEHOLDER is a shared const, but the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't feel very strongly about this since it does seem unlikely to change though. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
I agree that string matching isn't a good way but I think it's the only way to recognize if the outer reference column is exits in the unnest plan currently. 🤔
Maybe we can use something like datafusion/datafusion/expr/src/expr.rs Line 2546 in ede665b
|
||||
} | ||||
} | ||||
} | ||||
false | ||||
(false, false) | ||||
} | ||||
|
||||
fn unnest_to_table_factor_sql( | ||||
|
Original file line number | Diff line number | Diff line change | ||
---|---|---|---|---|
|
@@ -190,10 +190,11 @@ pub(super) fn rewrite_plan_for_sort_on_non_projected_fields( | |||
} | ||||
} | ||||
|
||||
/// This logic is to work out the columns and inner query for SubqueryAlias plan for both types of | ||||
/// subquery | ||||
/// This logic is to work out the columns and inner query for SubqueryAlias plan for some types of | ||||
/// subquery or unnest | ||||
/// - `(SELECT column_a as a from table) AS A` | ||||
/// - `(SELECT column_a from table) AS A (a)` | ||||
/// - `SELECT * FROM t1 CROSS JOIN UNNEST(t1.c1) AS u(c1)` (see [find_unnest_column_alias]) | ||||
/// | ||||
/// A roundtrip example for table alias with columns | ||||
/// | ||||
|
@@ -222,6 +223,15 @@ pub(super) fn subquery_alias_inner_query_and_columns( | |||
) -> (&LogicalPlan, Vec<Ident>) { | ||||
let plan: &LogicalPlan = subquery_alias.input.as_ref(); | ||||
|
||||
if let LogicalPlan::Subquery(subquery) = plan { | ||||
let (inner_projection, Some(column)) = | ||||
find_unnest_column_alias(subquery.subquery.as_ref()) | ||||
else { | ||||
return (plan, vec![]); | ||||
}; | ||||
return (inner_projection, vec![Ident::new(column)]); | ||||
} | ||||
|
||||
let LogicalPlan::Projection(outer_projections) = plan else { | ||||
return (plan, vec![]); | ||||
}; | ||||
|
@@ -257,6 +267,43 @@ pub(super) fn subquery_alias_inner_query_and_columns( | |||
(outer_projections.input.as_ref(), columns) | ||||
} | ||||
|
||||
/// Try to find the column alias for UNNEST in the inner projection. | ||||
/// For example: | ||||
/// ```sql | ||||
/// SELECT * FROM t1 CROSS JOIN UNNEST(t1.c1) AS u(c1) | ||||
/// ``` | ||||
/// The above query will be parsed into the following plan: | ||||
/// ```text | ||||
/// Projection: * | ||||
/// Cross Join: | ||||
/// SubqueryAlias: t1 | ||||
/// TableScan: t | ||||
/// SubqueryAlias: u | ||||
/// Subquery: | ||||
/// Projection: UNNEST(outer_ref(t1.c1)) AS c1 | ||||
/// Projection: __unnest_placeholder(outer_ref(t1.c1),depth=1) AS UNNEST(outer_ref(t1.c1)) | ||||
/// Unnest: lists[__unnest_placeholder(outer_ref(t1.c1))|depth=1] structs[] | ||||
/// Projection: outer_ref(t1.c1) AS __unnest_placeholder(outer_ref(t1.c1)) | ||||
/// EmptyRelation | ||||
/// ``` | ||||
/// The function will return the inner projection and the column alias `c1` if the column name | ||||
/// starts with `UNNEST(` (the `Display` result of [Expr::Unnest]) in the inner projection. | ||||
pub(super) fn find_unnest_column_alias( | ||||
plan: &LogicalPlan, | ||||
) -> (&LogicalPlan, Option<String>) { | ||||
if let LogicalPlan::Projection(projection) = plan { | ||||
if projection.expr.len() != 1 { | ||||
return (plan, None); | ||||
} | ||||
if let Some(Expr::Alias(alias)) = projection.expr.first() { | ||||
if alias.expr.schema_name().to_string().starts_with("UNNEST(") { | ||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is this not an expression of type There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No, it's not an
It's a column pointing to an alias of its child. The alias is built from an expression datafusion/datafusion/sql/src/utils.rs Line 400 in ede665b
I think there is a similar issue for #13824 (comment). |
||||
return (projection.input.as_ref(), Some(alias.name.clone())); | ||||
} | ||||
} | ||||
} | ||||
(plan, None) | ||||
} | ||||
|
||||
/// Injects column aliases into a subquery's logical plan. The function searches for a `Projection` | ||||
/// within the given plan, which may be wrapped by other operators (e.g., LIMIT, SORT). | ||||
/// If the top-level plan is a `Projection`, it directly injects the column aliases. | ||||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Could we define an enum that we return here? That is clearer to read than two booleans and we can limit it to reference only the possible states (i.e. it doesn't look like
(false, true)
can happen, but its something a caller would need to handle from the type system)There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
enum
sounds like a good idea. I will try it. Thanks 👍