From 5db274004bc4a7d493aba6764a8521694a67cd11 Mon Sep 17 00:00:00 2001 From: Sergei Grebnov Date: Sun, 27 Oct 2024 08:33:50 -0700 Subject: [PATCH] Improve TableScan with filters pushdown unparsing (multiple filters support) (#13131) --- datafusion/sql/src/unparser/ast.rs | 23 ++++++++++++++++++++++- datafusion/sql/tests/cases/plan_to_sql.rs | 15 +++++++++++++++ 2 files changed, 37 insertions(+), 1 deletion(-) diff --git a/datafusion/sql/src/unparser/ast.rs b/datafusion/sql/src/unparser/ast.rs index 71ff712985cd..2de1ce9125a7 100644 --- a/datafusion/sql/src/unparser/ast.rs +++ b/datafusion/sql/src/unparser/ast.rs @@ -182,7 +182,28 @@ impl SelectBuilder { self } pub fn selection(&mut self, value: Option) -> &mut Self { - self.selection = value; + // With filter pushdown optimization, the LogicalPlan can have filters defined as part of `TableScan` and `Filter` nodes. + // To avoid overwriting one of the filters, we combine the existing filter with the additional filter. + // Example: | + // | Projection: customer.c_phone AS cntrycode, customer.c_acctbal | + // | Filter: CAST(customer.c_acctbal AS Decimal128(38, 6)) > () | + // | Subquery: + // | .. | + // | TableScan: customer, full_filters=[customer.c_mktsegment = Utf8("BUILDING")] + match (&self.selection, value) { + (Some(existing_selection), Some(new_selection)) => { + self.selection = Some(ast::Expr::BinaryOp { + left: Box::new(existing_selection.clone()), + op: ast::BinaryOperator::And, + right: Box::new(new_selection), + }); + } + (None, Some(new_selection)) => { + self.selection = Some(new_selection); + } + (_, None) => (), + } + self } pub fn group_by(&mut self, value: ast::GroupByExpr) -> &mut Self { diff --git a/datafusion/sql/tests/cases/plan_to_sql.rs b/datafusion/sql/tests/cases/plan_to_sql.rs index 8e25c1c5b1cd..a58bdf4a31c4 100644 --- a/datafusion/sql/tests/cases/plan_to_sql.rs +++ b/datafusion/sql/tests/cases/plan_to_sql.rs @@ -968,6 +968,21 @@ fn test_table_scan_pushdown() -> Result<()> { table_scan_with_all.to_string(), "SELECT t1.id, t1.age FROM t1 WHERE (t1.id > t1.age) LIMIT 10" ); + + let table_scan_with_additional_filter = table_scan_with_filters( + Some("t1"), + &schema, + None, + vec![col("id").gt(col("age"))], + )? + .filter(col("id").eq(lit(5)))? + .build()?; + let table_scan_with_filter = plan_to_sql(&table_scan_with_additional_filter)?; + assert_eq!( + table_scan_with_filter.to_string(), + "SELECT * FROM t1 WHERE (t1.id = 5) AND (t1.id > t1.age)" + ); + Ok(()) }