diff --git a/datafusion-cli/Cargo.lock b/datafusion-cli/Cargo.lock index fbe7d5c04b9bf..eb54f55f3ad14 100644 --- a/datafusion-cli/Cargo.lock +++ b/datafusion-cli/Cargo.lock @@ -1571,6 +1571,7 @@ dependencies = [ "arrow-schema", "datafusion-common", "datafusion-expr", + "itertools", "log", "regex", "sqlparser", diff --git a/datafusion-examples/examples/analyzer_rule.rs b/datafusion-examples/examples/analyzer_rule.rs index bd067be97b8b3..bd835ac95c653 100644 --- a/datafusion-examples/examples/analyzer_rule.rs +++ b/datafusion-examples/examples/analyzer_rule.rs @@ -21,7 +21,7 @@ use datafusion_common::config::ConfigOptions; use datafusion_common::tree_node::{Transformed, TreeNode}; use datafusion_common::Result; use datafusion_expr::{col, lit, Expr, LogicalPlan, LogicalPlanBuilder}; -use datafusion_optimizer::analyzer::AnalyzerRule; +use datafusion_sql::analyzer::AnalyzerRule; use std::sync::{Arc, Mutex}; /// This example demonstrates how to add your own [`AnalyzerRule`] to diff --git a/datafusion-examples/examples/sql_frontend.rs b/datafusion-examples/examples/sql_frontend.rs index 839ee95eb1816..2f8d56a66acd6 100644 --- a/datafusion-examples/examples/sql_frontend.rs +++ b/datafusion-examples/examples/sql_frontend.rs @@ -22,9 +22,8 @@ use datafusion_expr::{ AggregateUDF, Expr, LogicalPlan, ScalarUDF, TableProviderFilterPushDown, TableSource, WindowUDF, }; -use datafusion_optimizer::{ - Analyzer, AnalyzerRule, Optimizer, OptimizerConfig, OptimizerContext, OptimizerRule, -}; +use datafusion_optimizer::{Optimizer, OptimizerConfig, OptimizerContext, OptimizerRule}; +use datafusion_sql::analyzer::{Analyzer, AnalyzerRule}; use datafusion_sql::planner::{ContextProvider, SqlToRel}; use datafusion_sql::sqlparser::dialect::PostgreSqlDialect; use datafusion_sql::sqlparser::parser::Parser; diff --git a/datafusion/core/src/datasource/view.rs b/datafusion/core/src/datasource/view.rs index 1ffe54e4b06c1..6b139d01dcbf0 100644 --- a/datafusion/core/src/datasource/view.rs +++ b/datafusion/core/src/datasource/view.rs @@ -19,6 +19,7 @@ use std::{any::Any, borrow::Cow, sync::Arc}; +use crate::datasource::{TableProvider, TableType}; use crate::{ error::Result, logical_expr::{Expr, LogicalPlan}, @@ -30,10 +31,8 @@ use datafusion_catalog::Session; use datafusion_common::config::ConfigOptions; use datafusion_common::Column; use datafusion_expr::{LogicalPlanBuilder, TableProviderFilterPushDown}; -use datafusion_optimizer::analyzer::expand_wildcard_rule::ExpandWildcardRule; -use datafusion_optimizer::Analyzer; - -use crate::datasource::{TableProvider, TableType}; +use datafusion_sql::analyzer::expand_wildcard_rule::ExpandWildcardRule; +use datafusion_sql::analyzer::Analyzer; /// An implementation of `TableProvider` that uses another logical plan. #[derive(Debug)] diff --git a/datafusion/core/src/execution/context/mod.rs b/datafusion/core/src/execution/context/mod.rs index b0951d9ec44cd..09e9211b4e793 100644 --- a/datafusion/core/src/execution/context/mod.rs +++ b/datafusion/core/src/execution/context/mod.rs @@ -77,7 +77,8 @@ use datafusion_catalog::{DynamicFileCatalog, SessionStore, UrlTableFactory}; pub use datafusion_execution::config::SessionConfig; pub use datafusion_execution::TaskContext; pub use datafusion_expr::execution_props::ExecutionProps; -use datafusion_optimizer::{AnalyzerRule, OptimizerRule}; +use datafusion_optimizer::OptimizerRule; +use datafusion_sql::analyzer::AnalyzerRule; use object_store::ObjectStore; use parking_lot::RwLock; use url::Url; diff --git a/datafusion/core/src/execution/session_state.rs b/datafusion/core/src/execution/session_state.rs index 38c54e3523672..6da912715883b 100644 --- a/datafusion/core/src/execution/session_state.rs +++ b/datafusion/core/src/execution/session_state.rs @@ -54,13 +54,12 @@ use datafusion_expr::var_provider::{is_system_variables, VarType}; use datafusion_expr::{ AggregateUDF, Explain, Expr, LogicalPlan, ScalarUDF, TableSource, WindowUDF, }; -use datafusion_optimizer::{ - Analyzer, AnalyzerRule, Optimizer, OptimizerConfig, OptimizerRule, -}; +use datafusion_optimizer::{Optimizer, OptimizerConfig, OptimizerRule}; use datafusion_physical_expr::create_physical_expr; use datafusion_physical_expr_common::physical_expr::PhysicalExpr; use datafusion_physical_optimizer::PhysicalOptimizerRule; use datafusion_physical_plan::ExecutionPlan; +use datafusion_sql::analyzer::{Analyzer, AnalyzerRule}; use datafusion_sql::parser::{DFParser, Statement}; use datafusion_sql::planner::{ContextProvider, ParserOptions, PlannerContext, SqlToRel}; use itertools::Itertools; diff --git a/datafusion/core/tests/optimizer/mod.rs b/datafusion/core/tests/optimizer/mod.rs index f17d13a420607..08860bdcd05ec 100644 --- a/datafusion/core/tests/optimizer/mod.rs +++ b/datafusion/core/tests/optimizer/mod.rs @@ -33,7 +33,6 @@ use datafusion_expr::{ ScalarUDF, TableSource, WindowUDF, }; use datafusion_functions::core::expr_ext::FieldAccessor; -use datafusion_optimizer::analyzer::Analyzer; use datafusion_optimizer::optimizer::Optimizer; use datafusion_optimizer::simplify_expressions::GuaranteeRewriter; use datafusion_optimizer::{OptimizerConfig, OptimizerContext}; @@ -45,6 +44,7 @@ use datafusion_sql::TableReference; use chrono::DateTime; use datafusion_functions::datetime; +use datafusion_sql::analyzer::Analyzer; #[cfg(test)] #[ctor::ctor] diff --git a/datafusion/core/tests/user_defined/user_defined_plan.rs b/datafusion/core/tests/user_defined/user_defined_plan.rs index e51adbc4ddc11..c61b0394a2d87 100644 --- a/datafusion/core/tests/user_defined/user_defined_plan.rs +++ b/datafusion/core/tests/user_defined/user_defined_plan.rs @@ -100,7 +100,7 @@ use datafusion_common::ScalarValue; use datafusion_expr::tree_node::replace_sort_expression; use datafusion_expr::{Projection, SortExpr}; use datafusion_optimizer::optimizer::ApplyOrder; -use datafusion_optimizer::AnalyzerRule; +use datafusion_sql::analyzer::AnalyzerRule; /// Execute the specified sql and return the resulting record batches /// pretty printed as a String. diff --git a/datafusion/expr/src/expr.rs b/datafusion/expr/src/expr.rs index 02a2edb98016d..ca674ae6fb8d7 100644 --- a/datafusion/expr/src/expr.rs +++ b/datafusion/expr/src/expr.rs @@ -17,7 +17,7 @@ //! Logical Expressions: [`Expr`] -use std::collections::{HashMap, HashSet}; +use std::collections::{BTreeSet, HashMap, HashSet}; use std::fmt::{self, Display, Formatter, Write}; use std::hash::{Hash, Hasher}; use std::mem; @@ -1837,6 +1837,23 @@ fn rewrite_placeholder(expr: &mut Expr, other: &Expr, schema: &DFSchema) -> Resu Ok(()) } +pub fn collect_subquery_cols( + exprs: &[Expr], + subquery_schema: &DFSchema, +) -> Result> { + exprs.iter().try_fold(BTreeSet::new(), |mut cols, expr| { + let mut using_cols: Vec = vec![]; + for col in expr.column_refs().into_iter() { + if subquery_schema.has_column(col) { + using_cols.push(col.clone()); + } + } + + cols.extend(using_cols); + Result::<_>::Ok(cols) + }) +} + #[macro_export] macro_rules! expr_vec_fmt { ( $ARRAY:expr ) => {{ diff --git a/datafusion/optimizer/src/decorrelate.rs b/datafusion/optimizer/src/decorrelate.rs index 7f918c03e3ac3..a847870f64cc1 100644 --- a/datafusion/optimizer/src/decorrelate.rs +++ b/datafusion/optimizer/src/decorrelate.rs @@ -22,13 +22,12 @@ use std::ops::Deref; use std::sync::Arc; use crate::simplify_expressions::ExprSimplifier; -use crate::utils::collect_subquery_cols; use datafusion_common::tree_node::{ Transformed, TransformedResult, TreeNode, TreeNodeRecursion, TreeNodeRewriter, }; use datafusion_common::{plan_err, Column, DFSchemaRef, Result, ScalarValue}; -use datafusion_expr::expr::Alias; +use datafusion_expr::expr::{collect_subquery_cols, Alias}; use datafusion_expr::simplify::SimplifyContext; use datafusion_expr::utils::{conjunction, find_join_exprs, split_conjunction}; use datafusion_expr::{expr, lit, EmptyRelation, Expr, LogicalPlan, LogicalPlanBuilder}; diff --git a/datafusion/optimizer/src/eliminate_nested_union.rs b/datafusion/optimizer/src/eliminate_nested_union.rs index 94da08243d78f..7aaeb6fa34c49 100644 --- a/datafusion/optimizer/src/eliminate_nested_union.rs +++ b/datafusion/optimizer/src/eliminate_nested_union.rs @@ -114,12 +114,12 @@ fn extract_plan_from_distinct(plan: Arc) -> Arc { #[cfg(test)] mod tests { use super::*; - use crate::analyzer::type_coercion::TypeCoercion; - use crate::analyzer::Analyzer; use crate::test::*; use arrow::datatypes::{DataType, Field, Schema}; use datafusion_common::config::ConfigOptions; use datafusion_expr::{col, logical_plan::table_scan}; + use datafusion_sql::analyzer::type_coercion::TypeCoercion; + use datafusion_sql::analyzer::Analyzer; fn schema() -> Schema { Schema::new(vec![ diff --git a/datafusion/optimizer/src/lib.rs b/datafusion/optimizer/src/lib.rs index 3b1df3510d2a4..4e4cf5e08ba9f 100644 --- a/datafusion/optimizer/src/lib.rs +++ b/datafusion/optimizer/src/lib.rs @@ -30,7 +30,6 @@ //! //! [`LogicalPlan`]: datafusion_expr::LogicalPlan //! [`TypeCoercion`]: analyzer::type_coercion::TypeCoercion -pub mod analyzer; pub mod common_subexpr_eliminate; pub mod decorrelate; pub mod decorrelate_predicate_subquery; @@ -61,7 +60,6 @@ pub mod utils; #[cfg(test)] pub mod test; -pub use analyzer::{Analyzer, AnalyzerRule}; pub use optimizer::{Optimizer, OptimizerConfig, OptimizerContext, OptimizerRule}; #[allow(deprecated)] pub use utils::optimize_children; diff --git a/datafusion/optimizer/src/test/mod.rs b/datafusion/optimizer/src/test/mod.rs index b8dc54e40476f..f1503402f8c04 100644 --- a/datafusion/optimizer/src/test/mod.rs +++ b/datafusion/optimizer/src/test/mod.rs @@ -15,13 +15,13 @@ // specific language governing permissions and limitations // under the License. -use crate::analyzer::{Analyzer, AnalyzerRule}; use crate::optimizer::Optimizer; use crate::{OptimizerContext, OptimizerRule}; use arrow::datatypes::{DataType, Field, Schema}; use datafusion_common::config::ConfigOptions; use datafusion_common::{assert_contains, Result}; use datafusion_expr::{col, logical_plan::table_scan, LogicalPlan, LogicalPlanBuilder}; +use datafusion_sql::analyzer::{Analyzer, AnalyzerRule}; use std::sync::Arc; pub mod user_defined; @@ -108,46 +108,6 @@ pub fn get_tpch_table_schema(table: &str) -> Schema { } } -pub fn assert_analyzed_plan_eq( - rule: Arc, - plan: LogicalPlan, - expected: &str, -) -> Result<()> { - let options = ConfigOptions::default(); - assert_analyzed_plan_with_config_eq(options, rule, plan, expected)?; - - Ok(()) -} - -pub fn assert_analyzed_plan_with_config_eq( - options: ConfigOptions, - rule: Arc, - plan: LogicalPlan, - expected: &str, -) -> Result<()> { - let analyzed_plan = - Analyzer::with_rules(vec![rule]).execute_and_check(plan, &options, |_, _| {})?; - let formatted_plan = format!("{analyzed_plan}"); - assert_eq!(formatted_plan, expected); - - Ok(()) -} - - -pub fn assert_analyzed_plan_eq_display_indent( - rule: Arc, - plan: LogicalPlan, - expected: &str, -) -> Result<()> { - let options = ConfigOptions::default(); - let analyzed_plan = - Analyzer::with_rules(vec![rule]).execute_and_check(plan, &options, |_, _| {})?; - let formatted_plan = analyzed_plan.display_indent_schema().to_string(); - assert_eq!(formatted_plan, expected); - - Ok(()) -} - pub fn assert_analyzer_check_err( rules: Vec>, plan: LogicalPlan, diff --git a/datafusion/optimizer/src/utils.rs b/datafusion/optimizer/src/utils.rs index 6972c16c0ddf8..1d886239b7b62 100644 --- a/datafusion/optimizer/src/utils.rs +++ b/datafusion/optimizer/src/utils.rs @@ -21,7 +21,7 @@ use std::collections::{BTreeSet, HashMap, HashSet}; use crate::{OptimizerConfig, OptimizerRule}; -use datafusion_common::{Column, DFSchema, Result}; +use datafusion_common::{Column, Result}; use datafusion_expr::expr_rewriter::replace_col; use datafusion_expr::{logical_plan::LogicalPlan, Expr}; @@ -80,23 +80,6 @@ pub(crate) fn has_all_column_refs(expr: &Expr, schema_cols: &HashSet) -> == column_refs.len() } -pub(crate) fn collect_subquery_cols( - exprs: &[Expr], - subquery_schema: &DFSchema, -) -> Result> { - exprs.iter().try_fold(BTreeSet::new(), |mut cols, expr| { - let mut using_cols: Vec = vec![]; - for col in expr.column_refs().into_iter() { - if subquery_schema.has_column(col) { - using_cols.push(col.clone()); - } - } - - cols.extend(using_cols); - Result::<_>::Ok(cols) - }) -} - pub(crate) fn replace_qualified_name( expr: Expr, cols: &BTreeSet, diff --git a/datafusion/optimizer/tests/optimizer_integration.rs b/datafusion/optimizer/tests/optimizer_integration.rs index 470bd947c7fbc..38632d4470285 100644 --- a/datafusion/optimizer/tests/optimizer_integration.rs +++ b/datafusion/optimizer/tests/optimizer_integration.rs @@ -27,9 +27,9 @@ use datafusion_expr::test::function_stub::sum_udaf; use datafusion_expr::{AggregateUDF, LogicalPlan, ScalarUDF, TableSource, WindowUDF}; use datafusion_functions_aggregate::average::avg_udaf; use datafusion_functions_aggregate::count::count_udaf; -use datafusion_optimizer::analyzer::Analyzer; use datafusion_optimizer::optimizer::Optimizer; use datafusion_optimizer::{OptimizerConfig, OptimizerContext, OptimizerRule}; +use datafusion_sql::analyzer::Analyzer; use datafusion_sql::planner::{ContextProvider, SqlToRel}; use datafusion_sql::sqlparser::ast::Statement; use datafusion_sql::sqlparser::dialect::GenericDialect; diff --git a/datafusion/sql/Cargo.toml b/datafusion/sql/Cargo.toml index 5c4b83fe38e11..e73d04025493c 100644 --- a/datafusion/sql/Cargo.toml +++ b/datafusion/sql/Cargo.toml @@ -46,6 +46,7 @@ arrow-array = { workspace = true } arrow-schema = { workspace = true } datafusion-common = { workspace = true, default-features = true } datafusion-expr = { workspace = true } +itertools = { workspace = true } log = { workspace = true } regex = { workspace = true } sqlparser = { workspace = true } diff --git a/datafusion/optimizer/src/analyzer/count_wildcard_rule.rs b/datafusion/sql/src/analyzer/count_wildcard_rule.rs similarity index 98% rename from datafusion/optimizer/src/analyzer/count_wildcard_rule.rs rename to datafusion/sql/src/analyzer/count_wildcard_rule.rs index 86520b3587cdc..abab032b28483 100644 --- a/datafusion/optimizer/src/analyzer/count_wildcard_rule.rs +++ b/datafusion/sql/src/analyzer/count_wildcard_rule.rs @@ -17,11 +17,11 @@ use crate::analyzer::AnalyzerRule; -use crate::utils::NamePreserver; use datafusion_common::config::ConfigOptions; use datafusion_common::tree_node::{Transformed, TransformedResult, TreeNode}; use datafusion_common::Result; use datafusion_expr::expr::{AggregateFunction, WindowFunction}; +use datafusion_expr::expr_rewriter::NamePreserver; use datafusion_expr::utils::COUNT_STAR_EXPANSION; use datafusion_expr::{lit, Expr, LogicalPlan, WindowFunctionDefinition}; @@ -101,7 +101,6 @@ fn analyze_internal(plan: LogicalPlan) -> Result> { #[cfg(test)] mod tests { use super::*; - use crate::test::*; use arrow::datatypes::DataType; use datafusion_common::ScalarValue; use datafusion_expr::expr::Sort; @@ -114,6 +113,10 @@ mod tests { use datafusion_functions_aggregate::expr_fn::max; use std::sync::Arc; + use crate::test::{ + assert_analyzed_plan_eq_display_indent, test_table_scan, + test_table_scan_with_name, + }; use datafusion_functions_aggregate::expr_fn::{count, sum}; fn assert_plan_eq(plan: LogicalPlan, expected: &str) -> Result<()> { diff --git a/datafusion/optimizer/src/analyzer/expand_wildcard_rule.rs b/datafusion/sql/src/analyzer/expand_wildcard_rule.rs similarity index 99% rename from datafusion/optimizer/src/analyzer/expand_wildcard_rule.rs rename to datafusion/sql/src/analyzer/expand_wildcard_rule.rs index a26ec4be5c851..b2f2664dd47aa 100644 --- a/datafusion/optimizer/src/analyzer/expand_wildcard_rule.rs +++ b/datafusion/sql/src/analyzer/expand_wildcard_rule.rs @@ -17,7 +17,7 @@ use std::sync::Arc; -use crate::AnalyzerRule; +use crate::analyzer::AnalyzerRule; use datafusion_common::config::ConfigOptions; use datafusion_common::tree_node::{Transformed, TransformedResult}; use datafusion_common::{Column, Result}; @@ -160,15 +160,14 @@ fn replace_columns( mod tests { use arrow::datatypes::{DataType, Field, Schema}; + use super::*; + use crate::analyzer::Analyzer; use crate::test::{assert_analyzed_plan_eq_display_indent, test_table_scan}; - use crate::Analyzer; use datafusion_common::{JoinType, TableReference}; use datafusion_expr::{ col, in_subquery, qualified_wildcard, table_scan, wildcard, LogicalPlanBuilder, }; - use super::*; - fn assert_plan_eq(plan: LogicalPlan, expected: &str) -> Result<()> { assert_analyzed_plan_eq_display_indent( Arc::new(ExpandWildcardRule::new()), diff --git a/datafusion/optimizer/src/analyzer/function_rewrite.rs b/datafusion/sql/src/analyzer/function_rewrite.rs similarity index 97% rename from datafusion/optimizer/src/analyzer/function_rewrite.rs rename to datafusion/sql/src/analyzer/function_rewrite.rs index c6bf14ebce2e3..0321b90b11d1e 100644 --- a/datafusion/optimizer/src/analyzer/function_rewrite.rs +++ b/datafusion/sql/src/analyzer/function_rewrite.rs @@ -22,8 +22,7 @@ use datafusion_common::config::ConfigOptions; use datafusion_common::tree_node::{Transformed, TreeNode}; use datafusion_common::{DFSchema, Result}; -use crate::utils::NamePreserver; -use datafusion_expr::expr_rewriter::FunctionRewrite; +use datafusion_expr::expr_rewriter::{FunctionRewrite, NamePreserver}; use datafusion_expr::utils::merge_schema; use datafusion_expr::LogicalPlan; use std::sync::Arc; diff --git a/datafusion/optimizer/src/analyzer/inline_table_scan.rs b/datafusion/sql/src/analyzer/inline_table_scan.rs similarity index 100% rename from datafusion/optimizer/src/analyzer/inline_table_scan.rs rename to datafusion/sql/src/analyzer/inline_table_scan.rs index 342d85a915b4d..404bc3cacad98 100644 --- a/datafusion/optimizer/src/analyzer/inline_table_scan.rs +++ b/datafusion/sql/src/analyzer/inline_table_scan.rs @@ -106,8 +106,8 @@ mod tests { use std::{borrow::Cow, sync::Arc, vec}; use crate::analyzer::inline_table_scan::InlineTableScan; - use crate::test::assert_analyzed_plan_eq; + use crate::test::assert_analyzed_plan_eq; use arrow::datatypes::{DataType, Field, Schema}; use datafusion_expr::{col, lit, Expr, LogicalPlan, LogicalPlanBuilder, TableSource}; diff --git a/datafusion/optimizer/src/analyzer/mod.rs b/datafusion/sql/src/analyzer/mod.rs similarity index 96% rename from datafusion/optimizer/src/analyzer/mod.rs rename to datafusion/sql/src/analyzer/mod.rs index 4cd891664e7f5..1b45656fe3439 100644 --- a/datafusion/optimizer/src/analyzer/mod.rs +++ b/datafusion/sql/src/analyzer/mod.rs @@ -20,7 +20,7 @@ use std::fmt::Debug; use std::sync::Arc; -use log::debug; +use log::{debug, trace}; use datafusion_common::config::ConfigOptions; use datafusion_common::instant::Instant; @@ -36,7 +36,6 @@ use crate::analyzer::expand_wildcard_rule::ExpandWildcardRule; use crate::analyzer::inline_table_scan::InlineTableScan; use crate::analyzer::subquery::check_subquery_expr; use crate::analyzer::type_coercion::TypeCoercion; -use crate::utils::log_plan; use self::function_rewrite::ApplyFunctionRewrites; @@ -191,3 +190,9 @@ fn check_plan(plan: &LogicalPlan) -> Result<()> { }) .map(|_| ()) } + +/// Log the plan in debug/tracing mode after some part of the optimizer runs +fn log_plan(description: &str, plan: &LogicalPlan) { + debug!("{description}:\n{}\n", plan.display_indent()); + trace!("{description}::\n{}\n", plan.display_indent_schema()); +} diff --git a/datafusion/optimizer/src/analyzer/subquery.rs b/datafusion/sql/src/analyzer/subquery.rs similarity index 99% rename from datafusion/optimizer/src/analyzer/subquery.rs rename to datafusion/sql/src/analyzer/subquery.rs index c771f31a58b21..3cee1d9181c26 100644 --- a/datafusion/optimizer/src/analyzer/subquery.rs +++ b/datafusion/sql/src/analyzer/subquery.rs @@ -18,10 +18,10 @@ use std::ops::Deref; use crate::analyzer::check_plan; -use crate::utils::collect_subquery_cols; use datafusion_common::tree_node::{TreeNode, TreeNodeRecursion}; use datafusion_common::{plan_err, Result}; +use datafusion_expr::expr::collect_subquery_cols; use datafusion_expr::expr_rewriter::strip_outer_reference; use datafusion_expr::utils::split_conjunction; use datafusion_expr::{ diff --git a/datafusion/optimizer/src/analyzer/type_coercion.rs b/datafusion/sql/src/analyzer/type_coercion.rs similarity index 99% rename from datafusion/optimizer/src/analyzer/type_coercion.rs rename to datafusion/sql/src/analyzer/type_coercion.rs index 4dc34284c7198..3ec92a04b64c2 100644 --- a/datafusion/optimizer/src/analyzer/type_coercion.rs +++ b/datafusion/sql/src/analyzer/type_coercion.rs @@ -24,7 +24,6 @@ use itertools::izip; use arrow::datatypes::{DataType, Field, IntervalUnit, Schema}; use crate::analyzer::AnalyzerRule; -use crate::utils::NamePreserver; use datafusion_common::config::ConfigOptions; use datafusion_common::tree_node::{Transformed, TreeNode, TreeNodeRewriter}; use datafusion_common::{ @@ -35,7 +34,7 @@ use datafusion_expr::expr::{ self, Alias, Between, BinaryExpr, Case, Exists, InList, InSubquery, Like, ScalarFunction, Sort, WindowFunction, }; -use datafusion_expr::expr_rewriter::coerce_plan_expr_for_schema; +use datafusion_expr::expr_rewriter::{coerce_plan_expr_for_schema, NamePreserver}; use datafusion_expr::expr_schema::cast_subquery; use datafusion_expr::logical_plan::Subquery; use datafusion_expr::type_coercion::binary::{ diff --git a/datafusion/sql/src/lib.rs b/datafusion/sql/src/lib.rs index 956f5e17e26f5..049cbf9b62070 100644 --- a/datafusion/sql/src/lib.rs +++ b/datafusion/sql/src/lib.rs @@ -34,6 +34,7 @@ //! [`LogicalPlan`]: datafusion_expr::logical_plan::LogicalPlan //! [`Expr`]: datafusion_expr::expr::Expr +pub mod analyzer; mod cte; mod expr; pub mod parser; @@ -43,6 +44,7 @@ mod relation; mod select; mod set_expr; mod statement; +mod test; #[cfg(feature = "unparser")] pub mod unparser; pub mod utils; diff --git a/datafusion/sql/src/test.rs b/datafusion/sql/src/test.rs new file mode 100644 index 0000000000000..cb98da2ad1cb4 --- /dev/null +++ b/datafusion/sql/src/test.rs @@ -0,0 +1,82 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use crate::analyzer::{Analyzer, AnalyzerRule}; +use arrow_schema::{DataType, Field, Schema}; +use datafusion_common::config::ConfigOptions; +use datafusion_expr::{table_scan, LogicalPlan}; +use std::sync::Arc; + +/// some tests share a common table +pub(crate) fn test_table_scan() -> datafusion_common::Result { + test_table_scan_with_name("test") +} + +/// some tests share a common table with different names +pub(crate) fn test_table_scan_with_name( + name: &str, +) -> datafusion_common::Result { + let schema = Schema::new(test_table_scan_fields()); + table_scan(Some(name), &schema, None)?.build() +} + +pub(crate) fn test_table_scan_fields() -> Vec { + vec![ + Field::new("a", DataType::UInt32, false), + Field::new("b", DataType::UInt32, false), + Field::new("c", DataType::UInt32, false), + ] +} + +pub(crate) fn assert_analyzed_plan_eq( + rule: Arc, + plan: LogicalPlan, + expected: &str, +) -> datafusion_common::Result<()> { + let options = ConfigOptions::default(); + assert_analyzed_plan_with_config_eq(options, rule, plan, expected)?; + + Ok(()) +} + +pub(crate) fn assert_analyzed_plan_with_config_eq( + options: ConfigOptions, + rule: Arc, + plan: LogicalPlan, + expected: &str, +) -> datafusion_common::Result<()> { + let analyzed_plan = + Analyzer::with_rules(vec![rule]).execute_and_check(plan, &options, |_, _| {})?; + let formatted_plan = format!("{analyzed_plan}"); + assert_eq!(formatted_plan, expected); + + Ok(()) +} + +pub(crate) fn assert_analyzed_plan_eq_display_indent( + rule: Arc, + plan: LogicalPlan, + expected: &str, +) -> datafusion_common::Result<()> { + let options = ConfigOptions::default(); + let analyzed_plan = + Analyzer::with_rules(vec![rule]).execute_and_check(plan, &options, |_, _| {})?; + let formatted_plan = analyzed_plan.display_indent_schema().to_string(); + assert_eq!(formatted_plan, expected); + + Ok(()) +}