From f300168791b261e1162ac7fab47b329c9e5467f3 Mon Sep 17 00:00:00 2001
From: Jonah Gao <jonahgao@msn.com>
Date: Mon, 1 Apr 2024 23:36:14 +0800
Subject: [PATCH 01/12] fix: detect non-recursive CTEs in the recursive `WITH`
 clause (#9836)

* move cte related logic to its own mod

* fix check cte self reference

* add tests

* fix test

* move test to slt
---
 datafusion/sql/src/cte.rs                  | 212 +++++++++++++++++++++
 datafusion/sql/src/lib.rs                  |   1 +
 datafusion/sql/src/planner.rs              |   5 +
 datafusion/sql/src/query.rs                | 144 +-------------
 datafusion/sql/src/set_expr.rs             |  81 ++++----
 datafusion/sql/tests/sql_integration.rs    |  10 -
 datafusion/sqllogictest/test_files/cte.slt |  88 +++++++++
 7 files changed, 356 insertions(+), 185 deletions(-)
 create mode 100644 datafusion/sql/src/cte.rs
diff --git a/datafusion/sql/src/cte.rs b/datafusion/sql/src/cte.rs
new file mode 100644
index 000000000000..5b1f81e820a2
--- /dev/null
+++ b/datafusion/sql/src/cte.rs
@@ -0,0 +1,212 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::sync::Arc;
+
+use crate::planner::{ContextProvider, PlannerContext, SqlToRel};
+
+use arrow::datatypes::Schema;
+use datafusion_common::{
+    not_impl_err, plan_err,
+    tree_node::{TreeNode, TreeNodeRecursion},
+    Result,
+};
+use datafusion_expr::{LogicalPlan, LogicalPlanBuilder, TableSource};
+use sqlparser::ast::{Query, SetExpr, SetOperator, With};
+
+impl<'a, S: ContextProvider> SqlToRel<'a, S> {
+    pub(super) fn plan_with_clause(
+        &self,
+        with: With,
+        planner_context: &mut PlannerContext,
+    ) -> Result<()> {
+        let is_recursive = with.recursive;
+        // Process CTEs from top to bottom
+        for cte in with.cte_tables {
+            // A `WITH` block can't use the same name more than once
+            let cte_name = self.normalizer.normalize(cte.alias.name.clone());
+            if planner_context.contains_cte(&cte_name) {
+                return plan_err!(
+                    "WITH query name {cte_name:?} specified more than once"
+                );
+            }
+
+            // Create a logical plan for the CTE
+            let cte_plan = if is_recursive {
+                self.recursive_cte(cte_name.clone(), *cte.query, planner_context)?
+            } else {
+                self.non_recursive_cte(*cte.query, planner_context)?
+            };
+
+            // Each `WITH` block can change the column names in the last
+            // projection (e.g. "WITH table(t1, t2) AS SELECT 1, 2").
+            let final_plan = self.apply_table_alias(cte_plan, cte.alias)?;
+            // Export the CTE to the outer query
+            planner_context.insert_cte(cte_name, final_plan);
+        }
+        Ok(())
+    }
+
+    fn non_recursive_cte(
+        &self,
+        cte_query: Query,
+        planner_context: &mut PlannerContext,
+    ) -> Result<LogicalPlan> {
+        // CTE expr don't need extend outer_query_schema,
+        // so we clone a new planner_context here.
+        let mut cte_planner_context = planner_context.clone();
+        self.query_to_plan(cte_query, &mut cte_planner_context)
+    }
+
+    fn recursive_cte(
+        &self,
+        cte_name: String,
+        mut cte_query: Query,
+        planner_context: &mut PlannerContext,
+    ) -> Result<LogicalPlan> {
+        if !self
+            .context_provider
+            .options()
+            .execution
+            .enable_recursive_ctes
+        {
+            return not_impl_err!("Recursive CTEs are not enabled");
+        }
+
+        let (left_expr, right_expr, set_quantifier) = match *cte_query.body {
+            SetExpr::SetOperation {
+                op: SetOperator::Union,
+                left,
+                right,
+                set_quantifier,
+            } => (left, right, set_quantifier),
+            other => {
+                // If the query is not a UNION, then it is not a recursive CTE
+                cte_query.body = Box::new(other);
+                return self.non_recursive_cte(cte_query, planner_context);
+            }
+        };
+
+        // Each recursive CTE consists from two parts in the logical plan:
+        //   1. A static term   (the left hand side on the SQL, where the
+        //                       referencing to the same CTE is not allowed)
+        //
+        //   2. A recursive term (the right hand side, and the recursive
+        //                       part)
+
+        // Since static term does not have any specific properties, it can
+        // be compiled as if it was a regular expression. This will
+        // allow us to infer the schema to be used in the recursive term.
+
+        // ---------- Step 1: Compile the static term ------------------
+        let static_plan =
+            self.set_expr_to_plan(*left_expr, &mut planner_context.clone())?;
+
+        // Since the recursive CTEs include a component that references a
+        // table with its name, like the example below:
+        //
+        // WITH RECURSIVE values(n) AS (
+        //      SELECT 1 as n -- static term
+        //    UNION ALL
+        //      SELECT n + 1
+        //      FROM values -- self reference
+        //      WHERE n < 100
+        // )
+        //
+        // We need a temporary 'relation' to be referenced and used. PostgreSQL
+        // calls this a 'working table', but it is entirely an implementation
+        // detail and a 'real' table with that name might not even exist (as
+        // in the case of DataFusion).
+        //
+        // Since we can't simply register a table during planning stage (it is
+        // an execution problem), we'll use a relation object that preserves the
+        // schema of the input perfectly and also knows which recursive CTE it is
+        // bound to.
+
+        // ---------- Step 2: Create a temporary relation ------------------
+        // Step 2.1: Create a table source for the temporary relation
+        let work_table_source = self.context_provider.create_cte_work_table(
+            &cte_name,
+            Arc::new(Schema::from(static_plan.schema().as_ref())),
+        )?;
+
+        // Step 2.2: Create a temporary relation logical plan that will be used
+        // as the input to the recursive term
+        let work_table_plan = LogicalPlanBuilder::scan(
+            cte_name.to_string(),
+            work_table_source.clone(),
+            None,
+        )?
+        .build()?;
+
+        let name = cte_name.clone();
+
+        // Step 2.3: Register the temporary relation in the planning context
+        // For all the self references in the variadic term, we'll replace it
+        // with the temporary relation we created above by temporarily registering
+        // it as a CTE. This temporary relation in the planning context will be
+        // replaced by the actual CTE plan once we're done with the planning.
+        planner_context.insert_cte(cte_name.clone(), work_table_plan);
+
+        // ---------- Step 3: Compile the recursive term ------------------
+        // this uses the named_relation we inserted above to resolve the
+        // relation. This ensures that the recursive term uses the named relation logical plan
+        // and thus the 'continuance' physical plan as its input and source
+        let recursive_plan =
+            self.set_expr_to_plan(*right_expr, &mut planner_context.clone())?;
+
+        // Check if the recursive term references the CTE itself,
+        // if not, it is a non-recursive CTE
+        if !has_work_table_reference(&recursive_plan, &work_table_source) {
+            // Remove the work table plan from the context
+            planner_context.remove_cte(&cte_name);
+            // Compile it as a non-recursive CTE
+            return self.set_operation_to_plan(
+                SetOperator::Union,
+                static_plan,
+                recursive_plan,
+                set_quantifier,
+            );
+        }
+
+        // ---------- Step 4: Create the final plan ------------------
+        // Step 4.1: Compile the final plan
+        let distinct = !Self::is_union_all(set_quantifier)?;
+        LogicalPlanBuilder::from(static_plan)
+            .to_recursive_query(name, recursive_plan, distinct)?
+            .build()
+    }
+}
+
+fn has_work_table_reference(
+    plan: &LogicalPlan,
+    work_table_source: &Arc<dyn TableSource>,
+) -> bool {
+    let mut has_reference = false;
+    plan.apply(&mut |node| {
+        if let LogicalPlan::TableScan(scan) = node {
+            if Arc::ptr_eq(&scan.source, work_table_source) {
+                has_reference = true;
+                return Ok(TreeNodeRecursion::Stop);
+            }
+        }
+        Ok(TreeNodeRecursion::Continue)
+    })
+    // Closure always return Ok
+    .unwrap();
+    has_reference
+}
diff --git a/datafusion/sql/src/lib.rs b/datafusion/sql/src/lib.rs
index 12d6a4669634..1040cc61c702 100644
--- a/datafusion/sql/src/lib.rs
+++ b/datafusion/sql/src/lib.rs
@@ -28,6 +28,7 @@
 //! [`SqlToRel`]: planner::SqlToRel
 //! [`LogicalPlan`]: datafusion_expr::logical_plan::LogicalPlan
 
+mod cte;
 mod expr;
 pub mod parser;
 pub mod planner;
diff --git a/datafusion/sql/src/planner.rs b/datafusion/sql/src/planner.rs
index f94c6ec4e8c9..d2182962b98e 100644
--- a/datafusion/sql/src/planner.rs
+++ b/datafusion/sql/src/planner.rs
@@ -213,6 +213,11 @@ impl PlannerContext {
     pub fn get_cte(&self, cte_name: &str) -> Option<&LogicalPlan> {
         self.ctes.get(cte_name).map(|cte| cte.as_ref())
     }
+
+    /// Remove the plan of CTE / Subquery for the specified name
+    pub(super) fn remove_cte(&mut self, cte_name: &str) {
+        self.ctes.remove(cte_name);
+    }
 }
 
 /// SQL query planner
diff --git a/datafusion/sql/src/query.rs b/datafusion/sql/src/query.rs
index eda8398c432b..ba876d052f5e 100644
--- a/datafusion/sql/src/query.rs
+++ b/datafusion/sql/src/query.rs
@@ -19,21 +19,15 @@ use std::sync::Arc;
 
 use crate::planner::{ContextProvider, PlannerContext, SqlToRel};
 
-use arrow::datatypes::Schema;
-use datafusion_common::{
-    not_impl_err, plan_err, sql_err, Constraints, DataFusionError, Result, ScalarValue,
-};
+use datafusion_common::{plan_err, Constraints, Result, ScalarValue};
 use datafusion_expr::{
     CreateMemoryTable, DdlStatement, Distinct, Expr, LogicalPlan, LogicalPlanBuilder,
     Operator,
 };
 use sqlparser::ast::{
-    Expr as SQLExpr, Offset as SQLOffset, OrderByExpr, Query, SetExpr, SetOperator,
-    SetQuantifier, Value,
+    Expr as SQLExpr, Offset as SQLOffset, OrderByExpr, Query, SetExpr, Value,
 };
 
-use sqlparser::parser::ParserError::ParserError;
-
 impl<'a, S: ContextProvider> SqlToRel<'a, S> {
     /// Generate a logical plan from an SQL query
     pub(crate) fn query_to_plan(
@@ -54,139 +48,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
     ) -> Result<LogicalPlan> {
         let set_expr = query.body;
         if let Some(with) = query.with {
-            // Process CTEs from top to bottom
-            let is_recursive = with.recursive;
-
-            for cte in with.cte_tables {
-                // A `WITH` block can't use the same name more than once
-                let cte_name = self.normalizer.normalize(cte.alias.name.clone());
-                if planner_context.contains_cte(&cte_name) {
-                    return sql_err!(ParserError(format!(
-                        "WITH query name {cte_name:?} specified more than once"
-                    )));
-                }
-
-                if is_recursive {
-                    if !self
-                        .context_provider
-                        .options()
-                        .execution
-                        .enable_recursive_ctes
-                    {
-                        return not_impl_err!("Recursive CTEs are not enabled");
-                    }
-
-                    match *cte.query.body {
-                        SetExpr::SetOperation {
-                            op: SetOperator::Union,
-                            left,
-                            right,
-                            set_quantifier,
-                        } => {
-                            let distinct = set_quantifier != SetQuantifier::All;
-
-                            // Each recursive CTE consists from two parts in the logical plan:
-                            //   1. A static term   (the left hand side on the SQL, where the
-                            //                       referencing to the same CTE is not allowed)
-                            //
-                            //   2. A recursive term (the right hand side, and the recursive
-                            //                       part)
-
-                            // Since static term does not have any specific properties, it can
-                            // be compiled as if it was a regular expression. This will
-                            // allow us to infer the schema to be used in the recursive term.
-
-                            // ---------- Step 1: Compile the static term ------------------
-                            let static_plan = self
-                                .set_expr_to_plan(*left, &mut planner_context.clone())?;
-
-                            // Since the recursive CTEs include a component that references a
-                            // table with its name, like the example below:
-                            //
-                            // WITH RECURSIVE values(n) AS (
-                            //      SELECT 1 as n -- static term
-                            //    UNION ALL
-                            //      SELECT n + 1
-                            //      FROM values -- self reference
-                            //      WHERE n < 100
-                            // )
-                            //
-                            // We need a temporary 'relation' to be referenced and used. PostgreSQL
-                            // calls this a 'working table', but it is entirely an implementation
-                            // detail and a 'real' table with that name might not even exist (as
-                            // in the case of DataFusion).
-                            //
-                            // Since we can't simply register a table during planning stage (it is
-                            // an execution problem), we'll use a relation object that preserves the
-                            // schema of the input perfectly and also knows which recursive CTE it is
-                            // bound to.
-
-                            // ---------- Step 2: Create a temporary relation ------------------
-                            // Step 2.1: Create a table source for the temporary relation
-                            let work_table_source =
-                                self.context_provider.create_cte_work_table(
-                                    &cte_name,
-                                    Arc::new(Schema::from(static_plan.schema().as_ref())),
-                                )?;
-
-                            // Step 2.2: Create a temporary relation logical plan that will be used
-                            // as the input to the recursive term
-                            let work_table_plan = LogicalPlanBuilder::scan(
-                                cte_name.to_string(),
-                                work_table_source,
-                                None,
-                            )?
-                            .build()?;
-
-                            let name = cte_name.clone();
-
-                            // Step 2.3: Register the temporary relation in the planning context
-                            // For all the self references in the variadic term, we'll replace it
-                            // with the temporary relation we created above by temporarily registering
-                            // it as a CTE. This temporary relation in the planning context will be
-                            // replaced by the actual CTE plan once we're done with the planning.
-                            planner_context.insert_cte(cte_name.clone(), work_table_plan);
-
-                            // ---------- Step 3: Compile the recursive term ------------------
-                            // this uses the named_relation we inserted above to resolve the
-                            // relation. This ensures that the recursive term uses the named relation logical plan
-                            // and thus the 'continuance' physical plan as its input and source
-                            let recursive_plan = self
-                                .set_expr_to_plan(*right, &mut planner_context.clone())?;
-
-                            // ---------- Step 4: Create the final plan ------------------
-                            // Step 4.1: Compile the final plan
-                            let logical_plan = LogicalPlanBuilder::from(static_plan)
-                                .to_recursive_query(name, recursive_plan, distinct)?
-                                .build()?;
-
-                            let final_plan =
-                                self.apply_table_alias(logical_plan, cte.alias)?;
-
-                            // Step 4.2: Remove the temporary relation from the planning context and replace it
-                            // with the final plan.
-                            planner_context.insert_cte(cte_name.clone(), final_plan);
-                        }
-                        _ => {
-                            return Err(DataFusionError::SQL(
-                                ParserError(format!("Unsupported CTE: {cte}")),
-                                None,
-                            ));
-                        }
-                    };
-                } else {
-                    // create logical plan & pass backreferencing CTEs
-                    // CTE expr don't need extend outer_query_schema
-                    let logical_plan =
-                        self.query_to_plan(*cte.query, &mut planner_context.clone())?;
-
-                    // Each `WITH` block can change the column names in the last
-                    // projection (e.g. "WITH table(t1, t2) AS SELECT 1, 2").
-                    let logical_plan = self.apply_table_alias(logical_plan, cte.alias)?;
-
-                    planner_context.insert_cte(cte_name, logical_plan);
-                }
-            }
+            self.plan_with_clause(with, planner_context)?;
         }
         let plan = self.set_expr_to_plan(*(set_expr.clone()), planner_context)?;
         let plan = self.order_by(plan, query.order_by, planner_context)?;
diff --git a/datafusion/sql/src/set_expr.rs b/datafusion/sql/src/set_expr.rs
index 2cbb68368f72..cbe41c33c729 100644
--- a/datafusion/sql/src/set_expr.rs
+++ b/datafusion/sql/src/set_expr.rs
@@ -35,45 +35,58 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
                 right,
                 set_quantifier,
             } => {
-                let all = match set_quantifier {
-                    SetQuantifier::All => true,
-                    SetQuantifier::Distinct | SetQuantifier::None => false,
-                    SetQuantifier::ByName => {
-                        return not_impl_err!("UNION BY NAME not implemented");
-                    }
-                    SetQuantifier::AllByName => {
-                        return not_impl_err!("UNION ALL BY NAME not implemented")
-                    }
-                    SetQuantifier::DistinctByName => {
-                        return not_impl_err!("UNION DISTINCT BY NAME not implemented")
-                    }
-                };
-
                 let left_plan = self.set_expr_to_plan(*left, planner_context)?;
                 let right_plan = self.set_expr_to_plan(*right, planner_context)?;
-                match (op, all) {
-                    (SetOperator::Union, true) => LogicalPlanBuilder::from(left_plan)
-                        .union(right_plan)?
-                        .build(),
-                    (SetOperator::Union, false) => LogicalPlanBuilder::from(left_plan)
-                        .union_distinct(right_plan)?
-                        .build(),
-                    (SetOperator::Intersect, true) => {
-                        LogicalPlanBuilder::intersect(left_plan, right_plan, true)
-                    }
-                    (SetOperator::Intersect, false) => {
-                        LogicalPlanBuilder::intersect(left_plan, right_plan, false)
-                    }
-                    (SetOperator::Except, true) => {
-                        LogicalPlanBuilder::except(left_plan, right_plan, true)
-                    }
-                    (SetOperator::Except, false) => {
-                        LogicalPlanBuilder::except(left_plan, right_plan, false)
-                    }
-                }
+                self.set_operation_to_plan(op, left_plan, right_plan, set_quantifier)
             }
             SetExpr::Query(q) => self.query_to_plan(*q, planner_context),
             _ => not_impl_err!("Query {set_expr} not implemented yet"),
         }
     }
+
+    pub(super) fn is_union_all(set_quantifier: SetQuantifier) -> Result<bool> {
+        match set_quantifier {
+            SetQuantifier::All => Ok(true),
+            SetQuantifier::Distinct | SetQuantifier::None => Ok(false),
+            SetQuantifier::ByName => {
+                not_impl_err!("UNION BY NAME not implemented")
+            }
+            SetQuantifier::AllByName => {
+                not_impl_err!("UNION ALL BY NAME not implemented")
+            }
+            SetQuantifier::DistinctByName => {
+                not_impl_err!("UNION DISTINCT BY NAME not implemented")
+            }
+        }
+    }
+
+    pub(super) fn set_operation_to_plan(
+        &self,
+        op: SetOperator,
+        left_plan: LogicalPlan,
+        right_plan: LogicalPlan,
+        set_quantifier: SetQuantifier,
+    ) -> Result<LogicalPlan> {
+        let all = Self::is_union_all(set_quantifier)?;
+        match (op, all) {
+            (SetOperator::Union, true) => LogicalPlanBuilder::from(left_plan)
+                .union(right_plan)?
+                .build(),
+            (SetOperator::Union, false) => LogicalPlanBuilder::from(left_plan)
+                .union_distinct(right_plan)?
+                .build(),
+            (SetOperator::Intersect, true) => {
+                LogicalPlanBuilder::intersect(left_plan, right_plan, true)
+            }
+            (SetOperator::Intersect, false) => {
+                LogicalPlanBuilder::intersect(left_plan, right_plan, false)
+            }
+            (SetOperator::Except, true) => {
+                LogicalPlanBuilder::except(left_plan, right_plan, true)
+            }
+            (SetOperator::Except, false) => {
+                LogicalPlanBuilder::except(left_plan, right_plan, false)
+            }
+        }
+    }
 }
diff --git a/datafusion/sql/tests/sql_integration.rs b/datafusion/sql/tests/sql_integration.rs
index 101c31039c7e..a34f8f07fe92 100644
--- a/datafusion/sql/tests/sql_integration.rs
+++ b/datafusion/sql/tests/sql_integration.rs
@@ -2994,16 +2994,6 @@ fn join_with_aliases() {
     quick_test(sql, expected);
 }
 
-#[test]
-fn cte_use_same_name_multiple_times() {
-    let sql =
-        "with a as (select * from person), a as (select * from orders) select * from a;";
-    let expected =
-        "SQL error: ParserError(\"WITH query name \\\"a\\\" specified more than once\")";
-    let result = logical_plan(sql).err().unwrap();
-    assert_eq!(result.strip_backtrace(), expected);
-}
-
 #[test]
 fn negative_interval_plus_interval_in_projection() {
     let sql = "select -interval '2 days' + interval '5 days';";
diff --git a/datafusion/sqllogictest/test_files/cte.slt b/datafusion/sqllogictest/test_files/cte.slt
index e33dfabaf2ca..eec7eb0e3399 100644
--- a/datafusion/sqllogictest/test_files/cte.slt
+++ b/datafusion/sqllogictest/test_files/cte.slt
@@ -39,6 +39,37 @@ physical_plan
 ProjectionExec: expr=[1 as a, 2 as b, 3 as c]
 --PlaceholderRowExec
 
+# cte_use_same_name_multiple_times
+statement error DataFusion error: Error during planning: WITH query name "a" specified more than once
+WITH a AS (SELECT 1), a AS (SELECT 2) SELECT * FROM a;
+
+# Test disabling recursive CTE
+statement ok
+set datafusion.execution.enable_recursive_ctes = false;
+
+query error DataFusion error: This feature is not implemented: Recursive CTEs are not enabled
+WITH RECURSIVE nodes AS (
+    SELECT 1 as id
+    UNION ALL
+    SELECT id + 1 as id
+    FROM nodes
+    WHERE id < 3
+) SELECT * FROM nodes
+
+statement ok
+set datafusion.execution.enable_recursive_ctes = true;
+
+
+# DISTINCT UNION is not supported
+query error DataFusion error: This feature is not implemented: Recursive queries with a distinct 'UNION' \(in which the previous iteration's results will be de\-duplicated\) is not supported
+WITH RECURSIVE nodes AS (
+    SELECT 1 as id
+    UNION
+    SELECT id + 1 as id
+    FROM nodes
+    WHERE id < 3
+) SELECT * FROM nodes
+
 
 # trivial recursive CTE works
 query I rowsort
@@ -744,3 +775,60 @@ WITH RECURSIVE my_cte AS (
     UNION ALL
     SELECT 'abc' FROM my_cte WHERE CAST(a AS text) !='abc'
 ) SELECT * FROM my_cte;
+
+# Define a non-recursive CTE in the recursive WITH clause.
+# Test issue: https://github.com/apache/arrow-datafusion/issues/9804
+query I
+WITH RECURSIVE cte AS (
+    SELECT a FROM (VALUES(1)) AS t(a) WHERE a > 2
+    UNION ALL
+    SELECT 2
+) SELECT * FROM cte;
+----
+2
+
+# Define a non-recursive CTE in the recursive WITH clause.
+# UNION ALL
+query I rowsort
+WITH RECURSIVE cte AS (
+    SELECT 1
+    UNION ALL
+    SELECT 2
+) SELECT * FROM cte;
+----
+1
+2
+
+# Define a non-recursive CTE in the recursive WITH clause.
+# DISTINCT UNION
+query I
+WITH RECURSIVE cte AS (
+    SELECT 2
+    UNION
+    SELECT 2
+) SELECT * FROM cte;
+----
+2
+
+# Define a non-recursive CTE in the recursive WITH clause.
+# UNION is not present.
+query I
+WITH RECURSIVE cte AS (
+    SELECT 1
+) SELECT * FROM cte;
+----
+1
+
+# Define a recursive CTE and a non-recursive CTE at the same time.
+query II rowsort
+WITH RECURSIVE
+non_recursive_cte AS (
+  SELECT 1
+),
+recursive_cte AS (
+  SELECT 1 AS a UNION ALL SELECT a+2 FROM recursive_cte WHERE a < 3
+)
+SELECT * FROM non_recursive_cte, recursive_cte;
+----
+1 1
+1 3

From c8584557cdfa7c138ab9039ceac31323f48a44d3 Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Mon, 1 Apr 2024 11:36:52 -0400
Subject: [PATCH 02/12] Minor: Add SIGMOD paper reference to architecture guide
 (#9886)

---
 datafusion/core/src/lib.rs | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/datafusion/core/src/lib.rs b/datafusion/core/src/lib.rs
index 5dc3e1ce7d3f..f6e2171d6b5f 100644
--- a/datafusion/core/src/lib.rs
+++ b/datafusion/core/src/lib.rs
@@ -167,6 +167,11 @@
 //! overview of how DataFusion is organized and then link to other
 //! sections of the docs with more details -->
 //!
+//! You can find a formal description of DataFusion's architecture in our
+//! [SIGMOD 2024 Paper].
+//!
+//! [SIGMOD 2024 Paper]: https://github.com/apache/arrow-datafusion/files/14789704/DataFusion_Query_Engine___SIGMOD_2024-FINAL.pdf
+//!
 //! ## Overview  Presentations
 //!
 //! The following presentations offer high level overviews of the

From b698e5ffc43ebb0585339ef9899496beccc0a707 Mon Sep 17 00:00:00 2001
From: Alex Huang <huangweijun1001@gmail.com>
Date: Mon, 1 Apr 2024 23:38:56 +0800
Subject: [PATCH 03/12] refactor: add macro for the binary math function in
 `datafusion-function` (#9889)

* refactor: macro for the binary math function in datafusion-function

* Update datafusion/functions/src/macros.rs

---------

Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>
---
 datafusion/functions/src/macros.rs     | 107 ++++++++++++++++++-
 datafusion/functions/src/math/atan2.rs | 140 -------------------------
 datafusion/functions/src/math/mod.rs   |   3 +-
 3 files changed, 107 insertions(+), 143 deletions(-)
 delete mode 100644 datafusion/functions/src/math/atan2.rs

diff --git a/datafusion/functions/src/macros.rs b/datafusion/functions/src/macros.rs
index 4907d74fe941..c92cb27ef5bb 100644
--- a/datafusion/functions/src/macros.rs
+++ b/datafusion/functions/src/macros.rs
@@ -251,7 +251,112 @@ macro_rules! make_math_unary_udf {
     };
 }
 
-#[macro_export]
+/// Macro to create a binary math UDF.
+///
+/// A binary math function takes two arguments of types Float32 or Float64,
+/// applies a binary floating function to the argument, and returns a value of the same type.
+///
+/// $UDF: the name of the UDF struct that implements `ScalarUDFImpl`
+/// $GNAME: a singleton instance of the UDF
+/// $NAME: the name of the function
+/// $BINARY_FUNC: the binary function to apply to the argument
+/// $MONOTONIC_FUNC: the monotonicity of the function
+macro_rules! make_math_binary_udf {
+    ($UDF:ident, $GNAME:ident, $NAME:ident, $BINARY_FUNC:ident, $MONOTONICITY:expr) => {
+        make_udf_function!($NAME::$UDF, $GNAME, $NAME);
+
+        mod $NAME {
+            use arrow::array::{ArrayRef, Float32Array, Float64Array};
+            use arrow::datatypes::DataType;
+            use datafusion_common::{exec_err, DataFusionError, Result};
+            use datafusion_expr::TypeSignature::*;
+            use datafusion_expr::{
+                ColumnarValue, FuncMonotonicity, ScalarUDFImpl, Signature, Volatility,
+            };
+            use std::any::Any;
+            use std::sync::Arc;
+
+            #[derive(Debug)]
+            pub struct $UDF {
+                signature: Signature,
+            }
+
+            impl $UDF {
+                pub fn new() -> Self {
+                    use DataType::*;
+                    Self {
+                        signature: Signature::one_of(
+                            vec![
+                                Exact(vec![Float32, Float32]),
+                                Exact(vec![Float64, Float64]),
+                            ],
+                            Volatility::Immutable,
+                        ),
+                    }
+                }
+            }
+
+            impl ScalarUDFImpl for $UDF {
+                fn as_any(&self) -> &dyn Any {
+                    self
+                }
+                fn name(&self) -> &str {
+                    stringify!($NAME)
+                }
+
+                fn signature(&self) -> &Signature {
+                    &self.signature
+                }
+
+                fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
+                    let arg_type = &arg_types[0];
+
+                    match arg_type {
+                        DataType::Float32 => Ok(DataType::Float32),
+                        // For other types (possible values float64/null/int), use Float64
+                        _ => Ok(DataType::Float64),
+                    }
+                }
+
+                fn monotonicity(&self) -> Result<Option<FuncMonotonicity>> {
+                    Ok($MONOTONICITY)
+                }
+
+                fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
+                    let args = ColumnarValue::values_to_arrays(args)?;
+
+                    let arr: ArrayRef = match args[0].data_type() {
+                        DataType::Float64 => Arc::new(make_function_inputs2!(
+                            &args[0],
+                            &args[1],
+                            "y",
+                            "x",
+                            Float64Array,
+                            { f64::$BINARY_FUNC }
+                        )),
+
+                        DataType::Float32 => Arc::new(make_function_inputs2!(
+                            &args[0],
+                            &args[1],
+                            "y",
+                            "x",
+                            Float32Array,
+                            { f32::$BINARY_FUNC }
+                        )),
+                        other => {
+                            return exec_err!(
+                                "Unsupported data type {other:?} for function {}",
+                                self.name()
+                            )
+                        }
+                    };
+                    Ok(ColumnarValue::Array(arr))
+                }
+            }
+        }
+    };
+}
+
 macro_rules! make_function_inputs2 {
     ($ARG1: expr, $ARG2: expr, $NAME1:expr, $NAME2: expr, $ARRAY_TYPE:ident, $FUNC: block) => {{
         let arg1 = downcast_arg!($ARG1, $NAME1, $ARRAY_TYPE);
diff --git a/datafusion/functions/src/math/atan2.rs b/datafusion/functions/src/math/atan2.rs
deleted file mode 100644
index b090c6c454fd..000000000000
--- a/datafusion/functions/src/math/atan2.rs
+++ /dev/null
@@ -1,140 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Math function: `atan2()`.
-
-use arrow::array::{ArrayRef, Float32Array, Float64Array};
-use arrow::datatypes::DataType;
-use datafusion_common::DataFusionError;
-use datafusion_common::{exec_err, Result};
-use datafusion_expr::ColumnarValue;
-use datafusion_expr::TypeSignature::*;
-use datafusion_expr::{ScalarUDFImpl, Signature, Volatility};
-use std::any::Any;
-use std::sync::Arc;
-
-use crate::make_function_inputs2;
-use crate::utils::make_scalar_function;
-
-#[derive(Debug)]
-pub(super) struct Atan2 {
-    signature: Signature,
-}
-
-impl Atan2 {
-    pub fn new() -> Self {
-        use DataType::*;
-        Self {
-            signature: Signature::one_of(
-                vec![Exact(vec![Float32, Float32]), Exact(vec![Float64, Float64])],
-                Volatility::Immutable,
-            ),
-        }
-    }
-}
-
-impl ScalarUDFImpl for Atan2 {
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-    fn name(&self) -> &str {
-        "atan2"
-    }
-
-    fn signature(&self) -> &Signature {
-        &self.signature
-    }
-
-    fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
-        use self::DataType::*;
-        match &arg_types[0] {
-            Float32 => Ok(Float32),
-            _ => Ok(Float64),
-        }
-    }
-
-    fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
-        make_scalar_function(atan2, vec![])(args)
-    }
-}
-
-/// Atan2 SQL function
-pub fn atan2(args: &[ArrayRef]) -> Result<ArrayRef> {
-    match args[0].data_type() {
-        DataType::Float64 => Ok(Arc::new(make_function_inputs2!(
-            &args[0],
-            &args[1],
-            "y",
-            "x",
-            Float64Array,
-            { f64::atan2 }
-        )) as ArrayRef),
-
-        DataType::Float32 => Ok(Arc::new(make_function_inputs2!(
-            &args[0],
-            &args[1],
-            "y",
-            "x",
-            Float32Array,
-            { f32::atan2 }
-        )) as ArrayRef),
-
-        other => exec_err!("Unsupported data type {other:?} for function atan2"),
-    }
-}
-
-#[cfg(test)]
-mod test {
-    use super::*;
-    use datafusion_common::cast::{as_float32_array, as_float64_array};
-
-    #[test]
-    fn test_atan2_f64() {
-        let args: Vec<ArrayRef> = vec![
-            Arc::new(Float64Array::from(vec![2.0, -3.0, 4.0, -5.0])), // y
-            Arc::new(Float64Array::from(vec![1.0, 2.0, -3.0, -4.0])), // x
-        ];
-
-        let result = atan2(&args).expect("failed to initialize function atan2");
-        let floats =
-            as_float64_array(&result).expect("failed to initialize function atan2");
-
-        assert_eq!(floats.len(), 4);
-        assert_eq!(floats.value(0), (2.0_f64).atan2(1.0));
-        assert_eq!(floats.value(1), (-3.0_f64).atan2(2.0));
-        assert_eq!(floats.value(2), (4.0_f64).atan2(-3.0));
-        assert_eq!(floats.value(3), (-5.0_f64).atan2(-4.0));
-    }
-
-    #[test]
-    fn test_atan2_f32() {
-        let args: Vec<ArrayRef> = vec![
-            Arc::new(Float32Array::from(vec![2.0, -3.0, 4.0, -5.0])), // y
-            Arc::new(Float32Array::from(vec![1.0, 2.0, -3.0, -4.0])), // x
-        ];
-
-        let result = atan2(&args).expect("failed to initialize function atan2");
-        let floats =
-            as_float32_array(&result).expect("failed to initialize function atan2");
-
-        assert_eq!(floats.len(), 4);
-        assert_eq!(floats.value(0), (2.0_f32).atan2(1.0));
-        assert_eq!(floats.value(1), (-3.0_f32).atan2(2.0));
-        assert_eq!(floats.value(2), (4.0_f32).atan2(-3.0));
-        assert_eq!(floats.value(3), (-5.0_f32).atan2(-4.0));
-    }
-}
diff --git a/datafusion/functions/src/math/mod.rs b/datafusion/functions/src/math/mod.rs
index 2ee1fffa1625..ee53fcf96a8b 100644
--- a/datafusion/functions/src/math/mod.rs
+++ b/datafusion/functions/src/math/mod.rs
@@ -18,13 +18,11 @@
 //! "math" DataFusion functions
 
 mod abs;
-mod atan2;
 mod nans;
 
 // Create UDFs
 make_udf_function!(nans::IsNanFunc, ISNAN, isnan);
 make_udf_function!(abs::AbsFunc, ABS, abs);
-make_udf_function!(atan2::Atan2, ATAN2, atan2);
 
 make_math_unary_udf!(Log2Func, LOG2, log2, log2, Some(vec![Some(true)]));
 make_math_unary_udf!(Log10Func, LOG10, log10, log10, Some(vec![Some(true)]));
@@ -39,6 +37,7 @@ make_math_unary_udf!(AtanhFunc, ATANH, atanh, atanh, Some(vec![Some(true)]));
 make_math_unary_udf!(AsinhFunc, ASINH, asinh, asinh, Some(vec![Some(true)]));
 make_math_unary_udf!(AcoshFunc, ACOSH, acosh, acosh, Some(vec![Some(true)]));
 make_math_unary_udf!(AtanFunc, ATAN, atan, atan, Some(vec![Some(true)]));
+make_math_binary_udf!(Atan2, ATAN2, atan2, atan2, Some(vec![Some(true)]));
 
 // Export the functions out of this package, both as expr_fn as well as a list of functions
 export_functions!(

From d8d521ac8b90002fa0ba1f91456051a9775ae193 Mon Sep 17 00:00:00 2001
From: Bruce Ritchie <bruce.ritchie@veeva.com>
Date: Mon, 1 Apr 2024 11:40:23 -0400
Subject: [PATCH 04/12] Add benchmark for substr_index (#9878)

* Fix to_timestamp benchmark

* Remove reference to simd and nightly build as simd is no longer an available feature in DataFusion and building with nightly may not be a good recommendation when getting started.

* Fixed missing trim() function.

* Add benchmark for substr_index

* Add missing required-features

* Update datafusion/functions/benches/substr_index.rs

Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>

---------

Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>
---
 datafusion/functions/Cargo.toml              |   5 +
 datafusion/functions/benches/substr_index.rs | 103 +++++++++++++++++++
 2 files changed, 108 insertions(+)
 create mode 100644 datafusion/functions/benches/substr_index.rs

diff --git a/datafusion/functions/Cargo.toml b/datafusion/functions/Cargo.toml
index 425ac207c33e..ef7d2c9b1892 100644
--- a/datafusion/functions/Cargo.toml
+++ b/datafusion/functions/Cargo.toml
@@ -107,3 +107,8 @@ required-features = ["datetime_expressions"]
 harness = false
 name = "to_char"
 required-features = ["datetime_expressions"]
+
+[[bench]]
+harness = false
+name = "substr_index"
+required-features = ["unicode_expressions"]
diff --git a/datafusion/functions/benches/substr_index.rs b/datafusion/functions/benches/substr_index.rs
new file mode 100644
index 000000000000..bb9a5b809eee
--- /dev/null
+++ b/datafusion/functions/benches/substr_index.rs
@@ -0,0 +1,103 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+extern crate criterion;
+
+use std::sync::Arc;
+
+use arrow::array::{ArrayRef, Int64Array, StringArray};
+use criterion::{black_box, criterion_group, criterion_main, Criterion};
+use rand::distributions::{Alphanumeric, Uniform};
+use rand::prelude::Distribution;
+use rand::Rng;
+
+use datafusion_expr::ColumnarValue;
+use datafusion_functions::unicode::substr_index;
+
+struct Filter<Dist, Test> {
+    dist: Dist,
+    test: Test,
+}
+
+impl<T, Dist, Test> Distribution<T> for Filter<Dist, Test>
+where
+    Dist: Distribution<T>,
+    Test: Fn(&T) -> bool,
+{
+    fn sample<R: Rng + ?Sized>(&self, rng: &mut R) -> T {
+        loop {
+            let x = self.dist.sample(rng);
+            if (self.test)(&x) {
+                return x;
+            }
+        }
+    }
+}
+
+fn data() -> (StringArray, StringArray, Int64Array) {
+    let dist = Filter {
+        dist: Uniform::new(-4, 5),
+        test: |x: &i64| x != &0,
+    };
+    let mut rng = rand::thread_rng();
+    let mut strings: Vec<String> = vec![];
+    let mut delimiters: Vec<String> = vec![];
+    let mut counts: Vec<i64> = vec![];
+
+    for _ in 0..1000 {
+        let length = rng.gen_range(20..50);
+        let text: String = (&mut rng)
+            .sample_iter(&Alphanumeric)
+            .take(length)
+            .map(char::from)
+            .collect();
+        let char = rng.gen_range(0..text.len());
+        let delimiter = &text.chars().nth(char).unwrap();
+        let count = rng.sample(&dist);
+
+        strings.push(text);
+        delimiters.push(delimiter.to_string());
+        counts.push(count);
+    }
+
+    (
+        StringArray::from(strings),
+        StringArray::from(delimiters),
+        Int64Array::from(counts),
+    )
+}
+
+fn criterion_benchmark(c: &mut Criterion) {
+    c.bench_function("substr_index_array_array_1000", |b| {
+        let (strings, delimiters, counts) = data();
+        let strings = ColumnarValue::Array(Arc::new(strings) as ArrayRef);
+        let delimiters = ColumnarValue::Array(Arc::new(delimiters) as ArrayRef);
+        let counts = ColumnarValue::Array(Arc::new(counts) as ArrayRef);
+
+        let args = [strings, delimiters, counts];
+        b.iter(|| {
+            black_box(
+                substr_index()
+                    .invoke(&args)
+                    .expect("substr_index should work on valid values"),
+            )
+        })
+    });
+}
+
+criterion_group!(benches, criterion_benchmark);
+criterion_main!(benches);

From b50f3aad043da9de613f422f20f7aa916ce55776 Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Mon, 1 Apr 2024 16:57:09 -0400
Subject: [PATCH 05/12] Add test for reading back file created with FORMAT
 options (#9753)

---
 datafusion/sqllogictest/test_files/copy.slt | 25 +++++++++++++++++++--
 1 file changed, 23 insertions(+), 2 deletions(-)

diff --git a/datafusion/sqllogictest/test_files/copy.slt b/datafusion/sqllogictest/test_files/copy.slt
index fca892dfcdad..95b6d29db407 100644
--- a/datafusion/sqllogictest/test_files/copy.slt
+++ b/datafusion/sqllogictest/test_files/copy.slt
@@ -514,10 +514,31 @@ OPTIONS (
 );
 
 
-# Format Options Support with format in OPTIONS i.e. COPY { table_name | query } TO 'file_name' OPTIONS (format <format-name>, ...)
+# Format Options Support with format in OPTIONS
+#
+# i.e. COPY { table_name | query } TO 'file_name' OPTIONS (format <format-name>, ...)
 
+# Ensure that the format is set in the OPTIONS, not extension
 query I
-COPY (select * from (values (1))) to 'test_files/scratch/copy/'
+COPY (select * from (values (1))) to 'test_files/scratch/copy/foo.dat'
+OPTIONS (format parquet);
+----
+1
+
+statement ok
+CREATE EXTERNAL TABLE foo_dat STORED AS PARQUET LOCATION 'test_files/scratch/copy/foo.dat';
+
+query I
+select * from foo_dat;
+----
+1
+
+statement ok
+DROP TABLE foo_dat;
+
+
+query I
+COPY (select * from (values (1))) to 'test_files/scratch/copy'
 OPTIONS (format parquet);
 ----
 1

From 178a26ddbb1c80cc1dde6797cd1f2f74f11cbf8b Mon Sep 17 00:00:00 2001
From: Junhao Liu <junhaoliu2023@gmail.com>
Date: Mon, 1 Apr 2024 14:57:22 -0600
Subject: [PATCH 06/12] port expr2sql (#9902)

---
 datafusion/sql/src/unparser/expr.rs | 41 ++++++++++++++++++++++++++++-
 1 file changed, 40 insertions(+), 1 deletion(-)

diff --git a/datafusion/sql/src/unparser/expr.rs b/datafusion/sql/src/unparser/expr.rs
index a29b5014b1ce..07b077eb50f1 100644
--- a/datafusion/sql/src/unparser/expr.rs
+++ b/datafusion/sql/src/unparser/expr.rs
@@ -176,7 +176,14 @@ impl Unparser<'_> {
             }) => {
                 not_impl_err!("Unsupported expression: {expr:?}")
             }
-            Expr::Like(Like {
+            Expr::SimilarTo(Like {
+                negated,
+                expr,
+                pattern,
+                escape_char,
+                case_insensitive: _,
+            })
+            | Expr::Like(Like {
                 negated,
                 expr,
                 pattern,
@@ -263,12 +270,18 @@ impl Unparser<'_> {
             Expr::IsTrue(expr) => {
                 Ok(ast::Expr::IsTrue(Box::new(self.expr_to_sql(expr)?)))
             }
+            Expr::IsNotTrue(expr) => {
+                Ok(ast::Expr::IsNotTrue(Box::new(self.expr_to_sql(expr)?)))
+            }
             Expr::IsFalse(expr) => {
                 Ok(ast::Expr::IsFalse(Box::new(self.expr_to_sql(expr)?)))
             }
             Expr::IsUnknown(expr) => {
                 Ok(ast::Expr::IsUnknown(Box::new(self.expr_to_sql(expr)?)))
             }
+            Expr::IsNotUnknown(expr) => {
+                Ok(ast::Expr::IsNotUnknown(Box::new(self.expr_to_sql(expr)?)))
+            }
             Expr::Not(expr) => {
                 let sql_parser_expr = self.expr_to_sql(expr)?;
                 Ok(AstExpr::UnaryOp {
@@ -276,6 +289,13 @@ impl Unparser<'_> {
                     expr: Box::new(sql_parser_expr),
                 })
             }
+            Expr::Negative(expr) => {
+                let sql_parser_expr = self.expr_to_sql(expr)?;
+                Ok(AstExpr::UnaryOp {
+                    op: UnaryOperator::Minus,
+                    expr: Box::new(sql_parser_expr),
+                })
+            }
             _ => not_impl_err!("Unsupported expression: {expr:?}"),
         }
     }
@@ -728,6 +748,16 @@ mod tests {
                 }),
                 r#""a" NOT LIKE 'foo' ESCAPE 'o'"#,
             ),
+            (
+                Expr::SimilarTo(Like {
+                    negated: false,
+                    expr: Box::new(col("a")),
+                    pattern: Box::new(lit("foo")),
+                    escape_char: Some('o'),
+                    case_insensitive: true,
+                }),
+                r#""a" LIKE 'foo' ESCAPE 'o'"#,
+            ),
             (
                 Expr::Literal(ScalarValue::Date64(Some(0))),
                 r#"CAST('1970-01-01 00:00:00' AS DATETIME)"#,
@@ -783,6 +813,10 @@ mod tests {
                 (col("a") + col("b")).gt(lit(4)).is_true(),
                 r#"(("a" + "b") > 4) IS TRUE"#,
             ),
+            (
+                (col("a") + col("b")).gt(lit(4)).is_not_true(),
+                r#"(("a" + "b") > 4) IS NOT TRUE"#,
+            ),
             (
                 (col("a") + col("b")).gt(lit(4)).is_false(),
                 r#"(("a" + "b") > 4) IS FALSE"#,
@@ -791,11 +825,16 @@ mod tests {
                 (col("a") + col("b")).gt(lit(4)).is_unknown(),
                 r#"(("a" + "b") > 4) IS UNKNOWN"#,
             ),
+            (
+                (col("a") + col("b")).gt(lit(4)).is_not_unknown(),
+                r#"(("a" + "b") > 4) IS NOT UNKNOWN"#,
+            ),
             (not(col("a")), r#"NOT "a""#),
             (
                 Expr::between(col("a"), lit(1), lit(7)),
                 r#"("a" BETWEEN 1 AND 7)"#,
             ),
+            (Expr::Negative(Box::new(col("a"))), r#"-"a""#),
         ];
 
         for (expr, expected) in tests {

From cc1db8a2043c73bda7adec309b42c08d88defab8 Mon Sep 17 00:00:00 2001
From: Huaijin <haohuaijin@gmail.com>
Date: Tue, 2 Apr 2024 05:07:16 +0800
Subject: [PATCH 07/12] refactor: make dfschema wrap schemaref (#9595)

* Start setting up SchemaRef

* Start updating DFSchema

* More updates to df schema

* More updates

* More updates

* Start working on columns

* Start cleaning up columns

* Remove DFField from dfschema tests

* More cleanup

* datafusion common is building

* More cleanup

* Start updating expr

* More cleanup

* Update build_join_schema

* Cleanup expr to_field

* Builder updates

* Update expr utils

* Work on logical plan

* Update expr rewriter

* Cleanup up logical plan

* More cleanup

* More cleanup

* Cleanup

* Fix unnest

* make datafusion-expr build

* make datafusion-optimizer build

* can build some datafusion-sql

* clean up

* make datafusion-sql build

* make core build

* make datafusion-substrait build

* clean up

* clean up

* fix plan.rs

* fix clean up

* fix to_field

* fix select * from file

* remove DFField in tests

* fix some tests

* fix unnest and dfschema

* fix dfschema test

* make datafusion-proto build

* fix some optimizer test

* fix dfschema merge

* fix with_column_renamed

* fix compound identifier tests

* fix unnest plan

* fix except

* fix test and conflicts

* remove clone in dfschema

* clean up dfschema

* optimizer dfschema merge

* retrigger ci

* fmt

* apply suggestion

* fmt

* find field return refer

* add some tests

* improve build_join_schema

* remove some clone

* remove ignore

* fmt

* remove dfschema create method

* add column from trait

* from Vec<Field> to Fields

* fmt

* Add schema validation check for CREATE EXTERNAL TABLE

---------

Co-authored-by: Matthew Turner <matthew.m.turner@outlook.com>
Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>
---
 benchmarks/src/tpch/convert.rs                |   5 +-
 datafusion-examples/examples/expr_api.rs      |  28 +-
 datafusion/common/src/column.rs               |  87 +-
 datafusion/common/src/dfschema.rs             | 925 +++++++-----------
 datafusion/common/src/error.rs                |  12 +-
 .../common/src/functional_dependencies.rs     |  28 +-
 datafusion/common/src/lib.rs                  |   4 +-
 datafusion/core/src/dataframe/mod.rs          |  44 +-
 .../core/src/datasource/listing/helpers.rs    |  14 +-
 datafusion/core/src/datasource/view.rs        |   7 +-
 datafusion/core/src/physical_planner.rs       |  42 +-
 datafusion/expr/src/expr_rewriter/mod.rs      |  55 +-
 datafusion/expr/src/expr_rewriter/order_by.rs |   2 +-
 datafusion/expr/src/expr_schema.rs            |  89 +-
 datafusion/expr/src/logical_plan/builder.rs   | 187 ++--
 datafusion/expr/src/logical_plan/plan.rs      | 103 +-
 datafusion/expr/src/utils.rs                  |  73 +-
 .../src/analyzer/inline_table_scan.rs         |   8 +-
 .../optimizer/src/analyzer/type_coercion.rs   |  51 +-
 .../optimizer/src/common_subexpr_eliminate.rs |  61 +-
 .../optimizer/src/optimize_projections.rs     |   3 +-
 datafusion/optimizer/src/optimizer.rs         |  33 +-
 .../optimizer/src/propagate_empty_relation.rs |  12 +-
 datafusion/optimizer/src/push_down_filter.rs  |  46 +-
 .../optimizer/src/push_down_projection.rs     |  53 +-
 .../src/replace_distinct_aggregate.rs         |  13 +-
 .../simplify_expressions/expr_simplifier.rs   |  23 +-
 .../src/single_distinct_to_groupby.rs         |  12 +-
 .../src/unwrap_cast_in_comparison.rs          |  31 +-
 .../proto/src/logical_plan/from_proto.rs      |  40 +-
 datafusion/proto/src/logical_plan/to_proto.rs |  25 +-
 .../tests/cases/roundtrip_logical_plan.rs     |  17 +-
 datafusion/sql/src/expr/identifier.rs         |  44 +-
 datafusion/sql/src/expr/mod.rs                |  24 +-
 datafusion/sql/src/expr/order_by.rs           |   5 +-
 datafusion/sql/src/relation/join.rs           |  10 +-
 datafusion/sql/src/statement.rs               |  28 +-
 datafusion/sql/src/utils.rs                   |   9 +-
 .../engines/datafusion_engine/normalize.rs    |   4 +-
 .../substrait/src/logical_plan/consumer.rs    |  21 +-
 40 files changed, 1063 insertions(+), 1215 deletions(-)

diff --git a/benchmarks/src/tpch/convert.rs b/benchmarks/src/tpch/convert.rs
index 4b6627234334..a841fe532294 100644
--- a/benchmarks/src/tpch/convert.rs
+++ b/benchmarks/src/tpch/convert.rs
@@ -86,10 +86,11 @@ impl ConvertOpt {
             // Select all apart from the padding column
             let selection = csv
                 .schema()
-                .fields()
                 .iter()
                 .take(schema.fields.len() - 1)
-                .map(|d| Expr::Column(d.qualified_column()))
+                .map(|(qualifier, field)| {
+                    Expr::Column(Column::from((qualifier, field.as_ref())))
+                })
                 .collect();
 
             csv = csv.select(selection)?;
diff --git a/datafusion-examples/examples/expr_api.rs b/datafusion-examples/examples/expr_api.rs
index 5f9f3106e14d..6e9c42480c32 100644
--- a/datafusion-examples/examples/expr_api.rs
+++ b/datafusion-examples/examples/expr_api.rs
@@ -22,7 +22,7 @@ use arrow::array::{BooleanArray, Int32Array};
 use arrow::record_batch::RecordBatch;
 
 use datafusion::arrow::datatypes::{DataType, Field, Schema, TimeUnit};
-use datafusion::common::{DFField, DFSchema};
+use datafusion::common::DFSchema;
 use datafusion::error::Result;
 use datafusion::optimizer::simplify_expressions::ExprSimplifier;
 use datafusion::physical_expr::{
@@ -272,32 +272,30 @@ fn expression_type_demo() -> Result<()> {
     // types of the input expressions. You can provide this information using
     // a schema. In this case we create a schema where the column `c` is of
     // type Utf8 (a String / VARCHAR)
-    let schema = DFSchema::new_with_metadata(
-        vec![DFField::new_unqualified("c", DataType::Utf8, true)],
+    let schema = DFSchema::from_unqualifed_fields(
+        vec![Field::new("c", DataType::Utf8, true)].into(),
         HashMap::new(),
-    )
-    .unwrap();
+    )?;
     assert_eq!("Utf8", format!("{}", expr.get_type(&schema).unwrap()));
 
     // Using a schema where the column `foo` is of type Int32
-    let schema = DFSchema::new_with_metadata(
-        vec![DFField::new_unqualified("c", DataType::Int32, true)],
+    let schema = DFSchema::from_unqualifed_fields(
+        vec![Field::new("c", DataType::Int32, true)].into(),
         HashMap::new(),
-    )
-    .unwrap();
+    )?;
     assert_eq!("Int32", format!("{}", expr.get_type(&schema).unwrap()));
 
     // Get the type of an expression that adds 2 columns. Adding an Int32
     // and Float32 results in Float32 type
     let expr = col("c1") + col("c2");
-    let schema = DFSchema::new_with_metadata(
+    let schema = DFSchema::from_unqualifed_fields(
         vec![
-            DFField::new_unqualified("c1", DataType::Int32, true),
-            DFField::new_unqualified("c2", DataType::Float32, true),
-        ],
+            Field::new("c1", DataType::Int32, true),
+            Field::new("c2", DataType::Float32, true),
+        ]
+        .into(),
         HashMap::new(),
-    )
-    .unwrap();
+    )?;
     assert_eq!("Float32", format!("{}", expr.get_type(&schema).unwrap()));
 
     Ok(())
diff --git a/datafusion/common/src/column.rs b/datafusion/common/src/column.rs
index f0edc7175948..16f9579c668c 100644
--- a/datafusion/common/src/column.rs
+++ b/datafusion/common/src/column.rs
@@ -17,6 +17,8 @@
 
 //! Column
 
+use arrow_schema::Field;
+
 use crate::error::_schema_err;
 use crate::utils::{parse_identifiers_normalized, quote_identifier};
 use crate::{DFSchema, DataFusionError, OwnedTableReference, Result, SchemaError};
@@ -178,11 +180,12 @@ impl Column {
         }
 
         for schema in schemas {
-            let fields = schema.fields_with_unqualified_name(&self.name);
-            match fields.len() {
+            let qualified_fields =
+                schema.qualified_fields_with_unqualified_name(&self.name);
+            match qualified_fields.len() {
                 0 => continue,
                 1 => {
-                    return Ok(fields[0].qualified_column());
+                    return Ok(Column::from(qualified_fields[0]));
                 }
                 _ => {
                     // More than 1 fields in this schema have their names set to self.name.
@@ -198,14 +201,13 @@ impl Column {
                     // We will use the relation from the first matched field to normalize self.
 
                     // Compare matched fields with one USING JOIN clause at a time
+                    let columns = schema.columns_with_unqualified_name(&self.name);
                     for using_col in using_columns {
-                        let all_matched = fields
-                            .iter()
-                            .all(|f| using_col.contains(&f.qualified_column()));
+                        let all_matched = columns.iter().all(|f| using_col.contains(f));
                         // All matched fields belong to the same using column set, in orther words
                         // the same join clause. We simply pick the qualifer from the first match.
                         if all_matched {
-                            return Ok(fields[0].qualified_column());
+                            return Ok(columns[0].clone());
                         }
                     }
                 }
@@ -214,10 +216,7 @@ impl Column {
 
         _schema_err!(SchemaError::FieldNotFound {
             field: Box::new(Column::new(self.relation.clone(), self.name)),
-            valid_fields: schemas
-                .iter()
-                .flat_map(|s| s.fields().iter().map(|f| f.qualified_column()))
-                .collect(),
+            valid_fields: schemas.iter().flat_map(|s| s.columns()).collect(),
         })
     }
 
@@ -267,13 +266,13 @@ impl Column {
         }
 
         for schema_level in schemas {
-            let fields = schema_level
+            let qualified_fields = schema_level
                 .iter()
-                .flat_map(|s| s.fields_with_unqualified_name(&self.name))
+                .flat_map(|s| s.qualified_fields_with_unqualified_name(&self.name))
                 .collect::<Vec<_>>();
-            match fields.len() {
+            match qualified_fields.len() {
                 0 => continue,
-                1 => return Ok(fields[0].qualified_column()),
+                1 => return Ok(Column::from(qualified_fields[0])),
                 _ => {
                     // More than 1 fields in this schema have their names set to self.name.
                     //
@@ -288,14 +287,16 @@ impl Column {
                     // We will use the relation from the first matched field to normalize self.
 
                     // Compare matched fields with one USING JOIN clause at a time
+                    let columns = schema_level
+                        .iter()
+                        .flat_map(|s| s.columns_with_unqualified_name(&self.name))
+                        .collect::<Vec<_>>();
                     for using_col in using_columns {
-                        let all_matched = fields
-                            .iter()
-                            .all(|f| using_col.contains(&f.qualified_column()));
+                        let all_matched = columns.iter().all(|c| using_col.contains(c));
                         // All matched fields belong to the same using column set, in orther words
                         // the same join clause. We simply pick the qualifer from the first match.
                         if all_matched {
-                            return Ok(fields[0].qualified_column());
+                            return Ok(columns[0].clone());
                         }
                     }
 
@@ -312,7 +313,7 @@ impl Column {
             valid_fields: schemas
                 .iter()
                 .flat_map(|s| s.iter())
-                .flat_map(|s| s.fields().iter().map(|f| f.qualified_column()))
+                .flat_map(|s| s.columns())
                 .collect(),
         })
     }
@@ -338,6 +339,13 @@ impl From<String> for Column {
     }
 }
 
+/// Create a column, use qualifier and field name
+impl From<(Option<&OwnedTableReference>, &Field)> for Column {
+    fn from((relation, field): (Option<&OwnedTableReference>, &Field)) -> Self {
+        Self::new(relation.cloned(), field.name())
+    }
+}
+
 impl FromStr for Column {
     type Err = Infallible;
 
@@ -355,36 +363,25 @@ impl fmt::Display for Column {
 #[cfg(test)]
 mod tests {
     use super::*;
-    use crate::DFField;
     use arrow::datatypes::DataType;
-    use std::collections::HashMap;
-
-    fn create_schema(names: &[(Option<&str>, &str)]) -> Result<DFSchema> {
-        let fields = names
-            .iter()
-            .map(|(qualifier, name)| {
-                DFField::new(
-                    qualifier.to_owned().map(|s| s.to_string()),
-                    name,
-                    DataType::Boolean,
-                    true,
-                )
-            })
-            .collect::<Vec<_>>();
-        DFSchema::new_with_metadata(fields, HashMap::new())
+    use arrow_schema::{Field, SchemaBuilder};
+
+    fn create_qualified_schema(qualifier: &str, names: Vec<&str>) -> Result<DFSchema> {
+        let mut schema_builder = SchemaBuilder::new();
+        schema_builder.extend(
+            names
+                .iter()
+                .map(|f| Field::new(*f, DataType::Boolean, true)),
+        );
+        let schema = Arc::new(schema_builder.finish());
+        DFSchema::try_from_qualified_schema(qualifier, &schema)
     }
 
     #[test]
     fn test_normalize_with_schemas_and_ambiguity_check() -> Result<()> {
-        let schema1 = create_schema(&[(Some("t1"), "a"), (Some("t1"), "b")])?;
-        let schema2 = create_schema(&[(Some("t2"), "c"), (Some("t2"), "d")])?;
-        let schema3 = create_schema(&[
-            (Some("t3"), "a"),
-            (Some("t3"), "b"),
-            (Some("t3"), "c"),
-            (Some("t3"), "d"),
-            (Some("t3"), "e"),
-        ])?;
+        let schema1 = create_qualified_schema("t1", vec!["a", "b"])?;
+        let schema2 = create_qualified_schema("t2", vec!["c", "d"])?;
+        let schema3 = create_qualified_schema("t3", vec!["a", "b", "c", "d", "e"])?;
 
         // already normalized
         let col = Column::new(Some("t1"), "a");
diff --git a/datafusion/common/src/dfschema.rs b/datafusion/common/src/dfschema.rs
index 90fb0b035d35..f098f98a744c 100644
--- a/datafusion/common/src/dfschema.rs
+++ b/datafusion/common/src/dfschema.rs
@@ -24,16 +24,15 @@ use std::fmt::{Display, Formatter};
 use std::hash::Hash;
 use std::sync::Arc;
 
-use crate::error::{
-    unqualified_field_not_found, DataFusionError, Result, SchemaError, _plan_err,
-    _schema_err,
-};
+use crate::error::{DataFusionError, Result, _plan_err, _schema_err};
 use crate::{
-    field_not_found, Column, FunctionalDependencies, OwnedTableReference, TableReference,
+    field_not_found, unqualified_field_not_found, Column, FunctionalDependencies,
+    OwnedTableReference, SchemaError, TableReference,
 };
 
 use arrow::compute::can_cast_types;
 use arrow::datatypes::{DataType, Field, FieldRef, Fields, Schema, SchemaRef};
+use arrow_schema::SchemaBuilder;
 
 /// A reference-counted reference to a [DFSchema].
 pub type DFSchemaRef = Arc<DFSchema>;
@@ -95,22 +94,24 @@ pub type DFSchemaRef = Arc<DFSchema>;
 /// Use the `Into` trait to convert `DFSchema` into an Arrow schema:
 ///
 /// ```rust
-/// use datafusion_common::{DFSchema, DFField};
+/// use datafusion_common::DFSchema;
 /// use arrow_schema::Schema;
+/// use arrow::datatypes::Field;
 /// use std::collections::HashMap;
 ///
-/// let df_schema = DFSchema::new_with_metadata(vec![
-///    DFField::new_unqualified("c1", arrow::datatypes::DataType::Int32, false),
-/// ], HashMap::new()).unwrap();
+/// let df_schema = DFSchema::from_unqualifed_fields(vec![
+///    Field::new("c1", arrow::datatypes::DataType::Int32, false),
+/// ].into(),HashMap::new()).unwrap();
 /// let schema = Schema::from(df_schema);
 /// assert_eq!(schema.fields().len(), 1);
 /// ```
 #[derive(Debug, Clone, PartialEq, Eq)]
 pub struct DFSchema {
-    /// Fields
-    fields: Vec<DFField>,
-    /// Additional metadata in form of key value pairs
-    metadata: HashMap<String, String>,
+    /// Inner Arrow schema reference.
+    inner: SchemaRef,
+    /// Optional qualifiers for each column in this schema. In the same order as
+    /// the `self.inner.fields()`
+    field_qualifiers: Vec<Option<OwnedTableReference>>,
     /// Stores functional dependencies in the schema.
     functional_dependencies: FunctionalDependencies,
 }
@@ -119,66 +120,107 @@ impl DFSchema {
     /// Creates an empty `DFSchema`
     pub fn empty() -> Self {
         Self {
-            fields: vec![],
-            metadata: HashMap::new(),
+            inner: Arc::new(Schema::new([])),
+            field_qualifiers: vec![],
             functional_dependencies: FunctionalDependencies::empty(),
         }
     }
 
-    /// Create a new `DFSchema`
+    /// Create a `DFSchema` from an Arrow schema where all the fields have a given qualifier
     pub fn new_with_metadata(
-        fields: Vec<DFField>,
+        qualified_fields: Vec<(Option<OwnedTableReference>, Arc<Field>)>,
         metadata: HashMap<String, String>,
     ) -> Result<Self> {
+        let (qualifiers, fields): (Vec<Option<OwnedTableReference>>, Vec<Arc<Field>>) =
+            qualified_fields.into_iter().unzip();
+
+        let schema = Arc::new(Schema::new_with_metadata(fields, metadata));
+
+        let dfschema = Self {
+            inner: schema,
+            field_qualifiers: qualifiers,
+            functional_dependencies: FunctionalDependencies::empty(),
+        };
+        dfschema.check_names()?;
+        Ok(dfschema)
+    }
+
+    /// Create a new `DFSchema` from a list of Arrow [Field]s
+    pub fn from_unqualifed_fields(
+        fields: Fields,
+        metadata: HashMap<String, String>,
+    ) -> Result<Self> {
+        let field_count = fields.len();
+        let schema = Arc::new(Schema::new_with_metadata(fields, metadata));
+        let dfschema = Self {
+            inner: schema,
+            field_qualifiers: vec![None; field_count],
+            functional_dependencies: FunctionalDependencies::empty(),
+        };
+        dfschema.check_names()?;
+        Ok(dfschema)
+    }
+
+    /// Create a `DFSchema` from an Arrow schema and a given qualifier
+    ///
+    /// To create a schema from an Arrow schema without a qualifier, use
+    /// `DFSchema::try_from`.
+    pub fn try_from_qualified_schema<'a>(
+        qualifier: impl Into<TableReference<'a>>,
+        schema: &Schema,
+    ) -> Result<Self> {
+        let qualifier = qualifier.into();
+        let owned_qualifier = qualifier.to_owned_reference();
+        let schema = DFSchema {
+            inner: schema.clone().into(),
+            field_qualifiers: vec![Some(owned_qualifier); schema.fields.len()],
+            functional_dependencies: FunctionalDependencies::empty(),
+        };
+        schema.check_names()?;
+        Ok(schema)
+    }
+
+    /// Create a `DFSchema` from an Arrow schema where all the fields have a given qualifier
+    pub fn from_field_specific_qualified_schema<'a>(
+        qualifiers: Vec<Option<impl Into<TableReference<'a>>>>,
+        schema: &SchemaRef,
+    ) -> Result<Self> {
+        let owned_qualifiers = qualifiers
+            .into_iter()
+            .map(|qualifier| qualifier.map(|q| q.into().to_owned_reference()))
+            .collect();
+        let dfschema = Self {
+            inner: schema.clone(),
+            field_qualifiers: owned_qualifiers,
+            functional_dependencies: FunctionalDependencies::empty(),
+        };
+        dfschema.check_names()?;
+        Ok(dfschema)
+    }
+
+    /// Check if the schema have some fields with the same name
+    pub fn check_names(&self) -> Result<()> {
         let mut qualified_names = BTreeSet::new();
         let mut unqualified_names = BTreeSet::new();
 
-        for field in &fields {
-            if let Some(qualifier) = field.qualifier() {
+        for (field, qualifier) in self.inner.fields().iter().zip(&self.field_qualifiers) {
+            if let Some(qualifier) = qualifier {
                 qualified_names.insert((qualifier, field.name()));
             } else if !unqualified_names.insert(field.name()) {
                 return _schema_err!(SchemaError::DuplicateUnqualifiedField {
-                    name: field.name().to_string(),
+                    name: field.name().to_string()
                 });
             }
         }
 
-        // Check for mix of qualified and unqualified fields with same unqualified name.
-        // The BTreeSet storage makes sure that errors are reported in deterministic order.
-        for (qualifier, name) in &qualified_names {
+        for (qualifier, name) in qualified_names {
             if unqualified_names.contains(name) {
                 return _schema_err!(SchemaError::AmbiguousReference {
-                    field: Column {
-                        relation: Some((*qualifier).clone()),
-                        name: name.to_string(),
-                    }
+                    field: Column::new(Some(qualifier.to_owned_reference()), name)
                 });
             }
         }
-        Ok(Self {
-            fields,
-            metadata,
-            functional_dependencies: FunctionalDependencies::empty(),
-        })
-    }
-
-    /// Create a `DFSchema` from an Arrow schema and a given qualifier
-    ///
-    /// To create a schema from an Arrow schema without a qualifier, use
-    /// `DFSchema::try_from`.
-    pub fn try_from_qualified_schema<'a>(
-        qualifier: impl Into<TableReference<'a>>,
-        schema: &Schema,
-    ) -> Result<Self> {
-        let qualifier = qualifier.into();
-        Self::new_with_metadata(
-            schema
-                .fields()
-                .iter()
-                .map(|f| DFField::from_qualified(qualifier.clone(), f.clone()))
-                .collect(),
-            schema.metadata().clone(),
-        )
+        Ok(())
     }
 
     /// Assigns functional dependencies.
@@ -186,7 +228,7 @@ impl DFSchema {
         mut self,
         functional_dependencies: FunctionalDependencies,
     ) -> Result<Self> {
-        if functional_dependencies.is_valid(self.fields.len()) {
+        if functional_dependencies.is_valid(self.inner.fields.len()) {
             self.functional_dependencies = functional_dependencies;
             Ok(self)
         } else {
@@ -200,50 +242,82 @@ impl DFSchema {
     /// Create a new schema that contains the fields from this schema followed by the fields
     /// from the supplied schema. An error will be returned if there are duplicate field names.
     pub fn join(&self, schema: &DFSchema) -> Result<Self> {
-        let mut fields = self.fields.clone();
-        let mut metadata = self.metadata.clone();
-        fields.extend_from_slice(schema.fields().as_slice());
-        metadata.extend(schema.metadata.clone());
-        Self::new_with_metadata(fields, metadata)
+        let mut schema_builder = SchemaBuilder::new();
+        schema_builder.extend(self.inner.fields().iter().cloned());
+        schema_builder.extend(schema.fields().iter().cloned());
+        let new_schema = schema_builder.finish();
+
+        let mut new_metadata = self.inner.metadata.clone();
+        new_metadata.extend(schema.inner.metadata.clone());
+        let new_schema_with_metadata = new_schema.with_metadata(new_metadata);
+
+        let mut new_qualifiers = self.field_qualifiers.clone();
+        new_qualifiers.extend_from_slice(schema.field_qualifiers.as_slice());
+
+        let new_self = Self {
+            inner: Arc::new(new_schema_with_metadata),
+            field_qualifiers: new_qualifiers,
+            functional_dependencies: FunctionalDependencies::empty(),
+        };
+        new_self.check_names()?;
+        Ok(new_self)
     }
 
     /// Modify this schema by appending the fields from the supplied schema, ignoring any
     /// duplicate fields.
     pub fn merge(&mut self, other_schema: &DFSchema) {
-        if other_schema.fields.is_empty() {
+        if other_schema.inner.fields.is_empty() {
             return;
         }
 
-        let self_fields: HashSet<&DFField> = self.fields.iter().collect();
-        let self_unqualified_names: HashSet<&str> =
-            self.fields.iter().map(|x| x.name().as_str()).collect();
-
-        let mut fields_to_add = vec![];
+        let self_fields: HashSet<(Option<&OwnedTableReference>, &FieldRef)> =
+            self.iter().collect();
+        let self_unqualified_names: HashSet<&str> = self
+            .inner
+            .fields
+            .iter()
+            .map(|field| field.name().as_str())
+            .collect();
 
-        for field in other_schema.fields() {
+        let mut schema_builder = SchemaBuilder::from(self.inner.fields.clone());
+        let mut qualifiers = Vec::new();
+        for (qualifier, field) in other_schema.iter() {
             // skip duplicate columns
-            let duplicated_field = match field.qualifier() {
-                Some(_) => self_fields.contains(field),
+            let duplicated_field = match qualifier {
+                Some(q) => self_fields.contains(&(Some(q), field)),
                 // for unqualified columns, check as unqualified name
                 None => self_unqualified_names.contains(field.name().as_str()),
             };
             if !duplicated_field {
-                fields_to_add.push(field.clone());
+                // self.inner.fields.push(field.clone());
+                schema_builder.push(field.clone());
+                qualifiers.push(qualifier.cloned());
             }
         }
-        self.fields.extend(fields_to_add);
-        self.metadata.extend(other_schema.metadata.clone())
+        let mut metadata = self.inner.metadata.clone();
+        metadata.extend(other_schema.inner.metadata.clone());
+
+        let finished = schema_builder.finish();
+        let finished_with_metadata = finished.with_metadata(metadata);
+        self.inner = finished_with_metadata.into();
+        self.field_qualifiers.extend(qualifiers);
     }
 
     /// Get a list of fields
-    pub fn fields(&self) -> &Vec<DFField> {
-        &self.fields
+    pub fn fields(&self) -> &Fields {
+        &self.inner.fields
     }
 
     /// Returns an immutable reference of a specific `Field` instance selected using an
     /// offset within the internal `fields` vector
-    pub fn field(&self, i: usize) -> &DFField {
-        &self.fields[i]
+    pub fn field(&self, i: usize) -> &Field {
+        &self.inner.fields[i]
+    }
+
+    /// Returns an immutable reference of a specific `Field` instance selected using an
+    /// offset within the internal `fields` vector and its qualifier
+    pub fn qualified_field(&self, i: usize) -> (Option<&OwnedTableReference>, &Field) {
+        (self.field_qualifiers[i].as_ref(), self.field(i))
     }
 
     pub fn index_of_column_by_name(
@@ -252,21 +326,18 @@ impl DFSchema {
         name: &str,
     ) -> Result<Option<usize>> {
         let mut matches = self
-            .fields
             .iter()
             .enumerate()
-            .filter(|(_, field)| match (qualifier, &field.qualifier) {
+            .filter(|(_, (q, f))| match (qualifier, q) {
                 // field to lookup is qualified.
                 // current field is qualified and not shared between relations, compare both
                 // qualifier and name.
-                (Some(q), Some(field_q)) => {
-                    q.resolved_eq(field_q) && field.name() == name
-                }
+                (Some(q), Some(field_q)) => q.resolved_eq(field_q) && f.name() == name,
                 // field to lookup is qualified but current field is unqualified.
                 (Some(qq), None) => {
                     // the original field may now be aliased with a name that matches the
                     // original qualified name
-                    let column = Column::from_qualified_name(field.name());
+                    let column = Column::from_qualified_name(f.name());
                     match column {
                         Column {
                             relation: Some(r),
@@ -276,7 +347,7 @@ impl DFSchema {
                     }
                 }
                 // field to lookup is unqualified, no need to compare qualifier
-                (None, Some(_)) | (None, None) => field.name() == name,
+                (None, Some(_)) | (None, None) => f.name() == name,
             })
             .map(|(idx, _)| idx);
         Ok(matches.next())
@@ -299,7 +370,7 @@ impl DFSchema {
         &self,
         qualifier: Option<&TableReference>,
         name: &str,
-    ) -> Result<&DFField> {
+    ) -> Result<&Field> {
         if let Some(qualifier) = qualifier {
             self.field_with_qualified_name(qualifier, name)
         } else {
@@ -307,11 +378,29 @@ impl DFSchema {
         }
     }
 
+    /// Find the qualified field with the given name
+    pub fn qualified_field_with_name(
+        &self,
+        qualifier: Option<&TableReference>,
+        name: &str,
+    ) -> Result<(Option<&OwnedTableReference>, &Field)> {
+        if let Some(qualifier) = qualifier {
+            let idx = self
+                .index_of_column_by_name(Some(qualifier), name)?
+                .ok_or_else(|| {
+                    field_not_found(Some(qualifier.to_string()), name, self)
+                })?;
+            Ok((self.field_qualifiers[idx].as_ref(), self.field(idx)))
+        } else {
+            self.qualified_field_with_unqualified_name(name)
+        }
+    }
+
     /// Find all fields having the given qualifier
-    pub fn fields_with_qualified(&self, qualifier: &TableReference) -> Vec<&DFField> {
-        self.fields
-            .iter()
-            .filter(|field| field.qualifier().map(|q| q.eq(qualifier)).unwrap_or(false))
+    pub fn fields_with_qualified(&self, qualifier: &TableReference) -> Vec<&Field> {
+        self.iter()
+            .filter(|(q, _)| q.map(|q| q.eq(qualifier)).unwrap_or(false))
+            .map(|(_, f)| f.as_ref())
             .collect()
     }
 
@@ -320,31 +409,90 @@ impl DFSchema {
         &self,
         qualifier: &TableReference,
     ) -> Vec<usize> {
-        self.fields
-            .iter()
+        self.iter()
             .enumerate()
-            .filter_map(|(idx, field)| {
-                field
-                    .qualifier()
-                    .and_then(|q| q.eq(qualifier).then_some(idx))
-            })
+            .filter_map(|(idx, (q, _))| q.and_then(|q| q.eq(qualifier).then_some(idx)))
             .collect()
     }
 
-    /// Find all fields match the given name
-    pub fn fields_with_unqualified_name(&self, name: &str) -> Vec<&DFField> {
-        self.fields
+    /// Find all fields that match the given name
+    pub fn fields_with_unqualified_name(&self, name: &str) -> Vec<&Field> {
+        self.fields()
             .iter()
             .filter(|field| field.name() == name)
+            .map(|f| f.as_ref())
             .collect()
     }
 
+    /// Find all fields that match the given name and return them with their qualifier
+    pub fn qualified_fields_with_unqualified_name(
+        &self,
+        name: &str,
+    ) -> Vec<(Option<&OwnedTableReference>, &Field)> {
+        self.iter()
+            .filter(|(_, field)| field.name() == name)
+            .map(|(qualifier, field)| (qualifier, field.as_ref()))
+            .collect()
+    }
+
+    /// Find all fields that match the given name and convert to column
+    pub fn columns_with_unqualified_name(&self, name: &str) -> Vec<Column> {
+        self.iter()
+            .filter(|(_, field)| field.name() == name)
+            .map(|(qualifier, field)| Column::new(qualifier.cloned(), field.name()))
+            .collect()
+    }
+
+    /// Return all `Column`s for the schema
+    pub fn columns(&self) -> Vec<Column> {
+        self.iter()
+            .map(|(qualifier, field)| {
+                Column::new(qualifier.cloned(), field.name().clone())
+            })
+            .collect()
+    }
+
+    /// Find the qualified field with the given unqualified name
+    pub fn qualified_field_with_unqualified_name(
+        &self,
+        name: &str,
+    ) -> Result<(Option<&OwnedTableReference>, &Field)> {
+        let matches = self.qualified_fields_with_unqualified_name(name);
+        match matches.len() {
+            0 => Err(unqualified_field_not_found(name, self)),
+            1 => Ok((matches[0].0, &matches[0].1)),
+            _ => {
+                // When `matches` size > 1, it doesn't necessarily mean an `ambiguous name` problem.
+                // Because name may generate from Alias/... . It means that it don't own qualifier.
+                // For example:
+                //             Join on id = b.id
+                // Project a.id as id   TableScan b id
+                // In this case, there isn't `ambiguous name` problem. When `matches` just contains
+                // one field without qualifier, we should return it.
+                let fields_without_qualifier = matches
+                    .iter()
+                    .filter(|(q, _)| q.is_none())
+                    .collect::<Vec<_>>();
+                if fields_without_qualifier.len() == 1 {
+                    Ok((fields_without_qualifier[0].0, fields_without_qualifier[0].1))
+                } else {
+                    _schema_err!(SchemaError::AmbiguousReference {
+                        field: Column {
+                            relation: None,
+                            name: name.to_string(),
+                        },
+                    })
+                }
+            }
+        }
+    }
+
     /// Find the field with the given name
-    pub fn field_with_unqualified_name(&self, name: &str) -> Result<&DFField> {
-        let matches = self.fields_with_unqualified_name(name);
+    pub fn field_with_unqualified_name(&self, name: &str) -> Result<&Field> {
+        let matches = self.qualified_fields_with_unqualified_name(name);
         match matches.len() {
             0 => Err(unqualified_field_not_found(name, self)),
-            1 => Ok(matches[0]),
+            1 => Ok(matches[0].1),
             _ => {
                 // When `matches` size > 1, it doesn't necessarily mean an `ambiguous name` problem.
                 // Because name may generate from Alias/... . It means that it don't own qualifier.
@@ -355,10 +503,10 @@ impl DFSchema {
                 // one field without qualifier, we should return it.
                 let fields_without_qualifier = matches
                     .iter()
-                    .filter(|f| f.qualifier.is_none())
+                    .filter(|(q, _)| q.is_none())
                     .collect::<Vec<_>>();
                 if fields_without_qualifier.len() == 1 {
-                    Ok(fields_without_qualifier[0])
+                    Ok(fields_without_qualifier[0].1)
                 } else {
                     _schema_err!(SchemaError::AmbiguousReference {
                         field: Column {
@@ -376,7 +524,7 @@ impl DFSchema {
         &self,
         qualifier: &TableReference,
         name: &str,
-    ) -> Result<&DFField> {
+    ) -> Result<&Field> {
         let idx = self
             .index_of_column_by_name(Some(qualifier), name)?
             .ok_or_else(|| field_not_found(Some(qualifier.to_string()), name, self))?;
@@ -385,13 +533,21 @@ impl DFSchema {
     }
 
     /// Find the field with the given qualified column
-    pub fn field_from_column(&self, column: &Column) -> Result<&DFField> {
+    pub fn field_from_column(&self, column: &Column) -> Result<&Field> {
         match &column.relation {
             Some(r) => self.field_with_qualified_name(r, &column.name),
             None => self.field_with_unqualified_name(&column.name),
         }
     }
 
+    /// Find the field with the given qualified column
+    pub fn qualified_field_from_column(
+        &self,
+        column: &Column,
+    ) -> Result<(Option<&OwnedTableReference>, &Field)> {
+        self.qualified_field_with_name(column.relation.as_ref(), &column.name)
+    }
+
     /// Find if the field exists with the given name
     pub fn has_column_with_unqualified_name(&self, name: &str) -> bool {
         self.fields().iter().any(|field| field.name() == name)
@@ -403,10 +559,8 @@ impl DFSchema {
         qualifier: &TableReference,
         name: &str,
     ) -> bool {
-        self.fields().iter().any(|field| {
-            field.qualifier().map(|q| q.eq(qualifier)).unwrap_or(false)
-                && field.name() == name
-        })
+        self.iter()
+            .any(|(q, f)| q.map(|q| q.eq(qualifier)).unwrap_or(false) && f.name() == name)
     }
 
     /// Find if the field exists with the given qualified column
@@ -419,7 +573,8 @@ impl DFSchema {
 
     /// Check to see if unqualified field names matches field names in Arrow schema
     pub fn matches_arrow_schema(&self, arrow_schema: &Schema) -> bool {
-        self.fields
+        self.inner
+            .fields
             .iter()
             .zip(arrow_schema.fields().iter())
             .all(|(dffield, arrowfield)| dffield.name() == arrowfield.name())
@@ -457,10 +612,10 @@ impl DFSchema {
         if self.fields().len() != other.fields().len() {
             return false;
         }
-        let self_fields = self.fields().iter();
-        let other_fields = other.fields().iter();
-        self_fields.zip(other_fields).all(|(f1, f2)| {
-            f1.qualifier() == f2.qualifier()
+        let self_fields = self.iter();
+        let other_fields = other.iter();
+        self_fields.zip(other_fields).all(|((q1, f1), (q2, f2))| {
+            q1 == q2
                 && f1.name() == f2.name()
                 && Self::datatype_is_logically_equal(f1.data_type(), f2.data_type())
         })
@@ -479,10 +634,10 @@ impl DFSchema {
         if self.fields().len() != other.fields().len() {
             return false;
         }
-        let self_fields = self.fields().iter();
-        let other_fields = other.fields().iter();
-        self_fields.zip(other_fields).all(|(f1, f2)| {
-            f1.qualifier() == f2.qualifier()
+        let self_fields = self.iter();
+        let other_fields = other.iter();
+        self_fields.zip(other_fields).all(|((q1, f1), (q2, f2))| {
+            q1 == q2
                 && f1.name() == f2.name()
                 && Self::datatype_is_semantically_equal(f1.data_type(), f2.data_type())
         })
@@ -588,12 +743,9 @@ impl DFSchema {
     /// Strip all field qualifier in schema
     pub fn strip_qualifiers(self) -> Self {
         DFSchema {
-            fields: self
-                .fields
-                .into_iter()
-                .map(|f| f.strip_qualifier())
-                .collect(),
-            ..self
+            field_qualifiers: vec![None; self.inner.fields.len()],
+            inner: self.inner,
+            functional_dependencies: self.functional_dependencies,
         }
     }
 
@@ -601,47 +753,53 @@ impl DFSchema {
     pub fn replace_qualifier(self, qualifier: impl Into<OwnedTableReference>) -> Self {
         let qualifier = qualifier.into();
         DFSchema {
-            fields: self
-                .fields
-                .into_iter()
-                .map(|f| DFField::from_qualified(qualifier.clone(), f.field))
-                .collect(),
-            ..self
+            field_qualifiers: vec![Some(qualifier); self.inner.fields.len()],
+            inner: self.inner,
+            functional_dependencies: self.functional_dependencies,
         }
     }
 
     /// Get list of fully-qualified field names in this schema
     pub fn field_names(&self) -> Vec<String> {
-        self.fields
-            .iter()
-            .map(|f| f.qualified_name())
+        self.iter()
+            .map(|(qualifier, field)| qualified_name(qualifier, field.name()))
             .collect::<Vec<_>>()
     }
 
     /// Get metadata of this schema
     pub fn metadata(&self) -> &HashMap<String, String> {
-        &self.metadata
+        &self.inner.metadata
     }
 
     /// Get functional dependencies
     pub fn functional_dependencies(&self) -> &FunctionalDependencies {
         &self.functional_dependencies
     }
+
+    /// Iterate over the qualifiers and fields in the DFSchema
+    pub fn iter(
+        &self,
+    ) -> impl Iterator<Item = (Option<&OwnedTableReference>, &FieldRef)> {
+        self.field_qualifiers
+            .iter()
+            .zip(self.inner.fields().iter())
+            .map(|(qualifier, field)| (qualifier.as_ref(), field))
+    }
 }
 
 impl From<DFSchema> for Schema {
     /// Convert DFSchema into a Schema
     fn from(df_schema: DFSchema) -> Self {
-        let fields: Fields = df_schema.fields.into_iter().map(|f| f.field).collect();
-        Schema::new_with_metadata(fields, df_schema.metadata)
+        let fields: Fields = df_schema.inner.fields.clone();
+        Schema::new_with_metadata(fields, df_schema.inner.metadata.clone())
     }
 }
 
 impl From<&DFSchema> for Schema {
     /// Convert DFSchema reference into a Schema
     fn from(df_schema: &DFSchema) -> Self {
-        let fields: Fields = df_schema.fields.iter().map(|f| f.field.clone()).collect();
-        Schema::new_with_metadata(fields, df_schema.metadata.clone())
+        let fields: Fields = df_schema.inner.fields.clone();
+        Schema::new_with_metadata(fields, df_schema.inner.metadata.clone())
     }
 }
 
@@ -649,14 +807,13 @@ impl From<&DFSchema> for Schema {
 impl TryFrom<Schema> for DFSchema {
     type Error = DataFusionError;
     fn try_from(schema: Schema) -> Result<Self, Self::Error> {
-        Self::new_with_metadata(
-            schema
-                .fields()
-                .iter()
-                .map(|f| DFField::from(f.clone()))
-                .collect(),
-            schema.metadata().clone(),
-        )
+        let field_count = schema.fields.len();
+        let dfschema = Self {
+            inner: schema.into(),
+            field_qualifiers: vec![None; field_count],
+            functional_dependencies: FunctionalDependencies::empty(),
+        };
+        Ok(dfschema)
     }
 }
 
@@ -669,8 +826,8 @@ impl From<DFSchema> for SchemaRef {
 // Hashing refers to a subset of fields considered in PartialEq.
 impl Hash for DFSchema {
     fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
-        self.fields.hash(state);
-        self.metadata.len().hash(state); // HashMap is not hashable
+        self.inner.fields.hash(state);
+        self.inner.metadata.len().hash(state); // HashMap is not hashable
     }
 }
 
@@ -705,9 +862,19 @@ impl ToDFSchema for SchemaRef {
     }
 }
 
-impl ToDFSchema for Vec<DFField> {
+impl ToDFSchema for Vec<Field> {
     fn to_dfschema(self) -> Result<DFSchema> {
-        DFSchema::new_with_metadata(self, HashMap::new())
+        let field_count = self.len();
+        let schema = Schema {
+            fields: self.into(),
+            metadata: HashMap::new(),
+        };
+        let dfschema = DFSchema {
+            inner: schema.into(),
+            field_qualifiers: vec![None; field_count],
+            functional_dependencies: FunctionalDependencies::empty(),
+        };
+        Ok(dfschema)
     }
 }
 
@@ -716,12 +883,11 @@ impl Display for DFSchema {
         write!(
             f,
             "fields:[{}], metadata:{:?}",
-            self.fields
-                .iter()
-                .map(|field| field.qualified_name())
+            self.iter()
+                .map(|(q, f)| qualified_name(q, f.name()))
                 .collect::<Vec<String>>()
                 .join(", "),
-            self.metadata
+            self.inner.metadata
         )
     }
 }
@@ -783,138 +949,6 @@ impl ExprSchema for DFSchema {
     }
 }
 
-/// DFField wraps an Arrow field and adds an optional qualifier
-#[derive(Debug, Clone, PartialEq, Eq, Hash)]
-pub struct DFField {
-    /// Optional qualifier (usually a table or relation name)
-    qualifier: Option<OwnedTableReference>,
-    /// Arrow field definition
-    field: FieldRef,
-}
-
-impl DFField {
-    /// Creates a new `DFField`
-    pub fn new<R: Into<OwnedTableReference>>(
-        qualifier: Option<R>,
-        name: &str,
-        data_type: DataType,
-        nullable: bool,
-    ) -> Self {
-        DFField {
-            qualifier: qualifier.map(|s| s.into()),
-            field: Arc::new(Field::new(name, data_type, nullable)),
-        }
-    }
-
-    /// Convenience method for creating new `DFField` without a qualifier
-    pub fn new_unqualified(name: &str, data_type: DataType, nullable: bool) -> Self {
-        DFField {
-            qualifier: None,
-            field: Arc::new(Field::new(name, data_type, nullable)),
-        }
-    }
-
-    /// Create a qualified field from an existing Arrow field
-    pub fn from_qualified<'a>(
-        qualifier: impl Into<TableReference<'a>>,
-        field: impl Into<FieldRef>,
-    ) -> Self {
-        Self {
-            qualifier: Some(qualifier.into().to_owned_reference()),
-            field: field.into(),
-        }
-    }
-
-    /// Returns an immutable reference to the `DFField`'s unqualified name
-    pub fn name(&self) -> &String {
-        self.field.name()
-    }
-
-    /// Returns an immutable reference to the `DFField`'s data-type
-    pub fn data_type(&self) -> &DataType {
-        self.field.data_type()
-    }
-
-    /// Indicates whether this `DFField` supports null values
-    pub fn is_nullable(&self) -> bool {
-        self.field.is_nullable()
-    }
-
-    pub fn metadata(&self) -> &HashMap<String, String> {
-        self.field.metadata()
-    }
-
-    /// Returns a string to the `DFField`'s qualified name
-    pub fn qualified_name(&self) -> String {
-        if let Some(qualifier) = &self.qualifier {
-            format!("{}.{}", qualifier, self.field.name())
-        } else {
-            self.field.name().to_owned()
-        }
-    }
-
-    /// Builds a qualified column based on self
-    pub fn qualified_column(&self) -> Column {
-        Column {
-            relation: self.qualifier.clone(),
-            name: self.field.name().to_string(),
-        }
-    }
-
-    /// Builds an unqualified column based on self
-    pub fn unqualified_column(&self) -> Column {
-        Column {
-            relation: None,
-            name: self.field.name().to_string(),
-        }
-    }
-
-    /// Get the optional qualifier
-    pub fn qualifier(&self) -> Option<&OwnedTableReference> {
-        self.qualifier.as_ref()
-    }
-
-    /// Get the arrow field
-    pub fn field(&self) -> &FieldRef {
-        &self.field
-    }
-
-    /// Return field with qualifier stripped
-    pub fn strip_qualifier(mut self) -> Self {
-        self.qualifier = None;
-        self
-    }
-
-    /// Return field with nullable specified
-    pub fn with_nullable(mut self, nullable: bool) -> Self {
-        let f = self.field().as_ref().clone().with_nullable(nullable);
-        self.field = f.into();
-        self
-    }
-
-    /// Return field with new metadata
-    pub fn with_metadata(mut self, metadata: HashMap<String, String>) -> Self {
-        let f = self.field().as_ref().clone().with_metadata(metadata);
-        self.field = f.into();
-        self
-    }
-}
-
-impl From<FieldRef> for DFField {
-    fn from(value: FieldRef) -> Self {
-        Self {
-            qualifier: None,
-            field: value,
-        }
-    }
-}
-
-impl From<Field> for DFField {
-    fn from(value: Field) -> Self {
-        Self::from(Arc::new(value))
-    }
-}
-
 /// DataFusion-specific extensions to [`Schema`].
 pub trait SchemaExt {
     /// This is a specialized version of Eq that ignores differences
@@ -967,6 +1001,13 @@ impl SchemaExt for Schema {
     }
 }
 
+pub fn qualified_name(qualifier: Option<&TableReference>, name: &str) -> String {
+    match qualifier {
+        Some(q) => format!("{}.{}", q, name),
+        None => name.to_string(),
+    }
+}
+
 #[cfg(test)]
 mod tests {
     use crate::assert_contains;
@@ -1007,22 +1048,6 @@ mod tests {
         Ok(())
     }
 
-    #[test]
-    fn from_unqualified_field() {
-        let field = Field::new("c0", DataType::Boolean, true);
-        let field = DFField::from(field);
-        assert_eq!("c0", field.name());
-        assert_eq!("c0", field.qualified_name());
-    }
-
-    #[test]
-    fn from_qualified_field() {
-        let field = Field::new("c0", DataType::Boolean, true);
-        let field = DFField::from_qualified("t1", field);
-        assert_eq!("c0", field.name());
-        assert_eq!("t1.c0", field.qualified_name());
-    }
-
     #[test]
     fn from_unqualified_schema() -> Result<()> {
         let schema = DFSchema::try_from(test_schema_1())?;
@@ -1037,6 +1062,35 @@ mod tests {
         Ok(())
     }
 
+    #[test]
+    fn test_from_field_specific_qualified_schema() -> Result<()> {
+        let schema = DFSchema::from_field_specific_qualified_schema(
+            vec![Some("t1"), None],
+            &Arc::new(Schema::new(vec![
+                Field::new("c0", DataType::Boolean, true),
+                Field::new("c1", DataType::Boolean, true),
+            ])),
+        )?;
+        assert_eq!("fields:[t1.c0, c1], metadata:{}", schema.to_string());
+        Ok(())
+    }
+
+    #[test]
+    fn test_from_qualified_fields() -> Result<()> {
+        let schema = DFSchema::new_with_metadata(
+            vec![
+                (
+                    Some("t0".into()),
+                    Arc::new(Field::new("c0", DataType::Boolean, true)),
+                ),
+                (None, Arc::new(Field::new("c1", DataType::Boolean, true))),
+            ],
+            HashMap::new(),
+        )?;
+        assert_eq!("fields:[t0.c0, c1], metadata:{}", schema.to_string());
+        Ok(())
+    }
+
     #[test]
     fn from_qualified_schema_into_arrow_schema() -> Result<()> {
         let schema = DFSchema::try_from_qualified_schema("t1", &test_schema_1())?;
@@ -1168,252 +1222,6 @@ mod tests {
         assert_eq!(err.strip_backtrace(), "Schema error: No field named c0.");
     }
 
-    #[test]
-    fn equivalent_names_and_types() {
-        let arrow_field1 = Field::new("f1", DataType::Int16, true);
-        let arrow_field1_meta = arrow_field1.clone().with_metadata(test_metadata_n(2));
-
-        let field1_i16_t = DFField::from(arrow_field1);
-        let field1_i16_t_meta = DFField::from(arrow_field1_meta);
-        let field1_i16_t_qualified =
-            DFField::from_qualified("foo", field1_i16_t.field().clone());
-        let field1_i16_f = DFField::from(Field::new("f1", DataType::Int16, false));
-        let field1_i32_t = DFField::from(Field::new("f1", DataType::Int32, true));
-        let field2_i16_t = DFField::from(Field::new("f2", DataType::Int16, true));
-        let field3_i16_t = DFField::from(Field::new("f3", DataType::Int16, true));
-
-        let dict =
-            DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8));
-        let field_dict_t = DFField::from(Field::new("f_dict", dict.clone(), true));
-        let field_dict_f = DFField::from(Field::new("f_dict", dict, false));
-
-        let list_t = DFField::from(Field::new_list(
-            "f_list",
-            field1_i16_t.field().clone(),
-            true,
-        ));
-        let list_f = DFField::from(Field::new_list(
-            "f_list",
-            field1_i16_f.field().clone(),
-            false,
-        ));
-
-        let list_f_name = DFField::from(Field::new_list(
-            "f_list",
-            field2_i16_t.field().clone(),
-            false,
-        ));
-
-        let struct_t = DFField::from(Field::new_struct(
-            "f_struct",
-            vec![field1_i16_t.field().clone()],
-            true,
-        ));
-        let struct_f = DFField::from(Field::new_struct(
-            "f_struct",
-            vec![field1_i16_f.field().clone()],
-            false,
-        ));
-
-        let struct_f_meta = DFField::from(Field::new_struct(
-            "f_struct",
-            vec![field1_i16_t_meta.field().clone()],
-            false,
-        ));
-
-        let struct_f_type = DFField::from(Field::new_struct(
-            "f_struct",
-            vec![field1_i32_t.field().clone()],
-            false,
-        ));
-
-        // same
-        TestCase {
-            fields1: vec![&field1_i16_t],
-            fields2: vec![&field1_i16_t],
-            expected_dfschema: true,
-            expected_arrow: true,
-        }
-        .run();
-
-        // same but metadata is different, should still be true
-        TestCase {
-            fields1: vec![&field1_i16_t_meta],
-            fields2: vec![&field1_i16_t],
-            expected_dfschema: true,
-            expected_arrow: true,
-        }
-        .run();
-
-        // different name
-        TestCase {
-            fields1: vec![&field1_i16_t],
-            fields2: vec![&field2_i16_t],
-            expected_dfschema: false,
-            expected_arrow: false,
-        }
-        .run();
-
-        // different type
-        TestCase {
-            fields1: vec![&field1_i16_t],
-            fields2: vec![&field1_i32_t],
-            expected_dfschema: false,
-            expected_arrow: false,
-        }
-        .run();
-
-        // different nullability
-        TestCase {
-            fields1: vec![&field1_i16_t],
-            fields2: vec![&field1_i16_f],
-            expected_dfschema: true,
-            expected_arrow: true,
-        }
-        .run();
-
-        // different qualifier
-        TestCase {
-            fields1: vec![&field1_i16_t],
-            fields2: vec![&field1_i16_t_qualified],
-            expected_dfschema: false,
-            expected_arrow: true,
-        }
-        .run();
-
-        // different name after first
-        TestCase {
-            fields1: vec![&field2_i16_t, &field1_i16_t],
-            fields2: vec![&field2_i16_t, &field3_i16_t],
-            expected_dfschema: false,
-            expected_arrow: false,
-        }
-        .run();
-
-        // different number
-        TestCase {
-            fields1: vec![&field1_i16_t, &field2_i16_t],
-            fields2: vec![&field1_i16_t],
-            expected_dfschema: false,
-            expected_arrow: false,
-        }
-        .run();
-
-        // dictionary
-        TestCase {
-            fields1: vec![&field_dict_t],
-            fields2: vec![&field_dict_t],
-            expected_dfschema: true,
-            expected_arrow: true,
-        }
-        .run();
-
-        // dictionary (different nullable)
-        TestCase {
-            fields1: vec![&field_dict_t],
-            fields2: vec![&field_dict_f],
-            expected_dfschema: true,
-            expected_arrow: true,
-        }
-        .run();
-
-        // dictionary (wrong type)
-        TestCase {
-            fields1: vec![&field_dict_t],
-            fields2: vec![&field1_i16_t],
-            expected_dfschema: false,
-            expected_arrow: false,
-        }
-        .run();
-
-        // list (different embedded nullability)
-        TestCase {
-            fields1: vec![&list_t],
-            fields2: vec![&list_f],
-            expected_dfschema: true,
-            expected_arrow: true,
-        }
-        .run();
-
-        // list (different sub field names)
-        TestCase {
-            fields1: vec![&list_t],
-            fields2: vec![&list_f_name],
-            expected_dfschema: false,
-            expected_arrow: false,
-        }
-        .run();
-
-        // struct
-        TestCase {
-            fields1: vec![&struct_t],
-            fields2: vec![&struct_f],
-            expected_dfschema: true,
-            expected_arrow: true,
-        }
-        .run();
-
-        // struct (different embedded meta)
-        TestCase {
-            fields1: vec![&struct_t],
-            fields2: vec![&struct_f_meta],
-            expected_dfschema: true,
-            expected_arrow: true,
-        }
-        .run();
-
-        // struct (different field type)
-        TestCase {
-            fields1: vec![&struct_t],
-            fields2: vec![&struct_f_type],
-            expected_dfschema: false,
-            expected_arrow: false,
-        }
-        .run();
-
-        #[derive(Debug)]
-        struct TestCase<'a> {
-            fields1: Vec<&'a DFField>,
-            fields2: Vec<&'a DFField>,
-            expected_dfschema: bool,
-            expected_arrow: bool,
-        }
-
-        impl<'a> TestCase<'a> {
-            fn run(self) {
-                println!("Running {self:#?}");
-                let schema1 = to_df_schema(self.fields1);
-                let schema2 = to_df_schema(self.fields2);
-                assert_eq!(
-                    schema1.equivalent_names_and_types(&schema2),
-                    self.expected_dfschema,
-                    "Comparison did not match expected: {}\n\n\
-                     schema1:\n\n{:#?}\n\nschema2:\n\n{:#?}",
-                    self.expected_dfschema,
-                    schema1,
-                    schema2
-                );
-
-                let arrow_schema1 = Schema::from(schema1);
-                let arrow_schema2 = Schema::from(schema2);
-                assert_eq!(
-                    arrow_schema1.equivalent_names_and_types(&arrow_schema2),
-                    self.expected_arrow,
-                    "Comparison did not match expected: {}\n\n\
-                     arrow schema1:\n\n{:#?}\n\n arrow schema2:\n\n{:#?}",
-                    self.expected_arrow,
-                    arrow_schema1,
-                    arrow_schema2
-                );
-            }
-        }
-
-        fn to_df_schema(fields: Vec<&DFField>) -> DFSchema {
-            let fields = fields.into_iter().cloned().collect();
-            DFSchema::new_with_metadata(fields, HashMap::new()).unwrap()
-        }
-    }
-
     #[test]
     fn into() {
         // Demonstrate how to convert back and forth between Schema, SchemaRef, DFSchema, and DFSchemaRef
@@ -1424,11 +1232,11 @@ mod tests {
         );
         let arrow_schema_ref = Arc::new(arrow_schema.clone());
 
-        let df_schema = DFSchema::new_with_metadata(
-            vec![DFField::new_unqualified("c0", DataType::Int64, true)],
-            metadata,
-        )
-        .unwrap();
+        let df_schema = DFSchema {
+            inner: arrow_schema_ref.clone(),
+            field_qualifiers: vec![None; arrow_schema_ref.fields.len()],
+            functional_dependencies: FunctionalDependencies::empty(),
+        };
         let df_schema_ref = Arc::new(df_schema.clone());
 
         {
@@ -1468,16 +1276,15 @@ mod tests {
         b_metadata.insert("key".to_string(), "value".to_string());
         let b_field = Field::new("b", DataType::Int64, false).with_metadata(b_metadata);
 
-        let a: DFField = DFField::from_qualified("table1", a_field);
-        let b: DFField = DFField::from_qualified("table1", b_field);
+        let schema = Arc::new(Schema::new(vec![a_field, b_field]));
 
-        let df_schema = Arc::new(
-            DFSchema::new_with_metadata([a, b].to_vec(), HashMap::new()).unwrap(),
-        );
-        let schema: Schema = df_schema.as_ref().clone().into();
-        let a_df = df_schema.fields.first().unwrap().field();
-        let a_arrow = schema.fields.first().unwrap();
-        assert_eq!(a_df.metadata(), a_arrow.metadata())
+        let df_schema = DFSchema {
+            inner: schema.clone(),
+            field_qualifiers: vec![None; schema.fields.len()],
+            functional_dependencies: FunctionalDependencies::empty(),
+        };
+
+        assert_eq!(df_schema.inner.metadata(), schema.metadata())
     }
 
     #[test]
diff --git a/datafusion/common/src/error.rs b/datafusion/common/src/error.rs
index cafab6d334b3..234b65392222 100644
--- a/datafusion/common/src/error.rs
+++ b/datafusion/common/src/error.rs
@@ -613,11 +613,7 @@ pub fn field_not_found<R: Into<OwnedTableReference>>(
 ) -> DataFusionError {
     schema_datafusion_err!(SchemaError::FieldNotFound {
         field: Box::new(Column::new(qualifier, name)),
-        valid_fields: schema
-            .fields()
-            .iter()
-            .map(|f| f.qualified_column())
-            .collect(),
+        valid_fields: schema.columns().to_vec(),
     })
 }
 
@@ -625,11 +621,7 @@ pub fn field_not_found<R: Into<OwnedTableReference>>(
 pub fn unqualified_field_not_found(name: &str, schema: &DFSchema) -> DataFusionError {
     schema_datafusion_err!(SchemaError::FieldNotFound {
         field: Box::new(Column::new_unqualified(name)),
-        valid_fields: schema
-            .fields()
-            .iter()
-            .map(|f| f.qualified_column())
-            .collect(),
+        valid_fields: schema.columns().to_vec(),
     })
 }
 
diff --git a/datafusion/common/src/functional_dependencies.rs b/datafusion/common/src/functional_dependencies.rs
index 1cb1751d713e..2eab0ece6d8b 100644
--- a/datafusion/common/src/functional_dependencies.rs
+++ b/datafusion/common/src/functional_dependencies.rs
@@ -73,16 +73,14 @@ impl Constraints {
                     is_primary,
                     ..
                 } => {
+                    let field_names = df_schema.field_names();
                     // Get primary key and/or unique indices in the schema:
                     let indices = columns
                         .iter()
                         .map(|pk| {
-                            let idx = df_schema
-                                .fields()
+                            let idx = field_names
                                 .iter()
-                                .position(|item| {
-                                    item.qualified_name() == pk.value.clone()
-                                })
+                                .position(|item| *item == pk.value)
                                 .ok_or_else(|| {
                                     DataFusionError::Execution(
                                         "Primary key doesn't exist".to_string(),
@@ -452,7 +450,7 @@ pub fn aggregate_functional_dependencies(
     aggr_schema: &DFSchema,
 ) -> FunctionalDependencies {
     let mut aggregate_func_dependencies = vec![];
-    let aggr_input_fields = aggr_input_schema.fields();
+    let aggr_input_fields = aggr_input_schema.field_names();
     let aggr_fields = aggr_schema.fields();
     // Association covers the whole table:
     let target_indices = (0..aggr_schema.fields().len()).collect::<Vec<_>>();
@@ -470,14 +468,14 @@ pub fn aggregate_functional_dependencies(
         let mut new_source_field_names = vec![];
         let source_field_names = source_indices
             .iter()
-            .map(|&idx| aggr_input_fields[idx].qualified_name())
+            .map(|&idx| &aggr_input_fields[idx])
             .collect::<Vec<_>>();
 
         for (idx, group_by_expr_name) in group_by_expr_names.iter().enumerate() {
             // When one of the input determinant expressions matches with
             // the GROUP BY expression, add the index of the GROUP BY
             // expression as a new determinant key:
-            if source_field_names.contains(group_by_expr_name) {
+            if source_field_names.contains(&group_by_expr_name) {
                 new_source_indices.push(idx);
                 new_source_field_names.push(group_by_expr_name.clone());
             }
@@ -538,11 +536,7 @@ pub fn get_target_functional_dependencies(
 ) -> Option<Vec<usize>> {
     let mut combined_target_indices = HashSet::new();
     let dependencies = schema.functional_dependencies();
-    let field_names = schema
-        .fields()
-        .iter()
-        .map(|item| item.qualified_name())
-        .collect::<Vec<_>>();
+    let field_names = schema.field_names();
     for FunctionalDependence {
         source_indices,
         target_indices,
@@ -551,7 +545,7 @@ pub fn get_target_functional_dependencies(
     {
         let source_key_names = source_indices
             .iter()
-            .map(|id_key_idx| field_names[*id_key_idx].clone())
+            .map(|id_key_idx| &field_names[*id_key_idx])
             .collect::<Vec<_>>();
         // If the GROUP BY expression contains a determinant key, we can use
         // the associated fields after aggregation even if they are not part
@@ -577,11 +571,7 @@ pub fn get_required_group_by_exprs_indices(
     group_by_expr_names: &[String],
 ) -> Option<Vec<usize>> {
     let dependencies = schema.functional_dependencies();
-    let field_names = schema
-        .fields()
-        .iter()
-        .map(|item| item.qualified_name())
-        .collect::<Vec<_>>();
+    let field_names = schema.field_names();
     let mut groupby_expr_indices = group_by_expr_names
         .iter()
         .map(|group_by_expr_name| {
diff --git a/datafusion/common/src/lib.rs b/datafusion/common/src/lib.rs
index da7d6579bfe6..4d2e8b7417fb 100644
--- a/datafusion/common/src/lib.rs
+++ b/datafusion/common/src/lib.rs
@@ -46,7 +46,9 @@ pub mod utils;
 /// Reexport arrow crate
 pub use arrow;
 pub use column::Column;
-pub use dfschema::{DFField, DFSchema, DFSchemaRef, ExprSchema, SchemaExt, ToDFSchema};
+pub use dfschema::{
+    qualified_name, DFSchema, DFSchemaRef, ExprSchema, SchemaExt, ToDFSchema,
+};
 pub use error::{
     field_not_found, unqualified_field_not_found, DataFusionError, Result, SchemaError,
     SharedResult,
diff --git a/datafusion/core/src/dataframe/mod.rs b/datafusion/core/src/dataframe/mod.rs
index eea5fc1127ce..1db4f8ede692 100644
--- a/datafusion/core/src/dataframe/mod.rs
+++ b/datafusion/core/src/dataframe/mod.rs
@@ -195,11 +195,15 @@ impl DataFrame {
     pub fn select_columns(self, columns: &[&str]) -> Result<DataFrame> {
         let fields = columns
             .iter()
-            .map(|name| self.plan.schema().field_with_unqualified_name(name))
+            .map(|name| {
+                self.plan
+                    .schema()
+                    .qualified_field_with_unqualified_name(name)
+            })
             .collect::<Result<Vec<_>>>()?;
         let expr: Vec<Expr> = fields
-            .iter()
-            .map(|f| Expr::Column(f.qualified_column()))
+            .into_iter()
+            .map(|(qualifier, field)| Expr::Column(Column::from((qualifier, field))))
             .collect();
         self.select(expr)
     }
@@ -1240,14 +1244,13 @@ impl DataFrame {
         let mut col_exists = false;
         let mut fields: Vec<Expr> = plan
             .schema()
-            .fields()
             .iter()
-            .map(|f| {
-                if f.name() == name {
+            .map(|(qualifier, field)| {
+                if field.name() == name {
                     col_exists = true;
                     new_column.clone()
                 } else {
-                    col(f.qualified_column())
+                    col(Column::from((qualifier, field.as_ref())))
                 }
             })
             .collect();
@@ -1298,24 +1301,25 @@ impl DataFrame {
             Column::from_qualified_name_ignore_case(old_name)
         };
 
-        let field_to_rename = match self.plan.schema().field_from_column(&old_column) {
-            Ok(field) => field,
-            // no-op if field not found
-            Err(DataFusionError::SchemaError(SchemaError::FieldNotFound { .. }, _)) => {
-                return Ok(self)
-            }
-            Err(err) => return Err(err),
-        };
+        let (qualifier_rename, field_rename) =
+            match self.plan.schema().qualified_field_from_column(&old_column) {
+                Ok(qualifier_and_field) => qualifier_and_field,
+                // no-op if field not found
+                Err(DataFusionError::SchemaError(
+                    SchemaError::FieldNotFound { .. },
+                    _,
+                )) => return Ok(self),
+                Err(err) => return Err(err),
+            };
         let projection = self
             .plan
             .schema()
-            .fields()
             .iter()
-            .map(|f| {
-                if f == field_to_rename {
-                    col(f.qualified_column()).alias(new_name)
+            .map(|(qualifier, field)| {
+                if qualifier.eq(&qualifier_rename) && field.as_ref() == field_rename {
+                    col(Column::from((qualifier, field.as_ref()))).alias(new_name)
                 } else {
-                    col(f.qualified_column())
+                    col(Column::from((qualifier, field.as_ref())))
                 }
             })
             .collect::<Vec<_>>();
diff --git a/datafusion/core/src/datasource/listing/helpers.rs b/datafusion/core/src/datasource/listing/helpers.rs
index c53e8df35de8..f97d465c442b 100644
--- a/datafusion/core/src/datasource/listing/helpers.rs
+++ b/datafusion/core/src/datasource/listing/helpers.rs
@@ -31,13 +31,15 @@ use arrow::{
     record_batch::RecordBatch,
 };
 use arrow_schema::Fields;
-use datafusion_common::tree_node::{TreeNode, TreeNodeRecursion};
-use datafusion_common::{internal_err, Column, DFField, DFSchema, DataFusionError};
 use datafusion_expr::execution_props::ExecutionProps;
+use futures::stream::FuturesUnordered;
+use futures::{stream::BoxStream, StreamExt, TryStreamExt};
+use log::{debug, trace};
+
+use datafusion_common::tree_node::{TreeNode, TreeNodeRecursion};
+use datafusion_common::{internal_err, Column, DFSchema, DataFusionError};
 use datafusion_expr::{Expr, ScalarFunctionDefinition, Volatility};
 use datafusion_physical_expr::create_physical_expr;
-use futures::stream::{BoxStream, FuturesUnordered, StreamExt, TryStreamExt};
-use log::{debug, trace};
 use object_store::path::Path;
 use object_store::{ObjectMeta, ObjectStore};
 
@@ -267,10 +269,10 @@ async fn prune_partitions(
         .collect();
     let schema = Arc::new(Schema::new(fields));
 
-    let df_schema = DFSchema::new_with_metadata(
+    let df_schema = DFSchema::from_unqualifed_fields(
         partition_cols
             .iter()
-            .map(|(n, d)| DFField::new_unqualified(n, d.clone(), true))
+            .map(|(n, d)| Field::new(n, d.clone(), true))
             .collect(),
         Default::default(),
     )?;
diff --git a/datafusion/core/src/datasource/view.rs b/datafusion/core/src/datasource/view.rs
index 85fb8939886c..d1b7dad15225 100644
--- a/datafusion/core/src/datasource/view.rs
+++ b/datafusion/core/src/datasource/view.rs
@@ -21,6 +21,7 @@ use std::{any::Any, sync::Arc};
 
 use arrow::datatypes::SchemaRef;
 use async_trait::async_trait;
+use datafusion_common::Column;
 use datafusion_expr::{LogicalPlanBuilder, TableProviderFilterPushDown};
 
 use crate::{
@@ -126,9 +127,9 @@ impl TableProvider for ViewTable {
                 let fields: Vec<Expr> = projection
                     .iter()
                     .map(|i| {
-                        Expr::Column(
-                            self.logical_plan.schema().field(*i).qualified_column(),
-                        )
+                        Expr::Column(Column::from(
+                            self.logical_plan.schema().qualified_field(*i),
+                        ))
                     })
                     .collect();
                 plan.project(fields)?
diff --git a/datafusion/core/src/physical_planner.rs b/datafusion/core/src/physical_planner.rs
index 0a1730e944d3..4733c1433ad0 100644
--- a/datafusion/core/src/physical_planner.rs
+++ b/datafusion/core/src/physical_planner.rs
@@ -1022,10 +1022,9 @@ impl DefaultPhysicalPlanner {
                         // Remove temporary projected columns
                         let join_plan = if added_project {
                             let final_join_result = join_schema
-                                .fields()
                                 .iter()
-                                .map(|field| {
-                                    Expr::Column(field.qualified_column())
+                                .map(|(qualifier, field)| {
+                                    Expr::Column(datafusion_common::Column::from((qualifier, field.as_ref())))
                                 })
                                 .collect::<Vec<_>>();
                             let projection =
@@ -1089,18 +1088,19 @@ impl DefaultPhysicalPlanner {
                             let (filter_df_fields, filter_fields): (Vec<_>, Vec<_>) = left_field_indices.clone()
                                 .into_iter()
                                 .map(|i| (
-                                    left_df_schema.field(i).clone(),
+                                    left_df_schema.qualified_field(i),
                                     physical_left.schema().field(i).clone(),
                                 ))
                                 .chain(
                                     right_field_indices.clone()
                                         .into_iter()
                                         .map(|i| (
-                                            right_df_schema.field(i).clone(),
+                                            right_df_schema.qualified_field(i),
                                             physical_right.schema().field(i).clone(),
                                         ))
                                 )
                                 .unzip();
+                            let filter_df_fields = filter_df_fields.into_iter().map(|(qualifier, field)| (qualifier.cloned(), Arc::new(field.clone()))).collect();
 
                             // Construct intermediate schemas used for filtering data and
                             // convert logical expression to physical according to filter schema
@@ -2012,9 +2012,7 @@ mod tests {
     use arrow::array::{ArrayRef, DictionaryArray, Int32Array};
     use arrow::datatypes::{DataType, Field, Int32Type, SchemaRef};
     use arrow::record_batch::RecordBatch;
-    use datafusion_common::{
-        assert_contains, DFField, DFSchema, DFSchemaRef, TableReference,
-    };
+    use datafusion_common::{assert_contains, DFSchema, DFSchemaRef, TableReference};
     use datafusion_execution::runtime_env::RuntimeEnv;
     use datafusion_execution::TaskContext;
     use datafusion_expr::{
@@ -2257,25 +2255,23 @@ mod tests {
             .await;
 
         let expected_error: &str = "Error during planning: \
-        Extension planner for NoOp created an ExecutionPlan with mismatched schema. \
-        LogicalPlan schema: DFSchema { fields: [\
-            DFField { qualifier: None, field: Field { \
-                name: \"a\", \
+            Extension planner for NoOp created an ExecutionPlan with mismatched schema. \
+            LogicalPlan schema: \
+            DFSchema { inner: Schema { fields: \
+                [Field { name: \"a\", \
                 data_type: Int32, \
                 nullable: false, \
                 dict_id: 0, \
-                dict_is_ordered: false, \
-                metadata: {} } }\
-        ], metadata: {}, functional_dependencies: FunctionalDependencies { deps: [] } }, \
-        ExecutionPlan schema: Schema { fields: [\
-            Field { \
-                name: \"b\", \
+                dict_is_ordered: false, metadata: {} }], \
+                metadata: {} }, field_qualifiers: [None], \
+                functional_dependencies: FunctionalDependencies { deps: [] } }, \
+            ExecutionPlan schema: Schema { fields: \
+                [Field { name: \"b\", \
                 data_type: Int32, \
                 nullable: false, \
                 dict_id: 0, \
-                dict_is_ordered: false, \
-                metadata: {} }\
-        ], metadata: {} }";
+                dict_is_ordered: false, metadata: {} }], \
+                metadata: {} }";
         match plan {
             Ok(_) => panic!("Expected planning failure"),
             Err(e) => assert!(
@@ -2539,8 +2535,8 @@ mod tests {
         fn default() -> Self {
             Self {
                 schema: DFSchemaRef::new(
-                    DFSchema::new_with_metadata(
-                        vec![DFField::new_unqualified("a", DataType::Int32, false)],
+                    DFSchema::from_unqualifed_fields(
+                        vec![Field::new("a", DataType::Int32, false)].into(),
                         HashMap::new(),
                     )
                     .unwrap(),
diff --git a/datafusion/expr/src/expr_rewriter/mod.rs b/datafusion/expr/src/expr_rewriter/mod.rs
index 7a227a91c455..60942adb6346 100644
--- a/datafusion/expr/src/expr_rewriter/mod.rs
+++ b/datafusion/expr/src/expr_rewriter/mod.rs
@@ -212,9 +212,10 @@ pub fn coerce_plan_expr_for_schema(
         _ => {
             let exprs: Vec<Expr> = plan
                 .schema()
-                .fields()
                 .iter()
-                .map(|field| Expr::Column(field.qualified_column()))
+                .map(|(qualifier, field)| {
+                    Expr::Column(Column::from((qualifier, field.as_ref())))
+                })
                 .collect();
 
             let new_exprs = coerce_exprs_for_schema(exprs, plan.schema(), schema)?;
@@ -283,10 +284,9 @@ mod test {
     use super::*;
     use crate::expr::Sort;
     use crate::{col, lit, Cast};
-
-    use arrow::datatypes::DataType;
+    use arrow::datatypes::{DataType, Field, Schema};
     use datafusion_common::tree_node::{TreeNode, TreeNodeRewriter};
-    use datafusion_common::{DFField, DFSchema, ScalarValue};
+    use datafusion_common::{DFSchema, OwnedTableReference, ScalarValue};
 
     #[derive(Default)]
     struct RecordingRewriter {
@@ -347,20 +347,21 @@ mod test {
         let expr = col("a") + col("b") + col("c");
 
         // Schemas with some matching and some non matching cols
-        let schema_a = make_schema_with_empty_metadata(vec![
-            make_field("tableA", "a"),
-            make_field("tableA", "aa"),
-        ]);
-        let schema_c = make_schema_with_empty_metadata(vec![
-            make_field("tableC", "cc"),
-            make_field("tableC", "c"),
-        ]);
-        let schema_b = make_schema_with_empty_metadata(vec![make_field("tableB", "b")]);
+        let schema_a = make_schema_with_empty_metadata(
+            vec![Some("tableA".into()), Some("tableA".into())],
+            vec!["a", "aa"],
+        );
+        let schema_c = make_schema_with_empty_metadata(
+            vec![Some("tableC".into()), Some("tableC".into())],
+            vec!["cc", "c"],
+        );
+        let schema_b =
+            make_schema_with_empty_metadata(vec![Some("tableB".into())], vec!["b"]);
         // non matching
-        let schema_f = make_schema_with_empty_metadata(vec![
-            make_field("tableC", "f"),
-            make_field("tableC", "ff"),
-        ]);
+        let schema_f = make_schema_with_empty_metadata(
+            vec![Some("tableC".into()), Some("tableC".into())],
+            vec!["f", "ff"],
+        );
         let schemas = vec![schema_c, schema_f, schema_b, schema_a];
         let schemas = schemas.iter().collect::<Vec<_>>();
 
@@ -378,7 +379,7 @@ mod test {
         // test normalizing columns when the name doesn't exist
         let expr = col("a") + col("b");
         let schema_a =
-            make_schema_with_empty_metadata(vec![make_field("\"tableA\"", "a")]);
+            make_schema_with_empty_metadata(vec![Some("\"tableA\"".into())], vec!["a"]);
         let schemas = [schema_a];
         let schemas = schemas.iter().collect::<Vec<_>>();
 
@@ -399,12 +400,16 @@ mod test {
         assert_eq!(unnormalized_expr, col("a") + col("b"));
     }
 
-    fn make_schema_with_empty_metadata(fields: Vec<DFField>) -> DFSchema {
-        DFSchema::new_with_metadata(fields, HashMap::new()).unwrap()
-    }
-
-    fn make_field(relation: &str, column: &str) -> DFField {
-        DFField::new(Some(relation.to_string()), column, DataType::Int8, false)
+    fn make_schema_with_empty_metadata(
+        qualifiers: Vec<Option<OwnedTableReference>>,
+        fields: Vec<&str>,
+    ) -> DFSchema {
+        let fields = fields
+            .iter()
+            .map(|f| Arc::new(Field::new(f.to_string(), DataType::Int8, false)))
+            .collect::<Vec<_>>();
+        let schema = Arc::new(Schema::new(fields));
+        DFSchema::from_field_specific_qualified_schema(qualifiers, &schema).unwrap()
     }
 
     #[test]
diff --git a/datafusion/expr/src/expr_rewriter/order_by.rs b/datafusion/expr/src/expr_rewriter/order_by.rs
index b1bc11a83f90..2fb522b979b0 100644
--- a/datafusion/expr/src/expr_rewriter/order_by.rs
+++ b/datafusion/expr/src/expr_rewriter/order_by.rs
@@ -90,7 +90,7 @@ fn rewrite_in_terms_of_projection(
             let col = Expr::Column(
                 found
                     .to_field(input.schema())
-                    .map(|f| f.qualified_column())?,
+                    .map(|(qualifier, field)| Column::new(qualifier, field.name()))?,
             );
             return Ok(Transformed::yes(col));
         }
diff --git a/datafusion/expr/src/expr_schema.rs b/datafusion/expr/src/expr_schema.rs
index f1ac22d584ee..de157f3cda75 100644
--- a/datafusion/expr/src/expr_schema.rs
+++ b/datafusion/expr/src/expr_schema.rs
@@ -28,8 +28,8 @@ use crate::{utils, LogicalPlan, Projection, Subquery};
 use arrow::compute::can_cast_types;
 use arrow::datatypes::{DataType, Field};
 use datafusion_common::{
-    internal_err, not_impl_err, plan_datafusion_err, plan_err, Column, DFField,
-    ExprSchema, Result,
+    internal_err, not_impl_err, plan_datafusion_err, plan_err, Column, ExprSchema,
+    OwnedTableReference, Result,
 };
 use std::collections::HashMap;
 use std::sync::Arc;
@@ -46,7 +46,10 @@ pub trait ExprSchemable {
     fn metadata(&self, schema: &dyn ExprSchema) -> Result<HashMap<String, String>>;
 
     /// convert to a field with respect to a schema
-    fn to_field(&self, input_schema: &dyn ExprSchema) -> Result<DFField>;
+    fn to_field(
+        &self,
+        input_schema: &dyn ExprSchema,
+    ) -> Result<(Option<OwnedTableReference>, Arc<Field>)>;
 
     /// cast to a type with respect to a schema
     fn cast_to(self, cast_to_type: &DataType, schema: &dyn ExprSchema) -> Result<Expr>;
@@ -70,21 +73,20 @@ impl ExprSchemable for Expr {
     /// ## and Float32 results in Float32 type
     ///
     /// ```
-    /// # use arrow::datatypes::DataType;
-    /// # use datafusion_common::{DFField, DFSchema};
+    /// # use arrow::datatypes::{DataType, Field};
+    /// # use datafusion_common::DFSchema;
     /// # use datafusion_expr::{col, ExprSchemable};
     /// # use std::collections::HashMap;
     ///
     /// fn main() {
     ///   let expr = col("c1") + col("c2");
-    ///   let schema = DFSchema::new_with_metadata(
+    ///   let schema = DFSchema::from_unqualifed_fields(
     ///     vec![
-    ///       DFField::new_unqualified("c1", DataType::Int32, true),
-    ///       DFField::new_unqualified("c2", DataType::Float32, true),
-    ///       ],
+    ///       Field::new("c1", DataType::Int32, true),
+    ///       Field::new("c2", DataType::Float32, true),
+    ///       ].into(),
     ///       HashMap::new(),
-    ///   )
-    ///   .unwrap();
+    ///   ).unwrap();
     ///   assert_eq!("Float32", format!("{}", expr.get_type(&schema).unwrap()));
     /// }
     /// ```
@@ -437,26 +439,37 @@ impl ExprSchemable for Expr {
     ///
     /// So for example, a projected expression `col(c1) + col(c2)` is
     /// placed in an output field **named** col("c1 + c2")
-    fn to_field(&self, input_schema: &dyn ExprSchema) -> Result<DFField> {
+    fn to_field(
+        &self,
+        input_schema: &dyn ExprSchema,
+    ) -> Result<(Option<OwnedTableReference>, Arc<Field>)> {
         match self {
             Expr::Column(c) => {
                 let (data_type, nullable) = self.data_type_and_nullable(input_schema)?;
-                Ok(
-                    DFField::new(c.relation.clone(), &c.name, data_type, nullable)
-                        .with_metadata(self.metadata(input_schema)?),
-                )
+                Ok((
+                    c.relation.clone(),
+                    Field::new(&c.name, data_type, nullable)
+                        .with_metadata(self.metadata(input_schema)?)
+                        .into(),
+                ))
             }
             Expr::Alias(Alias { relation, name, .. }) => {
                 let (data_type, nullable) = self.data_type_and_nullable(input_schema)?;
-                Ok(DFField::new(relation.clone(), name, data_type, nullable)
-                    .with_metadata(self.metadata(input_schema)?))
+                Ok((
+                    relation.clone(),
+                    Field::new(name, data_type, nullable)
+                        .with_metadata(self.metadata(input_schema)?)
+                        .into(),
+                ))
             }
             _ => {
                 let (data_type, nullable) = self.data_type_and_nullable(input_schema)?;
-                Ok(
-                    DFField::new_unqualified(&self.display_name()?, data_type, nullable)
-                        .with_metadata(self.metadata(input_schema)?),
-                )
+                Ok((
+                    None,
+                    Field::new(self.display_name()?, data_type, nullable)
+                        .with_metadata(self.metadata(input_schema)?)
+                        .into(),
+                ))
             }
         }
     }
@@ -535,7 +548,7 @@ pub fn cast_subquery(subquery: Subquery, cast_to_type: &DataType) -> Result<Subq
             )?)
         }
         _ => {
-            let cast_expr = Expr::Column(plan.schema().field(0).qualified_column())
+            let cast_expr = Expr::Column(Column::from(plan.schema().qualified_field(0)))
                 .cast_to(cast_to_type, subquery.subquery.schema())?;
             LogicalPlan::Projection(Projection::try_new(
                 vec![cast_expr],
@@ -553,8 +566,8 @@ pub fn cast_subquery(subquery: Subquery, cast_to_type: &DataType) -> Result<Subq
 mod tests {
     use super::*;
     use crate::{col, lit};
-    use arrow::datatypes::{DataType, Fields};
-    use datafusion_common::{Column, DFSchema, ScalarValue, TableReference};
+    use arrow::datatypes::{DataType, Fields, SchemaBuilder};
+    use datafusion_common::{Column, DFSchema, ScalarValue};
 
     macro_rules! test_is_expr_nullable {
         ($EXPR_TYPE:ident) => {{
@@ -679,23 +692,22 @@ mod tests {
                 .unwrap()
         );
 
-        let schema = DFSchema::new_with_metadata(
-            vec![DFField::new_unqualified("foo", DataType::Int32, true)
-                .with_metadata(meta.clone())],
+        let schema = DFSchema::from_unqualifed_fields(
+            vec![Field::new("foo", DataType::Int32, true).with_metadata(meta.clone())]
+                .into(),
             HashMap::new(),
         )
         .unwrap();
 
         // verify to_field method populates metadata
-        assert_eq!(&meta, expr.to_field(&schema).unwrap().metadata());
+        assert_eq!(&meta, expr.to_field(&schema).unwrap().1.metadata());
     }
 
     #[test]
     fn test_nested_schema_nullability() {
-        let fields = DFField::new(
-            Some(TableReference::Bare {
-                table: "table_name".into(),
-            }),
+        let mut builder = SchemaBuilder::new();
+        builder.push(Field::new("foo", DataType::Int32, true));
+        builder.push(Field::new(
             "parent",
             DataType::Struct(Fields::from(vec![Field::new(
                 "child",
@@ -703,12 +715,17 @@ mod tests {
                 false,
             )])),
             true,
-        );
+        ));
+        let schema = builder.finish();
 
-        let schema = DFSchema::new_with_metadata(vec![fields], HashMap::new()).unwrap();
+        let dfschema = DFSchema::from_field_specific_qualified_schema(
+            vec![Some("table_name"), None],
+            &Arc::new(schema),
+        )
+        .unwrap();
 
         let expr = col("parent").field("child");
-        assert!(expr.nullable(&schema).unwrap());
+        assert!(expr.nullable(&dfschema).unwrap());
     }
 
     #[derive(Debug)]
diff --git a/datafusion/expr/src/logical_plan/builder.rs b/datafusion/expr/src/logical_plan/builder.rs
index f47249d76d5b..e1f760c6e3f7 100644
--- a/datafusion/expr/src/logical_plan/builder.rs
+++ b/datafusion/expr/src/logical_plan/builder.rs
@@ -47,12 +47,12 @@ use crate::{
     TableProviderFilterPushDown, TableSource, WriteOp,
 };
 
-use arrow::datatypes::{DataType, Schema, SchemaRef};
+use arrow::datatypes::{DataType, Field, Fields, Schema, SchemaRef};
 use datafusion_common::config::FormatOptions;
 use datafusion_common::display::ToStringifiedPlan;
 use datafusion_common::{
     get_target_functional_dependencies, not_impl_err, plan_datafusion_err, plan_err,
-    Column, DFField, DFSchema, DFSchemaRef, DataFusionError, OwnedTableReference, Result,
+    Column, DFSchema, DFSchemaRef, DataFusionError, OwnedTableReference, Result,
     ScalarValue, TableReference, ToDFSchema, UnnestOptions,
 };
 
@@ -214,18 +214,14 @@ impl LogicalPlanBuilder {
             .map(|(j, data_type)| {
                 // naming is following convention https://www.postgresql.org/docs/current/queries-values.html
                 let name = &format!("column{}", j + 1);
-                DFField::new_unqualified(
-                    name,
-                    data_type.clone().unwrap_or(DataType::Utf8),
-                    true,
-                )
+                Field::new(name, data_type.clone().unwrap_or(DataType::Utf8), true)
             })
             .collect::<Vec<_>>();
         for (i, j) in nulls {
             values[i][j] = Expr::Literal(ScalarValue::try_from(fields[j].data_type())?);
         }
-        let schema =
-            DFSchemaRef::new(DFSchema::new_with_metadata(fields, HashMap::new())?);
+        let dfschema = DFSchema::from_unqualifed_fields(fields.into(), HashMap::new())?;
+        let schema = DFSchemaRef::new(dfschema);
         Ok(Self::from(LogicalPlan::Values(Values { schema, values })))
     }
 
@@ -368,10 +364,9 @@ impl LogicalPlanBuilder {
 
     /// Select the given column indices
     pub fn select(self, indices: impl IntoIterator<Item = usize>) -> Result<Self> {
-        let fields = self.plan.schema().fields();
         let exprs: Vec<_> = indices
             .into_iter()
-            .map(|x| Expr::Column(fields[x].qualified_column()))
+            .map(|x| Expr::Column(Column::from(self.plan.schema().qualified_field(x))))
             .collect();
         self.project(exprs)
     }
@@ -557,11 +552,7 @@ impl LogicalPlanBuilder {
         }
 
         // remove pushed down sort columns
-        let new_expr = schema
-            .fields()
-            .iter()
-            .map(|f| Expr::Column(f.qualified_column()))
-            .collect();
+        let new_expr = schema.columns().into_iter().map(Expr::Column).collect();
 
         let is_distinct = false;
         let plan = Self::add_missing_columns(self.plan, &missing_cols, is_distinct)?;
@@ -1137,7 +1128,7 @@ impl LogicalPlanBuilder {
         )?))
     }
 }
-pub fn change_redundant_column(fields: Vec<DFField>) -> Vec<DFField> {
+pub fn change_redundant_column(fields: &Fields) -> Vec<Field> {
     let mut name_map = HashMap::new();
     fields
         .into_iter()
@@ -1146,14 +1137,9 @@ pub fn change_redundant_column(fields: Vec<DFField>) -> Vec<DFField> {
             *counter += 1;
             if *counter > 1 {
                 let new_name = format!("{}:{}", field.name(), *counter - 1);
-                DFField::new(
-                    field.qualifier().cloned(),
-                    &new_name,
-                    field.data_type().clone(),
-                    field.is_nullable(),
-                )
+                Field::new(new_name, field.data_type().clone(), field.is_nullable())
             } else {
-                field
+                field.as_ref().clone()
             }
         })
         .collect()
@@ -1165,67 +1151,82 @@ pub fn build_join_schema(
     right: &DFSchema,
     join_type: &JoinType,
 ) -> Result<DFSchema> {
-    fn nullify_fields(fields: &[DFField]) -> Vec<DFField> {
+    fn nullify_fields<'a>(
+        fields: impl Iterator<Item = (Option<&'a OwnedTableReference>, &'a Arc<Field>)>,
+    ) -> Vec<(Option<OwnedTableReference>, Arc<Field>)> {
         fields
-            .iter()
-            .map(|f| f.clone().with_nullable(true))
+            .map(|(q, f)| {
+                // TODO: find a good way to do that
+                let field = f.as_ref().clone().with_nullable(true);
+                (q.map(|r| r.to_owned_reference()), Arc::new(field))
+            })
             .collect()
     }
 
-    let right_fields = right.fields();
-    let left_fields = left.fields();
+    let right_fields = right.iter();
+    let left_fields = left.iter();
 
-    let fields: Vec<DFField> = match join_type {
+    let qualified_fields: Vec<(Option<OwnedTableReference>, Arc<Field>)> = match join_type
+    {
         JoinType::Inner => {
             // left then right
-            left_fields
-                .iter()
-                .chain(right_fields.iter())
-                .cloned()
-                .collect()
+            let left_fields = left_fields
+                .map(|(q, f)| (q.map(|r| r.to_owned_reference()), f.clone()))
+                .collect::<Vec<_>>();
+            let right_fields = right_fields
+                .map(|(q, f)| (q.map(|r| r.to_owned_reference()), f.clone()))
+                .collect::<Vec<_>>();
+            left_fields.into_iter().chain(right_fields).collect()
         }
         JoinType::Left => {
             // left then right, right set to nullable in case of not matched scenario
+            let left_fields = left_fields
+                .map(|(q, f)| (q.map(|r| r.to_owned_reference()), f.clone()))
+                .collect::<Vec<_>>();
             left_fields
-                .iter()
-                .chain(&nullify_fields(right_fields))
-                .cloned()
+                .into_iter()
+                .chain(nullify_fields(right_fields))
                 .collect()
         }
         JoinType::Right => {
             // left then right, left set to nullable in case of not matched scenario
+            let right_fields = right_fields
+                .map(|(q, f)| (q.map(|r| r.to_owned_reference()), f.clone()))
+                .collect::<Vec<_>>();
             nullify_fields(left_fields)
-                .iter()
-                .chain(right_fields.iter())
-                .cloned()
+                .into_iter()
+                .chain(right_fields)
                 .collect()
         }
         JoinType::Full => {
             // left then right, all set to nullable in case of not matched scenario
             nullify_fields(left_fields)
-                .iter()
-                .chain(&nullify_fields(right_fields))
-                .cloned()
+                .into_iter()
+                .chain(nullify_fields(right_fields))
                 .collect()
         }
         JoinType::LeftSemi | JoinType::LeftAnti => {
             // Only use the left side for the schema
-            left_fields.clone()
+            left_fields
+                .map(|(q, f)| (q.map(|r| r.to_owned_reference()), f.clone()))
+                .collect()
         }
         JoinType::RightSemi | JoinType::RightAnti => {
             // Only use the right side for the schema
-            right_fields.clone()
+            right_fields
+                .map(|(q, f)| (q.map(|r| r.to_owned_reference()), f.clone()))
+                .collect()
         }
     };
     let func_dependencies = left.functional_dependencies().join(
         right.functional_dependencies(),
         join_type,
-        left_fields.len(),
+        left.fields().len(),
     );
     let mut metadata = left.metadata().clone();
     metadata.extend(right.metadata().clone());
-    let schema = DFSchema::new_with_metadata(fields, metadata)?;
-    schema.with_functional_dependencies(func_dependencies)
+    let dfschema = DFSchema::new_with_metadata(qualified_fields, metadata)?;
+    dfschema.with_functional_dependencies(func_dependencies)
 }
 
 /// Add additional "synthetic" group by expressions based on functional
@@ -1252,9 +1253,7 @@ fn add_group_by_exprs_from_dependencies(
         get_target_functional_dependencies(schema, &group_by_field_names)
     {
         for idx in target_indices {
-            let field = schema.field(idx);
-            let expr =
-                Expr::Column(Column::new(field.qualifier().cloned(), field.name()));
+            let expr = Expr::Column(Column::from(schema.qualified_field(idx)));
             let expr_name = expr.display_name()?;
             if !group_by_field_names.contains(&expr_name) {
                 group_by_field_names.push(expr_name);
@@ -1325,33 +1324,33 @@ pub fn union(left_plan: LogicalPlan, right_plan: LogicalPlan) -> Result<LogicalP
     }
 
     // create union schema
-    let union_schema = zip(
-        left_plan.schema().fields().iter(),
-        right_plan.schema().fields().iter(),
-    )
-    .map(|(left_field, right_field)| {
-        let nullable = left_field.is_nullable() || right_field.is_nullable();
-        let data_type =
-            comparison_coercion(left_field.data_type(), right_field.data_type())
-                .ok_or_else(|| {
-                    plan_datafusion_err!(
+    let union_qualified_fields =
+        zip(left_plan.schema().iter(), right_plan.schema().iter())
+            .map(
+                |((left_qualifier, left_field), (_right_qualifier, right_field))| {
+                    let nullable = left_field.is_nullable() || right_field.is_nullable();
+                    let data_type = comparison_coercion(
+                        left_field.data_type(),
+                        right_field.data_type(),
+                    )
+                    .ok_or_else(|| {
+                        plan_datafusion_err!(
                 "UNION Column {} (type: {}) is not compatible with column {} (type: {})",
                 right_field.name(),
                 right_field.data_type(),
                 left_field.name(),
                 left_field.data_type()
+                )
+                    })?;
+                    Ok((
+                        left_qualifier.cloned(),
+                        Arc::new(Field::new(left_field.name(), data_type, nullable)),
+                    ))
+                },
             )
-                })?;
-
-        Ok(DFField::new(
-            left_field.qualifier().cloned(),
-            left_field.name(),
-            data_type,
-            nullable,
-        ))
-    })
-    .collect::<Result<Vec<_>>>()?
-    .to_dfschema()?;
+            .collect::<Result<Vec<_>>>()?;
+    let union_schema =
+        DFSchema::new_with_metadata(union_qualified_fields, HashMap::new())?;
 
     let inputs = vec![left_plan, right_plan]
         .into_iter()
@@ -1551,18 +1550,18 @@ pub fn unnest_with_options(
     column: Column,
     options: UnnestOptions,
 ) -> Result<LogicalPlan> {
-    let unnest_field = input.schema().field_from_column(&column)?;
+    let (unnest_qualifier, unnest_field) =
+        input.schema().qualified_field_from_column(&column)?;
 
     // Extract the type of the nested field in the list.
     let unnested_field = match unnest_field.data_type() {
         DataType::List(field)
         | DataType::FixedSizeList(field, _)
-        | DataType::LargeList(field) => DFField::new(
-            unnest_field.qualifier().cloned(),
+        | DataType::LargeList(field) => Arc::new(Field::new(
             unnest_field.name(),
             field.data_type().clone(),
             unnest_field.is_nullable(),
-        ),
+        )),
         _ => {
             // If the unnest field is not a list type return the input plan.
             return Ok(input);
@@ -1572,13 +1571,12 @@ pub fn unnest_with_options(
     // Update the schema with the unnest column type changed to contain the nested type.
     let input_schema = input.schema();
     let fields = input_schema
-        .fields()
         .iter()
-        .map(|f| {
-            if f == unnest_field {
-                unnested_field.clone()
+        .map(|(q, f)| {
+            if f.as_ref() == unnest_field && q == unnest_qualifier {
+                (unnest_qualifier.cloned(), unnested_field.clone())
             } else {
-                f.clone()
+                (q.cloned(), f.clone())
             }
         })
         .collect::<Vec<_>>();
@@ -1588,10 +1586,11 @@ pub fn unnest_with_options(
     // We can use the existing functional dependencies:
     let deps = input_schema.functional_dependencies().clone();
     let schema = Arc::new(df_schema.with_functional_dependencies(deps)?);
+    let column = Column::from((unnest_qualifier, unnested_field.as_ref()));
 
     Ok(LogicalPlan::Unnest(Unnest {
         input: Arc::new(input),
-        column: unnested_field.qualified_column(),
+        column,
         schema,
         options,
     }))
@@ -2113,23 +2112,23 @@ mod tests {
     }
     #[test]
     fn test_change_redundant_column() -> Result<()> {
-        let t1_field_1 = DFField::new_unqualified("a", DataType::Int32, false);
-        let t2_field_1 = DFField::new_unqualified("a", DataType::Int32, false);
-        let t2_field_3 = DFField::new_unqualified("a", DataType::Int32, false);
-        let t1_field_2 = DFField::new_unqualified("b", DataType::Int32, false);
-        let t2_field_2 = DFField::new_unqualified("b", DataType::Int32, false);
+        let t1_field_1 = Field::new("a", DataType::Int32, false);
+        let t2_field_1 = Field::new("a", DataType::Int32, false);
+        let t2_field_3 = Field::new("a", DataType::Int32, false);
+        let t1_field_2 = Field::new("b", DataType::Int32, false);
+        let t2_field_2 = Field::new("b", DataType::Int32, false);
 
         let field_vec = vec![t1_field_1, t2_field_1, t1_field_2, t2_field_2, t2_field_3];
-        let remove_redundant = change_redundant_column(field_vec);
+        let remove_redundant = change_redundant_column(&Fields::from(field_vec));
 
         assert_eq!(
             remove_redundant,
             vec![
-                DFField::new_unqualified("a", DataType::Int32, false),
-                DFField::new_unqualified("a:1", DataType::Int32, false),
-                DFField::new_unqualified("b", DataType::Int32, false),
-                DFField::new_unqualified("b:1", DataType::Int32, false),
-                DFField::new_unqualified("a:2", DataType::Int32, false),
+                Field::new("a", DataType::Int32, false),
+                Field::new("a:1", DataType::Int32, false),
+                Field::new("b", DataType::Int32, false),
+                Field::new("b:1", DataType::Int32, false),
+                Field::new("a:2", DataType::Int32, false),
             ]
         );
         Ok(())
diff --git a/datafusion/expr/src/logical_plan/plan.rs b/datafusion/expr/src/logical_plan/plan.rs
index 0bf5b8dffaa2..7a28b085cbc0 100644
--- a/datafusion/expr/src/logical_plan/plan.rs
+++ b/datafusion/expr/src/logical_plan/plan.rs
@@ -49,7 +49,7 @@ use datafusion_common::tree_node::{
 };
 use datafusion_common::{
     aggregate_functional_dependencies, internal_err, plan_err, Column, Constraints,
-    DFField, DFSchema, DFSchemaRef, DataFusionError, Dependency, FunctionalDependence,
+    DFSchema, DFSchemaRef, DataFusionError, Dependency, FunctionalDependence,
     FunctionalDependencies, OwnedTableReference, ParamValues, Result, UnnestOptions,
 };
 
@@ -487,12 +487,12 @@ impl LogicalPlan {
             LogicalPlan::RecursiveQuery(RecursiveQuery { static_term, .. }) => {
                 static_term.head_output_expr()
             }
-            LogicalPlan::Union(union) => Ok(Some(Expr::Column(
-                union.schema.fields()[0].qualified_column(),
-            ))),
-            LogicalPlan::TableScan(table) => Ok(Some(Expr::Column(
-                table.projected_schema.fields()[0].qualified_column(),
-            ))),
+            LogicalPlan::Union(union) => Ok(Some(Expr::Column(Column::from(
+                union.schema.qualified_field(0),
+            )))),
+            LogicalPlan::TableScan(table) => Ok(Some(Expr::Column(Column::from(
+                table.projected_schema.qualified_field(0),
+            )))),
             LogicalPlan::SubqueryAlias(subquery_alias) => {
                 let expr_opt = subquery_alias.input.head_output_expr()?;
                 expr_opt
@@ -867,24 +867,30 @@ impl LogicalPlan {
             }) => {
                 // Update schema with unnested column type.
                 let input = Arc::new(inputs.swap_remove(0));
-                let nested_field = input.schema().field_from_column(column)?;
-                let unnested_field = schema.field_from_column(column)?;
-                let fields = input
+                let (nested_qualifier, nested_field) =
+                    input.schema().qualified_field_from_column(column)?;
+                let (unnested_qualifier, unnested_field) =
+                    schema.qualified_field_from_column(column)?;
+                let qualifiers_and_fields = input
                     .schema()
-                    .fields()
                     .iter()
-                    .map(|f| {
-                        if f == nested_field {
-                            unnested_field.clone()
+                    .map(|(qualifier, field)| {
+                        if qualifier.eq(&nested_qualifier)
+                            && field.as_ref() == nested_field
+                        {
+                            (
+                                unnested_qualifier.cloned(),
+                                Arc::new(unnested_field.clone()),
+                            )
                         } else {
-                            f.clone()
+                            (qualifier.cloned(), field.clone())
                         }
                     })
                     .collect::<Vec<_>>();
 
                 let schema = Arc::new(
                     DFSchema::new_with_metadata(
-                        fields,
+                        qualifiers_and_fields,
                         input.schema().metadata().clone(),
                     )?
                     // We can use the existing functional dependencies as is:
@@ -1803,12 +1809,7 @@ impl Projection {
 
     /// Create a new Projection using the specified output schema
     pub fn new_from_schema(input: Arc<LogicalPlan>, schema: DFSchemaRef) -> Self {
-        let expr: Vec<Expr> = schema
-            .fields()
-            .iter()
-            .map(|field| field.qualified_column())
-            .map(Expr::Column)
-            .collect();
+        let expr: Vec<Expr> = schema.columns().into_iter().map(Expr::Column).collect();
         Self {
             expr,
             input,
@@ -1860,9 +1861,10 @@ impl SubqueryAlias {
         alias: impl Into<OwnedTableReference>,
     ) -> Result<Self> {
         let alias = alias.into();
-        let fields = change_redundant_column(plan.schema().fields().clone());
+        let fields = change_redundant_column(plan.schema().fields());
         let meta_data = plan.schema().as_ref().metadata().clone();
-        let schema: Schema = DFSchema::new_with_metadata(fields, meta_data)?.into();
+        let schema: Schema =
+            DFSchema::from_unqualifed_fields(fields.into(), meta_data)?.into();
         // Since schema is the same, other than qualifier, we can use existing
         // functional dependencies:
         let func_dependencies = plan.schema().functional_dependencies().clone();
@@ -2007,9 +2009,13 @@ pub struct Window {
 impl Window {
     /// Create a new window operator.
     pub fn try_new(window_expr: Vec<Expr>, input: Arc<LogicalPlan>) -> Result<Self> {
-        let fields = input.schema().fields();
+        let fields: Vec<(Option<OwnedTableReference>, Arc<Field>)> = input
+            .schema()
+            .iter()
+            .map(|(q, f)| (q.cloned(), f.clone()))
+            .collect();
         let input_len = fields.len();
-        let mut window_fields = fields.clone();
+        let mut window_fields = fields;
         let expr_fields = exprlist_to_fields(window_expr.as_slice(), &input)?;
         window_fields.extend_from_slice(expr_fields.as_slice());
         let metadata = input.schema().metadata().clone();
@@ -2134,16 +2140,14 @@ impl TableScan {
             .map(|p| {
                 let projected_func_dependencies =
                     func_dependencies.project_functional_dependencies(p, p.len());
+
                 let df_schema = DFSchema::new_with_metadata(
                     p.iter()
                         .map(|i| {
-                            DFField::from_qualified(
-                                table_name.clone(),
-                                schema.field(*i).clone(),
-                            )
+                            (Some(table_name.clone()), Arc::new(schema.field(*i).clone()))
                         })
                         .collect(),
-                    schema.metadata().clone(),
+                    schema.metadata.clone(),
                 )?;
                 df_schema.with_functional_dependencies(projected_func_dependencies)
             })
@@ -2335,9 +2339,12 @@ impl DistinctOn {
         }
 
         let on_expr = normalize_cols(on_expr, input.as_ref())?;
+        let qualified_fields = exprlist_to_fields(select_expr.as_slice(), &input)?
+            .into_iter()
+            .collect();
 
-        let schema = DFSchema::new_with_metadata(
-            exprlist_to_fields(select_expr.as_slice(), &input)?,
+        let dfschema = DFSchema::new_with_metadata(
+            qualified_fields,
             input.schema().metadata().clone(),
         )?;
 
@@ -2346,7 +2353,7 @@ impl DistinctOn {
             select_expr,
             sort_expr: None,
             input,
-            schema: Arc::new(schema),
+            schema: Arc::new(dfschema),
         };
 
         if let Some(sort_expr) = sort_expr {
@@ -2416,20 +2423,19 @@ impl Aggregate {
 
         let grouping_expr: Vec<Expr> = grouping_set_to_exprlist(group_expr.as_slice())?;
 
-        let mut fields = exprlist_to_fields(grouping_expr.as_slice(), &input)?;
+        let mut qualified_fields = exprlist_to_fields(grouping_expr.as_slice(), &input)?;
 
         // Even columns that cannot be null will become nullable when used in a grouping set.
         if is_grouping_set {
-            fields = fields
+            qualified_fields = qualified_fields
                 .into_iter()
-                .map(|field| field.with_nullable(true))
+                .map(|(q, f)| (q, f.as_ref().clone().with_nullable(true).into()))
                 .collect::<Vec<_>>();
         }
 
-        fields.extend(exprlist_to_fields(aggr_expr.as_slice(), &input)?);
+        qualified_fields.extend(exprlist_to_fields(aggr_expr.as_slice(), &input)?);
 
-        let schema =
-            DFSchema::new_with_metadata(fields, input.schema().metadata().clone())?;
+        let schema = DFSchema::new_with_metadata(qualified_fields, HashMap::new())?;
 
         Self::try_new_with_schema(input, group_expr, aggr_expr, Arc::new(schema))
     }
@@ -2524,7 +2530,7 @@ fn calc_func_dependencies_for_project(
     exprs: &[Expr],
     input: &LogicalPlan,
 ) -> Result<FunctionalDependencies> {
-    let input_fields = input.schema().fields();
+    let input_fields = input.schema().field_names();
     // Calculate expression indices (if present) in the input schema.
     let proj_indices = exprs
         .iter()
@@ -2535,9 +2541,7 @@ fn calc_func_dependencies_for_project(
                 }
                 _ => format!("{}", expr),
             };
-            input_fields
-                .iter()
-                .position(|item| item.qualified_name() == expr_name)
+            input_fields.iter().position(|item| *item == expr_name)
         })
         .collect::<Vec<_>>();
     Ok(input
@@ -2673,7 +2677,6 @@ pub struct Unnest {
 
 #[cfg(test)]
 mod tests {
-    use std::collections::HashMap;
     use std::sync::Arc;
 
     use super::*;
@@ -3084,7 +3087,7 @@ digraph {
 
     #[test]
     fn projection_expr_schema_mismatch() -> Result<()> {
-        let empty_schema = Arc::new(DFSchema::new_with_metadata(vec![], HashMap::new())?);
+        let empty_schema = Arc::new(DFSchema::empty());
         let p = Projection::try_new_with_schema(
             vec![col("a")],
             Arc::new(LogicalPlan::EmptyRelation(EmptyRelation {
@@ -3258,10 +3261,10 @@ digraph {
             filters: vec![],
             fetch: None,
         }));
-        let col = schema.field(0).qualified_column();
+        let col = schema.field_names()[0].clone();
 
         let filter = Filter::try_new(
-            Expr::Column(col).eq(Expr::Literal(ScalarValue::Int32(Some(1)))),
+            Expr::Column(col.into()).eq(Expr::Literal(ScalarValue::Int32(Some(1)))),
             scan,
         )
         .unwrap();
@@ -3288,10 +3291,10 @@ digraph {
             filters: vec![],
             fetch: None,
         }));
-        let col = schema.field(0).qualified_column();
+        let col = schema.field_names()[0].clone();
 
         let filter = Filter::try_new(
-            Expr::Column(col).eq(Expr::Literal(ScalarValue::Int32(Some(1)))),
+            Expr::Column(col.into()).eq(Expr::Literal(ScalarValue::Int32(Some(1)))),
             scan,
         )
         .unwrap();
diff --git a/datafusion/expr/src/utils.rs b/datafusion/expr/src/utils.rs
index c7907d0db16a..0edcfa4bb522 100644
--- a/datafusion/expr/src/utils.rs
+++ b/datafusion/expr/src/utils.rs
@@ -30,12 +30,12 @@ use crate::{
     Operator, TryCast,
 };
 
-use arrow::datatypes::{DataType, TimeUnit};
+use arrow::datatypes::{DataType, Field, Schema, TimeUnit};
 use datafusion_common::tree_node::{TreeNode, TreeNodeRecursion};
 use datafusion_common::utils::get_at_indices;
 use datafusion_common::{
-    internal_err, plan_datafusion_err, plan_err, Column, DFField, DFSchema, DFSchemaRef,
-    Result, ScalarValue, TableReference,
+    internal_err, plan_datafusion_err, plan_err, Column, DFSchema, DFSchemaRef,
+    OwnedTableReference, Result, ScalarValue, TableReference,
 };
 
 use sqlparser::ast::{ExceptSelectItem, ExcludeSelectItem, WildcardAdditionalOptions};
@@ -342,12 +342,9 @@ fn get_excluded_columns(
     let mut result = vec![];
     for ident in unique_idents.into_iter() {
         let col_name = ident.value.as_str();
-        let field = if let Some(qualifier) = qualifier {
-            schema.field_with_qualified_name(qualifier, col_name)?
-        } else {
-            schema.field_with_unqualified_name(col_name)?
-        };
-        result.push(field.qualified_column())
+        let (qualifier, field) =
+            schema.qualified_field_with_name(qualifier.as_ref(), col_name)?;
+        result.push(Column::from((qualifier, field)));
     }
     Ok(result)
 }
@@ -359,18 +356,18 @@ fn get_exprs_except_skipped(
 ) -> Vec<Expr> {
     if columns_to_skip.is_empty() {
         schema
-            .fields()
             .iter()
-            .map(|f| Expr::Column(f.qualified_column()))
+            .map(|(qualifier, field)| {
+                Expr::Column(Column::from((qualifier, field.as_ref())))
+            })
             .collect::<Vec<Expr>>()
     } else {
         schema
-            .fields()
+            .columns()
             .iter()
-            .filter_map(|f| {
-                let col = f.qualified_column();
-                if !columns_to_skip.contains(&col) {
-                    Some(Expr::Column(col))
+            .filter_map(|c| {
+                if !columns_to_skip.contains(c) {
+                    Some(Expr::Column(c.clone()))
                 } else {
                     None
                 }
@@ -433,13 +430,14 @@ pub fn expand_qualified_wildcard(
     let projected_func_dependencies = schema
         .functional_dependencies()
         .project_functional_dependencies(&qualified_indices, qualified_indices.len());
-    let qualified_fields = get_at_indices(schema.fields(), &qualified_indices)?;
-    if qualified_fields.is_empty() {
+    let fields_with_qualified = get_at_indices(schema.fields(), &qualified_indices)?;
+    if fields_with_qualified.is_empty() {
         return plan_err!("Invalid qualifier {qualifier}");
     }
-    let qualified_schema =
-        DFSchema::new_with_metadata(qualified_fields, schema.metadata().clone())?
-            // We can use the functional dependencies as is, since it only stores indices:
+
+    let qualified_schema = Arc::new(Schema::new(fields_with_qualified));
+    let qualified_dfschema =
+        DFSchema::try_from_qualified_schema(qualifier.clone(), &qualified_schema)?
             .with_functional_dependencies(projected_func_dependencies)?;
     let excluded_columns = if let Some(WildcardAdditionalOptions {
         opt_exclude,
@@ -459,7 +457,10 @@ pub fn expand_qualified_wildcard(
     // Add each excluded `Column` to columns_to_skip
     let mut columns_to_skip = HashSet::new();
     columns_to_skip.extend(excluded_columns);
-    Ok(get_exprs_except_skipped(&qualified_schema, columns_to_skip))
+    Ok(get_exprs_except_skipped(
+        &qualified_dfschema,
+        columns_to_skip,
+    ))
 }
 
 /// (expr, "is the SortExpr for window (either comes from PARTITION BY or ORDER BY columns)")
@@ -737,7 +738,10 @@ fn agg_cols(agg: &Aggregate) -> Vec<Column> {
         .collect()
 }
 
-fn exprlist_to_fields_aggregate(exprs: &[Expr], agg: &Aggregate) -> Result<Vec<DFField>> {
+fn exprlist_to_fields_aggregate(
+    exprs: &[Expr],
+    agg: &Aggregate,
+) -> Result<Vec<(Option<OwnedTableReference>, Arc<Field>)>> {
     let agg_cols = agg_cols(agg);
     let mut fields = vec![];
     for expr in exprs {
@@ -753,7 +757,10 @@ fn exprlist_to_fields_aggregate(exprs: &[Expr], agg: &Aggregate) -> Result<Vec<D
 }
 
 /// Create field meta-data from an expression, for use in a result set schema
-pub fn exprlist_to_fields(exprs: &[Expr], plan: &LogicalPlan) -> Result<Vec<DFField>> {
+pub fn exprlist_to_fields(
+    exprs: &[Expr],
+    plan: &LogicalPlan,
+) -> Result<Vec<(Option<OwnedTableReference>, Arc<Field>)>> {
     // when dealing with aggregate plans we cannot simply look in the aggregate output schema
     // because it will contain columns representing complex expressions (such a column named
     // `GROUPING(person.state)` so in order to resolve `person.state` in this case we need to
@@ -805,11 +812,15 @@ pub fn columnize_expr(e: Expr, input_schema: &DFSchema) -> Expr {
         )),
         Expr::ScalarSubquery(_) => e.clone(),
         _ => match e.display_name() {
-            Ok(name) => match input_schema.field_with_unqualified_name(&name) {
-                Ok(field) => Expr::Column(field.qualified_column()),
-                // expression not provided as input, do not convert to a column reference
-                Err(_) => e,
-            },
+            Ok(name) => {
+                match input_schema.qualified_field_with_unqualified_name(&name) {
+                    Ok((qualifier, field)) => {
+                        Expr::Column(Column::from((qualifier, field)))
+                    }
+                    // expression not provided as input, do not convert to a column reference
+                    Err(_) => e,
+                }
+            }
             Err(_) => e,
         },
     }
@@ -842,8 +853,8 @@ pub(crate) fn find_columns_referenced_by_expr(e: &Expr) -> Vec<Column> {
 pub fn expr_as_column_expr(expr: &Expr, plan: &LogicalPlan) -> Result<Expr> {
     match expr {
         Expr::Column(col) => {
-            let field = plan.schema().field_from_column(col)?;
-            Ok(Expr::Column(field.qualified_column()))
+            let (qualifier, field) = plan.schema().qualified_field_from_column(col)?;
+            Ok(Expr::Column(Column::from((qualifier, field))))
         }
         _ => Ok(Expr::Column(Column::from_name(expr.display_name()?))),
     }
diff --git a/datafusion/optimizer/src/analyzer/inline_table_scan.rs b/datafusion/optimizer/src/analyzer/inline_table_scan.rs
index b21ec851dfcd..88202ffd21f1 100644
--- a/datafusion/optimizer/src/analyzer/inline_table_scan.rs
+++ b/datafusion/optimizer/src/analyzer/inline_table_scan.rs
@@ -23,7 +23,7 @@ use crate::analyzer::AnalyzerRule;
 
 use datafusion_common::config::ConfigOptions;
 use datafusion_common::tree_node::{Transformed, TransformedResult, TreeNode};
-use datafusion_common::Result;
+use datafusion_common::{Column, Result};
 use datafusion_expr::expr::{Exists, InSubquery};
 use datafusion_expr::{
     logical_plan::LogicalPlan, Expr, Filter, LogicalPlanBuilder, TableScan,
@@ -119,9 +119,9 @@ fn generate_projection_expr(
     let mut exprs = vec![];
     if let Some(projection) = projection {
         for i in projection {
-            exprs.push(Expr::Column(
-                sub_plan.schema().fields()[*i].qualified_column(),
-            ));
+            exprs.push(Expr::Column(Column::from(
+                sub_plan.schema().qualified_field(*i),
+            )));
         }
     } else {
         exprs.push(Expr::Wildcard { qualifier: None });
diff --git a/datafusion/optimizer/src/analyzer/type_coercion.rs b/datafusion/optimizer/src/analyzer/type_coercion.rs
index c76c1c8a7bd0..b7b7c4f20e4a 100644
--- a/datafusion/optimizer/src/analyzer/type_coercion.rs
+++ b/datafusion/optimizer/src/analyzer/type_coercion.rs
@@ -756,9 +756,9 @@ mod test {
     };
     use crate::test::assert_analyzed_plan_eq;
 
-    use arrow::datatypes::{DataType, TimeUnit};
+    use arrow::datatypes::{DataType, Field, TimeUnit};
     use datafusion_common::tree_node::{TransformedResult, TreeNode};
-    use datafusion_common::{DFField, DFSchema, DFSchemaRef, Result, ScalarValue};
+    use datafusion_common::{DFSchema, DFSchemaRef, Result, ScalarValue};
     use datafusion_expr::expr::{self, InSubquery, Like, ScalarFunction};
     use datafusion_expr::logical_plan::{EmptyRelation, Projection};
     use datafusion_expr::{
@@ -781,8 +781,8 @@ mod test {
         Arc::new(LogicalPlan::EmptyRelation(EmptyRelation {
             produce_one_row: false,
             schema: Arc::new(
-                DFSchema::new_with_metadata(
-                    vec![DFField::new_unqualified("a", data_type, true)],
+                DFSchema::from_unqualifed_fields(
+                    vec![Field::new("a", data_type, true)].into(),
                     std::collections::HashMap::new(),
                 )
                 .unwrap(),
@@ -1042,12 +1042,8 @@ mod test {
         let expr = col("a").in_list(vec![lit(1_i32), lit(4_i8), lit(8_i64)], false);
         let empty = Arc::new(LogicalPlan::EmptyRelation(EmptyRelation {
             produce_one_row: false,
-            schema: Arc::new(DFSchema::new_with_metadata(
-                vec![DFField::new_unqualified(
-                    "a",
-                    DataType::Decimal128(12, 4),
-                    true,
-                )],
+            schema: Arc::new(DFSchema::from_unqualifed_fields(
+                vec![Field::new("a", DataType::Decimal128(12, 4), true)].into(),
                 std::collections::HashMap::new(),
             )?),
         }));
@@ -1251,8 +1247,8 @@ mod test {
     #[test]
     fn test_type_coercion_rewrite() -> Result<()> {
         // gt
-        let schema = Arc::new(DFSchema::new_with_metadata(
-            vec![DFField::new_unqualified("a", DataType::Int64, true)],
+        let schema = Arc::new(DFSchema::from_unqualifed_fields(
+            vec![Field::new("a", DataType::Int64, true)].into(),
             std::collections::HashMap::new(),
         )?);
         let mut rewriter = TypeCoercionRewriter { schema };
@@ -1262,8 +1258,8 @@ mod test {
         assert_eq!(expected, result);
 
         // eq
-        let schema = Arc::new(DFSchema::new_with_metadata(
-            vec![DFField::new_unqualified("a", DataType::Int64, true)],
+        let schema = Arc::new(DFSchema::from_unqualifed_fields(
+            vec![Field::new("a", DataType::Int64, true)].into(),
             std::collections::HashMap::new(),
         )?);
         let mut rewriter = TypeCoercionRewriter { schema };
@@ -1273,8 +1269,8 @@ mod test {
         assert_eq!(expected, result);
 
         // lt
-        let schema = Arc::new(DFSchema::new_with_metadata(
-            vec![DFField::new_unqualified("a", DataType::Int64, true)],
+        let schema = Arc::new(DFSchema::from_unqualifed_fields(
+            vec![Field::new("a", DataType::Int64, true)].into(),
             std::collections::HashMap::new(),
         )?);
         let mut rewriter = TypeCoercionRewriter { schema };
@@ -1346,26 +1342,27 @@ mod test {
 
     #[test]
     fn test_case_expression_coercion() -> Result<()> {
-        let schema = Arc::new(DFSchema::new_with_metadata(
+        let schema = Arc::new(DFSchema::from_unqualifed_fields(
             vec![
-                DFField::new_unqualified("boolean", DataType::Boolean, true),
-                DFField::new_unqualified("integer", DataType::Int32, true),
-                DFField::new_unqualified("float", DataType::Float32, true),
-                DFField::new_unqualified(
+                Field::new("boolean", DataType::Boolean, true),
+                Field::new("integer", DataType::Int32, true),
+                Field::new("float", DataType::Float32, true),
+                Field::new(
                     "timestamp",
                     DataType::Timestamp(TimeUnit::Nanosecond, None),
                     true,
                 ),
-                DFField::new_unqualified("date", DataType::Date32, true),
-                DFField::new_unqualified(
+                Field::new("date", DataType::Date32, true),
+                Field::new(
                     "interval",
                     DataType::Interval(arrow::datatypes::IntervalUnit::MonthDayNano),
                     true,
                 ),
-                DFField::new_unqualified("binary", DataType::Binary, true),
-                DFField::new_unqualified("string", DataType::Utf8, true),
-                DFField::new_unqualified("decimal", DataType::Decimal128(10, 10), true),
-            ],
+                Field::new("binary", DataType::Binary, true),
+                Field::new("string", DataType::Utf8, true),
+                Field::new("decimal", DataType::Decimal128(10, 10), true),
+            ]
+            .into(),
             std::collections::HashMap::new(),
         )?);
 
diff --git a/datafusion/optimizer/src/common_subexpr_eliminate.rs b/datafusion/optimizer/src/common_subexpr_eliminate.rs
index 25c25c63f0b7..77613aa66293 100644
--- a/datafusion/optimizer/src/common_subexpr_eliminate.rs
+++ b/datafusion/optimizer/src/common_subexpr_eliminate.rs
@@ -24,13 +24,13 @@ use std::sync::Arc;
 use crate::utils::is_volatile_expression;
 use crate::{utils, OptimizerConfig, OptimizerRule};
 
-use arrow::datatypes::DataType;
+use arrow::datatypes::{DataType, Field};
 use datafusion_common::tree_node::{
     Transformed, TransformedResult, TreeNode, TreeNodeRecursion, TreeNodeRewriter,
     TreeNodeVisitor,
 };
 use datafusion_common::{
-    internal_err, Column, DFField, DFSchema, DFSchemaRef, DataFusionError, Result,
+    internal_err, qualified_name, Column, DFSchema, DFSchemaRef, DataFusionError, Result,
 };
 use datafusion_expr::expr::Alias;
 use datafusion_expr::logical_plan::{Aggregate, LogicalPlan, Projection, Window};
@@ -331,8 +331,10 @@ impl CommonSubexprEliminate {
                         proj_exprs.push(Expr::Column(Column::from_name(name)));
                     } else {
                         let id = ExprSet::expr_identifier(&expr_rewritten);
-                        let out_name =
-                            expr_rewritten.to_field(&new_input_schema)?.qualified_name();
+                        let (qualifier, field) =
+                            expr_rewritten.to_field(&new_input_schema)?;
+                        let out_name = qualified_name(qualifier.as_ref(), field.name());
+
                         agg_exprs.push(expr_rewritten.alias(&id));
                         proj_exprs
                             .push(Expr::Column(Column::from_name(id)).alias(out_name));
@@ -469,7 +471,7 @@ fn build_common_expr_project_plan(
         match expr_set.get(&id) {
             Some((expr, _, data_type, symbol)) => {
                 // todo: check `nullable`
-                let field = DFField::new_unqualified(&id, data_type.clone(), true);
+                let field = Field::new(&id, data_type.clone(), true);
                 fields_set.insert(field.name().to_owned());
                 project_exprs.push(expr.clone().alias(symbol.as_str()));
             }
@@ -479,9 +481,9 @@ fn build_common_expr_project_plan(
         }
     }
 
-    for field in input.schema().fields() {
-        if fields_set.insert(field.qualified_name()) {
-            project_exprs.push(Expr::Column(field.qualified_column()));
+    for (qualifier, field) in input.schema().iter() {
+        if fields_set.insert(qualified_name(qualifier, field.name())) {
+            project_exprs.push(Expr::Column(Column::from((qualifier, field.as_ref()))));
         }
     }
 
@@ -500,9 +502,8 @@ fn build_recover_project_plan(
     input: LogicalPlan,
 ) -> Result<LogicalPlan> {
     let col_exprs = schema
-        .fields()
         .iter()
-        .map(|field| Expr::Column(field.qualified_column()))
+        .map(|(qualifier, field)| Expr::Column(Column::from((qualifier, field.as_ref()))))
         .collect();
     Ok(LogicalPlan::Projection(Projection::try_new(
         col_exprs,
@@ -517,10 +518,14 @@ fn extract_expressions(
 ) -> Result<()> {
     if let Expr::GroupingSet(groupings) = expr {
         for e in groupings.distinct_expr() {
-            result.push(Expr::Column(e.to_field(schema)?.qualified_column()))
+            let (qualifier, field) = e.to_field(schema)?;
+            let col = Column::new(qualifier, field.name());
+            result.push(Expr::Column(col))
         }
     } else {
-        result.push(Expr::Column(expr.to_field(schema)?.qualified_column()));
+        let (qualifier, field) = expr.to_field(schema)?;
+        let col = Column::new(qualifier, field.name());
+        result.push(Expr::Column(col));
     }
 
     Ok(())
@@ -1037,8 +1042,8 @@ mod test {
             build_common_expr_project_plan(project, affected_id, &expr_set_2).unwrap();
 
         let mut field_set = BTreeSet::new();
-        for field in project_2.schema().fields() {
-            assert!(field_set.insert(field.qualified_name()));
+        for name in project_2.schema().field_names() {
+            assert!(field_set.insert(name));
         }
     }
 
@@ -1104,8 +1109,8 @@ mod test {
             build_common_expr_project_plan(project, affected_id, &expr_set_2).unwrap();
 
         let mut field_set = BTreeSet::new();
-        for field in project_2.schema().fields() {
-            assert!(field_set.insert(field.qualified_name()));
+        for name in project_2.schema().field_names() {
+            assert!(field_set.insert(name));
         }
     }
 
@@ -1181,12 +1186,13 @@ mod test {
     fn test_extract_expressions_from_grouping_set() -> Result<()> {
         let mut result = Vec::with_capacity(3);
         let grouping = grouping_set(vec![vec![col("a"), col("b")], vec![col("c")]]);
-        let schema = DFSchema::new_with_metadata(
+        let schema = DFSchema::from_unqualifed_fields(
             vec![
-                DFField::new_unqualified("a", DataType::Int32, false),
-                DFField::new_unqualified("b", DataType::Int32, false),
-                DFField::new_unqualified("c", DataType::Int32, false),
-            ],
+                Field::new("a", DataType::Int32, false),
+                Field::new("b", DataType::Int32, false),
+                Field::new("c", DataType::Int32, false),
+            ]
+            .into(),
             HashMap::default(),
         )?;
         extract_expressions(&grouping, &schema, &mut result)?;
@@ -1199,11 +1205,12 @@ mod test {
     fn test_extract_expressions_from_grouping_set_with_identical_expr() -> Result<()> {
         let mut result = Vec::with_capacity(2);
         let grouping = grouping_set(vec![vec![col("a"), col("b")], vec![col("a")]]);
-        let schema = DFSchema::new_with_metadata(
+        let schema = DFSchema::from_unqualifed_fields(
             vec![
-                DFField::new_unqualified("a", DataType::Int32, false),
-                DFField::new_unqualified("b", DataType::Int32, false),
-            ],
+                Field::new("a", DataType::Int32, false),
+                Field::new("b", DataType::Int32, false),
+            ]
+            .into(),
             HashMap::default(),
         )?;
         extract_expressions(&grouping, &schema, &mut result)?;
@@ -1215,8 +1222,8 @@ mod test {
     #[test]
     fn test_extract_expressions_from_col() -> Result<()> {
         let mut result = Vec::with_capacity(1);
-        let schema = DFSchema::new_with_metadata(
-            vec![DFField::new_unqualified("a", DataType::Int32, false)],
+        let schema = DFSchema::from_unqualifed_fields(
+            vec![Field::new("a", DataType::Int32, false)].into(),
             HashMap::default(),
         )?;
         extract_expressions(&col("a"), &schema, &mut result)?;
diff --git a/datafusion/optimizer/src/optimize_projections.rs b/datafusion/optimizer/src/optimize_projections.rs
index b942f187c331..c40a9bb704eb 100644
--- a/datafusion/optimizer/src/optimize_projections.rs
+++ b/datafusion/optimizer/src/optimize_projections.rs
@@ -665,10 +665,9 @@ fn outer_columns_helper_multi<'a>(
 ///
 /// A vector of `Expr::Column` expressions residing at `indices` of the `input_schema`.
 fn get_required_exprs(input_schema: &Arc<DFSchema>, indices: &[usize]) -> Vec<Expr> {
-    let fields = input_schema.fields();
     indices
         .iter()
-        .map(|&idx| Expr::Column(fields[idx].qualified_column()))
+        .map(|&idx| Expr::Column(Column::from(input_schema.qualified_field(idx))))
         .collect()
 }
 
diff --git a/datafusion/optimizer/src/optimizer.rs b/datafusion/optimizer/src/optimizer.rs
index fe63766fc265..3153f72d7ee7 100644
--- a/datafusion/optimizer/src/optimizer.rs
+++ b/datafusion/optimizer/src/optimizer.rs
@@ -467,7 +467,7 @@ mod tests {
     use crate::test::test_table_scan;
     use crate::{OptimizerConfig, OptimizerContext, OptimizerRule};
 
-    use datafusion_common::{plan_err, DFField, DFSchema, DFSchemaRef, Result};
+    use datafusion_common::{plan_err, DFSchema, DFSchemaRef, Result};
     use datafusion_expr::logical_plan::EmptyRelation;
     use datafusion_expr::{col, lit, LogicalPlan, LogicalPlanBuilder, Projection};
 
@@ -509,14 +509,18 @@ mod tests {
         let err = opt.optimize(&plan, &config, &observe).unwrap_err();
         assert_eq!(
             "Optimizer rule 'get table_scan rule' failed\ncaused by\nget table_scan rule\ncaused by\n\
-            Internal error: Failed due to a difference in schemas, \
-            original schema: DFSchema { fields: [\
-            DFField { qualifier: Some(Bare { table: \"test\" }), field: Field { name: \"a\", data_type: UInt32, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} } }, \
-            DFField { qualifier: Some(Bare { table: \"test\" }), field: Field { name: \"b\", data_type: UInt32, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} } }, \
-            DFField { qualifier: Some(Bare { table: \"test\" }), field: Field { name: \"c\", data_type: UInt32, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} } }], \
-            metadata: {}, functional_dependencies: FunctionalDependencies { deps: [] } }, \
-            new schema: DFSchema { fields: [], metadata: {}, functional_dependencies: FunctionalDependencies { deps: [] } }.\
-            \nThis was likely caused by a bug in DataFusion's code and we would welcome that you file an bug report in our issue tracker",
+            Internal error: Failed due to a difference in schemas, original schema: \
+            DFSchema { inner: Schema { fields: \
+                [Field { name: \"a\", data_type: UInt32, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, \
+                Field { name: \"b\", data_type: UInt32, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, \
+                Field { name: \"c\", data_type: UInt32, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }], metadata: {} }, \
+                field_qualifiers: [Some(Bare { table: \"test\" }), Some(Bare { table: \"test\" }), Some(Bare { table: \"test\" })], \
+                functional_dependencies: FunctionalDependencies { deps: [] } }, \
+            new schema: DFSchema { inner: Schema { \
+                fields: [], metadata: {} }, \
+                field_qualifiers: [], \
+                functional_dependencies: FunctionalDependencies { deps: [] } }.\n\
+                This was likely caused by a bug in DataFusion's code and we would welcome that you file an bug report in our issue tracker",
             err.strip_backtrace()
         );
     }
@@ -610,19 +614,14 @@ mod tests {
 
     fn add_metadata_to_fields(schema: &DFSchema) -> DFSchemaRef {
         let new_fields = schema
-            .fields()
             .iter()
             .enumerate()
-            .map(|(i, f)| {
+            .map(|(i, (qualifier, field))| {
                 let metadata =
                     [("key".into(), format!("value {i}"))].into_iter().collect();
 
-                let new_arrow_field = f.field().as_ref().clone().with_metadata(metadata);
-                if let Some(qualifier) = f.qualifier() {
-                    DFField::from_qualified(qualifier.clone(), new_arrow_field)
-                } else {
-                    DFField::from(new_arrow_field)
-                }
+                let new_arrow_field = field.as_ref().clone().with_metadata(metadata);
+                (qualifier.cloned(), Arc::new(new_arrow_field))
             })
             .collect::<Vec<_>>();
 
diff --git a/datafusion/optimizer/src/propagate_empty_relation.rs b/datafusion/optimizer/src/propagate_empty_relation.rs
index d1f9f87a32a3..55fb982d2a87 100644
--- a/datafusion/optimizer/src/propagate_empty_relation.rs
+++ b/datafusion/optimizer/src/propagate_empty_relation.rs
@@ -188,7 +188,7 @@ mod tests {
         test_table_scan_fields, test_table_scan_with_name,
     };
     use arrow::datatypes::{DataType, Field, Schema};
-    use datafusion_common::{Column, DFField, DFSchema, ScalarValue};
+    use datafusion_common::{Column, DFSchema, ScalarValue};
     use datafusion_expr::logical_plan::table_scan;
     use datafusion_expr::{
         binary_expr, col, lit, logical_plan::builder::LogicalPlanBuilder, Expr, JoinType,
@@ -373,14 +373,14 @@ mod tests {
     fn test_empty_with_non_empty() -> Result<()> {
         let table_scan = test_table_scan()?;
 
-        let fields = test_table_scan_fields()
-            .into_iter()
-            .map(DFField::from)
-            .collect();
+        let fields = test_table_scan_fields();
 
         let empty = LogicalPlan::EmptyRelation(EmptyRelation {
             produce_one_row: false,
-            schema: Arc::new(DFSchema::new_with_metadata(fields, Default::default())?),
+            schema: Arc::new(DFSchema::from_unqualifed_fields(
+                fields.into(),
+                Default::default(),
+            )?),
         });
 
         let one = LogicalPlanBuilder::from(empty.clone()).build()?;
diff --git a/datafusion/optimizer/src/push_down_filter.rs b/datafusion/optimizer/src/push_down_filter.rs
index e93e171e0324..83db4b0640a4 100644
--- a/datafusion/optimizer/src/push_down_filter.rs
+++ b/datafusion/optimizer/src/push_down_filter.rs
@@ -26,8 +26,8 @@ use datafusion_common::tree_node::{
     Transformed, TransformedResult, TreeNode, TreeNodeRecursion,
 };
 use datafusion_common::{
-    internal_err, plan_datafusion_err, Column, DFSchema, DFSchemaRef, JoinConstraint,
-    Result,
+    internal_err, plan_datafusion_err, qualified_name, Column, DFSchema, DFSchemaRef,
+    JoinConstraint, Result,
 };
 use datafusion_expr::expr::Alias;
 use datafusion_expr::expr_rewriter::replace_col;
@@ -198,13 +198,12 @@ fn on_lr_is_preserved(plan: &LogicalPlan) -> Result<(bool, bool)> {
 // relevant columns are contained on the relevant join side's schema.
 fn can_pushdown_join_predicate(predicate: &Expr, schema: &DFSchema) -> Result<bool> {
     let schema_columns = schema
-        .fields()
         .iter()
-        .flat_map(|f| {
+        .flat_map(|(qualifier, field)| {
             [
-                f.qualified_column(),
+                Column::new(qualifier.cloned(), field.name()),
                 // we need to push down filter using unqualified column as well
-                f.unqualified_column(),
+                Column::new_unqualified(field.name()),
             ]
         })
         .collect::<HashSet<_>>();
@@ -305,13 +304,12 @@ fn extract_or_clauses_for_join<'a>(
     schema: &'a DFSchema,
 ) -> impl Iterator<Item = Expr> + 'a {
     let schema_columns = schema
-        .fields()
         .iter()
-        .flat_map(|f| {
+        .flat_map(|(qualifier, field)| {
             [
-                f.qualified_column(),
+                Column::new(qualifier.cloned(), field.name()),
                 // we need to push down filter using unqualified column as well
-                f.unqualified_column(),
+                Column::new_unqualified(field.name()),
             ]
         })
         .collect::<HashSet<_>>();
@@ -672,17 +670,14 @@ impl OptimizerRule for PushDownFilter {
             }
             LogicalPlan::SubqueryAlias(subquery_alias) => {
                 let mut replace_map = HashMap::new();
-                for (i, field) in
-                    subquery_alias.input.schema().fields().iter().enumerate()
+                for (i, (qualifier, field)) in
+                    subquery_alias.input.schema().iter().enumerate()
                 {
+                    let (sub_qualifier, sub_field) =
+                        subquery_alias.schema.qualified_field(i);
                     replace_map.insert(
-                        subquery_alias
-                            .schema
-                            .fields()
-                            .get(i)
-                            .unwrap()
-                            .qualified_name(),
-                        Expr::Column(field.qualified_column()),
+                        qualified_name(sub_qualifier, sub_field.name()),
+                        Expr::Column(Column::new(qualifier.cloned(), field.name())),
                     );
                 }
                 let new_predicate =
@@ -700,17 +695,16 @@ impl OptimizerRule for PushDownFilter {
                 let (volatile_map, non_volatile_map): (HashMap<_, _>, HashMap<_, _>) =
                     projection
                         .schema
-                        .fields()
                         .iter()
                         .enumerate()
-                        .map(|(i, field)| {
+                        .map(|(i, (qualifier, field))| {
                             // strip alias, as they should not be part of filters
                             let expr = match &projection.expr[i] {
                                 Expr::Alias(Alias { expr, .. }) => expr.as_ref().clone(),
                                 expr => expr.clone(),
                             };
 
-                            (field.qualified_name(), expr)
+                            (qualified_name(qualifier, field.name()), expr)
                         })
                         .partition(|(_, value)| {
                             is_volatile_expression(value).unwrap_or(true)
@@ -760,10 +754,12 @@ impl OptimizerRule for PushDownFilter {
                 let mut inputs = Vec::with_capacity(union.inputs.len());
                 for input in &union.inputs {
                     let mut replace_map = HashMap::new();
-                    for (i, field) in input.schema().fields().iter().enumerate() {
+                    for (i, (qualifier, field)) in input.schema().iter().enumerate() {
+                        let (union_qualifier, union_field) =
+                            union.schema.qualified_field(i);
                         replace_map.insert(
-                            union.schema.fields().get(i).unwrap().qualified_name(),
-                            Expr::Column(field.qualified_column()),
+                            qualified_name(union_qualifier, union_field.name()),
+                            Expr::Column(Column::new(qualifier.cloned(), field.name())),
                         );
                     }
 
diff --git a/datafusion/optimizer/src/push_down_projection.rs b/datafusion/optimizer/src/push_down_projection.rs
index 28b3ff090fe6..ccdcf2f65bc8 100644
--- a/datafusion/optimizer/src/push_down_projection.rs
+++ b/datafusion/optimizer/src/push_down_projection.rs
@@ -29,7 +29,7 @@ mod tests {
     use crate::test::*;
     use crate::OptimizerContext;
     use arrow::datatypes::{DataType, Field, Schema};
-    use datafusion_common::{Column, DFField, DFSchema, Result};
+    use datafusion_common::{Column, DFSchema, Result};
     use datafusion_expr::builder::table_scan_with_filters;
     use datafusion_expr::expr::{self, Cast};
     use datafusion_expr::logical_plan::{
@@ -225,11 +225,20 @@ mod tests {
             **optimized_join.schema(),
             DFSchema::new_with_metadata(
                 vec![
-                    DFField::new(Some("test"), "a", DataType::UInt32, false),
-                    DFField::new(Some("test"), "b", DataType::UInt32, false),
-                    DFField::new(Some("test2"), "c1", DataType::UInt32, true),
+                    (
+                        Some("test".into()),
+                        Arc::new(Field::new("a", DataType::UInt32, false))
+                    ),
+                    (
+                        Some("test".into()),
+                        Arc::new(Field::new("b", DataType::UInt32, false))
+                    ),
+                    (
+                        Some("test2".into()),
+                        Arc::new(Field::new("c1", DataType::UInt32, true))
+                    ),
                 ],
-                HashMap::new(),
+                HashMap::new()
             )?,
         );
 
@@ -268,11 +277,20 @@ mod tests {
             **optimized_join.schema(),
             DFSchema::new_with_metadata(
                 vec![
-                    DFField::new(Some("test"), "a", DataType::UInt32, false),
-                    DFField::new(Some("test"), "b", DataType::UInt32, false),
-                    DFField::new(Some("test2"), "c1", DataType::UInt32, true),
+                    (
+                        Some("test".into()),
+                        Arc::new(Field::new("a", DataType::UInt32, false))
+                    ),
+                    (
+                        Some("test".into()),
+                        Arc::new(Field::new("b", DataType::UInt32, false))
+                    ),
+                    (
+                        Some("test2".into()),
+                        Arc::new(Field::new("c1", DataType::UInt32, true))
+                    ),
                 ],
-                HashMap::new(),
+                HashMap::new()
             )?,
         );
 
@@ -309,11 +327,20 @@ mod tests {
             **optimized_join.schema(),
             DFSchema::new_with_metadata(
                 vec![
-                    DFField::new(Some("test"), "a", DataType::UInt32, false),
-                    DFField::new(Some("test"), "b", DataType::UInt32, false),
-                    DFField::new(Some("test2"), "a", DataType::UInt32, true),
+                    (
+                        Some("test".into()),
+                        Arc::new(Field::new("a", DataType::UInt32, false))
+                    ),
+                    (
+                        Some("test".into()),
+                        Arc::new(Field::new("b", DataType::UInt32, false))
+                    ),
+                    (
+                        Some("test2".into()),
+                        Arc::new(Field::new("a", DataType::UInt32, true))
+                    ),
                 ],
-                HashMap::new(),
+                HashMap::new()
             )?,
         );
 
diff --git a/datafusion/optimizer/src/replace_distinct_aggregate.rs b/datafusion/optimizer/src/replace_distinct_aggregate.rs
index 0666c324d12c..0055e329c29d 100644
--- a/datafusion/optimizer/src/replace_distinct_aggregate.rs
+++ b/datafusion/optimizer/src/replace_distinct_aggregate.rs
@@ -18,7 +18,7 @@
 use crate::optimizer::{ApplyOrder, ApplyOrder::BottomUp};
 use crate::{OptimizerConfig, OptimizerRule};
 
-use datafusion_common::Result;
+use datafusion_common::{Column, Result};
 use datafusion_expr::utils::expand_wildcard;
 use datafusion_expr::{
     aggregate_function::AggregateFunction as AggregateFunctionFunc, col,
@@ -122,15 +122,12 @@ impl OptimizerRule for ReplaceDistinctWithAggregate {
                 // expressions, for `DISTINCT ON` we only need to emit the original selection expressions.
                 let project_exprs = plan
                     .schema()
-                    .fields()
                     .iter()
                     .skip(on_expr.len())
-                    .zip(schema.fields().iter())
-                    .map(|(new_field, old_field)| {
-                        Ok(col(new_field.qualified_column()).alias_qualified(
-                            old_field.qualifier().cloned(),
-                            old_field.name(),
-                        ))
+                    .zip(schema.iter())
+                    .map(|((new_qualifier, new_field), (old_qualifier, old_field))| {
+                        Ok(col(Column::from((new_qualifier, new_field.as_ref())))
+                            .alias_qualified(old_qualifier.cloned(), old_field.name()))
                     })
                     .collect::<Result<Vec<Expr>>>()?;
 
diff --git a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs
index 1cbe7decf15b..8b70f76617dd 100644
--- a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs
+++ b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs
@@ -1729,7 +1729,7 @@ mod tests {
     use crate::test::test_table_scan_with_name;
 
     use arrow::datatypes::{DataType, Field, Schema};
-    use datafusion_common::{assert_contains, DFField, ToDFSchema};
+    use datafusion_common::{assert_contains, ToDFSchema};
     use datafusion_expr::{interval_arithmetic::Interval, *};
     use datafusion_physical_expr::execution_props::ExecutionProps;
 
@@ -3085,17 +3085,18 @@ mod tests {
 
     fn expr_test_schema() -> DFSchemaRef {
         Arc::new(
-            DFSchema::new_with_metadata(
+            DFSchema::from_unqualifed_fields(
                 vec![
-                    DFField::new_unqualified("c1", DataType::Utf8, true),
-                    DFField::new_unqualified("c2", DataType::Boolean, true),
-                    DFField::new_unqualified("c3", DataType::Int64, true),
-                    DFField::new_unqualified("c4", DataType::UInt32, true),
-                    DFField::new_unqualified("c1_non_null", DataType::Utf8, false),
-                    DFField::new_unqualified("c2_non_null", DataType::Boolean, false),
-                    DFField::new_unqualified("c3_non_null", DataType::Int64, false),
-                    DFField::new_unqualified("c4_non_null", DataType::UInt32, false),
-                ],
+                    Field::new("c1", DataType::Utf8, true),
+                    Field::new("c2", DataType::Boolean, true),
+                    Field::new("c3", DataType::Int64, true),
+                    Field::new("c4", DataType::UInt32, true),
+                    Field::new("c1_non_null", DataType::Utf8, false),
+                    Field::new("c2_non_null", DataType::Boolean, false),
+                    Field::new("c3_non_null", DataType::Int64, false),
+                    Field::new("c4_non_null", DataType::UInt32, false),
+                ]
+                .into(),
                 HashMap::new(),
             )
             .unwrap(),
diff --git a/datafusion/optimizer/src/single_distinct_to_groupby.rs b/datafusion/optimizer/src/single_distinct_to_groupby.rs
index 07a9d84f7d48..5b47abb308d0 100644
--- a/datafusion/optimizer/src/single_distinct_to_groupby.rs
+++ b/datafusion/optimizer/src/single_distinct_to_groupby.rs
@@ -22,7 +22,7 @@ use std::sync::Arc;
 use crate::optimizer::ApplyOrder;
 use crate::{OptimizerConfig, OptimizerRule};
 
-use datafusion_common::{DFSchema, Result};
+use datafusion_common::{qualified_name, DFSchema, Result};
 use datafusion_expr::expr::AggregateFunctionDefinition;
 use datafusion_expr::{
     aggregate_function::AggregateFunction::{Max, Min, Sum},
@@ -118,7 +118,6 @@ impl OptimizerRule for SingleDistinctToGroupBy {
                 ..
             }) => {
                 if is_single_distinct_agg(plan)? && !contains_grouping_set(group_expr) {
-                    let fields = schema.fields();
                     // alias all original group_by exprs
                     let (mut inner_group_exprs, out_group_expr_with_alias): (
                         Vec<Expr>,
@@ -150,9 +149,13 @@ impl OptimizerRule for SingleDistinctToGroupBy {
                                 // Second aggregate refers to the `test.a + Int32(1)` expression However, its input do not have `test.a` expression in it.
                                 let alias_str = format!("group_alias_{i}");
                                 let alias_expr = group_expr.clone().alias(&alias_str);
+                                let (qualifier, field) = schema.qualified_field(i);
                                 (
                                     alias_expr,
-                                    (col(alias_str), Some(fields[i].qualified_name())),
+                                    (
+                                        col(alias_str),
+                                        Some(qualified_name(qualifier, field.name())),
+                                    ),
                                 )
                             }
                         })
@@ -266,7 +269,8 @@ impl OptimizerRule for SingleDistinctToGroupBy {
                         })
                         .chain(outer_aggr_exprs.iter().enumerate().map(|(idx, expr)| {
                             let idx = idx + group_size;
-                            let name = fields[idx].qualified_name();
+                            let (qualifier, field) = schema.qualified_field(idx);
+                            let name = qualified_name(qualifier, field.name());
                             columnize_expr(expr.clone().alias(name), &outer_aggr_schema)
                         }))
                         .collect();
diff --git a/datafusion/optimizer/src/unwrap_cast_in_comparison.rs b/datafusion/optimizer/src/unwrap_cast_in_comparison.rs
index 196a35ee9ae8..e4a777e7c71a 100644
--- a/datafusion/optimizer/src/unwrap_cast_in_comparison.rs
+++ b/datafusion/optimizer/src/unwrap_cast_in_comparison.rs
@@ -484,7 +484,7 @@ mod tests {
     use arrow::compute::{cast_with_options, CastOptions};
     use arrow::datatypes::{DataType, Field};
     use datafusion_common::tree_node::{TransformedResult, TreeNode};
-    use datafusion_common::{DFField, DFSchema, DFSchemaRef, ScalarValue};
+    use datafusion_common::{DFSchema, DFSchemaRef, ScalarValue};
     use datafusion_expr::{cast, col, in_list, lit, try_cast, Expr};
 
     #[test]
@@ -740,25 +740,18 @@ mod tests {
 
     fn expr_test_schema() -> DFSchemaRef {
         Arc::new(
-            DFSchema::new_with_metadata(
+            DFSchema::from_unqualifed_fields(
                 vec![
-                    DFField::new_unqualified("c1", DataType::Int32, false),
-                    DFField::new_unqualified("c2", DataType::Int64, false),
-                    DFField::new_unqualified("c3", DataType::Decimal128(18, 2), false),
-                    DFField::new_unqualified("c4", DataType::Decimal128(38, 37), false),
-                    DFField::new_unqualified("c5", DataType::Float32, false),
-                    DFField::new_unqualified("c6", DataType::UInt32, false),
-                    DFField::new_unqualified(
-                        "ts_nano_none",
-                        timestamp_nano_none_type(),
-                        false,
-                    ),
-                    DFField::new_unqualified(
-                        "ts_nano_utf",
-                        timestamp_nano_utc_type(),
-                        false,
-                    ),
-                ],
+                    Field::new("c1", DataType::Int32, false),
+                    Field::new("c2", DataType::Int64, false),
+                    Field::new("c3", DataType::Decimal128(18, 2), false),
+                    Field::new("c4", DataType::Decimal128(38, 37), false),
+                    Field::new("c5", DataType::Float32, false),
+                    Field::new("c6", DataType::UInt32, false),
+                    Field::new("ts_nano_none", timestamp_nano_none_type(), false),
+                    Field::new("ts_nano_utf", timestamp_nano_utc_type(), false),
+                ]
+                .into(),
                 HashMap::new(),
             )
             .unwrap(),
diff --git a/datafusion/proto/src/logical_plan/from_proto.rs b/datafusion/proto/src/logical_plan/from_proto.rs
index f9e2dc5596ac..b39ce41bbe26 100644
--- a/datafusion/proto/src/logical_plan/from_proto.rs
+++ b/datafusion/proto/src/logical_plan/from_proto.rs
@@ -30,8 +30,8 @@ use arrow::{
 use datafusion::execution::registry::FunctionRegistry;
 use datafusion_common::{
     arrow_datafusion_err, internal_err, plan_datafusion_err, Column, Constraint,
-    Constraints, DFField, DFSchema, DFSchemaRef, DataFusionError, OwnedTableReference,
-    Result, ScalarValue,
+    Constraints, DFSchema, DFSchemaRef, DataFusionError, OwnedTableReference, Result,
+    ScalarValue,
 };
 use datafusion_expr::expr::Unnest;
 use datafusion_expr::expr::{Alias, Placeholder};
@@ -170,13 +170,24 @@ impl TryFrom<&protobuf::DfSchema> for DFSchema {
     type Error = Error;
 
     fn try_from(df_schema: &protobuf::DfSchema) -> Result<Self, Self::Error> {
-        let fields = df_schema
-            .columns
-            .iter()
-            .map(|c| c.try_into())
-            .collect::<Result<Vec<DFField>, _>>()?;
+        let df_fields = df_schema.columns.clone();
+        let qualifiers_and_fields: Vec<(Option<OwnedTableReference>, Arc<Field>)> =
+            df_fields
+                .iter()
+                .map(|df_field| {
+                    let field: Field = df_field.field.as_ref().required("field")?;
+                    Ok((
+                        df_field
+                            .qualifier
+                            .as_ref()
+                            .map(|q| q.relation.clone().into()),
+                        Arc::new(field),
+                    ))
+                })
+                .collect::<Result<Vec<_>, Error>>()?;
+
         Ok(DFSchema::new_with_metadata(
-            fields,
+            qualifiers_and_fields,
             df_schema.metadata.clone(),
         )?)
     }
@@ -191,19 +202,6 @@ impl TryFrom<protobuf::DfSchema> for DFSchemaRef {
     }
 }
 
-impl TryFrom<&protobuf::DfField> for DFField {
-    type Error = Error;
-
-    fn try_from(df_field: &protobuf::DfField) -> Result<Self, Self::Error> {
-        let field: Field = df_field.field.as_ref().required("field")?;
-
-        Ok(match &df_field.qualifier {
-            Some(q) => DFField::from_qualified(q.relation.clone(), field),
-            None => DFField::from(field),
-        })
-    }
-}
-
 impl From<protobuf::WindowFrameUnits> for WindowFrameUnits {
     fn from(units: protobuf::WindowFrameUnits) -> Self {
         match units {
diff --git a/datafusion/proto/src/logical_plan/to_proto.rs b/datafusion/proto/src/logical_plan/to_proto.rs
index 3ee69066e1aa..39f8a913db94 100644
--- a/datafusion/proto/src/logical_plan/to_proto.rs
+++ b/datafusion/proto/src/logical_plan/to_proto.rs
@@ -45,7 +45,7 @@ use arrow::{
     record_batch::RecordBatch,
 };
 use datafusion_common::{
-    Column, Constraint, Constraints, DFField, DFSchema, DFSchemaRef, OwnedTableReference,
+    Column, Constraint, Constraints, DFSchema, DFSchemaRef, OwnedTableReference,
     ScalarValue,
 };
 use datafusion_expr::expr::{
@@ -275,27 +275,20 @@ impl TryFrom<SchemaRef> for protobuf::Schema {
     }
 }
 
-impl TryFrom<&DFField> for protobuf::DfField {
-    type Error = Error;
-
-    fn try_from(f: &DFField) -> Result<Self, Self::Error> {
-        Ok(Self {
-            field: Some(f.field().as_ref().try_into()?),
-            qualifier: f.qualifier().map(|r| protobuf::ColumnRelation {
-                relation: r.to_string(),
-            }),
-        })
-    }
-}
-
 impl TryFrom<&DFSchema> for protobuf::DfSchema {
     type Error = Error;
 
     fn try_from(s: &DFSchema) -> Result<Self, Self::Error> {
         let columns = s
-            .fields()
             .iter()
-            .map(|f| f.try_into())
+            .map(|(qualifier, field)| {
+                Ok(protobuf::DfField {
+                    field: Some(field.as_ref().try_into()?),
+                    qualifier: qualifier.map(|r| protobuf::ColumnRelation {
+                        relation: r.to_string(),
+                    }),
+                })
+            })
             .collect::<Result<Vec<_>, Error>>()?;
         Ok(Self {
             columns,
diff --git a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs
index 3a47f556c0f3..22543c0dd1bf 100644
--- a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs
+++ b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs
@@ -19,6 +19,7 @@ use std::any::Any;
 use std::collections::HashMap;
 use std::fmt::{self, Debug, Formatter};
 use std::sync::Arc;
+use std::vec;
 
 use arrow::array::{ArrayRef, FixedSizeListArray};
 use arrow::datatypes::{
@@ -34,8 +35,8 @@ use datafusion::test_util::{TestTableFactory, TestTableProvider};
 use datafusion_common::config::{FormatOptions, TableOptions};
 use datafusion_common::scalar::ScalarStructBuilder;
 use datafusion_common::{
-    internal_datafusion_err, internal_err, not_impl_err, plan_err, DFField, DFSchema,
-    DFSchemaRef, DataFusionError, FileType, Result, ScalarValue,
+    internal_datafusion_err, internal_err, not_impl_err, plan_err, DFSchema, DFSchemaRef,
+    DataFusionError, FileType, Result, ScalarValue,
 };
 use datafusion_expr::dml::CopyTo;
 use datafusion_expr::expr::{
@@ -1412,9 +1413,15 @@ fn roundtrip_schema() {
 fn roundtrip_dfschema() {
     let dfschema = DFSchema::new_with_metadata(
         vec![
-            DFField::new_unqualified("a", DataType::Int64, false),
-            DFField::new(Some("t"), "b", DataType::Decimal128(15, 2), true)
-                .with_metadata(HashMap::from([(String::from("k1"), String::from("v1"))])),
+            (None, Arc::new(Field::new("a", DataType::Int64, false))),
+            (
+                Some("t".into()),
+                Arc::new(
+                    Field::new("b", DataType::Decimal128(15, 2), true).with_metadata(
+                        HashMap::from([(String::from("k1"), String::from("v1"))]),
+                    ),
+                ),
+            ),
         ],
         HashMap::from([
             (String::from("k2"), String::from("v2")),
diff --git a/datafusion/sql/src/expr/identifier.rs b/datafusion/sql/src/expr/identifier.rs
index 9f53ff579e7c..beb7a133e0eb 100644
--- a/datafusion/sql/src/expr/identifier.rs
+++ b/datafusion/sql/src/expr/identifier.rs
@@ -16,9 +16,10 @@
 // under the License.
 
 use crate::planner::{ContextProvider, PlannerContext, SqlToRel};
+use arrow_schema::Field;
 use datafusion_common::{
-    internal_err, plan_datafusion_err, Column, DFField, DFSchema, DataFusionError,
-    Result, TableReference,
+    internal_err, plan_datafusion_err, Column, DFSchema, DataFusionError,
+    OwnedTableReference, Result, TableReference,
 };
 use datafusion_expr::{Case, Expr};
 use sqlparser::ast::{Expr as SQLExpr, Ident};
@@ -57,13 +58,14 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
                 Err(_) => {
                     // check the outer_query_schema and try to find a match
                     if let Some(outer) = planner_context.outer_query_schema() {
-                        match outer.field_with_unqualified_name(normalize_ident.as_str())
-                        {
-                            Ok(field) => {
+                        match outer.qualified_field_with_unqualified_name(
+                            normalize_ident.as_str(),
+                        ) {
+                            Ok((qualifier, field)) => {
                                 // found an exact match on a qualified name in the outer plan schema, so this is an outer reference column
                                 Ok(Expr::OuterReferenceColumn(
                                     field.data_type().clone(),
-                                    field.qualified_column(),
+                                    Column::from((qualifier, field)),
                                 ))
                             }
                             Err(_) => Ok(Expr::Column(Column {
@@ -122,20 +124,20 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
             let search_result = search_dfschema(&ids, schema);
             match search_result {
                 // found matching field with spare identifier(s) for nested field(s) in structure
-                Some((field, nested_names)) if !nested_names.is_empty() => {
+                Some((field, qualifier, nested_names)) if !nested_names.is_empty() => {
                     // TODO: remove when can support multiple nested identifiers
                     if nested_names.len() > 1 {
                         return internal_err!(
                             "Nested identifiers not yet supported for column {}",
-                            field.qualified_column().quoted_flat_name()
+                            Column::from((qualifier, field)).quoted_flat_name()
                         );
                     }
                     let nested_name = nested_names[0].to_string();
-                    Ok(Expr::Column(field.qualified_column()).field(nested_name))
+                    Ok(Expr::Column(Column::from((qualifier, field))).field(nested_name))
                 }
                 // found matching field with no spare identifier(s)
-                Some((field, _nested_names)) => {
-                    Ok(Expr::Column(field.qualified_column()))
+                Some((field, qualifier, _nested_names)) => {
+                    Ok(Expr::Column(Column::from((qualifier, field))))
                 }
                 None => {
                     // return default where use all identifiers to not have a nested field
@@ -148,21 +150,21 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
                             let search_result = search_dfschema(&ids, outer);
                             match search_result {
                                 // found matching field with spare identifier(s) for nested field(s) in structure
-                                Some((field, nested_names))
+                                Some((field, qualifier, nested_names))
                                     if !nested_names.is_empty() =>
                                 {
                                     // TODO: remove when can support nested identifiers for OuterReferenceColumn
                                     internal_err!(
                                         "Nested identifiers are not yet supported for OuterReferenceColumn {}",
-                                        field.qualified_column().quoted_flat_name()
+                                        Column::from((qualifier, field)).quoted_flat_name()
                                     )
                                 }
                                 // found matching field with no spare identifier(s)
-                                Some((field, _nested_names)) => {
+                                Some((field, qualifier, _nested_names)) => {
                                     // found an exact match on a qualified name in the outer plan schema, so this is an outer reference column
                                     Ok(Expr::OuterReferenceColumn(
                                         field.data_type().clone(),
-                                        field.qualified_column(),
+                                        Column::from((qualifier, field)),
                                     ))
                                 }
                                 // found no matching field, will return a default
@@ -269,10 +271,16 @@ fn form_identifier(idents: &[String]) -> Result<(Option<TableReference>, &String
 fn search_dfschema<'ids, 'schema>(
     ids: &'ids [String],
     schema: &'schema DFSchema,
-) -> Option<(&'schema DFField, &'ids [String])> {
+) -> Option<(
+    &'schema Field,
+    Option<&'schema OwnedTableReference>,
+    &'ids [String],
+)> {
     generate_schema_search_terms(ids).find_map(|(qualifier, column, nested_names)| {
-        let field = schema.field_with_name(qualifier.as_ref(), column).ok();
-        field.map(|f| (f, nested_names))
+        let qualifier_and_field = schema
+            .qualified_field_with_name(qualifier.as_ref(), column)
+            .ok();
+        qualifier_and_field.map(|(qualifier, field)| (field, qualifier, nested_names))
     })
 }
 
diff --git a/datafusion/sql/src/expr/mod.rs b/datafusion/sql/src/expr/mod.rs
index 064578ad51d6..4173e129f428 100644
--- a/datafusion/sql/src/expr/mod.rs
+++ b/datafusion/sql/src/expr/mod.rs
@@ -136,20 +136,16 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
         match expr {
             Expr::Column(col) => match &col.relation {
                 Some(q) => {
-                    match schema
-                        .fields()
-                        .iter()
-                        .find(|field| match field.qualifier() {
-                            Some(field_q) => {
-                                field.name() == &col.name
-                                    && field_q.to_string().ends_with(&format!(".{q}"))
-                            }
-                            _ => false,
-                        }) {
-                        Some(df_field) => Expr::Column(Column {
-                            relation: df_field.qualifier().cloned(),
-                            name: df_field.name().clone(),
-                        }),
+                    match schema.iter().find(|(qualifier, field)| match qualifier {
+                        Some(field_q) => {
+                            field.name() == &col.name
+                                && field_q.to_string().ends_with(&format!(".{q}"))
+                        }
+                        _ => false,
+                    }) {
+                        Some((qualifier, df_field)) => {
+                            Expr::Column(Column::from((qualifier, df_field.as_ref())))
+                        }
                         None => Expr::Column(col),
                     }
                 }
diff --git a/datafusion/sql/src/expr/order_by.rs b/datafusion/sql/src/expr/order_by.rs
index 46f19f436ccc..4ccdf6c2d418 100644
--- a/datafusion/sql/src/expr/order_by.rs
+++ b/datafusion/sql/src/expr/order_by.rs
@@ -16,7 +16,7 @@
 // under the License.
 
 use crate::planner::{ContextProvider, PlannerContext, SqlToRel};
-use datafusion_common::{plan_datafusion_err, plan_err, DFSchema, Result};
+use datafusion_common::{plan_datafusion_err, plan_err, Column, DFSchema, Result};
 use datafusion_expr::expr::Sort;
 use datafusion_expr::Expr;
 use sqlparser::ast::{Expr as SQLExpr, OrderByExpr, Value};
@@ -60,8 +60,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
                         );
                     }
 
-                    let field = schema.field(field_index - 1);
-                    Expr::Column(field.qualified_column())
+                    Expr::Column(Column::from(schema.qualified_field(field_index - 1)))
                 }
                 e => self.sql_expr_to_logical_expr(e.clone(), schema, planner_context)?,
             };
diff --git a/datafusion/sql/src/relation/join.rs b/datafusion/sql/src/relation/join.rs
index 4ba089f48630..262bae397cee 100644
--- a/datafusion/sql/src/relation/join.rs
+++ b/datafusion/sql/src/relation/join.rs
@@ -145,17 +145,13 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
                     .build()
             }
             JoinConstraint::Natural => {
-                let left_cols: HashSet<&String> = left
-                    .schema()
-                    .fields()
-                    .iter()
-                    .map(|f| f.field().name())
-                    .collect();
+                let left_cols: HashSet<&String> =
+                    left.schema().fields().iter().map(|f| f.name()).collect();
                 let keys: Vec<Column> = right
                     .schema()
                     .fields()
                     .iter()
-                    .map(|f| f.field().name())
+                    .map(|f| f.name())
                     .filter(|f| left_cols.contains(f))
                     .map(Column::from_name)
                     .collect();
diff --git a/datafusion/sql/src/statement.rs b/datafusion/sql/src/statement.rs
index 7717f75d16b8..69d1b71e4fe8 100644
--- a/datafusion/sql/src/statement.rs
+++ b/datafusion/sql/src/statement.rs
@@ -29,11 +29,11 @@ use crate::planner::{
 };
 use crate::utils::normalize_ident;
 
-use arrow_schema::DataType;
+use arrow_schema::{DataType, Fields};
 use datafusion_common::parsers::CompressionTypeVariant;
 use datafusion_common::{
     exec_err, not_impl_err, plan_datafusion_err, plan_err, schema_err,
-    unqualified_field_not_found, Column, Constraints, DFField, DFSchema, DFSchemaRef,
+    unqualified_field_not_found, Column, Constraints, DFSchema, DFSchemaRef,
     DataFusionError, FileType, OwnedTableReference, Result, ScalarValue, SchemaError,
     SchemaReference, TableReference, ToDFSchema,
 };
@@ -988,6 +988,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
 
         let schema = self.build_schema(columns)?;
         let df_schema = schema.to_dfschema_ref()?;
+        df_schema.check_names()?;
 
         let ordered_exprs =
             self.build_order_by(order_exprs, &df_schema, &mut planner_context)?;
@@ -1263,9 +1264,8 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
 
         // Build updated values for each column, using the previous value if not modified
         let exprs = table_schema
-            .fields()
             .iter()
-            .map(|field| {
+            .map(|(qualifier, field)| {
                 let expr = match assign_map.remove(field.name()) {
                     Some(new_value) => {
                         let mut expr = self.sql_to_expr(
@@ -1292,7 +1292,10 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
                                 field.name(),
                             ))
                         } else {
-                            datafusion_expr::Expr::Column(field.qualified_column())
+                            datafusion_expr::Expr::Column(Column::from((
+                                qualifier,
+                                field.as_ref(),
+                            )))
                         }
                     }
                 };
@@ -1358,8 +1361,8 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
                     }
                     Ok(table_schema.field(column_index).clone())
                 })
-                .collect::<Result<Vec<DFField>>>()?;
-            (fields, value_indices)
+                .collect::<Result<Vec<_>>>()?;
+            (Fields::from(fields), value_indices)
         };
 
         // infer types for Values clause... other types should be resolvable the regular way
@@ -1378,7 +1381,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
                                 idx + 1
                             )
                         })?;
-                        let dt = field.field().data_type().clone();
+                        let dt = field.data_type().clone();
                         let _ = prepare_param_data_types.insert(name, dt);
                     }
                 }
@@ -1400,11 +1403,10 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
             .map(|(i, value_index)| {
                 let target_field = table_schema.field(i);
                 let expr = match value_index {
-                    Some(v) => {
-                        let source_field = source.schema().field(v);
-                        datafusion_expr::Expr::Column(source_field.qualified_column())
-                            .cast_to(target_field.data_type(), source.schema())?
-                    }
+                    Some(v) => datafusion_expr::Expr::Column(Column::from(
+                        source.schema().qualified_field(v),
+                    ))
+                    .cast_to(target_field.data_type(), source.schema())?,
                     // The value is not specified. Fill in the default value for the column.
                     None => table_source
                         .get_column_default(target_field.name())
diff --git a/datafusion/sql/src/utils.rs b/datafusion/sql/src/utils.rs
index abb896ab113e..d2f1982d5418 100644
--- a/datafusion/sql/src/utils.rs
+++ b/datafusion/sql/src/utils.rs
@@ -24,7 +24,7 @@ use arrow_schema::{
 };
 use datafusion_common::tree_node::{Transformed, TransformedResult, TreeNode};
 use datafusion_common::{
-    exec_err, internal_err, plan_err, DataFusionError, Result, ScalarValue,
+    exec_err, internal_err, plan_err, Column, DataFusionError, Result, ScalarValue,
 };
 use datafusion_expr::expr::{Alias, GroupingSet, WindowFunction};
 use datafusion_expr::utils::{expr_as_column_expr, find_column_exprs};
@@ -37,8 +37,11 @@ pub(crate) fn resolve_columns(expr: &Expr, plan: &LogicalPlan) -> Result<Expr> {
         .transform_up(&|nested_expr| {
             match nested_expr {
                 Expr::Column(col) => {
-                    let field = plan.schema().field_from_column(&col)?;
-                    Ok(Transformed::yes(Expr::Column(field.qualified_column())))
+                    let (qualifier, field) =
+                        plan.schema().qualified_field_from_column(&col)?;
+                    Ok(Transformed::yes(Expr::Column(Column::from((
+                        qualifier, field,
+                    )))))
                 }
                 _ => {
                     // keep recursing
diff --git a/datafusion/sqllogictest/src/engines/datafusion_engine/normalize.rs b/datafusion/sqllogictest/src/engines/datafusion_engine/normalize.rs
index c0db111bc60d..04e80b77bb9f 100644
--- a/datafusion/sqllogictest/src/engines/datafusion_engine/normalize.rs
+++ b/datafusion/sqllogictest/src/engines/datafusion_engine/normalize.rs
@@ -15,10 +15,10 @@
 // specific language governing permissions and limitations
 // under the License.
 
+use arrow::datatypes::Fields;
 use arrow::util::display::ArrayFormatter;
 use arrow::{array, array::ArrayRef, datatypes::DataType, record_batch::RecordBatch};
 use datafusion_common::format::DEFAULT_FORMAT_OPTIONS;
-use datafusion_common::DFField;
 use datafusion_common::DataFusionError;
 use std::path::PathBuf;
 use std::sync::OnceLock;
@@ -239,7 +239,7 @@ pub fn cell_to_string(col: &ArrayRef, row: usize) -> Result<String> {
 }
 
 /// Converts columns to a result as expected by sqllogicteset.
-pub(crate) fn convert_schema_to_types(columns: &[DFField]) -> Vec<DFColumnType> {
+pub(crate) fn convert_schema_to_types(columns: &Fields) -> Vec<DFColumnType> {
     columns
         .iter()
         .map(|f| f.data_type())
diff --git a/datafusion/substrait/src/logical_plan/consumer.rs b/datafusion/substrait/src/logical_plan/consumer.rs
index ed1e48ca71a6..54324658a1ad 100644
--- a/datafusion/substrait/src/logical_plan/consumer.rs
+++ b/datafusion/substrait/src/logical_plan/consumer.rs
@@ -18,7 +18,7 @@
 use async_recursion::async_recursion;
 use datafusion::arrow::datatypes::{DataType, Field, TimeUnit};
 use datafusion::common::{
-    not_impl_err, substrait_datafusion_err, substrait_err, DFField, DFSchema, DFSchemaRef,
+    not_impl_err, substrait_datafusion_err, substrait_err, DFSchema, DFSchemaRef,
 };
 
 use datafusion::execution::FunctionRegistry;
@@ -484,9 +484,14 @@ pub async fn from_substrait_rel(
                                 .collect();
                             match &t {
                                 LogicalPlan::TableScan(scan) => {
-                                    let fields: Vec<DFField> = column_indices
+                                    let fields = column_indices
                                         .iter()
-                                        .map(|i| scan.projected_schema.field(*i).clone())
+                                        .map(|i| {
+                                            scan.projected_schema.qualified_field(*i)
+                                        })
+                                        .map(|(qualifier, field)| {
+                                            (qualifier.cloned(), Arc::new(field.clone()))
+                                        })
                                         .collect();
                                     let mut scan = scan.clone();
                                     scan.projection = Some(column_indices);
@@ -1389,13 +1394,9 @@ fn from_substrait_field_reference(
                 Some(_) => not_impl_err!(
                     "Direct reference StructField with child is not supported"
                 ),
-                None => {
-                    let column = input_schema.field(x.field as usize).qualified_column();
-                    Ok(Expr::Column(Column {
-                        relation: column.relation,
-                        name: column.name,
-                    }))
-                }
+                None => Ok(Expr::Column(Column::from(
+                    input_schema.qualified_field(x.field as usize),
+                ))),
             },
             _ => not_impl_err!(
                 "Direct reference with types other than StructField is not supported"

From 544e49bb0acac7130a873a92b44e1c902e41ac8f Mon Sep 17 00:00:00 2001
From: Eren Avsarogullari <eren@apache.org>
Date: Tue, 2 Apr 2024 06:20:29 -0700
Subject: [PATCH 08/12] Add `spilled_rows` metric to `ExternalSorter` by
 `IPCWriter` (#9885)

* Expose ExternalSorter spilled_rows metric

* Issue-9884 - Address review comments
---
 .../physical-plan/src/metrics/builder.rs      |  9 +++++
 datafusion/physical-plan/src/metrics/mod.rs   | 10 ++++-
 datafusion/physical-plan/src/metrics/value.rs | 24 ++++++++----
 datafusion/physical-plan/src/sorts/sort.rs    | 38 +++++++++++++------
 4 files changed, 61 insertions(+), 20 deletions(-)

diff --git a/datafusion/physical-plan/src/metrics/builder.rs b/datafusion/physical-plan/src/metrics/builder.rs
index 5e8ff72df35c..2037ddb70c2d 100644
--- a/datafusion/physical-plan/src/metrics/builder.rs
+++ b/datafusion/physical-plan/src/metrics/builder.rs
@@ -123,6 +123,15 @@ impl<'a> MetricBuilder<'a> {
         count
     }
 
+    /// Consume self and create a new counter for recording the total spilled rows
+    /// triggered by an operator
+    pub fn spilled_rows(self, partition: usize) -> Count {
+        let count = Count::new();
+        self.with_partition(partition)
+            .build(MetricValue::SpilledRows(count.clone()));
+        count
+    }
+
     /// Consume self and create a new gauge for reporting current memory usage
     pub fn mem_used(self, partition: usize) -> Gauge {
         let gauge = Gauge::new();
diff --git a/datafusion/physical-plan/src/metrics/mod.rs b/datafusion/physical-plan/src/metrics/mod.rs
index b2e0086f69e9..9232865aa09c 100644
--- a/datafusion/physical-plan/src/metrics/mod.rs
+++ b/datafusion/physical-plan/src/metrics/mod.rs
@@ -70,7 +70,7 @@ pub struct Metric {
     /// The value of the metric
     value: MetricValue,
 
-    /// arbitrary name=value pairs identifiying this metric
+    /// arbitrary name=value pairs identifying this metric
     labels: Vec<Label>,
 
     /// To which partition of an operators output did this metric
@@ -209,6 +209,13 @@ impl MetricsSet {
             .map(|v| v.as_usize())
     }
 
+    /// Convenience: return the total rows of spills, aggregated
+    /// across partitions or `None` if no metric is present
+    pub fn spilled_rows(&self) -> Option<usize> {
+        self.sum(|metric| matches!(metric.value(), MetricValue::SpilledRows(_)))
+            .map(|v| v.as_usize())
+    }
+
     /// Convenience: return the amount of elapsed CPU time spent,
     /// aggregated across partitions or `None` if no metric is present
     pub fn elapsed_compute(&self) -> Option<usize> {
@@ -251,6 +258,7 @@ impl MetricsSet {
             MetricValue::ElapsedCompute(_) => false,
             MetricValue::SpillCount(_) => false,
             MetricValue::SpilledBytes(_) => false,
+            MetricValue::SpilledRows(_) => false,
             MetricValue::CurrentMemoryUsage(_) => false,
             MetricValue::Gauge { name, .. } => name == metric_name,
             MetricValue::StartTimestamp(_) => false,
diff --git a/datafusion/physical-plan/src/metrics/value.rs b/datafusion/physical-plan/src/metrics/value.rs
index ab87cd9e3780..22db8f1e4e88 100644
--- a/datafusion/physical-plan/src/metrics/value.rs
+++ b/datafusion/physical-plan/src/metrics/value.rs
@@ -364,13 +364,15 @@ pub enum MetricValue {
     /// Note 2: *Does* includes time when the thread could have made
     /// progress but the OS did not schedule it (e.g. due to CPU
     /// contention), thus making this value different than the
-    /// classical defintion of "cpu_time", which is the time reported
+    /// classical definition of "cpu_time", which is the time reported
     /// from `clock_gettime(CLOCK_THREAD_CPUTIME_ID, ..)`.
     ElapsedCompute(Time),
     /// Number of spills produced: "spill_count" metric
     SpillCount(Count),
     /// Total size of spilled bytes produced: "spilled_bytes" metric
     SpilledBytes(Count),
+    /// Total size of spilled rows produced: "spilled_rows" metric
+    SpilledRows(Count),
     /// Current memory used
     CurrentMemoryUsage(Gauge),
     /// Operator defined count.
@@ -407,6 +409,7 @@ impl MetricValue {
             Self::OutputRows(_) => "output_rows",
             Self::SpillCount(_) => "spill_count",
             Self::SpilledBytes(_) => "spilled_bytes",
+            Self::SpilledRows(_) => "spilled_rows",
             Self::CurrentMemoryUsage(_) => "mem_used",
             Self::ElapsedCompute(_) => "elapsed_compute",
             Self::Count { name, .. } => name.borrow(),
@@ -423,6 +426,7 @@ impl MetricValue {
             Self::OutputRows(count) => count.value(),
             Self::SpillCount(count) => count.value(),
             Self::SpilledBytes(bytes) => bytes.value(),
+            Self::SpilledRows(count) => count.value(),
             Self::CurrentMemoryUsage(used) => used.value(),
             Self::ElapsedCompute(time) => time.value(),
             Self::Count { count, .. } => count.value(),
@@ -448,6 +452,7 @@ impl MetricValue {
             Self::OutputRows(_) => Self::OutputRows(Count::new()),
             Self::SpillCount(_) => Self::SpillCount(Count::new()),
             Self::SpilledBytes(_) => Self::SpilledBytes(Count::new()),
+            Self::SpilledRows(_) => Self::SpilledRows(Count::new()),
             Self::CurrentMemoryUsage(_) => Self::CurrentMemoryUsage(Gauge::new()),
             Self::ElapsedCompute(_) => Self::ElapsedCompute(Time::new()),
             Self::Count { name, .. } => Self::Count {
@@ -481,6 +486,7 @@ impl MetricValue {
             (Self::OutputRows(count), Self::OutputRows(other_count))
             | (Self::SpillCount(count), Self::SpillCount(other_count))
             | (Self::SpilledBytes(count), Self::SpilledBytes(other_count))
+            | (Self::SpilledRows(count), Self::SpilledRows(other_count))
             | (
                 Self::Count { count, .. },
                 Self::Count {
@@ -526,12 +532,13 @@ impl MetricValue {
             Self::ElapsedCompute(_) => 1, // show second
             Self::SpillCount(_) => 2,
             Self::SpilledBytes(_) => 3,
-            Self::CurrentMemoryUsage(_) => 4,
-            Self::Count { .. } => 5,
-            Self::Gauge { .. } => 6,
-            Self::Time { .. } => 7,
-            Self::StartTimestamp(_) => 8, // show timestamps last
-            Self::EndTimestamp(_) => 9,
+            Self::SpilledRows(_) => 4,
+            Self::CurrentMemoryUsage(_) => 5,
+            Self::Count { .. } => 6,
+            Self::Gauge { .. } => 7,
+            Self::Time { .. } => 8,
+            Self::StartTimestamp(_) => 9, // show timestamps last
+            Self::EndTimestamp(_) => 10,
         }
     }
 
@@ -541,13 +548,14 @@ impl MetricValue {
     }
 }
 
-impl std::fmt::Display for MetricValue {
+impl Display for MetricValue {
     /// Prints the value of this metric
     fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
         match self {
             Self::OutputRows(count)
             | Self::SpillCount(count)
             | Self::SpilledBytes(count)
+            | Self::SpilledRows(count)
             | Self::Count { count, .. } => {
                 write!(f, "{count}")
             }
diff --git a/datafusion/physical-plan/src/sorts/sort.rs b/datafusion/physical-plan/src/sorts/sort.rs
index a6f47d3d2fc9..2f4276c5ba77 100644
--- a/datafusion/physical-plan/src/sorts/sort.rs
+++ b/datafusion/physical-plan/src/sorts/sort.rs
@@ -71,6 +71,9 @@ struct ExternalSorterMetrics {
 
     /// total spilled bytes during the execution of the operator
     spilled_bytes: Count,
+
+    /// total spilled rows during the execution of the operator
+    spilled_rows: Count,
 }
 
 impl ExternalSorterMetrics {
@@ -79,6 +82,7 @@ impl ExternalSorterMetrics {
             baseline: BaselineMetrics::new(metrics, partition),
             spill_count: MetricBuilder::new(metrics).spill_count(partition),
             spilled_bytes: MetricBuilder::new(metrics).spilled_bytes(partition),
+            spilled_rows: MetricBuilder::new(metrics).spilled_rows(partition),
         }
     }
 }
@@ -231,13 +235,13 @@ struct ExternalSorter {
     /// prior to spilling.
     sort_spill_reservation_bytes: usize,
     /// If the in size of buffered memory batches is below this size,
-    /// the data will be concated and sorted in place rather than
+    /// the data will be concatenated and sorted in place rather than
     /// sort/merged.
     sort_in_place_threshold_bytes: usize,
 }
 
 impl ExternalSorter {
-    // TOOD: make a builder or some other nicer API to avoid the
+    // TODO: make a builder or some other nicer API to avoid the
     // clippy warning
     #[allow(clippy::too_many_arguments)]
     pub fn new(
@@ -371,13 +375,18 @@ impl ExternalSorter {
         self.metrics.spilled_bytes.value()
     }
 
+    /// How many rows have been spilled to disk?
+    fn spilled_rows(&self) -> usize {
+        self.metrics.spilled_rows.value()
+    }
+
     /// How many spill files have been created?
     fn spill_count(&self) -> usize {
         self.metrics.spill_count.value()
     }
 
     /// Writes any `in_memory_batches` to a spill file and clears
-    /// the batches. The contents of the spil file are sorted.
+    /// the batches. The contents of the spill file are sorted.
     ///
     /// Returns the amount of memory freed.
     async fn spill(&mut self) -> Result<usize> {
@@ -390,13 +399,15 @@ impl ExternalSorter {
 
         self.in_mem_sort().await?;
 
-        let spillfile = self.runtime.disk_manager.create_tmp_file("Sorting")?;
+        let spill_file = self.runtime.disk_manager.create_tmp_file("Sorting")?;
         let batches = std::mem::take(&mut self.in_mem_batches);
-        spill_sorted_batches(batches, spillfile.path(), self.schema.clone()).await?;
+        let spilled_rows =
+            spill_sorted_batches(batches, spill_file.path(), self.schema.clone()).await?;
         let used = self.reservation.free();
         self.metrics.spill_count.add(1);
         self.metrics.spilled_bytes.add(used);
-        self.spills.push(spillfile);
+        self.metrics.spilled_rows.add(spilled_rows as usize);
+        self.spills.push(spill_file);
         Ok(used)
     }
 
@@ -576,6 +587,7 @@ impl Debug for ExternalSorter {
         f.debug_struct("ExternalSorter")
             .field("memory_used", &self.used())
             .field("spilled_bytes", &self.spilled_bytes())
+            .field("spilled_rows", &self.spilled_rows())
             .field("spill_count", &self.spill_count())
             .finish()
     }
@@ -650,11 +662,14 @@ pub(crate) fn lexsort_to_indices_multi_columns(
     Ok(indices)
 }
 
+/// Spills sorted `in_memory_batches` to disk.
+///
+/// Returns number of the rows spilled to disk.
 async fn spill_sorted_batches(
     batches: Vec<RecordBatch>,
     path: &Path,
     schema: SchemaRef,
-) -> Result<()> {
+) -> Result<u64> {
     let path: PathBuf = path.into();
     let task = SpawnedTask::spawn_blocking(move || write_sorted(batches, path, schema));
     match task.join().await {
@@ -685,7 +700,7 @@ fn write_sorted(
     batches: Vec<RecordBatch>,
     path: PathBuf,
     schema: SchemaRef,
-) -> Result<()> {
+) -> Result<u64> {
     let mut writer = IPCWriter::new(path.as_ref(), schema.as_ref())?;
     for batch in batches {
         writer.write(&batch)?;
@@ -697,7 +712,7 @@ fn write_sorted(
         writer.num_rows,
         human_readable_size(writer.num_bytes as usize),
     );
-    Ok(())
+    Ok(writer.num_rows)
 }
 
 fn read_spill(sender: Sender<Result<RecordBatch>>, path: &Path) -> Result<()> {
@@ -1062,8 +1077,9 @@ mod tests {
 
         assert_eq!(metrics.output_rows().unwrap(), 10000);
         assert!(metrics.elapsed_compute().unwrap() > 0);
-        assert!(metrics.spill_count().unwrap() > 0);
-        assert!(metrics.spilled_bytes().unwrap() > 0);
+        assert_eq!(metrics.spill_count().unwrap(), 4);
+        assert_eq!(metrics.spilled_bytes().unwrap(), 38784);
+        assert_eq!(metrics.spilled_rows().unwrap(), 9600);
 
         let columns = result[0].columns();
 

From e8ab555b76368ce61a9720d79fdf2cd2615de5e3 Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Tue, 2 Apr 2024 11:48:10 -0400
Subject: [PATCH 09/12] Minor: Add ParquetExec::table_parquet_options accessor
 (#9909)

---
 datafusion/common/src/config.rs               |  2 ++
 .../datasource/physical_plan/parquet/mod.rs   | 32 +++++++++++--------
 2 files changed, 21 insertions(+), 13 deletions(-)

diff --git a/datafusion/common/src/config.rs b/datafusion/common/src/config.rs
index 968d8215ca4d..30ab9a339b54 100644
--- a/datafusion/common/src/config.rs
+++ b/datafusion/common/src/config.rs
@@ -1362,6 +1362,8 @@ impl TableOptions {
     }
 }
 
+/// Options that control how Parquet files are read, including global options
+/// that apply to all columns and optional column-specific overrides
 #[derive(Clone, Default, Debug, PartialEq)]
 pub struct TableParquetOptions {
     /// Global Parquet options that propagates to all columns.
diff --git a/datafusion/core/src/datasource/physical_plan/parquet/mod.rs b/datafusion/core/src/datasource/physical_plan/parquet/mod.rs
index 377dad5cee6c..c4a888f5462d 100644
--- a/datafusion/core/src/datasource/physical_plan/parquet/mod.rs
+++ b/datafusion/core/src/datasource/physical_plan/parquet/mod.rs
@@ -89,9 +89,10 @@ pub struct ParquetExec {
     metadata_size_hint: Option<usize>,
     /// Optional user defined parquet file reader factory
     parquet_file_reader_factory: Option<Arc<dyn ParquetFileReaderFactory>>,
+    /// Cached plan properties such as equivalence properties, ordering, partitioning, etc.
     cache: PlanProperties,
-    /// Parquet Options
-    parquet_options: TableParquetOptions,
+    /// Options for reading Parquet files
+    table_parquet_options: TableParquetOptions,
 }
 
 impl ParquetExec {
@@ -100,7 +101,7 @@ impl ParquetExec {
         base_config: FileScanConfig,
         predicate: Option<Arc<dyn PhysicalExpr>>,
         metadata_size_hint: Option<usize>,
-        parquet_options: TableParquetOptions,
+        table_parquet_options: TableParquetOptions,
     ) -> Self {
         debug!("Creating ParquetExec, files: {:?}, projection {:?}, predicate: {:?}, limit: {:?}",
         base_config.file_groups, base_config.projection, predicate, base_config.limit);
@@ -155,15 +156,20 @@ impl ParquetExec {
             metadata_size_hint,
             parquet_file_reader_factory: None,
             cache,
-            parquet_options,
+            table_parquet_options,
         }
     }
 
-    /// Ref to the base configs
+    /// [`FileScanConfig`] that controls this scan (such as which files to read)
     pub fn base_config(&self) -> &FileScanConfig {
         &self.base_config
     }
 
+    /// Options passed to the parquet reader for this scan
+    pub fn table_parquet_options(&self) -> &TableParquetOptions {
+        &self.table_parquet_options
+    }
+
     /// Optional predicate.
     pub fn predicate(&self) -> Option<&Arc<dyn PhysicalExpr>> {
         self.predicate.as_ref()
@@ -197,13 +203,13 @@ impl ParquetExec {
     ///
     /// [`Expr`]: datafusion_expr::Expr
     pub fn with_pushdown_filters(mut self, pushdown_filters: bool) -> Self {
-        self.parquet_options.global.pushdown_filters = pushdown_filters;
+        self.table_parquet_options.global.pushdown_filters = pushdown_filters;
         self
     }
 
     /// Return the value described in [`Self::with_pushdown_filters`]
     fn pushdown_filters(&self) -> bool {
-        self.parquet_options.global.pushdown_filters
+        self.table_parquet_options.global.pushdown_filters
     }
 
     /// If true, the `RowFilter` made by `pushdown_filters` may try to
@@ -213,13 +219,13 @@ impl ParquetExec {
     ///
     /// [`Expr`]: datafusion_expr::Expr
     pub fn with_reorder_filters(mut self, reorder_filters: bool) -> Self {
-        self.parquet_options.global.reorder_filters = reorder_filters;
+        self.table_parquet_options.global.reorder_filters = reorder_filters;
         self
     }
 
     /// Return the value described in [`Self::with_reorder_filters`]
     fn reorder_filters(&self) -> bool {
-        self.parquet_options.global.reorder_filters
+        self.table_parquet_options.global.reorder_filters
     }
 
     /// If enabled, the reader will read the page index
@@ -227,24 +233,24 @@ impl ParquetExec {
     /// via `RowSelector` and `RowFilter` by
     /// eliminating unnecessary IO and decoding
     pub fn with_enable_page_index(mut self, enable_page_index: bool) -> Self {
-        self.parquet_options.global.enable_page_index = enable_page_index;
+        self.table_parquet_options.global.enable_page_index = enable_page_index;
         self
     }
 
     /// Return the value described in [`Self::with_enable_page_index`]
     fn enable_page_index(&self) -> bool {
-        self.parquet_options.global.enable_page_index
+        self.table_parquet_options.global.enable_page_index
     }
 
     /// If enabled, the reader will read by the bloom filter
     pub fn with_enable_bloom_filter(mut self, enable_bloom_filter: bool) -> Self {
-        self.parquet_options.global.bloom_filter_enabled = enable_bloom_filter;
+        self.table_parquet_options.global.bloom_filter_enabled = enable_bloom_filter;
         self
     }
 
     /// Return the value described in [`Self::with_enable_bloom_filter`]
     fn enable_bloom_filter(&self) -> bool {
-        self.parquet_options.global.bloom_filter_enabled
+        self.table_parquet_options.global.bloom_filter_enabled
     }
 
     fn output_partitioning_helper(file_config: &FileScanConfig) -> Partitioning {

From e9c5dc985bb5375118f8b8c61adaab791b6c0dd1 Mon Sep 17 00:00:00 2001
From: Val Lorentz <vlorentz@softwareheritage.org>
Date: Tue, 2 Apr 2024 18:46:42 +0200
Subject: [PATCH 10/12] Add support for Bloom filters on unsigned integer
 columns in Parquet tables (#9770)

* Add support for Bloom filters on unsigned integer columns in Parquet tables

* Add Scenario::UInt

* Add tests for Bloom filters on unsigned integer columns in Parquet tables

* Fix _scalar_fun_and_eq to actually call a function

* Add prune_uint32_eq_large_in_list

---------

Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>
---
 .../physical_plan/parquet/row_groups.rs       |   4 +
 datafusion/core/tests/parquet/mod.rs          |  11 ++
 .../core/tests/parquet/row_group_pruning.rs   | 162 +++++++++++++++++-
 3 files changed, 176 insertions(+), 1 deletion(-)

diff --git a/datafusion/core/src/datasource/physical_plan/parquet/row_groups.rs b/datafusion/core/src/datasource/physical_plan/parquet/row_groups.rs
index a82c5d97a2b7..8df4925fc566 100644
--- a/datafusion/core/src/datasource/physical_plan/parquet/row_groups.rs
+++ b/datafusion/core/src/datasource/physical_plan/parquet/row_groups.rs
@@ -234,6 +234,10 @@ impl PruningStatistics for BloomFilterStatistics {
                     ScalarValue::Int32(Some(v)) => sbbf.check(v),
                     ScalarValue::Int16(Some(v)) => sbbf.check(v),
                     ScalarValue::Int8(Some(v)) => sbbf.check(v),
+                    ScalarValue::UInt64(Some(v)) => sbbf.check(v),
+                    ScalarValue::UInt32(Some(v)) => sbbf.check(v),
+                    ScalarValue::UInt16(Some(v)) => sbbf.check(v),
+                    ScalarValue::UInt8(Some(v)) => sbbf.check(v),
                     ScalarValue::Decimal128(Some(v), p, s) => match parquet_type {
                         Type::INT32 => {
                             //https://github.com/apache/parquet-format/blob/eb4b31c1d64a01088d02a2f9aefc6c17c54cc6fc/Encodings.md?plain=1#L35-L42
diff --git a/datafusion/core/tests/parquet/mod.rs b/datafusion/core/tests/parquet/mod.rs
index 1da86a0363a5..b4415d638ada 100644
--- a/datafusion/core/tests/parquet/mod.rs
+++ b/datafusion/core/tests/parquet/mod.rs
@@ -66,6 +66,7 @@ enum Scenario {
     Int,
     Int32Range,
     UInt,
+    UInt32Range,
     Float64,
     Decimal,
     DecimalBloomFilterInt32,
@@ -455,6 +456,13 @@ fn make_int32_range(start: i32, end: i32) -> RecordBatch {
     RecordBatch::try_new(schema, vec![array.clone()]).unwrap()
 }
 
+fn make_uint32_range(start: u32, end: u32) -> RecordBatch {
+    let schema = Arc::new(Schema::new(vec![Field::new("u", DataType::UInt32, true)]));
+    let v = vec![start, end];
+    let array = Arc::new(UInt32Array::from(v)) as ArrayRef;
+    RecordBatch::try_new(schema, vec![array.clone()]).unwrap()
+}
+
 /// Return record batch with f64 vector
 ///
 /// Columns are named
@@ -659,6 +667,9 @@ fn create_data_batch(scenario: Scenario) -> Vec<RecordBatch> {
                 make_uint_batches(250, 255),
             ]
         }
+        Scenario::UInt32Range => {
+            vec![make_uint32_range(0, 10), make_uint32_range(200000, 300000)]
+        }
         Scenario::Float64 => {
             vec![
                 make_f64_batch(vec![-5.0, -4.0, -3.0, -2.0, -1.0]),
diff --git a/datafusion/core/tests/parquet/row_group_pruning.rs b/datafusion/core/tests/parquet/row_group_pruning.rs
index b70102f78a96..b7b434d1c3d3 100644
--- a/datafusion/core/tests/parquet/row_group_pruning.rs
+++ b/datafusion/core/tests/parquet/row_group_pruning.rs
@@ -339,7 +339,7 @@ macro_rules! int_tests {
             async fn [<prune_int $bits _scalar_fun_and_eq >]() {
                 RowGroupPruningTest::new()
                     .with_scenario(Scenario::Int)
-                    .with_query(&format!("SELECT * FROM t where i{} = 1", $bits))
+                    .with_query(&format!("SELECT * FROM t where abs(i{}) = 1 and i{} = 1", $bits, $bits))
                     .with_expected_errors(Some(0))
                     .with_matched_by_stats(Some(1))
                     .with_pruned_by_stats(Some(3))
@@ -452,6 +452,144 @@ int_tests!(16, correct_bloom_filters: false);
 int_tests!(32, correct_bloom_filters: true);
 int_tests!(64, correct_bloom_filters: true);
 
+// $bits: number of bits of the integer to test (8, 16, 32, 64)
+// $correct_bloom_filters: if false, replicates the
+// https://github.com/apache/arrow-datafusion/issues/9779 bug so that tests pass
+// if and only if Bloom filters on UInt8 and UInt16 columns are still buggy.
+macro_rules! uint_tests {
+    ($bits:expr, correct_bloom_filters: $correct_bloom_filters:expr) => {
+        paste::item! {
+            #[tokio::test]
+            async fn [<prune_uint $bits _lt >]() {
+                RowGroupPruningTest::new()
+                    .with_scenario(Scenario::UInt)
+                    .with_query(&format!("SELECT * FROM t where u{} < 6", $bits))
+                    .with_expected_errors(Some(0))
+                    .with_matched_by_stats(Some(3))
+                    .with_pruned_by_stats(Some(1))
+                    .with_matched_by_bloom_filter(Some(0))
+                    .with_pruned_by_bloom_filter(Some(0))
+                    .with_expected_rows(11)
+                    .test_row_group_prune()
+                    .await;
+            }
+
+            #[tokio::test]
+            async fn [<prune_uint $bits _eq >]() {
+                RowGroupPruningTest::new()
+                    .with_scenario(Scenario::UInt)
+                    .with_query(&format!("SELECT * FROM t where u{} = 6", $bits))
+                    .with_expected_errors(Some(0))
+                    .with_matched_by_stats(Some(1))
+                    .with_pruned_by_stats(Some(3))
+                    .with_matched_by_bloom_filter(Some(if $correct_bloom_filters { 1 } else { 0 }))
+                    .with_pruned_by_bloom_filter(Some(if $correct_bloom_filters { 0 } else { 1 }))
+                    .with_expected_rows(if $correct_bloom_filters { 1 } else { 0 })
+                    .test_row_group_prune()
+                    .await;
+            }
+            #[tokio::test]
+            async fn [<prune_uint $bits _scalar_fun_and_eq >]() {
+                RowGroupPruningTest::new()
+                    .with_scenario(Scenario::UInt)
+                    .with_query(&format!("SELECT * FROM t where power(u{}, 2) = 36 and u{} = 6", $bits, $bits))
+                    .with_expected_errors(Some(0))
+                    .with_matched_by_stats(Some(1))
+                    .with_pruned_by_stats(Some(3))
+                    .with_matched_by_bloom_filter(Some(if $correct_bloom_filters { 1 } else { 0 }))
+                    .with_pruned_by_bloom_filter(Some(if $correct_bloom_filters { 0 } else { 1 }))
+                    .with_expected_rows(if $correct_bloom_filters { 1 } else { 0 })
+                    .test_row_group_prune()
+                    .await;
+            }
+
+            #[tokio::test]
+            async fn [<prune_uint $bits _scalar_fun >]() {
+                RowGroupPruningTest::new()
+                    .with_scenario(Scenario::UInt)
+                    .with_query(&format!("SELECT * FROM t where power(u{}, 2) = 25", $bits))
+                    .with_expected_errors(Some(0))
+                    .with_matched_by_stats(Some(0))
+                    .with_pruned_by_stats(Some(0))
+                    .with_matched_by_bloom_filter(Some(0))
+                    .with_pruned_by_bloom_filter(Some(0))
+                    .with_expected_rows(2)
+                    .test_row_group_prune()
+                    .await;
+            }
+
+            #[tokio::test]
+            async fn [<prune_uint $bits _complex_expr >]() {
+                RowGroupPruningTest::new()
+                    .with_scenario(Scenario::UInt)
+                    .with_query(&format!("SELECT * FROM t where u{}+1 = 6", $bits))
+                    .with_expected_errors(Some(0))
+                    .with_matched_by_stats(Some(0))
+                    .with_pruned_by_stats(Some(0))
+                    .with_matched_by_bloom_filter(Some(0))
+                    .with_pruned_by_bloom_filter(Some(0))
+                    .with_expected_rows(2)
+                    .test_row_group_prune()
+                    .await;
+            }
+
+            #[tokio::test]
+            async fn [<prune_uint $bits _eq_in_list >]() {
+                // result of sql "SELECT * FROM t where in (1)"
+                RowGroupPruningTest::new()
+                    .with_scenario(Scenario::UInt)
+                    .with_query(&format!("SELECT * FROM t where u{} in (6)", $bits))
+                    .with_expected_errors(Some(0))
+                    .with_matched_by_stats(Some(1))
+                    .with_pruned_by_stats(Some(3))
+                    .with_matched_by_bloom_filter(Some(if $correct_bloom_filters { 1 } else { 0 }))
+                    .with_pruned_by_bloom_filter(Some(if $correct_bloom_filters { 0 } else { 1 }))
+                    .with_expected_rows(if $correct_bloom_filters { 1 } else { 0 })
+                    .test_row_group_prune()
+                    .await;
+            }
+
+            #[tokio::test]
+            async fn [<prune_uint $bits _eq_in_list_2 >]() {
+                // result of sql "SELECT * FROM t where in (1000)", prune all
+                // test whether statistics works
+                RowGroupPruningTest::new()
+                    .with_scenario(Scenario::UInt)
+                    .with_query(&format!("SELECT * FROM t where u{} in (100)", $bits))
+                    .with_expected_errors(Some(0))
+                    .with_matched_by_stats(Some(0))
+                    .with_pruned_by_stats(Some(4))
+                    .with_matched_by_bloom_filter(Some(0))
+                    .with_pruned_by_bloom_filter(Some(0))
+                    .with_expected_rows(0)
+                    .test_row_group_prune()
+                    .await;
+            }
+
+            #[tokio::test]
+            async fn [<prune_uint $bits _eq_in_list_negated >]() {
+                // result of sql "SELECT * FROM t where not in (1)" prune nothing
+                RowGroupPruningTest::new()
+                    .with_scenario(Scenario::UInt)
+                    .with_query(&format!("SELECT * FROM t where u{} not in (6)", $bits))
+                    .with_expected_errors(Some(0))
+                    .with_matched_by_stats(Some(4))
+                    .with_pruned_by_stats(Some(0))
+                    .with_matched_by_bloom_filter(Some(4))
+                    .with_pruned_by_bloom_filter(Some(0))
+                    .with_expected_rows(19)
+                    .test_row_group_prune()
+                    .await;
+            }
+        }
+    };
+}
+
+uint_tests!(8, correct_bloom_filters: false);
+uint_tests!(16, correct_bloom_filters: false);
+uint_tests!(32, correct_bloom_filters: true);
+uint_tests!(64, correct_bloom_filters: true);
+
 #[tokio::test]
 async fn prune_int32_eq_large_in_list() {
     // result of sql "SELECT * FROM t where i in (2050...2582)", prune all
@@ -474,6 +612,28 @@ async fn prune_int32_eq_large_in_list() {
         .await;
 }
 
+#[tokio::test]
+async fn prune_uint32_eq_large_in_list() {
+    // result of sql "SELECT * FROM t where i in (2050...2582)", prune all
+    RowGroupPruningTest::new()
+        .with_scenario(Scenario::UInt32Range)
+        .with_query(
+            format!(
+                "SELECT * FROM t where u in ({})",
+                (200050..200082).join(",")
+            )
+            .as_str(),
+        )
+        .with_expected_errors(Some(0))
+        .with_matched_by_stats(Some(1))
+        .with_pruned_by_stats(Some(0))
+        .with_matched_by_bloom_filter(Some(0))
+        .with_pruned_by_bloom_filter(Some(1))
+        .with_expected_rows(0)
+        .test_row_group_prune()
+        .await;
+}
+
 #[tokio::test]
 async fn prune_f64_lt() {
     RowGroupPruningTest::new()

From f5805b7aae974c0499ec9b979962083d0ac8965c Mon Sep 17 00:00:00 2001
From: Eren Avsarogullari <eren@apache.org>
Date: Tue, 2 Apr 2024 11:06:27 -0700
Subject: [PATCH 11/12] Move `radians`, `signum`, `sin`, `sinh` and `sqrt`
 functions to `datafusion-functions` crate (#9882)

* Issue-9860 - Move Radians, Signum, Sin, Sinh, Sqrt functions to datafusion-functions

* Issue-9860 - Fix UT failure

---------

Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>
---
 datafusion/expr/src/built_in_function.rs      | 34 -------------------
 datafusion/expr/src/expr_fn.rs                | 14 ++------
 datafusion/functions/src/math/mod.rs          | 13 ++++++-
 datafusion/physical-expr/src/functions.rs     |  5 ---
 .../physical-expr/src/math_expressions.rs     |  5 ---
 datafusion/proto/proto/datafusion.proto       | 10 +++---
 datafusion/proto/src/generated/pbjson.rs      | 15 --------
 datafusion/proto/src/generated/prost.rs       | 20 +++--------
 .../proto/src/logical_plan/from_proto.rs      | 20 ++---------
 datafusion/proto/src/logical_plan/to_proto.rs |  5 ---
 .../tests/cases/roundtrip_logical_plan.rs     | 16 +++------
 .../tests/cases/roundtrip_physical_plan.rs    |  6 ++--
 datafusion/sql/tests/sql_integration.rs       |  3 +-
 13 files changed, 36 insertions(+), 130 deletions(-)

diff --git a/datafusion/expr/src/built_in_function.rs b/datafusion/expr/src/built_in_function.rs
index a1b3b717392e..744192e9a3e1 100644
--- a/datafusion/expr/src/built_in_function.rs
+++ b/datafusion/expr/src/built_in_function.rs
@@ -69,18 +69,8 @@ pub enum BuiltinScalarFunction {
     Pi,
     /// power
     Power,
-    /// radians
-    Radians,
     /// round
     Round,
-    /// signum
-    Signum,
-    /// sin
-    Sin,
-    /// sinh
-    Sinh,
-    /// sqrt
-    Sqrt,
     /// trunc
     Trunc,
     /// cot
@@ -165,10 +155,6 @@ impl BuiltinScalarFunction {
             BuiltinScalarFunction::Pi => Volatility::Immutable,
             BuiltinScalarFunction::Power => Volatility::Immutable,
             BuiltinScalarFunction::Round => Volatility::Immutable,
-            BuiltinScalarFunction::Signum => Volatility::Immutable,
-            BuiltinScalarFunction::Sin => Volatility::Immutable,
-            BuiltinScalarFunction::Sinh => Volatility::Immutable,
-            BuiltinScalarFunction::Sqrt => Volatility::Immutable,
             BuiltinScalarFunction::Cbrt => Volatility::Immutable,
             BuiltinScalarFunction::Cot => Volatility::Immutable,
             BuiltinScalarFunction::Trunc => Volatility::Immutable,
@@ -176,7 +162,6 @@ impl BuiltinScalarFunction {
             BuiltinScalarFunction::ConcatWithSeparator => Volatility::Immutable,
             BuiltinScalarFunction::EndsWith => Volatility::Immutable,
             BuiltinScalarFunction::InitCap => Volatility::Immutable,
-            BuiltinScalarFunction::Radians => Volatility::Immutable,
 
             // Volatile builtin functions
             BuiltinScalarFunction::Random => Volatility::Volatile,
@@ -241,12 +226,7 @@ impl BuiltinScalarFunction {
             | BuiltinScalarFunction::Degrees
             | BuiltinScalarFunction::Exp
             | BuiltinScalarFunction::Floor
-            | BuiltinScalarFunction::Radians
             | BuiltinScalarFunction::Round
-            | BuiltinScalarFunction::Signum
-            | BuiltinScalarFunction::Sin
-            | BuiltinScalarFunction::Sinh
-            | BuiltinScalarFunction::Sqrt
             | BuiltinScalarFunction::Cbrt
             | BuiltinScalarFunction::Trunc
             | BuiltinScalarFunction::Cot => match input_expr_types[0] {
@@ -335,11 +315,6 @@ impl BuiltinScalarFunction {
             | BuiltinScalarFunction::Degrees
             | BuiltinScalarFunction::Exp
             | BuiltinScalarFunction::Floor
-            | BuiltinScalarFunction::Radians
-            | BuiltinScalarFunction::Signum
-            | BuiltinScalarFunction::Sin
-            | BuiltinScalarFunction::Sinh
-            | BuiltinScalarFunction::Sqrt
             | BuiltinScalarFunction::Cot => {
                 // math expressions expect 1 argument of type f64 or f32
                 // priority is given to f64 because e.g. `sqrt(1i32)` is in IR (real numbers) and thus we
@@ -366,11 +341,7 @@ impl BuiltinScalarFunction {
                 | BuiltinScalarFunction::Exp
                 | BuiltinScalarFunction::Factorial
                 | BuiltinScalarFunction::Floor
-                | BuiltinScalarFunction::Radians
                 | BuiltinScalarFunction::Round
-                | BuiltinScalarFunction::Signum
-                | BuiltinScalarFunction::Sinh
-                | BuiltinScalarFunction::Sqrt
                 | BuiltinScalarFunction::Cbrt
                 | BuiltinScalarFunction::Trunc
                 | BuiltinScalarFunction::Pi
@@ -402,13 +373,8 @@ impl BuiltinScalarFunction {
             BuiltinScalarFunction::Nanvl => &["nanvl"],
             BuiltinScalarFunction::Pi => &["pi"],
             BuiltinScalarFunction::Power => &["power", "pow"],
-            BuiltinScalarFunction::Radians => &["radians"],
             BuiltinScalarFunction::Random => &["random"],
             BuiltinScalarFunction::Round => &["round"],
-            BuiltinScalarFunction::Signum => &["signum"],
-            BuiltinScalarFunction::Sin => &["sin"],
-            BuiltinScalarFunction::Sinh => &["sinh"],
-            BuiltinScalarFunction::Sqrt => &["sqrt"],
             BuiltinScalarFunction::Trunc => &["trunc"],
 
             // conditional functions
diff --git a/datafusion/expr/src/expr_fn.rs b/datafusion/expr/src/expr_fn.rs
index a2015787040f..1aa063b17539 100644
--- a/datafusion/expr/src/expr_fn.rs
+++ b/datafusion/expr/src/expr_fn.rs
@@ -534,12 +534,9 @@ macro_rules! nary_scalar_expr {
 // generate methods for creating the supported unary/binary expressions
 
 // math functions
-scalar_expr!(Sqrt, sqrt, num, "square root of a number");
 scalar_expr!(Cbrt, cbrt, num, "cube root of a number");
-scalar_expr!(Sin, sin, num, "sine");
 scalar_expr!(Cos, cos, num, "cosine");
 scalar_expr!(Cot, cot, num, "cotangent");
-scalar_expr!(Sinh, sinh, num, "hyperbolic sine");
 scalar_expr!(Cosh, cosh, num, "hyperbolic cosine");
 scalar_expr!(Factorial, factorial, num, "factorial");
 scalar_expr!(
@@ -555,14 +552,12 @@ scalar_expr!(
     "nearest integer greater than or equal to argument"
 );
 scalar_expr!(Degrees, degrees, num, "converts radians to degrees");
-scalar_expr!(Radians, radians, num, "converts degrees to radians");
 nary_scalar_expr!(Round, round, "round to nearest integer");
 nary_scalar_expr!(
     Trunc,
     trunc,
     "truncate toward zero, with optional precision"
 );
-scalar_expr!(Signum, signum, num, "sign of the argument (-1, 0, +1) ");
 scalar_expr!(Exp, exp, num, "exponential");
 scalar_expr!(Gcd, gcd, arg_1 arg_2, "greatest common divisor");
 scalar_expr!(Lcm, lcm, arg_1 arg_2, "least common multiple");
@@ -885,8 +880,8 @@ impl WindowUDFImpl for SimpleWindowUDF {
 /// ```
 /// use datafusion_expr::{col, lit, call_fn};
 ///
-/// // create the expression sin(x) < 0.2
-/// let expr = call_fn("sin", vec![col("x")]).unwrap().lt(lit(0.2));
+/// // create the expression trunc(x) < 0.2
+/// let expr = call_fn("trunc", vec![col("x")]).unwrap().lt(lit(0.2));
 /// ```
 pub fn call_fn(name: impl AsRef<str>, args: Vec<Expr>) -> Result<Expr> {
     match name.as_ref().parse::<BuiltinScalarFunction>() {
@@ -967,23 +962,18 @@ mod test {
 
     #[test]
     fn scalar_function_definitions() {
-        test_unary_scalar_expr!(Sqrt, sqrt);
         test_unary_scalar_expr!(Cbrt, cbrt);
-        test_unary_scalar_expr!(Sin, sin);
         test_unary_scalar_expr!(Cos, cos);
         test_unary_scalar_expr!(Cot, cot);
-        test_unary_scalar_expr!(Sinh, sinh);
         test_unary_scalar_expr!(Cosh, cosh);
         test_unary_scalar_expr!(Factorial, factorial);
         test_unary_scalar_expr!(Floor, floor);
         test_unary_scalar_expr!(Ceil, ceil);
         test_unary_scalar_expr!(Degrees, degrees);
-        test_unary_scalar_expr!(Radians, radians);
         test_nary_scalar_expr!(Round, round, input);
         test_nary_scalar_expr!(Round, round, input, decimal_places);
         test_nary_scalar_expr!(Trunc, trunc, num);
         test_nary_scalar_expr!(Trunc, trunc, num, precision);
-        test_unary_scalar_expr!(Signum, signum);
         test_unary_scalar_expr!(Exp, exp);
         test_scalar_expr!(Nanvl, nanvl, x, y);
         test_scalar_expr!(Iszero, iszero, input);
diff --git a/datafusion/functions/src/math/mod.rs b/datafusion/functions/src/math/mod.rs
index ee53fcf96a8b..d6a44cfbdbf5 100644
--- a/datafusion/functions/src/math/mod.rs
+++ b/datafusion/functions/src/math/mod.rs
@@ -39,6 +39,12 @@ make_math_unary_udf!(AcoshFunc, ACOSH, acosh, acosh, Some(vec![Some(true)]));
 make_math_unary_udf!(AtanFunc, ATAN, atan, atan, Some(vec![Some(true)]));
 make_math_binary_udf!(Atan2, ATAN2, atan2, atan2, Some(vec![Some(true)]));
 
+make_math_unary_udf!(RadiansFunc, RADIANS, radians, to_radians, None);
+make_math_unary_udf!(SignumFunc, SIGNUM, signum, signum, None);
+make_math_unary_udf!(SinFunc, SIN, sin, sin, None);
+make_math_unary_udf!(SinhFunc, SINH, sinh, sinh, None);
+make_math_unary_udf!(SqrtFunc, SQRT, sqrt, sqrt, None);
+
 // Export the functions out of this package, both as expr_fn as well as a list of functions
 export_functions!(
     (
@@ -66,5 +72,10 @@ export_functions!(
     (asinh, num, "returns inverse hyperbolic sine"),
     (acosh, num, "returns inverse hyperbolic cosine"),
     (atan, num, "returns inverse tangent"),
-    (atan2, y x, "returns inverse tangent of a division given in the argument")
+    (atan2, y x, "returns inverse tangent of a division given in the argument"),
+    (radians, num, "converts degrees to radians"),
+    (signum, num, "sign of the argument (-1, 0, +1)"),
+    (sin, num, "sine"),
+    (sinh, num, "hyperbolic sine"),
+    (sqrt, num, "square root of a number")
 );
diff --git a/datafusion/physical-expr/src/functions.rs b/datafusion/physical-expr/src/functions.rs
index a1e471bdd422..f7be2704ab79 100644
--- a/datafusion/physical-expr/src/functions.rs
+++ b/datafusion/physical-expr/src/functions.rs
@@ -200,15 +200,10 @@ pub fn create_physical_fun(
         BuiltinScalarFunction::Nanvl => {
             Arc::new(|args| make_scalar_function_inner(math_expressions::nanvl)(args))
         }
-        BuiltinScalarFunction::Radians => Arc::new(math_expressions::to_radians),
         BuiltinScalarFunction::Random => Arc::new(math_expressions::random),
         BuiltinScalarFunction::Round => {
             Arc::new(|args| make_scalar_function_inner(math_expressions::round)(args))
         }
-        BuiltinScalarFunction::Signum => Arc::new(math_expressions::signum),
-        BuiltinScalarFunction::Sin => Arc::new(math_expressions::sin),
-        BuiltinScalarFunction::Sinh => Arc::new(math_expressions::sinh),
-        BuiltinScalarFunction::Sqrt => Arc::new(math_expressions::sqrt),
         BuiltinScalarFunction::Cbrt => Arc::new(math_expressions::cbrt),
         BuiltinScalarFunction::Trunc => {
             Arc::new(|args| make_scalar_function_inner(math_expressions::trunc)(args))
diff --git a/datafusion/physical-expr/src/math_expressions.rs b/datafusion/physical-expr/src/math_expressions.rs
index 5339c12f6e93..acccb9cb3cd3 100644
--- a/datafusion/physical-expr/src/math_expressions.rs
+++ b/datafusion/physical-expr/src/math_expressions.rs
@@ -155,11 +155,8 @@ macro_rules! make_function_scalar_inputs_return_type {
     }};
 }
 
-math_unary_function!("sqrt", sqrt);
 math_unary_function!("cbrt", cbrt);
-math_unary_function!("sin", sin);
 math_unary_function!("cos", cos);
-math_unary_function!("sinh", sinh);
 math_unary_function!("cosh", cosh);
 math_unary_function!("asin", asin);
 math_unary_function!("acos", acos);
@@ -169,13 +166,11 @@ math_unary_function!("acosh", acosh);
 math_unary_function!("atanh", atanh);
 math_unary_function!("floor", floor);
 math_unary_function!("ceil", ceil);
-math_unary_function!("signum", signum);
 math_unary_function!("exp", exp);
 math_unary_function!("ln", ln);
 math_unary_function!("log2", log2);
 math_unary_function!("log10", log10);
 math_unary_function!("degrees", to_degrees);
-math_unary_function!("radians", to_radians);
 
 /// Factorial SQL function
 pub fn factorial(args: &[ArrayRef]) -> Result<ArrayRef> {
diff --git a/datafusion/proto/proto/datafusion.proto b/datafusion/proto/proto/datafusion.proto
index e959cad2a810..22a73aff1837 100644
--- a/datafusion/proto/proto/datafusion.proto
+++ b/datafusion/proto/proto/datafusion.proto
@@ -556,9 +556,9 @@ enum ScalarFunction {
   // 12 was Log10
   // 13 was Log2
   Round = 14;
-  Signum = 15;
-  Sin = 16;
-  Sqrt = 17;
+  // 15 was Signum
+  // 16 was Sin
+  // 17 was Sqrt
   // Tan = 18;
   Trunc = 19;
   // 20 was Array
@@ -618,12 +618,12 @@ enum ScalarFunction {
   // 74 Acosh
   // 75 was Asinh
   // 76 was Atanh
-  Sinh = 77;
+  // 77 was Sinh
   Cosh = 78;
   // Tanh = 79;
   Pi = 80;
   Degrees = 81;
-  Radians = 82;
+  // 82 was Radians
   Factorial = 83;
   Lcm = 84;
   Gcd = 85;
diff --git a/datafusion/proto/src/generated/pbjson.rs b/datafusion/proto/src/generated/pbjson.rs
index d900d0031df3..aafb5b535b09 100644
--- a/datafusion/proto/src/generated/pbjson.rs
+++ b/datafusion/proto/src/generated/pbjson.rs
@@ -22920,9 +22920,6 @@ impl serde::Serialize for ScalarFunction {
             Self::Floor => "Floor",
             Self::Log => "Log",
             Self::Round => "Round",
-            Self::Signum => "Signum",
-            Self::Sin => "Sin",
-            Self::Sqrt => "Sqrt",
             Self::Trunc => "Trunc",
             Self::Concat => "Concat",
             Self::ConcatWithSeparator => "ConcatWithSeparator",
@@ -22931,11 +22928,9 @@ impl serde::Serialize for ScalarFunction {
             Self::Coalesce => "Coalesce",
             Self::Power => "Power",
             Self::Cbrt => "Cbrt",
-            Self::Sinh => "Sinh",
             Self::Cosh => "Cosh",
             Self::Pi => "Pi",
             Self::Degrees => "Degrees",
-            Self::Radians => "Radians",
             Self::Factorial => "Factorial",
             Self::Lcm => "Lcm",
             Self::Gcd => "Gcd",
@@ -22961,9 +22956,6 @@ impl<'de> serde::Deserialize<'de> for ScalarFunction {
             "Floor",
             "Log",
             "Round",
-            "Signum",
-            "Sin",
-            "Sqrt",
             "Trunc",
             "Concat",
             "ConcatWithSeparator",
@@ -22972,11 +22964,9 @@ impl<'de> serde::Deserialize<'de> for ScalarFunction {
             "Coalesce",
             "Power",
             "Cbrt",
-            "Sinh",
             "Cosh",
             "Pi",
             "Degrees",
-            "Radians",
             "Factorial",
             "Lcm",
             "Gcd",
@@ -23031,9 +23021,6 @@ impl<'de> serde::Deserialize<'de> for ScalarFunction {
                     "Floor" => Ok(ScalarFunction::Floor),
                     "Log" => Ok(ScalarFunction::Log),
                     "Round" => Ok(ScalarFunction::Round),
-                    "Signum" => Ok(ScalarFunction::Signum),
-                    "Sin" => Ok(ScalarFunction::Sin),
-                    "Sqrt" => Ok(ScalarFunction::Sqrt),
                     "Trunc" => Ok(ScalarFunction::Trunc),
                     "Concat" => Ok(ScalarFunction::Concat),
                     "ConcatWithSeparator" => Ok(ScalarFunction::ConcatWithSeparator),
@@ -23042,11 +23029,9 @@ impl<'de> serde::Deserialize<'de> for ScalarFunction {
                     "Coalesce" => Ok(ScalarFunction::Coalesce),
                     "Power" => Ok(ScalarFunction::Power),
                     "Cbrt" => Ok(ScalarFunction::Cbrt),
-                    "Sinh" => Ok(ScalarFunction::Sinh),
                     "Cosh" => Ok(ScalarFunction::Cosh),
                     "Pi" => Ok(ScalarFunction::Pi),
                     "Degrees" => Ok(ScalarFunction::Degrees),
-                    "Radians" => Ok(ScalarFunction::Radians),
                     "Factorial" => Ok(ScalarFunction::Factorial),
                     "Lcm" => Ok(ScalarFunction::Lcm),
                     "Gcd" => Ok(ScalarFunction::Gcd),
diff --git a/datafusion/proto/src/generated/prost.rs b/datafusion/proto/src/generated/prost.rs
index 753abb4e2756..81f390fff184 100644
--- a/datafusion/proto/src/generated/prost.rs
+++ b/datafusion/proto/src/generated/prost.rs
@@ -2855,9 +2855,9 @@ pub enum ScalarFunction {
     /// 12 was Log10
     /// 13 was Log2
     Round = 14,
-    Signum = 15,
-    Sin = 16,
-    Sqrt = 17,
+    /// 15 was Signum
+    /// 16 was Sin
+    /// 17 was Sqrt
     /// Tan = 18;
     Trunc = 19,
     /// 20 was Array
@@ -2917,12 +2917,12 @@ pub enum ScalarFunction {
     /// 74 Acosh
     /// 75 was Asinh
     /// 76 was Atanh
-    Sinh = 77,
+    /// 77 was Sinh
     Cosh = 78,
     /// Tanh = 79;
     Pi = 80,
     Degrees = 81,
-    Radians = 82,
+    /// 82 was Radians
     Factorial = 83,
     Lcm = 84,
     Gcd = 85,
@@ -2993,9 +2993,6 @@ impl ScalarFunction {
             ScalarFunction::Floor => "Floor",
             ScalarFunction::Log => "Log",
             ScalarFunction::Round => "Round",
-            ScalarFunction::Signum => "Signum",
-            ScalarFunction::Sin => "Sin",
-            ScalarFunction::Sqrt => "Sqrt",
             ScalarFunction::Trunc => "Trunc",
             ScalarFunction::Concat => "Concat",
             ScalarFunction::ConcatWithSeparator => "ConcatWithSeparator",
@@ -3004,11 +3001,9 @@ impl ScalarFunction {
             ScalarFunction::Coalesce => "Coalesce",
             ScalarFunction::Power => "Power",
             ScalarFunction::Cbrt => "Cbrt",
-            ScalarFunction::Sinh => "Sinh",
             ScalarFunction::Cosh => "Cosh",
             ScalarFunction::Pi => "Pi",
             ScalarFunction::Degrees => "Degrees",
-            ScalarFunction::Radians => "Radians",
             ScalarFunction::Factorial => "Factorial",
             ScalarFunction::Lcm => "Lcm",
             ScalarFunction::Gcd => "Gcd",
@@ -3028,9 +3023,6 @@ impl ScalarFunction {
             "Floor" => Some(Self::Floor),
             "Log" => Some(Self::Log),
             "Round" => Some(Self::Round),
-            "Signum" => Some(Self::Signum),
-            "Sin" => Some(Self::Sin),
-            "Sqrt" => Some(Self::Sqrt),
             "Trunc" => Some(Self::Trunc),
             "Concat" => Some(Self::Concat),
             "ConcatWithSeparator" => Some(Self::ConcatWithSeparator),
@@ -3039,11 +3031,9 @@ impl ScalarFunction {
             "Coalesce" => Some(Self::Coalesce),
             "Power" => Some(Self::Power),
             "Cbrt" => Some(Self::Cbrt),
-            "Sinh" => Some(Self::Sinh),
             "Cosh" => Some(Self::Cosh),
             "Pi" => Some(Self::Pi),
             "Degrees" => Some(Self::Degrees),
-            "Radians" => Some(Self::Radians),
             "Factorial" => Some(Self::Factorial),
             "Lcm" => Some(Self::Lcm),
             "Gcd" => Some(Self::Gcd),
diff --git a/datafusion/proto/src/logical_plan/from_proto.rs b/datafusion/proto/src/logical_plan/from_proto.rs
index b39ce41bbe26..3694418412b1 100644
--- a/datafusion/proto/src/logical_plan/from_proto.rs
+++ b/datafusion/proto/src/logical_plan/from_proto.rs
@@ -42,9 +42,9 @@ use datafusion_expr::{
     expr::{self, InList, Sort, WindowFunction},
     factorial, floor, gcd, initcap, iszero, lcm, log,
     logical_plan::{PlanType, StringifiedPlan},
-    nanvl, pi, power, radians, random, round, signum, sin, sinh, sqrt, trunc,
-    AggregateFunction, Between, BinaryExpr, BuiltInWindowFunction, BuiltinScalarFunction,
-    Case, Cast, Expr, GetFieldAccess, GetIndexedField, GroupingSet,
+    nanvl, pi, power, random, round, trunc, AggregateFunction, Between, BinaryExpr,
+    BuiltInWindowFunction, BuiltinScalarFunction, Case, Cast, Expr, GetFieldAccess,
+    GetIndexedField, GroupingSet,
     GroupingSet::GroupingSets,
     JoinConstraint, JoinType, Like, Operator, TryCast, WindowFrame, WindowFrameBound,
     WindowFrameUnits,
@@ -421,17 +421,13 @@ impl From<&protobuf::ScalarFunction> for BuiltinScalarFunction {
         use protobuf::ScalarFunction;
         match f {
             ScalarFunction::Unknown => todo!(),
-            ScalarFunction::Sqrt => Self::Sqrt,
             ScalarFunction::Cbrt => Self::Cbrt,
-            ScalarFunction::Sin => Self::Sin,
             ScalarFunction::Cos => Self::Cos,
             ScalarFunction::Cot => Self::Cot,
-            ScalarFunction::Sinh => Self::Sinh,
             ScalarFunction::Cosh => Self::Cosh,
             ScalarFunction::Exp => Self::Exp,
             ScalarFunction::Log => Self::Log,
             ScalarFunction::Degrees => Self::Degrees,
-            ScalarFunction::Radians => Self::Radians,
             ScalarFunction::Factorial => Self::Factorial,
             ScalarFunction::Gcd => Self::Gcd,
             ScalarFunction::Lcm => Self::Lcm,
@@ -440,7 +436,6 @@ impl From<&protobuf::ScalarFunction> for BuiltinScalarFunction {
             ScalarFunction::Round => Self::Round,
             ScalarFunction::Trunc => Self::Trunc,
             ScalarFunction::Concat => Self::Concat,
-            ScalarFunction::Signum => Self::Signum,
             ScalarFunction::ConcatWithSeparator => Self::ConcatWithSeparator,
             ScalarFunction::EndsWith => Self::EndsWith,
             ScalarFunction::InitCap => Self::InitCap,
@@ -1311,19 +1306,13 @@ pub fn parse_expr(
 
             match scalar_function {
                 ScalarFunction::Unknown => Err(proto_error("Unknown scalar function")),
-                ScalarFunction::Sqrt => Ok(sqrt(parse_expr(&args[0], registry, codec)?)),
                 ScalarFunction::Cbrt => Ok(cbrt(parse_expr(&args[0], registry, codec)?)),
-                ScalarFunction::Sin => Ok(sin(parse_expr(&args[0], registry, codec)?)),
                 ScalarFunction::Cos => Ok(cos(parse_expr(&args[0], registry, codec)?)),
-                ScalarFunction::Sinh => Ok(sinh(parse_expr(&args[0], registry, codec)?)),
                 ScalarFunction::Cosh => Ok(cosh(parse_expr(&args[0], registry, codec)?)),
                 ScalarFunction::Exp => Ok(exp(parse_expr(&args[0], registry, codec)?)),
                 ScalarFunction::Degrees => {
                     Ok(degrees(parse_expr(&args[0], registry, codec)?))
                 }
-                ScalarFunction::Radians => {
-                    Ok(radians(parse_expr(&args[0], registry, codec)?))
-                }
                 ScalarFunction::Floor => {
                     Ok(floor(parse_expr(&args[0], registry, codec)?))
                 }
@@ -1333,9 +1322,6 @@ pub fn parse_expr(
                 ScalarFunction::Ceil => Ok(ceil(parse_expr(&args[0], registry, codec)?)),
                 ScalarFunction::Round => Ok(round(parse_exprs(args, registry, codec)?)),
                 ScalarFunction::Trunc => Ok(trunc(parse_exprs(args, registry, codec)?)),
-                ScalarFunction::Signum => {
-                    Ok(signum(parse_expr(&args[0], registry, codec)?))
-                }
                 ScalarFunction::InitCap => {
                     Ok(initcap(parse_expr(&args[0], registry, codec)?))
                 }
diff --git a/datafusion/proto/src/logical_plan/to_proto.rs b/datafusion/proto/src/logical_plan/to_proto.rs
index 39f8a913db94..ab488f3f551b 100644
--- a/datafusion/proto/src/logical_plan/to_proto.rs
+++ b/datafusion/proto/src/logical_plan/to_proto.rs
@@ -1408,12 +1408,9 @@ impl TryFrom<&BuiltinScalarFunction> for protobuf::ScalarFunction {
 
     fn try_from(scalar: &BuiltinScalarFunction) -> Result<Self, Self::Error> {
         let scalar_function = match scalar {
-            BuiltinScalarFunction::Sqrt => Self::Sqrt,
             BuiltinScalarFunction::Cbrt => Self::Cbrt,
-            BuiltinScalarFunction::Sin => Self::Sin,
             BuiltinScalarFunction::Cos => Self::Cos,
             BuiltinScalarFunction::Cot => Self::Cot,
-            BuiltinScalarFunction::Sinh => Self::Sinh,
             BuiltinScalarFunction::Cosh => Self::Cosh,
             BuiltinScalarFunction::Exp => Self::Exp,
             BuiltinScalarFunction::Factorial => Self::Factorial,
@@ -1421,13 +1418,11 @@ impl TryFrom<&BuiltinScalarFunction> for protobuf::ScalarFunction {
             BuiltinScalarFunction::Lcm => Self::Lcm,
             BuiltinScalarFunction::Log => Self::Log,
             BuiltinScalarFunction::Degrees => Self::Degrees,
-            BuiltinScalarFunction::Radians => Self::Radians,
             BuiltinScalarFunction::Floor => Self::Floor,
             BuiltinScalarFunction::Ceil => Self::Ceil,
             BuiltinScalarFunction::Round => Self::Round,
             BuiltinScalarFunction::Trunc => Self::Trunc,
             BuiltinScalarFunction::Concat => Self::Concat,
-            BuiltinScalarFunction::Signum => Self::Signum,
             BuiltinScalarFunction::ConcatWithSeparator => Self::ConcatWithSeparator,
             BuiltinScalarFunction::EndsWith => Self::EndsWith,
             BuiltinScalarFunction::InitCap => Self::InitCap,
diff --git a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs
index 22543c0dd1bf..4cd133dc21d4 100644
--- a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs
+++ b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs
@@ -45,11 +45,10 @@ use datafusion_expr::expr::{
 };
 use datafusion_expr::logical_plan::{Extension, UserDefinedLogicalNodeCore};
 use datafusion_expr::{
-    col, create_udaf, lit, Accumulator, AggregateFunction, BuiltinScalarFunction::Sqrt,
-    ColumnarValue, Expr, ExprSchemable, LogicalPlan, Operator, PartitionEvaluator,
-    ScalarUDF, ScalarUDFImpl, Signature, TryCast, Volatility, WindowFrame,
-    WindowFrameBound, WindowFrameUnits, WindowFunctionDefinition, WindowUDF,
-    WindowUDFImpl,
+    col, create_udaf, lit, Accumulator, AggregateFunction, ColumnarValue, Expr,
+    ExprSchemable, LogicalPlan, Operator, PartitionEvaluator, ScalarUDF, ScalarUDFImpl,
+    Signature, TryCast, Volatility, WindowFrame, WindowFrameBound, WindowFrameUnits,
+    WindowFunctionDefinition, WindowUDF, WindowUDFImpl,
 };
 use datafusion_proto::bytes::{
     logical_plan_from_bytes, logical_plan_from_bytes_with_extension_codec,
@@ -1623,13 +1622,6 @@ fn roundtrip_qualified_wildcard() {
     roundtrip_expr_test(test_expr, ctx);
 }
 
-#[test]
-fn roundtrip_sqrt() {
-    let test_expr = Expr::ScalarFunction(ScalarFunction::new(Sqrt, vec![col("col")]));
-    let ctx = SessionContext::new();
-    roundtrip_expr_test(test_expr, ctx);
-}
-
 #[test]
 fn roundtrip_like() {
     fn like(negated: bool, escape_char: Option<char>) {
diff --git a/datafusion/proto/tests/cases/roundtrip_physical_plan.rs b/datafusion/proto/tests/cases/roundtrip_physical_plan.rs
index 4924128ae190..0238291c77e1 100644
--- a/datafusion/proto/tests/cases/roundtrip_physical_plan.rs
+++ b/datafusion/proto/tests/cases/roundtrip_physical_plan.rs
@@ -609,14 +609,14 @@ fn roundtrip_builtin_scalar_function() -> Result<()> {
 
     let input = Arc::new(EmptyExec::new(schema.clone()));
 
-    let fun_def = ScalarFunctionDefinition::BuiltIn(BuiltinScalarFunction::Sin);
+    let fun_def = ScalarFunctionDefinition::BuiltIn(BuiltinScalarFunction::Trunc);
 
     let expr = ScalarFunctionExpr::new(
-        "sin",
+        "trunc",
         fun_def,
         vec![col("a", &schema)?],
         DataType::Float64,
-        None,
+        Some(vec![Some(true)]),
         false,
     );
 
diff --git a/datafusion/sql/tests/sql_integration.rs b/datafusion/sql/tests/sql_integration.rs
index a34f8f07fe92..f2f188105faf 100644
--- a/datafusion/sql/tests/sql_integration.rs
+++ b/datafusion/sql/tests/sql_integration.rs
@@ -2704,7 +2704,8 @@ fn logical_plan_with_dialect_and_options(
             "date_trunc",
             vec![DataType::Utf8, DataType::Timestamp(Nanosecond, None)],
             DataType::Int32,
-        ));
+        ))
+        .with_udf(make_udf("sqrt", vec![DataType::Int64], DataType::Int64));
     let planner = SqlToRel::new_with_options(&context, options);
     let result = DFParser::parse_sql_with_dialect(sql, dialect);
     let mut ast = result?;

From 760500b1ed87900d6eb54ad28fab625bfb32a79a Mon Sep 17 00:00:00 2001
From: universalmind303 <cory.grinstead@gmail.com>
Date: Tue, 2 Apr 2024 13:16:01 -0500
Subject: [PATCH 12/12] refactor: make all udf function impls public (#9903)

* refactor: make all udf function impls public

* clippy
---
 datafusion/functions-array/src/array_has.rs   |  24 +-
 datafusion/functions-array/src/concat.rs      |  24 +-
 datafusion/functions-array/src/lib.rs         |  44 +-
 datafusion/functions-array/src/make_array.rs  |   6 +
 datafusion/functions-array/src/udf.rs         | 828 ++++++++++++++++++
 datafusion/functions/src/core/arrow_cast.rs   |   8 +-
 datafusion/functions/src/core/arrowtypeof.rs  |   8 +-
 datafusion/functions/src/core/getfield.rs     |   8 +-
 datafusion/functions/src/core/mod.rs          |  16 +-
 datafusion/functions/src/core/named_struct.rs |   8 +-
 datafusion/functions/src/core/nullif.rs       |   8 +-
 datafusion/functions/src/core/nvl.rs          |   8 +-
 datafusion/functions/src/core/nvl2.rs         |   8 +-
 datafusion/functions/src/core/struct.rs       |   8 +-
 datafusion/functions/src/crypto/digest.rs     |   8 +-
 datafusion/functions/src/crypto/md5.rs        |   8 +-
 datafusion/functions/src/crypto/sha224.rs     |   8 +-
 datafusion/functions/src/crypto/sha256.rs     |   8 +-
 datafusion/functions/src/crypto/sha384.rs     |   8 +-
 datafusion/functions/src/crypto/sha512.rs     |   8 +-
 .../functions/src/datetime/current_date.rs    |   8 +-
 .../functions/src/datetime/current_time.rs    |   8 +-
 datafusion/functions/src/datetime/date_bin.rs |   8 +-
 .../functions/src/datetime/date_part.rs       |   8 +-
 .../functions/src/datetime/date_trunc.rs      |   8 +-
 .../functions/src/datetime/from_unixtime.rs   |   8 +-
 .../functions/src/datetime/make_date.rs       |   8 +-
 datafusion/functions/src/datetime/mod.rs      |  26 +-
 datafusion/functions/src/datetime/now.rs      |   8 +-
 datafusion/functions/src/datetime/to_char.rs  |   8 +-
 datafusion/functions/src/datetime/to_date.rs  |   8 +-
 .../functions/src/datetime/to_timestamp.rs    |  40 +-
 .../functions/src/datetime/to_unixtime.rs     |   8 +-
 datafusion/functions/src/encoding/inner.rs    |  16 +-
 datafusion/functions/src/encoding/mod.rs      |   2 +-
 datafusion/functions/src/math/abs.rs          |   8 +-
 datafusion/functions/src/math/mod.rs          |   4 +-
 datafusion/functions/src/math/nans.rs         |   8 +-
 datafusion/functions/src/regex/regexplike.rs  |   8 +-
 datafusion/functions/src/regex/regexpmatch.rs |   8 +-
 .../functions/src/regex/regexpreplace.rs      |   8 +-
 datafusion/functions/src/string/ascii.rs      |   2 +-
 datafusion/functions/src/string/bit_length.rs |   2 +-
 datafusion/functions/src/string/btrim.rs      |   2 +-
 datafusion/functions/src/string/chr.rs        |   2 +-
 .../functions/src/string/levenshtein.rs       |   2 +-
 datafusion/functions/src/string/lower.rs      |   2 +-
 datafusion/functions/src/string/ltrim.rs      |   2 +-
 .../functions/src/string/octet_length.rs      |   2 +-
 datafusion/functions/src/string/overlay.rs    |   2 +-
 datafusion/functions/src/string/repeat.rs     |   2 +-
 datafusion/functions/src/string/replace.rs    |   2 +-
 datafusion/functions/src/string/rtrim.rs      |   2 +-
 datafusion/functions/src/string/split_part.rs |   2 +-
 .../functions/src/string/starts_with.rs       |   2 +-
 datafusion/functions/src/string/to_hex.rs     |   2 +-
 datafusion/functions/src/string/upper.rs      |   2 +-
 datafusion/functions/src/string/uuid.rs       |   2 +-
 .../functions/src/unicode/character_length.rs |   2 +-
 .../functions/src/unicode/find_in_set.rs      |   2 +-
 datafusion/functions/src/unicode/left.rs      |   2 +-
 datafusion/functions/src/unicode/lpad.rs      |   2 +-
 datafusion/functions/src/unicode/reverse.rs   |   2 +-
 datafusion/functions/src/unicode/right.rs     |   2 +-
 datafusion/functions/src/unicode/rpad.rs      |   2 +-
 datafusion/functions/src/unicode/strpos.rs    |   2 +-
 datafusion/functions/src/unicode/substr.rs    |   2 +-
 .../functions/src/unicode/substrindex.rs      |   2 +-
 datafusion/functions/src/unicode/translate.rs |   2 +-
 69 files changed, 1209 insertions(+), 117 deletions(-)
 create mode 100644 datafusion/functions-array/src/udf.rs

diff --git a/datafusion/functions-array/src/array_has.rs b/datafusion/functions-array/src/array_has.rs
index 4e4ebaf035fc..ee064335c1cc 100644
--- a/datafusion/functions-array/src/array_has.rs
+++ b/datafusion/functions-array/src/array_has.rs
@@ -54,11 +54,17 @@ make_udf_function!(ArrayHasAny,
 );
 
 #[derive(Debug)]
-pub(super) struct ArrayHas {
+pub struct ArrayHas {
     signature: Signature,
     aliases: Vec<String>,
 }
 
+impl Default for ArrayHas {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
 impl ArrayHas {
     pub fn new() -> Self {
         Self {
@@ -121,11 +127,17 @@ impl ScalarUDFImpl for ArrayHas {
 }
 
 #[derive(Debug)]
-pub(super) struct ArrayHasAll {
+pub struct ArrayHasAll {
     signature: Signature,
     aliases: Vec<String>,
 }
 
+impl Default for ArrayHasAll {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
 impl ArrayHasAll {
     pub fn new() -> Self {
         Self {
@@ -178,11 +190,17 @@ impl ScalarUDFImpl for ArrayHasAll {
 }
 
 #[derive(Debug)]
-pub(super) struct ArrayHasAny {
+pub struct ArrayHasAny {
     signature: Signature,
     aliases: Vec<String>,
 }
 
+impl Default for ArrayHasAny {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
 impl ArrayHasAny {
     pub fn new() -> Self {
         Self {
diff --git a/datafusion/functions-array/src/concat.rs b/datafusion/functions-array/src/concat.rs
index cb76192e29c2..f9d9bf4356ff 100644
--- a/datafusion/functions-array/src/concat.rs
+++ b/datafusion/functions-array/src/concat.rs
@@ -45,11 +45,17 @@ make_udf_function!(
 );
 
 #[derive(Debug)]
-pub(super) struct ArrayAppend {
+pub struct ArrayAppend {
     signature: Signature,
     aliases: Vec<String>,
 }
 
+impl Default for ArrayAppend {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
 impl ArrayAppend {
     pub fn new() -> Self {
         Self {
@@ -99,11 +105,17 @@ make_udf_function!(
 );
 
 #[derive(Debug)]
-pub(super) struct ArrayPrepend {
+pub struct ArrayPrepend {
     signature: Signature,
     aliases: Vec<String>,
 }
 
+impl Default for ArrayPrepend {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
 impl ArrayPrepend {
     pub fn new() -> Self {
         Self {
@@ -152,11 +164,17 @@ make_udf_function!(
 );
 
 #[derive(Debug)]
-pub(super) struct ArrayConcat {
+pub struct ArrayConcat {
     signature: Signature,
     aliases: Vec<String>,
 }
 
+impl Default for ArrayConcat {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
 impl ArrayConcat {
     pub fn new() -> Self {
         Self {
diff --git a/datafusion/functions-array/src/lib.rs b/datafusion/functions-array/src/lib.rs
index 7c261f958bf0..5914736773b7 100644
--- a/datafusion/functions-array/src/lib.rs
+++ b/datafusion/functions-array/src/lib.rs
@@ -28,28 +28,28 @@
 #[macro_use]
 pub mod macros;
 
-mod array_has;
-mod cardinality;
-mod concat;
-mod dimension;
-mod empty;
-mod except;
-mod extract;
-mod flatten;
-mod length;
-mod make_array;
-mod position;
-mod range;
-mod remove;
-mod repeat;
-mod replace;
-mod resize;
-mod reverse;
-mod rewrite;
-mod set_ops;
-mod sort;
-mod string;
-mod utils;
+pub mod array_has;
+pub mod cardinality;
+pub mod concat;
+pub mod dimension;
+pub mod empty;
+pub mod except;
+pub mod extract;
+pub mod flatten;
+pub mod length;
+pub mod make_array;
+pub mod position;
+pub mod range;
+pub mod remove;
+pub mod repeat;
+pub mod replace;
+pub mod resize;
+pub mod reverse;
+pub mod rewrite;
+pub mod set_ops;
+pub mod sort;
+pub mod string;
+pub mod utils;
 
 use datafusion_common::Result;
 use datafusion_execution::FunctionRegistry;
diff --git a/datafusion/functions-array/src/make_array.rs b/datafusion/functions-array/src/make_array.rs
index 8eaae09f28f5..0439a736ee42 100644
--- a/datafusion/functions-array/src/make_array.rs
+++ b/datafusion/functions-array/src/make_array.rs
@@ -48,6 +48,12 @@ pub struct MakeArray {
     aliases: Vec<String>,
 }
 
+impl Default for MakeArray {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
 impl MakeArray {
     pub fn new() -> Self {
         Self {
diff --git a/datafusion/functions-array/src/udf.rs b/datafusion/functions-array/src/udf.rs
new file mode 100644
index 000000000000..1462b3efad33
--- /dev/null
+++ b/datafusion/functions-array/src/udf.rs
@@ -0,0 +1,828 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! [`ScalarUDFImpl`] definitions for array functions.
+
+use arrow::array::{NullArray, StringArray};
+use arrow::datatypes::DataType;
+use arrow::datatypes::Field;
+use arrow::datatypes::IntervalUnit::MonthDayNano;
+use arrow_schema::DataType::{LargeUtf8, List, Utf8};
+use datafusion_common::exec_err;
+use datafusion_common::plan_err;
+use datafusion_common::Result;
+use datafusion_expr::expr::ScalarFunction;
+use datafusion_expr::Expr;
+use datafusion_expr::TypeSignature;
+use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility};
+use std::any::Any;
+use std::sync::Arc;
+
+// Create static instances of ScalarUDFs for each function
+make_udf_function!(ArrayToString,
+    array_to_string,
+    array delimiter, // arg name
+    "converts each element to its text representation.", // doc
+    array_to_string_udf // internal function name
+);
+#[derive(Debug)]
+pub struct ArrayToString {
+    signature: Signature,
+    aliases: Vec<String>,
+}
+
+impl ArrayToString {
+    pub fn new() -> Self {
+        Self {
+            signature: Signature::variadic_any(Volatility::Immutable),
+            aliases: vec![
+                String::from("array_to_string"),
+                String::from("list_to_string"),
+                String::from("array_join"),
+                String::from("list_join"),
+            ],
+        }
+    }
+}
+
+impl ScalarUDFImpl for ArrayToString {
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+    fn name(&self) -> &str {
+        "array_to_string"
+    }
+
+    fn signature(&self) -> &Signature {
+        &self.signature
+    }
+
+    fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
+        use DataType::*;
+        Ok(match arg_types[0] {
+            List(_) | LargeList(_) | FixedSizeList(_, _) => Utf8,
+            _ => {
+                return plan_err!("The array_to_string function can only accept List/LargeList/FixedSizeList.");
+            }
+        })
+    }
+
+    fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
+        let args = ColumnarValue::values_to_arrays(args)?;
+        crate::kernels::array_to_string(&args).map(ColumnarValue::Array)
+    }
+
+    fn aliases(&self) -> &[String] {
+        &self.aliases
+    }
+}
+
+make_udf_function!(StringToArray,
+    string_to_array,
+    string delimiter null_string, // arg name
+    "splits a `string` based on a `delimiter` and returns an array of parts. Any parts matching the optional `null_string` will be replaced with `NULL`", // doc
+    string_to_array_udf // internal function name
+);
+#[derive(Debug)]
+pub struct StringToArray {
+    signature: Signature,
+    aliases: Vec<String>,
+}
+
+impl StringToArray {
+    pub fn new() -> Self {
+        Self {
+            signature: Signature::variadic_any(Volatility::Immutable),
+            aliases: vec![
+                String::from("string_to_array"),
+                String::from("string_to_list"),
+            ],
+        }
+    }
+}
+
+impl ScalarUDFImpl for StringToArray {
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+    fn name(&self) -> &str {
+        "string_to_array"
+    }
+
+    fn signature(&self) -> &Signature {
+        &self.signature
+    }
+
+    fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
+        use DataType::*;
+        Ok(match arg_types[0] {
+            Utf8 | LargeUtf8 => {
+                List(Arc::new(Field::new("item", arg_types[0].clone(), true)))
+            }
+            _ => {
+                return plan_err!(
+                    "The string_to_array function can only accept Utf8 or LargeUtf8."
+                );
+            }
+        })
+    }
+
+    fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
+        let mut args = ColumnarValue::values_to_arrays(args)?;
+        // Case: delimiter is NULL, needs to be handled as well.
+        if args[1].as_any().is::<NullArray>() {
+            args[1] = Arc::new(StringArray::new_null(args[1].len()));
+        };
+
+        match args[0].data_type() {
+            Utf8 => {
+                crate::kernels::string_to_array::<i32>(&args).map(ColumnarValue::Array)
+            }
+            LargeUtf8 => {
+                crate::kernels::string_to_array::<i64>(&args).map(ColumnarValue::Array)
+            }
+            other => {
+                exec_err!("unsupported type for string_to_array function as {other}")
+            }
+        }
+    }
+
+    fn aliases(&self) -> &[String] {
+        &self.aliases
+    }
+}
+
+make_udf_function!(
+    Range,
+    range,
+    start stop step,
+    "create a list of values in the range between start and stop",
+    range_udf
+);
+#[derive(Debug)]
+pub struct Range {
+    signature: Signature,
+    aliases: Vec<String>,
+}
+impl Range {
+    pub fn new() -> Self {
+        use DataType::*;
+        Self {
+            signature: Signature::one_of(
+                vec![
+                    TypeSignature::Exact(vec![Int64]),
+                    TypeSignature::Exact(vec![Int64, Int64]),
+                    TypeSignature::Exact(vec![Int64, Int64, Int64]),
+                    TypeSignature::Exact(vec![Date32, Date32, Interval(MonthDayNano)]),
+                ],
+                Volatility::Immutable,
+            ),
+            aliases: vec![String::from("range")],
+        }
+    }
+}
+impl ScalarUDFImpl for Range {
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+    fn name(&self) -> &str {
+        "range"
+    }
+
+    fn signature(&self) -> &Signature {
+        &self.signature
+    }
+
+    fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
+        use DataType::*;
+        Ok(List(Arc::new(Field::new(
+            "item",
+            arg_types[0].clone(),
+            true,
+        ))))
+    }
+
+    fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
+        let args = ColumnarValue::values_to_arrays(args)?;
+        match args[0].data_type() {
+            arrow::datatypes::DataType::Int64 => {
+                crate::kernels::gen_range(&args, false).map(ColumnarValue::Array)
+            }
+            arrow::datatypes::DataType::Date32 => {
+                crate::kernels::gen_range_date(&args, false).map(ColumnarValue::Array)
+            }
+            _ => {
+                exec_err!("unsupported type for range")
+            }
+        }
+    }
+
+    fn aliases(&self) -> &[String] {
+        &self.aliases
+    }
+}
+
+make_udf_function!(
+    GenSeries,
+    gen_series,
+    start stop step,
+    "create a list of values in the range between start and stop, include upper bound",
+    gen_series_udf
+);
+#[derive(Debug)]
+pub struct GenSeries {
+    signature: Signature,
+    aliases: Vec<String>,
+}
+impl GenSeries {
+    pub fn new() -> Self {
+        use DataType::*;
+        Self {
+            signature: Signature::one_of(
+                vec![
+                    TypeSignature::Exact(vec![Int64]),
+                    TypeSignature::Exact(vec![Int64, Int64]),
+                    TypeSignature::Exact(vec![Int64, Int64, Int64]),
+                    TypeSignature::Exact(vec![Date32, Date32, Interval(MonthDayNano)]),
+                ],
+                Volatility::Immutable,
+            ),
+            aliases: vec![String::from("generate_series")],
+        }
+    }
+}
+impl ScalarUDFImpl for GenSeries {
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+    fn name(&self) -> &str {
+        "generate_series"
+    }
+
+    fn signature(&self) -> &Signature {
+        &self.signature
+    }
+
+    fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
+        use DataType::*;
+        Ok(List(Arc::new(Field::new(
+            "item",
+            arg_types[0].clone(),
+            true,
+        ))))
+    }
+
+    fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
+        let args = ColumnarValue::values_to_arrays(args)?;
+        match args[0].data_type() {
+            arrow::datatypes::DataType::Int64 => {
+                crate::kernels::gen_range(&args, true).map(ColumnarValue::Array)
+            }
+            arrow::datatypes::DataType::Date32 => {
+                crate::kernels::gen_range_date(&args, true).map(ColumnarValue::Array)
+            }
+            _ => {
+                exec_err!("unsupported type for range")
+            }
+        }
+    }
+
+    fn aliases(&self) -> &[String] {
+        &self.aliases
+    }
+}
+
+make_udf_function!(
+    ArrayDims,
+    array_dims,
+    array,
+    "returns an array of the array's dimensions.",
+    array_dims_udf
+);
+
+#[derive(Debug)]
+pub struct ArrayDims {
+    signature: Signature,
+    aliases: Vec<String>,
+}
+
+impl ArrayDims {
+    pub fn new() -> Self {
+        Self {
+            signature: Signature::array(Volatility::Immutable),
+            aliases: vec!["array_dims".to_string(), "list_dims".to_string()],
+        }
+    }
+}
+
+impl ScalarUDFImpl for ArrayDims {
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+    fn name(&self) -> &str {
+        "array_dims"
+    }
+
+    fn signature(&self) -> &Signature {
+        &self.signature
+    }
+
+    fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
+        use DataType::*;
+        Ok(match arg_types[0] {
+            List(_) | LargeList(_) | FixedSizeList(_, _) => {
+                List(Arc::new(Field::new("item", UInt64, true)))
+            }
+            _ => {
+                return plan_err!("The array_dims function can only accept List/LargeList/FixedSizeList.");
+            }
+        })
+    }
+
+    fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
+        let args = ColumnarValue::values_to_arrays(args)?;
+        crate::kernels::array_dims(&args).map(ColumnarValue::Array)
+    }
+
+    fn aliases(&self) -> &[String] {
+        &self.aliases
+    }
+}
+
+make_udf_function!(
+    ArraySort,
+    array_sort,
+    array desc null_first,
+    "returns sorted array.",
+    array_sort_udf
+);
+
+#[derive(Debug)]
+pub struct ArraySort {
+    signature: Signature,
+    aliases: Vec<String>,
+}
+
+impl ArraySort {
+    pub fn new() -> Self {
+        Self {
+            signature: Signature::variadic_any(Volatility::Immutable),
+            aliases: vec!["array_sort".to_string(), "list_sort".to_string()],
+        }
+    }
+}
+
+impl ScalarUDFImpl for ArraySort {
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+    fn name(&self) -> &str {
+        "array_sort"
+    }
+
+    fn signature(&self) -> &Signature {
+        &self.signature
+    }
+
+    fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
+        use DataType::*;
+        match &arg_types[0] {
+            List(field) | FixedSizeList(field, _) => Ok(List(Arc::new(Field::new(
+                "item",
+                field.data_type().clone(),
+                true,
+            )))),
+            LargeList(field) => Ok(LargeList(Arc::new(Field::new(
+                "item",
+                field.data_type().clone(),
+                true,
+            )))),
+            _ => exec_err!(
+                "Not reachable, data_type should be List, LargeList or FixedSizeList"
+            ),
+        }
+    }
+
+    fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
+        let args = ColumnarValue::values_to_arrays(args)?;
+        crate::kernels::array_sort(&args).map(ColumnarValue::Array)
+    }
+
+    fn aliases(&self) -> &[String] {
+        &self.aliases
+    }
+}
+
+make_udf_function!(
+    Cardinality,
+    cardinality,
+    array,
+    "returns the total number of elements in the array.",
+    cardinality_udf
+);
+
+impl Cardinality {
+    pub fn new() -> Self {
+        Self {
+            signature: Signature::array(Volatility::Immutable),
+            aliases: vec![String::from("cardinality")],
+        }
+    }
+}
+
+#[derive(Debug)]
+pub struct Cardinality {
+    signature: Signature,
+    aliases: Vec<String>,
+}
+impl ScalarUDFImpl for Cardinality {
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+    fn name(&self) -> &str {
+        "cardinality"
+    }
+
+    fn signature(&self) -> &Signature {
+        &self.signature
+    }
+
+    fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
+        use DataType::*;
+        Ok(match arg_types[0] {
+            List(_) | LargeList(_) | FixedSizeList(_, _) => UInt64,
+            _ => {
+                return plan_err!("The cardinality function can only accept List/LargeList/FixedSizeList.");
+            }
+        })
+    }
+
+    fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
+        let args = ColumnarValue::values_to_arrays(args)?;
+        crate::kernels::cardinality(&args).map(ColumnarValue::Array)
+    }
+
+    fn aliases(&self) -> &[String] {
+        &self.aliases
+    }
+}
+
+make_udf_function!(
+    ArrayNdims,
+    array_ndims,
+    array,
+    "returns the number of dimensions of the array.",
+    array_ndims_udf
+);
+
+#[derive(Debug)]
+pub struct ArrayNdims {
+    signature: Signature,
+    aliases: Vec<String>,
+}
+impl ArrayNdims {
+    pub fn new() -> Self {
+        Self {
+            signature: Signature::array(Volatility::Immutable),
+            aliases: vec![String::from("array_ndims"), String::from("list_ndims")],
+        }
+    }
+}
+
+impl ScalarUDFImpl for ArrayNdims {
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+    fn name(&self) -> &str {
+        "array_ndims"
+    }
+
+    fn signature(&self) -> &Signature {
+        &self.signature
+    }
+
+    fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
+        use DataType::*;
+        Ok(match arg_types[0] {
+            List(_) | LargeList(_) | FixedSizeList(_, _) => UInt64,
+            _ => {
+                return plan_err!("The array_ndims function can only accept List/LargeList/FixedSizeList.");
+            }
+        })
+    }
+
+    fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
+        let args = ColumnarValue::values_to_arrays(args)?;
+        crate::kernels::array_ndims(&args).map(ColumnarValue::Array)
+    }
+
+    fn aliases(&self) -> &[String] {
+        &self.aliases
+    }
+}
+
+make_udf_function!(
+    ArrayEmpty,
+    array_empty,
+    array,
+    "returns true for an empty array or false for a non-empty array.",
+    array_empty_udf
+);
+
+#[derive(Debug)]
+pub struct ArrayEmpty {
+    signature: Signature,
+    aliases: Vec<String>,
+}
+impl ArrayEmpty {
+    pub fn new() -> Self {
+        Self {
+            signature: Signature::array(Volatility::Immutable),
+            aliases: vec![String::from("empty")],
+        }
+    }
+}
+
+impl ScalarUDFImpl for ArrayEmpty {
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+    fn name(&self) -> &str {
+        "empty"
+    }
+
+    fn signature(&self) -> &Signature {
+        &self.signature
+    }
+
+    fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
+        use DataType::*;
+        Ok(match arg_types[0] {
+            List(_) | LargeList(_) | FixedSizeList(_, _) => Boolean,
+            _ => {
+                return plan_err!("The array_empty function can only accept List/LargeList/FixedSizeList.");
+            }
+        })
+    }
+
+    fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
+        let args = ColumnarValue::values_to_arrays(args)?;
+        crate::kernels::array_empty(&args).map(ColumnarValue::Array)
+    }
+
+    fn aliases(&self) -> &[String] {
+        &self.aliases
+    }
+}
+
+make_udf_function!(
+    ArrayRepeat,
+    array_repeat,
+    element count, // arg name
+    "returns an array containing element `count` times.", // doc
+    array_repeat_udf // internal function name
+);
+#[derive(Debug)]
+pub struct ArrayRepeat {
+    signature: Signature,
+    aliases: Vec<String>,
+}
+
+impl ArrayRepeat {
+    pub fn new() -> Self {
+        Self {
+            signature: Signature::variadic_any(Volatility::Immutable),
+            aliases: vec![String::from("array_repeat"), String::from("list_repeat")],
+        }
+    }
+}
+
+impl ScalarUDFImpl for ArrayRepeat {
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+    fn name(&self) -> &str {
+        "array_repeat"
+    }
+
+    fn signature(&self) -> &Signature {
+        &self.signature
+    }
+
+    fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
+        Ok(List(Arc::new(Field::new(
+            "item",
+            arg_types[0].clone(),
+            true,
+        ))))
+    }
+
+    fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
+        let args = ColumnarValue::values_to_arrays(args)?;
+        crate::kernels::array_repeat(&args).map(ColumnarValue::Array)
+    }
+
+    fn aliases(&self) -> &[String] {
+        &self.aliases
+    }
+}
+
+make_udf_function!(
+    ArrayLength,
+    array_length,
+    array,
+    "returns the length of the array dimension.",
+    array_length_udf
+);
+
+#[derive(Debug)]
+pub struct ArrayLength {
+    signature: Signature,
+    aliases: Vec<String>,
+}
+impl ArrayLength {
+    pub fn new() -> Self {
+        Self {
+            signature: Signature::variadic_any(Volatility::Immutable),
+            aliases: vec![String::from("array_length"), String::from("list_length")],
+        }
+    }
+}
+
+impl ScalarUDFImpl for ArrayLength {
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+    fn name(&self) -> &str {
+        "array_length"
+    }
+
+    fn signature(&self) -> &Signature {
+        &self.signature
+    }
+
+    fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
+        use DataType::*;
+        Ok(match arg_types[0] {
+            List(_) | LargeList(_) | FixedSizeList(_, _) => UInt64,
+            _ => {
+                return plan_err!("The array_length function can only accept List/LargeList/FixedSizeList.");
+            }
+        })
+    }
+
+    fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
+        let args = ColumnarValue::values_to_arrays(args)?;
+        crate::kernels::array_length(&args).map(ColumnarValue::Array)
+    }
+
+    fn aliases(&self) -> &[String] {
+        &self.aliases
+    }
+}
+
+make_udf_function!(
+    Flatten,
+    flatten,
+    array,
+    "flattens an array of arrays into a single array.",
+    flatten_udf
+);
+
+#[derive(Debug)]
+pub struct Flatten {
+    signature: Signature,
+    aliases: Vec<String>,
+}
+impl Flatten {
+    pub fn new() -> Self {
+        Self {
+            signature: Signature::array(Volatility::Immutable),
+            aliases: vec![String::from("flatten")],
+        }
+    }
+}
+
+impl ScalarUDFImpl for Flatten {
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+    fn name(&self) -> &str {
+        "flatten"
+    }
+
+    fn signature(&self) -> &Signature {
+        &self.signature
+    }
+
+    fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
+        use DataType::*;
+        fn get_base_type(data_type: &DataType) -> Result<DataType> {
+            match data_type {
+                List(field) | FixedSizeList(field, _)
+                    if matches!(field.data_type(), List(_) | FixedSizeList(_, _)) =>
+                {
+                    get_base_type(field.data_type())
+                }
+                LargeList(field) if matches!(field.data_type(), LargeList(_)) => {
+                    get_base_type(field.data_type())
+                }
+                Null | List(_) | LargeList(_) => Ok(data_type.to_owned()),
+                FixedSizeList(field, _) => Ok(List(field.clone())),
+                _ => exec_err!(
+                    "Not reachable, data_type should be List, LargeList or FixedSizeList"
+                ),
+            }
+        }
+
+        let data_type = get_base_type(&arg_types[0])?;
+        Ok(data_type)
+    }
+
+    fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
+        let args = ColumnarValue::values_to_arrays(args)?;
+        crate::kernels::flatten(&args).map(ColumnarValue::Array)
+    }
+
+    fn aliases(&self) -> &[String] {
+        &self.aliases
+    }
+}
+
+make_udf_function!(
+    ArrayDistinct,
+    array_distinct,
+    array,
+    "return distinct values from the array after removing duplicates.",
+    array_distinct_udf
+);
+
+#[derive(Debug)]
+pub struct ArrayDistinct {
+    signature: Signature,
+    aliases: Vec<String>,
+}
+
+impl crate::udf::ArrayDistinct {
+    pub fn new() -> Self {
+        Self {
+            signature: Signature::array(Volatility::Immutable),
+            aliases: vec!["array_distinct".to_string(), "list_distinct".to_string()],
+        }
+    }
+}
+
+impl ScalarUDFImpl for crate::udf::ArrayDistinct {
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+    fn name(&self) -> &str {
+        "array_distinct"
+    }
+
+    fn signature(&self) -> &Signature {
+        &self.signature
+    }
+
+    fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
+        use DataType::*;
+        match &arg_types[0] {
+            List(field) | FixedSizeList(field, _) => Ok(List(Arc::new(Field::new(
+                "item",
+                field.data_type().clone(),
+                true,
+            )))),
+            LargeList(field) => Ok(LargeList(Arc::new(Field::new(
+                "item",
+                field.data_type().clone(),
+                true,
+            )))),
+            _ => exec_err!(
+                "Not reachable, data_type should be List, LargeList or FixedSizeList"
+            ),
+        }
+    }
+
+    fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
+        let args = ColumnarValue::values_to_arrays(args)?;
+        crate::kernels::array_distinct(&args).map(ColumnarValue::Array)
+    }
+
+    fn aliases(&self) -> &[String] {
+        &self.aliases
+    }
+}
diff --git a/datafusion/functions/src/core/arrow_cast.rs b/datafusion/functions/src/core/arrow_cast.rs
index b6c1b5eb9a38..d641389e0ae3 100644
--- a/datafusion/functions/src/core/arrow_cast.rs
+++ b/datafusion/functions/src/core/arrow_cast.rs
@@ -51,10 +51,16 @@ use datafusion_expr::{ColumnarValue, Expr, ScalarUDFImpl, Signature, Volatility}
 /// select arrow_cast(column_x, 'Float64')
 /// ```
 #[derive(Debug)]
-pub(super) struct ArrowCastFunc {
+pub struct ArrowCastFunc {
     signature: Signature,
 }
 
+impl Default for ArrowCastFunc {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
 impl ArrowCastFunc {
     pub fn new() -> Self {
         Self {
diff --git a/datafusion/functions/src/core/arrowtypeof.rs b/datafusion/functions/src/core/arrowtypeof.rs
index 89702d3267ec..cc5e7e619bd8 100644
--- a/datafusion/functions/src/core/arrowtypeof.rs
+++ b/datafusion/functions/src/core/arrowtypeof.rs
@@ -22,10 +22,16 @@ use datafusion_expr::{ScalarUDFImpl, Signature, Volatility};
 use std::any::Any;
 
 #[derive(Debug)]
-pub(super) struct ArrowTypeOfFunc {
+pub struct ArrowTypeOfFunc {
     signature: Signature,
 }
 
+impl Default for ArrowTypeOfFunc {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
 impl ArrowTypeOfFunc {
     pub fn new() -> Self {
         Self {
diff --git a/datafusion/functions/src/core/getfield.rs b/datafusion/functions/src/core/getfield.rs
index e6313423867a..b00b8ea553f2 100644
--- a/datafusion/functions/src/core/getfield.rs
+++ b/datafusion/functions/src/core/getfield.rs
@@ -25,10 +25,16 @@ use datafusion_expr::{ScalarUDFImpl, Signature, Volatility};
 use std::any::Any;
 
 #[derive(Debug)]
-pub(super) struct GetFieldFunc {
+pub struct GetFieldFunc {
     signature: Signature,
 }
 
+impl Default for GetFieldFunc {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
 impl GetFieldFunc {
     pub fn new() -> Self {
         Self {
diff --git a/datafusion/functions/src/core/mod.rs b/datafusion/functions/src/core/mod.rs
index 85d2410251c5..0f6920ccffa9 100644
--- a/datafusion/functions/src/core/mod.rs
+++ b/datafusion/functions/src/core/mod.rs
@@ -17,14 +17,14 @@
 
 //! "core" DataFusion functions
 
-mod arrow_cast;
-mod arrowtypeof;
-mod getfield;
-mod named_struct;
-mod nullif;
-mod nvl;
-mod nvl2;
-mod r#struct;
+pub mod arrow_cast;
+pub mod arrowtypeof;
+pub mod getfield;
+pub mod named_struct;
+pub mod nullif;
+pub mod nvl;
+pub mod nvl2;
+pub mod r#struct;
 
 // create UDFs
 make_udf_function!(arrow_cast::ArrowCastFunc, ARROW_CAST, arrow_cast);
diff --git a/datafusion/functions/src/core/named_struct.rs b/datafusion/functions/src/core/named_struct.rs
index 327a41baa741..8ccda977f3a4 100644
--- a/datafusion/functions/src/core/named_struct.rs
+++ b/datafusion/functions/src/core/named_struct.rs
@@ -74,10 +74,16 @@ fn named_struct_expr(args: &[ColumnarValue]) -> Result<ColumnarValue> {
 }
 
 #[derive(Debug)]
-pub(super) struct NamedStructFunc {
+pub struct NamedStructFunc {
     signature: Signature,
 }
 
+impl Default for NamedStructFunc {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
 impl NamedStructFunc {
     pub fn new() -> Self {
         Self {
diff --git a/datafusion/functions/src/core/nullif.rs b/datafusion/functions/src/core/nullif.rs
index 1e903d7a881d..dc9696bd8d72 100644
--- a/datafusion/functions/src/core/nullif.rs
+++ b/datafusion/functions/src/core/nullif.rs
@@ -27,7 +27,7 @@ use datafusion_expr::{ScalarUDFImpl, Signature, Volatility};
 use std::any::Any;
 
 #[derive(Debug)]
-pub(super) struct NullIfFunc {
+pub struct NullIfFunc {
     signature: Signature,
 }
 
@@ -50,6 +50,12 @@ static SUPPORTED_NULLIF_TYPES: &[DataType] = &[
     DataType::LargeUtf8,
 ];
 
+impl Default for NullIfFunc {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
 impl NullIfFunc {
     pub fn new() -> Self {
         Self {
diff --git a/datafusion/functions/src/core/nvl.rs b/datafusion/functions/src/core/nvl.rs
index 76b037eb81ba..274e36fbdecc 100644
--- a/datafusion/functions/src/core/nvl.rs
+++ b/datafusion/functions/src/core/nvl.rs
@@ -23,7 +23,7 @@ use datafusion_common::{internal_err, Result};
 use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility};
 
 #[derive(Debug)]
-pub(super) struct NVLFunc {
+pub struct NVLFunc {
     signature: Signature,
     aliases: Vec<String>,
 }
@@ -47,6 +47,12 @@ static SUPPORTED_NVL_TYPES: &[DataType] = &[
     DataType::LargeUtf8,
 ];
 
+impl Default for NVLFunc {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
 impl NVLFunc {
     pub fn new() -> Self {
         Self {
diff --git a/datafusion/functions/src/core/nvl2.rs b/datafusion/functions/src/core/nvl2.rs
index a65657eaeafd..66b9ef566a78 100644
--- a/datafusion/functions/src/core/nvl2.rs
+++ b/datafusion/functions/src/core/nvl2.rs
@@ -23,10 +23,16 @@ use datafusion_common::{internal_err, plan_datafusion_err, Result};
 use datafusion_expr::{utils, ColumnarValue, ScalarUDFImpl, Signature, Volatility};
 
 #[derive(Debug)]
-pub(super) struct NVL2Func {
+pub struct NVL2Func {
     signature: Signature,
 }
 
+impl Default for NVL2Func {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
 impl NVL2Func {
     pub fn new() -> Self {
         Self {
diff --git a/datafusion/functions/src/core/struct.rs b/datafusion/functions/src/core/struct.rs
index ac300e0abde3..9d4b2e4a0b8b 100644
--- a/datafusion/functions/src/core/struct.rs
+++ b/datafusion/functions/src/core/struct.rs
@@ -54,10 +54,16 @@ fn struct_expr(args: &[ColumnarValue]) -> Result<ColumnarValue> {
     Ok(ColumnarValue::Array(array_struct(arrays.as_slice())?))
 }
 #[derive(Debug)]
-pub(super) struct StructFunc {
+pub struct StructFunc {
     signature: Signature,
 }
 
+impl Default for StructFunc {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
 impl StructFunc {
     pub fn new() -> Self {
         Self {
diff --git a/datafusion/functions/src/crypto/digest.rs b/datafusion/functions/src/crypto/digest.rs
index c6556787cb9c..c9dd3c1f56a2 100644
--- a/datafusion/functions/src/crypto/digest.rs
+++ b/datafusion/functions/src/crypto/digest.rs
@@ -25,9 +25,15 @@ use datafusion_expr::{
 use std::any::Any;
 
 #[derive(Debug)]
-pub(super) struct DigestFunc {
+pub struct DigestFunc {
     signature: Signature,
 }
+impl Default for DigestFunc {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
 impl DigestFunc {
     pub fn new() -> Self {
         use DataType::*;
diff --git a/datafusion/functions/src/crypto/md5.rs b/datafusion/functions/src/crypto/md5.rs
index 7b2936a37938..ccb6fbba80aa 100644
--- a/datafusion/functions/src/crypto/md5.rs
+++ b/datafusion/functions/src/crypto/md5.rs
@@ -23,9 +23,15 @@ use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility};
 use std::any::Any;
 
 #[derive(Debug)]
-pub(super) struct Md5Func {
+pub struct Md5Func {
     signature: Signature,
 }
+impl Default for Md5Func {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
 impl Md5Func {
     pub fn new() -> Self {
         use DataType::*;
diff --git a/datafusion/functions/src/crypto/sha224.rs b/datafusion/functions/src/crypto/sha224.rs
index ef0fae97cfa5..2795c4a25004 100644
--- a/datafusion/functions/src/crypto/sha224.rs
+++ b/datafusion/functions/src/crypto/sha224.rs
@@ -23,9 +23,15 @@ use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility};
 use std::any::Any;
 
 #[derive(Debug)]
-pub(super) struct SHA224Func {
+pub struct SHA224Func {
     signature: Signature,
 }
+impl Default for SHA224Func {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
 impl SHA224Func {
     pub fn new() -> Self {
         use DataType::*;
diff --git a/datafusion/functions/src/crypto/sha256.rs b/datafusion/functions/src/crypto/sha256.rs
index f763f925cc56..0a3f3b26e431 100644
--- a/datafusion/functions/src/crypto/sha256.rs
+++ b/datafusion/functions/src/crypto/sha256.rs
@@ -23,9 +23,15 @@ use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility};
 use std::any::Any;
 
 #[derive(Debug)]
-pub(super) struct SHA256Func {
+pub struct SHA256Func {
     signature: Signature,
 }
+impl Default for SHA256Func {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
 impl SHA256Func {
     pub fn new() -> Self {
         use DataType::*;
diff --git a/datafusion/functions/src/crypto/sha384.rs b/datafusion/functions/src/crypto/sha384.rs
index b382d42663be..c3f7845ce7bd 100644
--- a/datafusion/functions/src/crypto/sha384.rs
+++ b/datafusion/functions/src/crypto/sha384.rs
@@ -23,9 +23,15 @@ use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility};
 use std::any::Any;
 
 #[derive(Debug)]
-pub(super) struct SHA384Func {
+pub struct SHA384Func {
     signature: Signature,
 }
+impl Default for SHA384Func {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
 impl SHA384Func {
     pub fn new() -> Self {
         use DataType::*;
diff --git a/datafusion/functions/src/crypto/sha512.rs b/datafusion/functions/src/crypto/sha512.rs
index a852376fadd6..dc3bfac9d8bd 100644
--- a/datafusion/functions/src/crypto/sha512.rs
+++ b/datafusion/functions/src/crypto/sha512.rs
@@ -23,9 +23,15 @@ use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility};
 use std::any::Any;
 
 #[derive(Debug)]
-pub(super) struct SHA512Func {
+pub struct SHA512Func {
     signature: Signature,
 }
+impl Default for SHA512Func {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
 impl SHA512Func {
     pub fn new() -> Self {
         use DataType::*;
diff --git a/datafusion/functions/src/datetime/current_date.rs b/datafusion/functions/src/datetime/current_date.rs
index 5338234a8e49..8b180ff41b91 100644
--- a/datafusion/functions/src/datetime/current_date.rs
+++ b/datafusion/functions/src/datetime/current_date.rs
@@ -26,11 +26,17 @@ use datafusion_expr::simplify::{ExprSimplifyResult, SimplifyInfo};
 use datafusion_expr::{ColumnarValue, Expr, ScalarUDFImpl, Signature, Volatility};
 
 #[derive(Debug)]
-pub(super) struct CurrentDateFunc {
+pub struct CurrentDateFunc {
     signature: Signature,
     aliases: Vec<String>,
 }
 
+impl Default for CurrentDateFunc {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
 impl CurrentDateFunc {
     pub fn new() -> Self {
         Self {
diff --git a/datafusion/functions/src/datetime/current_time.rs b/datafusion/functions/src/datetime/current_time.rs
index b8a8aa2acb53..803759d4e904 100644
--- a/datafusion/functions/src/datetime/current_time.rs
+++ b/datafusion/functions/src/datetime/current_time.rs
@@ -26,10 +26,16 @@ use datafusion_expr::simplify::{ExprSimplifyResult, SimplifyInfo};
 use datafusion_expr::{ColumnarValue, Expr, ScalarUDFImpl, Signature, Volatility};
 
 #[derive(Debug)]
-pub(super) struct CurrentTimeFunc {
+pub struct CurrentTimeFunc {
     signature: Signature,
 }
 
+impl Default for CurrentTimeFunc {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
 impl CurrentTimeFunc {
     pub fn new() -> Self {
         Self {
diff --git a/datafusion/functions/src/datetime/date_bin.rs b/datafusion/functions/src/datetime/date_bin.rs
index b7f20f68e89c..7f5d9bb5d921 100644
--- a/datafusion/functions/src/datetime/date_bin.rs
+++ b/datafusion/functions/src/datetime/date_bin.rs
@@ -40,10 +40,16 @@ use datafusion_expr::{
 };
 
 #[derive(Debug)]
-pub(super) struct DateBinFunc {
+pub struct DateBinFunc {
     signature: Signature,
 }
 
+impl Default for DateBinFunc {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
 impl DateBinFunc {
     pub fn new() -> Self {
         let base_sig = |array_type: TimeUnit| {
diff --git a/datafusion/functions/src/datetime/date_part.rs b/datafusion/functions/src/datetime/date_part.rs
index b41f7e13cff2..111cdabe2bfb 100644
--- a/datafusion/functions/src/datetime/date_part.rs
+++ b/datafusion/functions/src/datetime/date_part.rs
@@ -39,11 +39,17 @@ use datafusion_expr::{
 };
 
 #[derive(Debug)]
-pub(super) struct DatePartFunc {
+pub struct DatePartFunc {
     signature: Signature,
     aliases: Vec<String>,
 }
 
+impl Default for DatePartFunc {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
 impl DatePartFunc {
     pub fn new() -> Self {
         Self {
diff --git a/datafusion/functions/src/datetime/date_trunc.rs b/datafusion/functions/src/datetime/date_trunc.rs
index b4b25666516d..0414bf9c2a26 100644
--- a/datafusion/functions/src/datetime/date_trunc.rs
+++ b/datafusion/functions/src/datetime/date_trunc.rs
@@ -45,11 +45,17 @@ use datafusion_expr::{
 };
 
 #[derive(Debug)]
-pub(super) struct DateTruncFunc {
+pub struct DateTruncFunc {
     signature: Signature,
     aliases: Vec<String>,
 }
 
+impl Default for DateTruncFunc {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
 impl DateTruncFunc {
     pub fn new() -> Self {
         Self {
diff --git a/datafusion/functions/src/datetime/from_unixtime.rs b/datafusion/functions/src/datetime/from_unixtime.rs
index f0d5016c0db9..d36ebe735ee7 100644
--- a/datafusion/functions/src/datetime/from_unixtime.rs
+++ b/datafusion/functions/src/datetime/from_unixtime.rs
@@ -25,10 +25,16 @@ use datafusion_common::{exec_err, Result};
 use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility};
 
 #[derive(Debug)]
-pub(super) struct FromUnixtimeFunc {
+pub struct FromUnixtimeFunc {
     signature: Signature,
 }
 
+impl Default for FromUnixtimeFunc {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
 impl FromUnixtimeFunc {
     pub fn new() -> Self {
         Self {
diff --git a/datafusion/functions/src/datetime/make_date.rs b/datafusion/functions/src/datetime/make_date.rs
index 8afb36538363..6aa72572bc4d 100644
--- a/datafusion/functions/src/datetime/make_date.rs
+++ b/datafusion/functions/src/datetime/make_date.rs
@@ -30,10 +30,16 @@ use datafusion_common::{exec_err, Result, ScalarValue};
 use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility};
 
 #[derive(Debug)]
-pub(super) struct MakeDateFunc {
+pub struct MakeDateFunc {
     signature: Signature,
 }
 
+impl Default for MakeDateFunc {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
 impl MakeDateFunc {
     pub fn new() -> Self {
         Self {
diff --git a/datafusion/functions/src/datetime/mod.rs b/datafusion/functions/src/datetime/mod.rs
index a2dfc93b05a3..c6939976eb02 100644
--- a/datafusion/functions/src/datetime/mod.rs
+++ b/datafusion/functions/src/datetime/mod.rs
@@ -21,19 +21,19 @@ use std::sync::Arc;
 
 use datafusion_expr::ScalarUDF;
 
-mod common;
-mod current_date;
-mod current_time;
-mod date_bin;
-mod date_part;
-mod date_trunc;
-mod from_unixtime;
-mod make_date;
-mod now;
-mod to_char;
-mod to_date;
-mod to_timestamp;
-mod to_unixtime;
+pub mod common;
+pub mod current_date;
+pub mod current_time;
+pub mod date_bin;
+pub mod date_part;
+pub mod date_trunc;
+pub mod from_unixtime;
+pub mod make_date;
+pub mod now;
+pub mod to_char;
+pub mod to_date;
+pub mod to_timestamp;
+pub mod to_unixtime;
 
 // create UDFs
 make_udf_function!(current_date::CurrentDateFunc, CURRENT_DATE, current_date);
diff --git a/datafusion/functions/src/datetime/now.rs b/datafusion/functions/src/datetime/now.rs
index cc7979df0d86..b2221215b94b 100644
--- a/datafusion/functions/src/datetime/now.rs
+++ b/datafusion/functions/src/datetime/now.rs
@@ -26,10 +26,16 @@ use datafusion_expr::simplify::{ExprSimplifyResult, SimplifyInfo};
 use datafusion_expr::{ColumnarValue, Expr, ScalarUDFImpl, Signature, Volatility};
 
 #[derive(Debug)]
-pub(super) struct NowFunc {
+pub struct NowFunc {
     signature: Signature,
 }
 
+impl Default for NowFunc {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
 impl NowFunc {
     pub fn new() -> Self {
         Self {
diff --git a/datafusion/functions/src/datetime/to_char.rs b/datafusion/functions/src/datetime/to_char.rs
index ef5c45a5ad9c..f2e5af978ca0 100644
--- a/datafusion/functions/src/datetime/to_char.rs
+++ b/datafusion/functions/src/datetime/to_char.rs
@@ -35,11 +35,17 @@ use datafusion_expr::{
 };
 
 #[derive(Debug)]
-pub(super) struct ToCharFunc {
+pub struct ToCharFunc {
     signature: Signature,
     aliases: Vec<String>,
 }
 
+impl Default for ToCharFunc {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
 impl ToCharFunc {
     pub fn new() -> Self {
         Self {
diff --git a/datafusion/functions/src/datetime/to_date.rs b/datafusion/functions/src/datetime/to_date.rs
index 077ad8f087b3..e491c0b55508 100644
--- a/datafusion/functions/src/datetime/to_date.rs
+++ b/datafusion/functions/src/datetime/to_date.rs
@@ -26,10 +26,16 @@ use datafusion_common::{exec_err, internal_datafusion_err, Result};
 use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility};
 
 #[derive(Debug)]
-pub(super) struct ToDateFunc {
+pub struct ToDateFunc {
     signature: Signature,
 }
 
+impl Default for ToDateFunc {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
 impl ToDateFunc {
     pub fn new() -> Self {
         Self {
diff --git a/datafusion/functions/src/datetime/to_timestamp.rs b/datafusion/functions/src/datetime/to_timestamp.rs
index b9cf8f3c604b..a7bcca62944c 100644
--- a/datafusion/functions/src/datetime/to_timestamp.rs
+++ b/datafusion/functions/src/datetime/to_timestamp.rs
@@ -30,30 +30,36 @@ use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility};
 use crate::datetime::common::*;
 
 #[derive(Debug)]
-pub(super) struct ToTimestampFunc {
+pub struct ToTimestampFunc {
     signature: Signature,
 }
 
 #[derive(Debug)]
-pub(super) struct ToTimestampSecondsFunc {
+pub struct ToTimestampSecondsFunc {
     signature: Signature,
 }
 
 #[derive(Debug)]
-pub(super) struct ToTimestampMillisFunc {
+pub struct ToTimestampMillisFunc {
     signature: Signature,
 }
 
 #[derive(Debug)]
-pub(super) struct ToTimestampMicrosFunc {
+pub struct ToTimestampMicrosFunc {
     signature: Signature,
 }
 
 #[derive(Debug)]
-pub(super) struct ToTimestampNanosFunc {
+pub struct ToTimestampNanosFunc {
     signature: Signature,
 }
 
+impl Default for ToTimestampFunc {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
 impl ToTimestampFunc {
     pub fn new() -> Self {
         Self {
@@ -62,6 +68,12 @@ impl ToTimestampFunc {
     }
 }
 
+impl Default for ToTimestampSecondsFunc {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
 impl ToTimestampSecondsFunc {
     pub fn new() -> Self {
         Self {
@@ -70,6 +82,12 @@ impl ToTimestampSecondsFunc {
     }
 }
 
+impl Default for ToTimestampMillisFunc {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
 impl ToTimestampMillisFunc {
     pub fn new() -> Self {
         Self {
@@ -78,6 +96,12 @@ impl ToTimestampMillisFunc {
     }
 }
 
+impl Default for ToTimestampMicrosFunc {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
 impl ToTimestampMicrosFunc {
     pub fn new() -> Self {
         Self {
@@ -86,6 +110,12 @@ impl ToTimestampMicrosFunc {
     }
 }
 
+impl Default for ToTimestampNanosFunc {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
 impl ToTimestampNanosFunc {
     pub fn new() -> Self {
         Self {
diff --git a/datafusion/functions/src/datetime/to_unixtime.rs b/datafusion/functions/src/datetime/to_unixtime.rs
index ed56cdf8d0af..396dadccb4b3 100644
--- a/datafusion/functions/src/datetime/to_unixtime.rs
+++ b/datafusion/functions/src/datetime/to_unixtime.rs
@@ -26,10 +26,16 @@ use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility};
 use super::to_timestamp::ToTimestampSecondsFunc;
 
 #[derive(Debug)]
-pub(super) struct ToUnixtimeFunc {
+pub struct ToUnixtimeFunc {
     signature: Signature,
 }
 
+impl Default for ToUnixtimeFunc {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
 impl ToUnixtimeFunc {
     pub fn new() -> Self {
         Self {
diff --git a/datafusion/functions/src/encoding/inner.rs b/datafusion/functions/src/encoding/inner.rs
index a49a64228d2f..d9ce299a2602 100644
--- a/datafusion/functions/src/encoding/inner.rs
+++ b/datafusion/functions/src/encoding/inner.rs
@@ -37,10 +37,16 @@ use datafusion_expr::{ScalarUDFImpl, Signature, Volatility};
 use std::any::Any;
 
 #[derive(Debug)]
-pub(super) struct EncodeFunc {
+pub struct EncodeFunc {
     signature: Signature,
 }
 
+impl Default for EncodeFunc {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
 impl EncodeFunc {
     pub fn new() -> Self {
         use DataType::*;
@@ -91,10 +97,16 @@ impl ScalarUDFImpl for EncodeFunc {
 }
 
 #[derive(Debug)]
-pub(super) struct DecodeFunc {
+pub struct DecodeFunc {
     signature: Signature,
 }
 
+impl Default for DecodeFunc {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
 impl DecodeFunc {
     pub fn new() -> Self {
         use DataType::*;
diff --git a/datafusion/functions/src/encoding/mod.rs b/datafusion/functions/src/encoding/mod.rs
index 9d9f07dff7ce..49f914a68774 100644
--- a/datafusion/functions/src/encoding/mod.rs
+++ b/datafusion/functions/src/encoding/mod.rs
@@ -15,7 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-mod inner;
+pub mod inner;
 
 // create `encode` and `decode` UDFs
 make_udf_function!(inner::EncodeFunc, ENCODE, encode);
diff --git a/datafusion/functions/src/math/abs.rs b/datafusion/functions/src/math/abs.rs
index 8aa48460ff69..e05dc8665285 100644
--- a/datafusion/functions/src/math/abs.rs
+++ b/datafusion/functions/src/math/abs.rs
@@ -104,10 +104,16 @@ fn create_abs_function(input_data_type: &DataType) -> Result<MathArrayFunction>
     }
 }
 #[derive(Debug)]
-pub(super) struct AbsFunc {
+pub struct AbsFunc {
     signature: Signature,
 }
 
+impl Default for AbsFunc {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
 impl AbsFunc {
     pub fn new() -> Self {
         Self {
diff --git a/datafusion/functions/src/math/mod.rs b/datafusion/functions/src/math/mod.rs
index d6a44cfbdbf5..a3ec2e3b90ce 100644
--- a/datafusion/functions/src/math/mod.rs
+++ b/datafusion/functions/src/math/mod.rs
@@ -17,8 +17,8 @@
 
 //! "math" DataFusion functions
 
-mod abs;
-mod nans;
+pub mod abs;
+pub mod nans;
 
 // Create UDFs
 make_udf_function!(nans::IsNanFunc, ISNAN, isnan);
diff --git a/datafusion/functions/src/math/nans.rs b/datafusion/functions/src/math/nans.rs
index 3f3d7d197c33..2bd704a7de2e 100644
--- a/datafusion/functions/src/math/nans.rs
+++ b/datafusion/functions/src/math/nans.rs
@@ -28,10 +28,16 @@ use std::any::Any;
 use std::sync::Arc;
 
 #[derive(Debug)]
-pub(super) struct IsNanFunc {
+pub struct IsNanFunc {
     signature: Signature,
 }
 
+impl Default for IsNanFunc {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
 impl IsNanFunc {
     pub fn new() -> Self {
         use DataType::*;
diff --git a/datafusion/functions/src/regex/regexplike.rs b/datafusion/functions/src/regex/regexplike.rs
index 94dd7529e8ff..09b96a28c107 100644
--- a/datafusion/functions/src/regex/regexplike.rs
+++ b/datafusion/functions/src/regex/regexplike.rs
@@ -32,9 +32,15 @@ use std::any::Any;
 use std::sync::Arc;
 
 #[derive(Debug)]
-pub(super) struct RegexpLikeFunc {
+pub struct RegexpLikeFunc {
     signature: Signature,
 }
+impl Default for RegexpLikeFunc {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
 impl RegexpLikeFunc {
     pub fn new() -> Self {
         use DataType::*;
diff --git a/datafusion/functions/src/regex/regexpmatch.rs b/datafusion/functions/src/regex/regexpmatch.rs
index 5178188424fb..73228e608143 100644
--- a/datafusion/functions/src/regex/regexpmatch.rs
+++ b/datafusion/functions/src/regex/regexpmatch.rs
@@ -33,9 +33,15 @@ use std::any::Any;
 use std::sync::Arc;
 
 #[derive(Debug)]
-pub(super) struct RegexpMatchFunc {
+pub struct RegexpMatchFunc {
     signature: Signature,
 }
+impl Default for RegexpMatchFunc {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
 impl RegexpMatchFunc {
     pub fn new() -> Self {
         use DataType::*;
diff --git a/datafusion/functions/src/regex/regexpreplace.rs b/datafusion/functions/src/regex/regexpreplace.rs
index d90996e04b3f..4e21883c9752 100644
--- a/datafusion/functions/src/regex/regexpreplace.rs
+++ b/datafusion/functions/src/regex/regexpreplace.rs
@@ -38,9 +38,15 @@ use std::collections::HashMap;
 use std::sync::Arc;
 use std::sync::OnceLock;
 #[derive(Debug)]
-pub(super) struct RegexpReplaceFunc {
+pub struct RegexpReplaceFunc {
     signature: Signature,
 }
+impl Default for RegexpReplaceFunc {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
 impl RegexpReplaceFunc {
     pub fn new() -> Self {
         use DataType::*;
diff --git a/datafusion/functions/src/string/ascii.rs b/datafusion/functions/src/string/ascii.rs
index 9a07f4c19cf1..15a3c2391ac6 100644
--- a/datafusion/functions/src/string/ascii.rs
+++ b/datafusion/functions/src/string/ascii.rs
@@ -44,7 +44,7 @@ pub fn ascii<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
 }
 
 #[derive(Debug)]
-pub(super) struct AsciiFunc {
+pub struct AsciiFunc {
     signature: Signature,
 }
 impl AsciiFunc {
diff --git a/datafusion/functions/src/string/bit_length.rs b/datafusion/functions/src/string/bit_length.rs
index 6a200471d42d..17c49216553b 100644
--- a/datafusion/functions/src/string/bit_length.rs
+++ b/datafusion/functions/src/string/bit_length.rs
@@ -27,7 +27,7 @@ use datafusion_expr::{ScalarUDFImpl, Signature};
 use crate::utils::utf8_to_int_type;
 
 #[derive(Debug)]
-pub(super) struct BitLengthFunc {
+pub struct BitLengthFunc {
     signature: Signature,
 }
 
diff --git a/datafusion/functions/src/string/btrim.rs b/datafusion/functions/src/string/btrim.rs
index 573a23d07021..b0a85eab6d83 100644
--- a/datafusion/functions/src/string/btrim.rs
+++ b/datafusion/functions/src/string/btrim.rs
@@ -35,7 +35,7 @@ fn btrim<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
 }
 
 #[derive(Debug)]
-pub(super) struct BTrimFunc {
+pub struct BTrimFunc {
     signature: Signature,
     aliases: Vec<String>,
 }
diff --git a/datafusion/functions/src/string/chr.rs b/datafusion/functions/src/string/chr.rs
index d1f8dc398a2b..21d79cf6b0f1 100644
--- a/datafusion/functions/src/string/chr.rs
+++ b/datafusion/functions/src/string/chr.rs
@@ -61,7 +61,7 @@ pub fn chr(args: &[ArrayRef]) -> Result<ArrayRef> {
 }
 
 #[derive(Debug)]
-pub(super) struct ChrFunc {
+pub struct ChrFunc {
     signature: Signature,
 }
 
diff --git a/datafusion/functions/src/string/levenshtein.rs b/datafusion/functions/src/string/levenshtein.rs
index 8f497e73e393..390b0d72bee2 100644
--- a/datafusion/functions/src/string/levenshtein.rs
+++ b/datafusion/functions/src/string/levenshtein.rs
@@ -30,7 +30,7 @@ use datafusion_expr::TypeSignature::*;
 use datafusion_expr::{ScalarUDFImpl, Signature, Volatility};
 
 #[derive(Debug)]
-pub(super) struct LevenshteinFunc {
+pub struct LevenshteinFunc {
     signature: Signature,
 }
 
diff --git a/datafusion/functions/src/string/lower.rs b/datafusion/functions/src/string/lower.rs
index 327772bd808d..a1eff7042211 100644
--- a/datafusion/functions/src/string/lower.rs
+++ b/datafusion/functions/src/string/lower.rs
@@ -27,7 +27,7 @@ use crate::string::common::handle;
 use crate::utils::utf8_to_str_type;
 
 #[derive(Debug)]
-pub(super) struct LowerFunc {
+pub struct LowerFunc {
     signature: Signature,
 }
 
diff --git a/datafusion/functions/src/string/ltrim.rs b/datafusion/functions/src/string/ltrim.rs
index e6926e5bd56e..ad86259d0d7e 100644
--- a/datafusion/functions/src/string/ltrim.rs
+++ b/datafusion/functions/src/string/ltrim.rs
@@ -35,7 +35,7 @@ fn ltrim<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
 }
 
 #[derive(Debug)]
-pub(super) struct LtrimFunc {
+pub struct LtrimFunc {
     signature: Signature,
 }
 
diff --git a/datafusion/functions/src/string/octet_length.rs b/datafusion/functions/src/string/octet_length.rs
index 639bf6cb48a9..bdd262b7e37e 100644
--- a/datafusion/functions/src/string/octet_length.rs
+++ b/datafusion/functions/src/string/octet_length.rs
@@ -27,7 +27,7 @@ use datafusion_expr::{ScalarUDFImpl, Signature};
 use crate::utils::utf8_to_int_type;
 
 #[derive(Debug)]
-pub(super) struct OctetLengthFunc {
+pub struct OctetLengthFunc {
     signature: Signature,
 }
 
diff --git a/datafusion/functions/src/string/overlay.rs b/datafusion/functions/src/string/overlay.rs
index 8b9cc03afc4d..3f92a73c1af9 100644
--- a/datafusion/functions/src/string/overlay.rs
+++ b/datafusion/functions/src/string/overlay.rs
@@ -30,7 +30,7 @@ use datafusion_expr::{ScalarUDFImpl, Signature};
 use crate::utils::{make_scalar_function, utf8_to_str_type};
 
 #[derive(Debug)]
-pub(super) struct OverlayFunc {
+pub struct OverlayFunc {
     signature: Signature,
 }
 
diff --git a/datafusion/functions/src/string/repeat.rs b/datafusion/functions/src/string/repeat.rs
index f4319af0a5c4..77521120d9d8 100644
--- a/datafusion/functions/src/string/repeat.rs
+++ b/datafusion/functions/src/string/repeat.rs
@@ -30,7 +30,7 @@ use datafusion_expr::{ScalarUDFImpl, Signature};
 use crate::utils::{make_scalar_function, utf8_to_str_type};
 
 #[derive(Debug)]
-pub(super) struct RepeatFunc {
+pub struct RepeatFunc {
     signature: Signature,
 }
 
diff --git a/datafusion/functions/src/string/replace.rs b/datafusion/functions/src/string/replace.rs
index e869ac205440..01a3762acaf4 100644
--- a/datafusion/functions/src/string/replace.rs
+++ b/datafusion/functions/src/string/replace.rs
@@ -30,7 +30,7 @@ use datafusion_expr::{ScalarUDFImpl, Signature};
 use crate::utils::{make_scalar_function, utf8_to_str_type};
 
 #[derive(Debug)]
-pub(super) struct ReplaceFunc {
+pub struct ReplaceFunc {
     signature: Signature,
 }
 
diff --git a/datafusion/functions/src/string/rtrim.rs b/datafusion/functions/src/string/rtrim.rs
index d04d15ce8847..607e647b2615 100644
--- a/datafusion/functions/src/string/rtrim.rs
+++ b/datafusion/functions/src/string/rtrim.rs
@@ -35,7 +35,7 @@ fn rtrim<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
 }
 
 #[derive(Debug)]
-pub(super) struct RtrimFunc {
+pub struct RtrimFunc {
     signature: Signature,
 }
 
diff --git a/datafusion/functions/src/string/split_part.rs b/datafusion/functions/src/string/split_part.rs
index 0aa968a1ef5b..4396386afff5 100644
--- a/datafusion/functions/src/string/split_part.rs
+++ b/datafusion/functions/src/string/split_part.rs
@@ -30,7 +30,7 @@ use datafusion_expr::{ScalarUDFImpl, Signature};
 use crate::utils::{make_scalar_function, utf8_to_str_type};
 
 #[derive(Debug)]
-pub(super) struct SplitPartFunc {
+pub struct SplitPartFunc {
     signature: Signature,
 }
 
diff --git a/datafusion/functions/src/string/starts_with.rs b/datafusion/functions/src/string/starts_with.rs
index f1b03907f8d8..edbf5c9217a7 100644
--- a/datafusion/functions/src/string/starts_with.rs
+++ b/datafusion/functions/src/string/starts_with.rs
@@ -40,7 +40,7 @@ pub fn starts_with<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
 }
 
 #[derive(Debug)]
-pub(super) struct StartsWithFunc {
+pub struct StartsWithFunc {
     signature: Signature,
 }
 impl StartsWithFunc {
diff --git a/datafusion/functions/src/string/to_hex.rs b/datafusion/functions/src/string/to_hex.rs
index ab320c68d493..feedeb47f564 100644
--- a/datafusion/functions/src/string/to_hex.rs
+++ b/datafusion/functions/src/string/to_hex.rs
@@ -60,7 +60,7 @@ where
 }
 
 #[derive(Debug)]
-pub(super) struct ToHexFunc {
+pub struct ToHexFunc {
     signature: Signature,
 }
 impl ToHexFunc {
diff --git a/datafusion/functions/src/string/upper.rs b/datafusion/functions/src/string/upper.rs
index 066174abf277..c21824d30d53 100644
--- a/datafusion/functions/src/string/upper.rs
+++ b/datafusion/functions/src/string/upper.rs
@@ -24,7 +24,7 @@ use datafusion_expr::{ScalarUDFImpl, Signature, Volatility};
 use std::any::Any;
 
 #[derive(Debug)]
-pub(super) struct UpperFunc {
+pub struct UpperFunc {
     signature: Signature,
 }
 
diff --git a/datafusion/functions/src/string/uuid.rs b/datafusion/functions/src/string/uuid.rs
index 791ad6d3c4f3..c68871d42e9f 100644
--- a/datafusion/functions/src/string/uuid.rs
+++ b/datafusion/functions/src/string/uuid.rs
@@ -29,7 +29,7 @@ use datafusion_expr::{ColumnarValue, Volatility};
 use datafusion_expr::{ScalarUDFImpl, Signature};
 
 #[derive(Debug)]
-pub(super) struct UuidFunc {
+pub struct UuidFunc {
     signature: Signature,
 }
 
diff --git a/datafusion/functions/src/unicode/character_length.rs b/datafusion/functions/src/unicode/character_length.rs
index 51331bf9a586..7e2723771ff2 100644
--- a/datafusion/functions/src/unicode/character_length.rs
+++ b/datafusion/functions/src/unicode/character_length.rs
@@ -28,7 +28,7 @@ use std::any::Any;
 use std::sync::Arc;
 
 #[derive(Debug)]
-pub(super) struct CharacterLengthFunc {
+pub struct CharacterLengthFunc {
     signature: Signature,
     aliases: Vec<String>,
 }
diff --git a/datafusion/functions/src/unicode/find_in_set.rs b/datafusion/functions/src/unicode/find_in_set.rs
index 7e0306d49454..fc45f897c5f4 100644
--- a/datafusion/functions/src/unicode/find_in_set.rs
+++ b/datafusion/functions/src/unicode/find_in_set.rs
@@ -31,7 +31,7 @@ use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility};
 use crate::utils::{make_scalar_function, utf8_to_int_type};
 
 #[derive(Debug)]
-pub(super) struct FindInSetFunc {
+pub struct FindInSetFunc {
     signature: Signature,
 }
 
diff --git a/datafusion/functions/src/unicode/left.rs b/datafusion/functions/src/unicode/left.rs
index 473589fdc8aa..24ea2d5a8f25 100644
--- a/datafusion/functions/src/unicode/left.rs
+++ b/datafusion/functions/src/unicode/left.rs
@@ -31,7 +31,7 @@ use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility};
 use crate::utils::{make_scalar_function, utf8_to_str_type};
 
 #[derive(Debug)]
-pub(super) struct LeftFunc {
+pub struct LeftFunc {
     signature: Signature,
 }
 
diff --git a/datafusion/functions/src/unicode/lpad.rs b/datafusion/functions/src/unicode/lpad.rs
index 76a8e68cca25..47208903bcef 100644
--- a/datafusion/functions/src/unicode/lpad.rs
+++ b/datafusion/functions/src/unicode/lpad.rs
@@ -29,7 +29,7 @@ use datafusion_expr::TypeSignature::Exact;
 use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility};
 
 #[derive(Debug)]
-pub(super) struct LPadFunc {
+pub struct LPadFunc {
     signature: Signature,
 }
 
diff --git a/datafusion/functions/src/unicode/reverse.rs b/datafusion/functions/src/unicode/reverse.rs
index 42ca6e0d17c3..6b24c2336810 100644
--- a/datafusion/functions/src/unicode/reverse.rs
+++ b/datafusion/functions/src/unicode/reverse.rs
@@ -28,7 +28,7 @@ use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility};
 use crate::utils::{make_scalar_function, utf8_to_str_type};
 
 #[derive(Debug)]
-pub(super) struct ReverseFunc {
+pub struct ReverseFunc {
     signature: Signature,
 }
 
diff --git a/datafusion/functions/src/unicode/right.rs b/datafusion/functions/src/unicode/right.rs
index d1bd976342b2..dddbf31e721b 100644
--- a/datafusion/functions/src/unicode/right.rs
+++ b/datafusion/functions/src/unicode/right.rs
@@ -31,7 +31,7 @@ use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility};
 use crate::utils::{make_scalar_function, utf8_to_str_type};
 
 #[derive(Debug)]
-pub(super) struct RightFunc {
+pub struct RightFunc {
     signature: Signature,
 }
 
diff --git a/datafusion/functions/src/unicode/rpad.rs b/datafusion/functions/src/unicode/rpad.rs
index 070278c90b2f..8946f07006b7 100644
--- a/datafusion/functions/src/unicode/rpad.rs
+++ b/datafusion/functions/src/unicode/rpad.rs
@@ -29,7 +29,7 @@ use datafusion_expr::TypeSignature::Exact;
 use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility};
 
 #[derive(Debug)]
-pub(super) struct RPadFunc {
+pub struct RPadFunc {
     signature: Signature,
 }
 
diff --git a/datafusion/functions/src/unicode/strpos.rs b/datafusion/functions/src/unicode/strpos.rs
index 1e8bfa37d40e..4ebdd9d58623 100644
--- a/datafusion/functions/src/unicode/strpos.rs
+++ b/datafusion/functions/src/unicode/strpos.rs
@@ -31,7 +31,7 @@ use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility};
 use crate::utils::{make_scalar_function, utf8_to_int_type};
 
 #[derive(Debug)]
-pub(super) struct StrposFunc {
+pub struct StrposFunc {
     signature: Signature,
     aliases: Vec<String>,
 }
diff --git a/datafusion/functions/src/unicode/substr.rs b/datafusion/functions/src/unicode/substr.rs
index 403157e2a85a..260937a01a74 100644
--- a/datafusion/functions/src/unicode/substr.rs
+++ b/datafusion/functions/src/unicode/substr.rs
@@ -30,7 +30,7 @@ use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility};
 use crate::utils::{make_scalar_function, utf8_to_str_type};
 
 #[derive(Debug)]
-pub(super) struct SubstrFunc {
+pub struct SubstrFunc {
     signature: Signature,
 }
 
diff --git a/datafusion/functions/src/unicode/substrindex.rs b/datafusion/functions/src/unicode/substrindex.rs
index 77e8116fff4c..d00108a68fc9 100644
--- a/datafusion/functions/src/unicode/substrindex.rs
+++ b/datafusion/functions/src/unicode/substrindex.rs
@@ -29,7 +29,7 @@ use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility};
 use crate::utils::{make_scalar_function, utf8_to_str_type};
 
 #[derive(Debug)]
-pub(super) struct SubstrIndexFunc {
+pub struct SubstrIndexFunc {
     signature: Signature,
     aliases: Vec<String>,
 }
diff --git a/datafusion/functions/src/unicode/translate.rs b/datafusion/functions/src/unicode/translate.rs
index bc1836700304..25daf8738b21 100644
--- a/datafusion/functions/src/unicode/translate.rs
+++ b/datafusion/functions/src/unicode/translate.rs
@@ -31,7 +31,7 @@ use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility};
 use crate::utils::{make_scalar_function, utf8_to_str_type};
 
 #[derive(Debug)]
-pub(super) struct TranslateFunc {
+pub struct TranslateFunc {
     signature: Signature,
 }