From d1514632a1601934e6f49da699c09c82ce375941 Mon Sep 17 00:00:00 2001 From: gstvg <28798827+gstvg@users.noreply.github.com> Date: Fri, 22 Mar 2024 18:17:02 +0000 Subject: [PATCH 1/5] Support custom struct field names with new scalar function named_struct --- datafusion/functions/src/core/mod.rs | 3 + datafusion/functions/src/core/named_struct.rs | 186 ++++++++++++++++++ datafusion/sql/src/expr/mod.rs | 45 ++++- .../sqllogictest/test_files/explain.slt | 4 +- datafusion/sqllogictest/test_files/struct.slt | 10 +- .../source/user-guide/sql/scalar_functions.md | 58 +++++- 6 files changed, 283 insertions(+), 23 deletions(-) create mode 100644 datafusion/functions/src/core/named_struct.rs diff --git a/datafusion/functions/src/core/mod.rs b/datafusion/functions/src/core/mod.rs index 5a0bd2c77f63..85d2410251c5 100644 --- a/datafusion/functions/src/core/mod.rs +++ b/datafusion/functions/src/core/mod.rs @@ -20,6 +20,7 @@ mod arrow_cast; mod arrowtypeof; mod getfield; +mod named_struct; mod nullif; mod nvl; mod nvl2; @@ -32,6 +33,7 @@ make_udf_function!(nvl::NVLFunc, NVL, nvl); make_udf_function!(nvl2::NVL2Func, NVL2, nvl2); make_udf_function!(arrowtypeof::ArrowTypeOfFunc, ARROWTYPEOF, arrow_typeof); make_udf_function!(r#struct::StructFunc, STRUCT, r#struct); +make_udf_function!(named_struct::NamedStructFunc, NAMED_STRUCT, named_struct); make_udf_function!(getfield::GetFieldFunc, GET_FIELD, get_field); // Export the functions out of this package, both as expr_fn as well as a list of functions @@ -42,5 +44,6 @@ export_functions!( (nvl2, arg_1 arg_2 arg_3, "Returns value2 if value1 is not NULL; otherwise, it returns value3."), (arrow_typeof, arg_1, "Returns the Arrow type of the input expression."), (r#struct, args, "Returns a struct with the given arguments"), + (named_struct, args, "Returns a struct with the given names and arguments pairs"), (get_field, arg_1 arg_2, "Returns the value of the field with the given name from the struct") ); diff --git a/datafusion/functions/src/core/named_struct.rs b/datafusion/functions/src/core/named_struct.rs new file mode 100644 index 000000000000..b6474a9b1386 --- /dev/null +++ b/datafusion/functions/src/core/named_struct.rs @@ -0,0 +1,186 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use arrow::array::StructArray; +use arrow::datatypes::{DataType, Field, Fields}; +use datafusion_common::{exec_err, Result, ScalarValue}; +use datafusion_expr::{ColumnarValue, Expr, ExprSchemable}; +use datafusion_expr::{ScalarUDFImpl, Signature, Volatility}; +use std::any::Any; +use std::sync::Arc; + +/// put values in a struct array. +fn named_struct_expr(args: &[ColumnarValue]) -> Result { + // do not accept 0 arguments. + if args.is_empty() { + return exec_err!("named_struct requires at least one pair of arguments"); + } + + if args.len() % 2 != 0 { + return exec_err!("named_struct requires an even number of arguments"); + } + + let fields = args + .chunks_exact(2) + .map(|chunk| { + let name = &chunk[0]; + let value = &chunk[1]; + + if let ColumnarValue::Scalar(ScalarValue::Utf8(Some(name))) = name { + let array_ref = match value { + ColumnarValue::Array(array) => array.clone(), + ColumnarValue::Scalar(scalar) => scalar.to_array()?.clone(), + }; + + Ok(( + Arc::new(Field::new( + name.clone(), + array_ref.data_type().clone(), + true, + )), + array_ref, + )) + } else { + exec_err!("named_struct even arguments must be string literals") + } + }) + .collect::>>()?; + Ok(ColumnarValue::Array(Arc::new(StructArray::from(fields)))) +} +#[derive(Debug)] +pub(super) struct NamedStructFunc { + signature: Signature, +} + +impl NamedStructFunc { + pub fn new() -> Self { + Self { + signature: Signature::variadic_any(Volatility::Immutable), + } + } +} + +impl ScalarUDFImpl for NamedStructFunc { + fn as_any(&self) -> &dyn Any { + self + } + fn name(&self) -> &str { + "named_struct" + } + + fn signature(&self) -> &Signature { + &self.signature + } + + fn return_type(&self, _arg_types: &[DataType]) -> Result { + Err(datafusion_common::DataFusionError::Internal( + "return_type called instead of return_type_from_exprs".into(), + )) + } + + fn return_type_from_exprs( + &self, + args: &[datafusion_expr::Expr], + schema: &dyn datafusion_common::ExprSchema, + _arg_types: &[DataType], + ) -> Result { + // do not accept 0 arguments. + if args.is_empty() { + return exec_err!("named_struct requires at least one pair of arguments"); + } + + if args.len() % 2 != 0 { + return exec_err!("named_struct requires an even number of arguments"); + } + + let return_fields = args + .chunks_exact(2) + .map(|chunk| { + let name = &chunk[0]; + let value = &chunk[1]; + + if let Expr::Literal(ScalarValue::Utf8(Some(name))) = name { + Ok(Field::new(name.clone(), value.get_type(schema)?, true)) + } else { + exec_err!("named_struct even arguments must be string literals") + } + }) + .collect::>>()?; + Ok(DataType::Struct(Fields::from(return_fields))) + } + + fn invoke(&self, args: &[ColumnarValue]) -> Result { + named_struct_expr(args) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use arrow::array::Int64Array; + use datafusion_common::cast::as_struct_array; + use datafusion_common::ScalarValue; + + #[test] + fn test_named_struct() { + // named_struct("first", 1, "second", 2, "third", 3) = {"first": 1, "second": 2, "third": 3} + let args = [ + ColumnarValue::Scalar(ScalarValue::Utf8(Some("first".into()))), + ColumnarValue::Scalar(ScalarValue::Int64(Some(1))), + ColumnarValue::Scalar(ScalarValue::Utf8(Some("second".into()))), + ColumnarValue::Scalar(ScalarValue::Int64(Some(2))), + ColumnarValue::Scalar(ScalarValue::Utf8(Some("third".into()))), + ColumnarValue::Scalar(ScalarValue::Int64(Some(3))), + ]; + let struc = named_struct_expr(&args) + .expect("failed to initialize function struct") + .into_array(1) + .expect("Failed to convert to array"); + let result = + as_struct_array(&struc).expect("failed to initialize function struct"); + assert_eq!( + &Int64Array::from(vec![1]), + result + .column_by_name("first") + .unwrap() + .clone() + .as_any() + .downcast_ref::() + .unwrap() + ); + assert_eq!( + &Int64Array::from(vec![2]), + result + .column_by_name("second") + .unwrap() + .clone() + .as_any() + .downcast_ref::() + .unwrap() + ); + assert_eq!( + &Int64Array::from(vec![3]), + result + .column_by_name("third") + .unwrap() + .clone() + .as_any() + .downcast_ref::() + .unwrap() + ); + } +} diff --git a/datafusion/sql/src/expr/mod.rs b/datafusion/sql/src/expr/mod.rs index c34b42193cec..6e8b12ca3a92 100644 --- a/datafusion/sql/src/expr/mod.rs +++ b/datafusion/sql/src/expr/mod.rs @@ -38,7 +38,8 @@ use datafusion_expr::expr::InList; use datafusion_expr::expr::ScalarFunction; use datafusion_expr::{ col, expr, lit, AggregateFunction, Between, BinaryExpr, BuiltinScalarFunction, Cast, - Expr, ExprSchemable, GetFieldAccess, GetIndexedField, Like, Operator, TryCast, + Expr, ExprSchemable, GetFieldAccess, GetIndexedField, Like, Literal, Operator, + TryCast, }; use sqlparser::ast::{ArrayAgg, Expr as SQLExpr, JsonOperator, TrimWhereField, Value}; use sqlparser::parser::ParserError::ParserError; @@ -602,18 +603,44 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { } let args = values .into_iter() - .map(|value| { - self.sql_expr_to_logical_expr(value, input_schema, planner_context) + .enumerate() + .map(|(i, value)| { + let args = if let SQLExpr::Named { expr, name } = value { + [ + name.value.lit(), + self.sql_expr_to_logical_expr( + *expr, + input_schema, + planner_context, + )?, + ] + } else { + [ + format!("c{i}").lit(), + self.sql_expr_to_logical_expr( + value, + input_schema, + planner_context, + )?, + ] + }; + + Ok(args) }) - .collect::>>()?; - let struct_func = self + .collect::>>()? + .into_iter() + .flatten() + .collect(); + + let named_struct_func = self .context_provider - .get_function_meta("struct") + .get_function_meta("named_struct") .ok_or_else(|| { - internal_datafusion_err!("Unable to find expected 'struct' function") - })?; + internal_datafusion_err!("Unable to find expected 'named_struct' function") + })?; + Ok(Expr::ScalarFunction(ScalarFunction::new_udf( - struct_func, + named_struct_func, args, ))) } diff --git a/datafusion/sqllogictest/test_files/explain.slt b/datafusion/sqllogictest/test_files/explain.slt index b7ad36dace16..4653250cf93f 100644 --- a/datafusion/sqllogictest/test_files/explain.slt +++ b/datafusion/sqllogictest/test_files/explain.slt @@ -390,8 +390,8 @@ query TT explain select struct(1, 2.3, 'abc'); ---- logical_plan -Projection: Struct({c0:1,c1:2.3,c2:abc}) AS struct(Int64(1),Float64(2.3),Utf8("abc")) +Projection: Struct({c0:1,c1:2.3,c2:abc}) AS named_struct(Utf8("c0"),Int64(1),Utf8("c1"),Float64(2.3),Utf8("c2"),Utf8("abc")) --EmptyRelation physical_plan -ProjectionExec: expr=[{c0:1,c1:2.3,c2:abc} as struct(Int64(1),Float64(2.3),Utf8("abc"))] +ProjectionExec: expr=[{c0:1,c1:2.3,c2:abc} as named_struct(Utf8("c0"),Int64(1),Utf8("c1"),Float64(2.3),Utf8("c2"),Utf8("abc"))] --PlaceholderRowExec diff --git a/datafusion/sqllogictest/test_files/struct.slt b/datafusion/sqllogictest/test_files/struct.slt index 936dedcc896e..1ab6aa01c411 100644 --- a/datafusion/sqllogictest/test_files/struct.slt +++ b/datafusion/sqllogictest/test_files/struct.slt @@ -50,6 +50,12 @@ select struct(1, 3.14, 'e'); ---- {c0: 1, c1: 3.14, c2: e} +# struct scalar function #1 with alias +query ? +select struct(1 as "name0", 3.14 as name1, 'e', true as 'name3'); +---- +{name0: 1, name1: 3.14, c2: e, name3: true} + # struct scalar function with columns #1 query ? select struct(a, b, c) from values; @@ -63,10 +69,10 @@ query TT explain select struct(a, b, c) from values; ---- logical_plan -Projection: struct(values.a, values.b, values.c) +Projection: named_struct(Utf8("c0"), values.a, Utf8("c1"), values.b, Utf8("c2"), values.c) --TableScan: values projection=[a, b, c] physical_plan -ProjectionExec: expr=[struct(a@0, b@1, c@2) as struct(values.a,values.b,values.c)] +ProjectionExec: expr=[named_struct(c0, a@0, c1, b@1, c2, c@2) as named_struct(Utf8("c0"),values.a,Utf8("c1"),values.b,Utf8("c2"),values.c)] --MemoryExec: partitions=1, partition_sizes=[1] statement ok diff --git a/docs/source/user-guide/sql/scalar_functions.md b/docs/source/user-guide/sql/scalar_functions.md index d4570dbc35f2..a752cc070016 100644 --- a/docs/source/user-guide/sql/scalar_functions.md +++ b/docs/source/user-guide/sql/scalar_functions.md @@ -3306,11 +3306,12 @@ are not allowed ## Struct Functions - [struct](#struct) +- [named_struct](#named_struct) ### `struct` -Returns an Arrow struct using the specified input expressions. -Fields in the returned struct use the `cN` naming convention. +Returns an Arrow struct using the specified input expressions optionally named. +Fields in the returned struct use the optional name or the `cN` naming convention. For example: `c0`, `c1`, `c2`, etc. ``` @@ -3318,7 +3319,7 @@ struct(expression1[, ..., expression_n]) ``` For example, this query converts two columns `a` and `b` to a single column with -a struct type of fields `c0` and `c1`: +a struct type of fields `field_a` and `c1`: ``` select * from t; @@ -3329,18 +3330,55 @@ select * from t; | 3 | 4 | +---+---+ -select struct(a, b) from t; -+-----------------+ -| struct(t.a,t.b) | -+-----------------+ -| {c0: 1, c1: 2} | -| {c0: 3, c1: 4} | -+-----------------+ +select struct(a as field_a, b) from t; ++--------------------------------------------------+ +| named_struct(Utf8("field_a"),t.a,Utf8("c1"),t.b) | ++--------------------------------------------------+ +| {field_a: 1, c1: 2} | +| {field_a: 3, c1: 4} | ++--------------------------------------------------+ ``` #### Arguments - **expression_n**: Expression to include in the output struct. + Can be a constant, column, or function, any combination of arithmetic or + string operators, or a named expression of previous listed . + +### `named_struct` + +Returns an Arrow struct using the specified name and input expressions pairs. + +``` +named_struct(expression1_name, expression1_input[, ..., expression_n_name, expression_n_input]) +``` + +For example, this query converts two columns `a` and `b` to a single column with +a struct type of fields `field_a` and `field_b`: + +``` +select * from t; ++---+---+ +| a | b | ++---+---+ +| 1 | 2 | +| 3 | 4 | ++---+---+ + +select named_struct('field_a', a, 'field_b', b) from t; ++-------------------------------------------------------+ +| named_struct(Utf8("field_a"),t.a,Utf8("field_b"),t.b) | ++-------------------------------------------------------+ +| {field_a: 1, field_b: 2} | +| {field_a: 3, field_b: 4} | ++-------------------------------------------------------+ +``` + +#### Arguments + +- **expression_n_name**: Name of the column field. + Must be a constant string. +- **expression_n_input**: Expression to include in the output struct. Can be a constant, column, or function, and any combination of arithmetic or string operators. From 774ce116fc5cc36933554af593ead59db3b3ce7d Mon Sep 17 00:00:00 2001 From: gstvg <28798827+gstvg@users.noreply.github.com> Date: Thu, 28 Mar 2024 01:10:45 +0000 Subject: [PATCH 2/5] add tests and corretly handle mixed arrray and scalar values --- datafusion/functions/src/core/named_struct.rs | 78 +++++++------ datafusion/sqllogictest/test_files/struct.slt | 104 +++++++++++++++++- 2 files changed, 143 insertions(+), 39 deletions(-) diff --git a/datafusion/functions/src/core/named_struct.rs b/datafusion/functions/src/core/named_struct.rs index b6474a9b1386..28da91e3b159 100644 --- a/datafusion/functions/src/core/named_struct.rs +++ b/datafusion/functions/src/core/named_struct.rs @@ -17,7 +17,7 @@ use arrow::array::StructArray; use arrow::datatypes::{DataType, Field, Fields}; -use datafusion_common::{exec_err, Result, ScalarValue}; +use datafusion_common::{exec_err, internal_err, Result, ScalarValue}; use datafusion_expr::{ColumnarValue, Expr, ExprSchemable}; use datafusion_expr::{ScalarUDFImpl, Signature, Volatility}; use std::any::Any; @@ -27,40 +27,50 @@ use std::sync::Arc; fn named_struct_expr(args: &[ColumnarValue]) -> Result { // do not accept 0 arguments. if args.is_empty() { - return exec_err!("named_struct requires at least one pair of arguments"); + return exec_err!("named_struct requires at least one pair of arguments, got 0 instead"); } if args.len() % 2 != 0 { - return exec_err!("named_struct requires an even number of arguments"); + return exec_err!("named_struct requires an even number of arguments, got {} instead", args.len()); } - let fields = args + let (names, values): (Vec<_>, Vec<_>) = args .chunks_exact(2) - .map(|chunk| { - let name = &chunk[0]; - let value = &chunk[1]; - - if let ColumnarValue::Scalar(ScalarValue::Utf8(Some(name))) = name { - let array_ref = match value { - ColumnarValue::Array(array) => array.clone(), - ColumnarValue::Scalar(scalar) => scalar.to_array()?.clone(), - }; - - Ok(( - Arc::new(Field::new( - name.clone(), - array_ref.data_type().clone(), - true, - )), - array_ref, - )) - } else { - exec_err!("named_struct even arguments must be string literals") - } + .enumerate() + .map(|(i, chunk)| { + + let name_column = &chunk[0]; + + let name = match name_column { + ColumnarValue::Scalar(ScalarValue::Utf8(Some(name_scalar))) => name_scalar, + _ => return exec_err!("named_struct even arguments must be string literals, got {name_column:?} instead at position {}", i * 2) + }; + + Ok((name, chunk[1].clone())) + }) + .collect::>>()? + .into_iter() + .unzip(); + + let arrays = ColumnarValue::values_to_arrays(&values)?; + + let fields = names.into_iter() + .zip(arrays) + .map(|(name, value)| { + ( + Arc::new(Field::new( + name, + value.data_type().clone(), + true, + )), + value, + ) }) - .collect::>>()?; + .collect::>(); + Ok(ColumnarValue::Array(Arc::new(StructArray::from(fields)))) } + #[derive(Debug)] pub(super) struct NamedStructFunc { signature: Signature, @@ -78,6 +88,7 @@ impl ScalarUDFImpl for NamedStructFunc { fn as_any(&self) -> &dyn Any { self } + fn name(&self) -> &str { "named_struct" } @@ -87,9 +98,7 @@ impl ScalarUDFImpl for NamedStructFunc { } fn return_type(&self, _arg_types: &[DataType]) -> Result { - Err(datafusion_common::DataFusionError::Internal( - "return_type called instead of return_type_from_exprs".into(), - )) + internal_err!("named_struct: return_type called instead of return_type_from_exprs") } fn return_type_from_exprs( @@ -100,23 +109,24 @@ impl ScalarUDFImpl for NamedStructFunc { ) -> Result { // do not accept 0 arguments. if args.is_empty() { - return exec_err!("named_struct requires at least one pair of arguments"); + return exec_err!("named_struct requires at least one pair of arguments, got 0 instead"); } if args.len() % 2 != 0 { - return exec_err!("named_struct requires an even number of arguments"); + return exec_err!("named_struct requires an even number of arguments, got {} instead", args.len()); } let return_fields = args .chunks_exact(2) - .map(|chunk| { + .enumerate() + .map(|(i, chunk)| { let name = &chunk[0]; let value = &chunk[1]; if let Expr::Literal(ScalarValue::Utf8(Some(name))) = name { - Ok(Field::new(name.clone(), value.get_type(schema)?, true)) + Ok(Field::new(name, value.get_type(schema)?, true)) } else { - exec_err!("named_struct even arguments must be string literals") + exec_err!("named_struct even arguments must be string literals, got {name} instead at position {}", i * 2) } }) .collect::>>()?; diff --git a/datafusion/sqllogictest/test_files/struct.slt b/datafusion/sqllogictest/test_files/struct.slt index 1ab6aa01c411..5a730d504458 100644 --- a/datafusion/sqllogictest/test_files/struct.slt +++ b/datafusion/sqllogictest/test_files/struct.slt @@ -23,11 +23,12 @@ statement ok CREATE TABLE values( a INT, b FLOAT, - c VARCHAR + c VARCHAR, + n VARCHAR, ) AS VALUES - (1, 1.1, 'a'), - (2, 2.2, 'b'), - (3, 3.3, 'c') + (1, 1.1, 'a', NULL), + (2, 2.2, 'b', NULL), + (3, 3.3, 'c', NULL) ; # struct[i] @@ -50,12 +51,18 @@ select struct(1, 3.14, 'e'); ---- {c0: 1, c1: 3.14, c2: e} -# struct scalar function #1 with alias +# struct scalar function with named values query ? select struct(1 as "name0", 3.14 as name1, 'e', true as 'name3'); ---- {name0: 1, name1: 3.14, c2: e, name3: true} +# struct scalar function with mixed named and unnamed values +query ? +select struct(1, 3.14 as name1, 'e', true); +---- +{c0: 1, name1: 3.14, c2: e, c3: true} + # struct scalar function with columns #1 query ? select struct(a, b, c) from values; @@ -75,5 +82,92 @@ physical_plan ProjectionExec: expr=[named_struct(c0, a@0, c1, b@1, c2, c@2) as named_struct(Utf8("c0"),values.a,Utf8("c1"),values.b,Utf8("c2"),values.c)] --MemoryExec: partitions=1, partition_sizes=[1] +# error on 0 arguments +query error +select named_struct(); +---- +DataFusion error: Error during planning: No function matches the given name and argument types 'named_struct()'. You might need to add explicit type casts. + Candidate functions: + named_struct(Any, .., Any) + + +# error on odd number of arguments #1 +query error DataFusion error: Execution error: named_struct requires an even number of arguments, got 1 instead +select named_struct('a'); + +# error on odd number of arguments #2 +query error DataFusion error: Execution error: named_struct requires an even number of arguments, got 1 instead +select named_struct(1); + +# error on odd number of arguments #3 +query error DataFusion error: Execution error: named_struct requires an even number of arguments, got 1 instead +select named_struct(values.a) from values; + +# error on odd number of arguments #4 +query error DataFusion error: Execution error: named_struct requires an even number of arguments, got 3 instead +select named_struct('a', 1, 'b'); + +# error on even argument not a string literal #1 +query error DataFusion error: Execution error: named_struct even arguments must be string literals, got Int64\(1\) instead at position 0 +select named_struct(1, 'a'); + +# error on even argument not a string literal #2 +query error DataFusion error: Execution error: named_struct even arguments must be string literals, got Int64\(0\) instead at position 2 +select named_struct('corret', 1, 0, 'wrong'); + +# error on even argument not a string literal #3 +query error DataFusion error: Execution error: named_struct even arguments must be string literals, got values\.a instead at position 0 +select named_struct(values.a, 'a') from values; + +# error on even argument not a string literal #4 +query error DataFusion error: Execution error: named_struct even arguments must be string literals, got values\.c instead at position 0 +select named_struct(values.c, 'c') from values; + +# named_struct with mixed scalar and array values #1 +query ? +select named_struct('scalar', 27, 'array', values.a, 'null', NULL) from values; +---- +{scalar: 27, array: 1, null: } +{scalar: 27, array: 2, null: } +{scalar: 27, array: 3, null: } + +# named_struct with mixed scalar and array values #2 +query ? +select named_struct('array', values.a, 'scalar', 27, 'null', NULL) from values; +---- +{array: 1, scalar: 27, null: } +{array: 2, scalar: 27, null: } +{array: 3, scalar: 27, null: } + +# named_struct with mixed scalar and array values #3 +query ? +select named_struct('null', NULL, 'array', values.a, 'scalar', 27) from values; +---- +{null: , array: 1, scalar: 27} +{null: , array: 2, scalar: 27} +{null: , array: 3, scalar: 27} + +# named_struct with mixed scalar and array values #4 +query ? +select named_struct('null_array', values.n, 'array', values.a, 'scalar', 27, 'null', NULL) from values; +---- +{null_array: , array: 1, scalar: 27, null: } +{null_array: , array: 2, scalar: 27, null: } +{null_array: , array: 3, scalar: 27, null: } + +# named_struct arrays only +query ? +select named_struct('field_a', a, 'field_b', b) from values; +---- +{field_a: 1, field_b: 1.1} +{field_a: 2, field_b: 2.2} +{field_a: 3, field_b: 3.3} + +# named_struct scalars only +query ? +select named_struct('field_a', 1, 'field_b', 2); +---- +{field_a: 1, field_b: 2} + statement ok drop table values; From 79766d5f9b2370fe9384f29ea297380cad3a6430 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Fri, 29 Mar 2024 07:42:52 -0400 Subject: [PATCH 3/5] fix slt --- datafusion/sqllogictest/test_files/struct.slt | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/datafusion/sqllogictest/test_files/struct.slt b/datafusion/sqllogictest/test_files/struct.slt index 056044a189ed..44fac651db93 100644 --- a/datafusion/sqllogictest/test_files/struct.slt +++ b/datafusion/sqllogictest/test_files/struct.slt @@ -92,13 +92,8 @@ ProjectionExec: expr=[named_struct(c0, a@0, c1, b@1, c2, c@2) as named_struct(Ut --MemoryExec: partitions=1, partition_sizes=[1] # error on 0 arguments -query error +query error DataFusion error: Error during planning: No function matches the given name and argument types 'named_struct\(\)'. You might need to add explicit type casts. select named_struct(); ----- -DataFusion error: Error during planning: No function matches the given name and argument types 'named_struct()'. You might need to add explicit type casts. - Candidate functions: - named_struct(Any, .., Any) - # error on odd number of arguments #1 query error DataFusion error: Execution error: named_struct requires an even number of arguments, got 1 instead From 0c22ce6139b1fd02873551bb482fd45714926c44 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Fri, 29 Mar 2024 07:46:13 -0400 Subject: [PATCH 4/5] fmt --- datafusion/functions/src/core/named_struct.rs | 31 ++++++++++++------- 1 file changed, 20 insertions(+), 11 deletions(-) diff --git a/datafusion/functions/src/core/named_struct.rs b/datafusion/functions/src/core/named_struct.rs index 28da91e3b159..ce8a60545ee6 100644 --- a/datafusion/functions/src/core/named_struct.rs +++ b/datafusion/functions/src/core/named_struct.rs @@ -27,11 +27,16 @@ use std::sync::Arc; fn named_struct_expr(args: &[ColumnarValue]) -> Result { // do not accept 0 arguments. if args.is_empty() { - return exec_err!("named_struct requires at least one pair of arguments, got 0 instead"); + return exec_err!( + "named_struct requires at least one pair of arguments, got 0 instead" + ); } if args.len() % 2 != 0 { - return exec_err!("named_struct requires an even number of arguments, got {} instead", args.len()); + return exec_err!( + "named_struct requires an even number of arguments, got {} instead", + args.len() + ); } let (names, values): (Vec<_>, Vec<_>) = args @@ -54,15 +59,12 @@ fn named_struct_expr(args: &[ColumnarValue]) -> Result { let arrays = ColumnarValue::values_to_arrays(&values)?; - let fields = names.into_iter() + let fields = names + .into_iter() .zip(arrays) .map(|(name, value)| { ( - Arc::new(Field::new( - name, - value.data_type().clone(), - true, - )), + Arc::new(Field::new(name, value.data_type().clone(), true)), value, ) }) @@ -98,7 +100,9 @@ impl ScalarUDFImpl for NamedStructFunc { } fn return_type(&self, _arg_types: &[DataType]) -> Result { - internal_err!("named_struct: return_type called instead of return_type_from_exprs") + internal_err!( + "named_struct: return_type called instead of return_type_from_exprs" + ) } fn return_type_from_exprs( @@ -109,11 +113,16 @@ impl ScalarUDFImpl for NamedStructFunc { ) -> Result { // do not accept 0 arguments. if args.is_empty() { - return exec_err!("named_struct requires at least one pair of arguments, got 0 instead"); + return exec_err!( + "named_struct requires at least one pair of arguments, got 0 instead" + ); } if args.len() % 2 != 0 { - return exec_err!("named_struct requires an even number of arguments, got {} instead", args.len()); + return exec_err!( + "named_struct requires an even number of arguments, got {} instead", + args.len() + ); } let return_fields = args From f35cddc123ff23d22cc3e3dd6d3f0469cc8d5427 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Fri, 29 Mar 2024 07:49:49 -0400 Subject: [PATCH 5/5] port test to slt --- datafusion/functions/src/core/named_struct.rs | 57 ------------------- datafusion/sqllogictest/test_files/struct.slt | 5 ++ 2 files changed, 5 insertions(+), 57 deletions(-) diff --git a/datafusion/functions/src/core/named_struct.rs b/datafusion/functions/src/core/named_struct.rs index ce8a60545ee6..327a41baa741 100644 --- a/datafusion/functions/src/core/named_struct.rs +++ b/datafusion/functions/src/core/named_struct.rs @@ -146,60 +146,3 @@ impl ScalarUDFImpl for NamedStructFunc { named_struct_expr(args) } } - -#[cfg(test)] -mod tests { - use super::*; - use arrow::array::Int64Array; - use datafusion_common::cast::as_struct_array; - use datafusion_common::ScalarValue; - - #[test] - fn test_named_struct() { - // named_struct("first", 1, "second", 2, "third", 3) = {"first": 1, "second": 2, "third": 3} - let args = [ - ColumnarValue::Scalar(ScalarValue::Utf8(Some("first".into()))), - ColumnarValue::Scalar(ScalarValue::Int64(Some(1))), - ColumnarValue::Scalar(ScalarValue::Utf8(Some("second".into()))), - ColumnarValue::Scalar(ScalarValue::Int64(Some(2))), - ColumnarValue::Scalar(ScalarValue::Utf8(Some("third".into()))), - ColumnarValue::Scalar(ScalarValue::Int64(Some(3))), - ]; - let struc = named_struct_expr(&args) - .expect("failed to initialize function struct") - .into_array(1) - .expect("Failed to convert to array"); - let result = - as_struct_array(&struc).expect("failed to initialize function struct"); - assert_eq!( - &Int64Array::from(vec![1]), - result - .column_by_name("first") - .unwrap() - .clone() - .as_any() - .downcast_ref::() - .unwrap() - ); - assert_eq!( - &Int64Array::from(vec![2]), - result - .column_by_name("second") - .unwrap() - .clone() - .as_any() - .downcast_ref::() - .unwrap() - ); - assert_eq!( - &Int64Array::from(vec![3]), - result - .column_by_name("third") - .unwrap() - .clone() - .as_any() - .downcast_ref::() - .unwrap() - ); - } -} diff --git a/datafusion/sqllogictest/test_files/struct.slt b/datafusion/sqllogictest/test_files/struct.slt index 44fac651db93..2e0b699f6dd6 100644 --- a/datafusion/sqllogictest/test_files/struct.slt +++ b/datafusion/sqllogictest/test_files/struct.slt @@ -175,3 +175,8 @@ select named_struct('field_a', 1, 'field_b', 2); statement ok drop table values; + +query T +select arrow_typeof(named_struct('first', 1, 'second', 2, 'third', 3)); +---- +Struct([Field { name: "first", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "second", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "third", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }]) \ No newline at end of file