From 02a38c44ab3d1313c146efecd64dd99d29167cac Mon Sep 17 00:00:00 2001 From: kf zheng <100595273+kev1n8@users.noreply.github.com> Date: Tue, 13 Aug 2024 15:25:54 +0800 Subject: [PATCH 1/2] add string view support for left --- datafusion/functions/src/unicode/left.rs | 43 +++++++++++++++++++----- 1 file changed, 35 insertions(+), 8 deletions(-) diff --git a/datafusion/functions/src/unicode/left.rs b/datafusion/functions/src/unicode/left.rs index 7d456f5f1e94..c49784948dd0 100644 --- a/datafusion/functions/src/unicode/left.rs +++ b/datafusion/functions/src/unicode/left.rs @@ -19,10 +19,15 @@ use std::any::Any; use std::cmp::Ordering; use std::sync::Arc; -use arrow::array::{ArrayRef, GenericStringArray, OffsetSizeTrait}; +use arrow::array::{ + Array, ArrayAccessor, ArrayIter, ArrayRef, GenericStringArray, Int64Array, + OffsetSizeTrait, +}; use arrow::datatypes::DataType; -use datafusion_common::cast::{as_generic_string_array, as_int64_array}; +use datafusion_common::cast::{ + as_generic_string_array, as_int64_array, as_string_view_array, +}; use datafusion_common::exec_err; use datafusion_common::Result; use datafusion_expr::TypeSignature::Exact; @@ -46,7 +51,11 @@ impl LeftFunc { use DataType::*; Self { signature: Signature::one_of( - vec![Exact(vec![Utf8, Int64]), Exact(vec![LargeUtf8, Int64])], + vec![ + Exact(vec![Utf8View, Int64]), + Exact(vec![Utf8, Int64]), + Exact(vec![LargeUtf8, Int64]), + ], Volatility::Immutable, ), } @@ -72,9 +81,14 @@ impl ScalarUDFImpl for LeftFunc { fn invoke(&self, args: &[ColumnarValue]) -> Result { match args[0].data_type() { - DataType::Utf8 => make_scalar_function(left::, vec![])(args), + DataType::Utf8 | DataType::Utf8View => { + make_scalar_function(left::, vec![])(args) + } DataType::LargeUtf8 => make_scalar_function(left::, vec![])(args), - other => exec_err!("Unsupported data type {other:?} for function left"), + other => exec_err!( + "Unsupported data type {other:?} for function left,\ + expected Utf8View, Utf8 or LargeUtf8." + ), } } } @@ -83,10 +97,23 @@ impl ScalarUDFImpl for LeftFunc { /// left('abcde', 2) = 'ab' /// The implementation uses UTF-8 code points as characters pub fn left(args: &[ArrayRef]) -> Result { - let string_array = as_generic_string_array::(&args[0])?; let n_array = as_int64_array(&args[1])?; - let result = string_array - .iter() + + if args[0].data_type() == &DataType::Utf8View { + let string_array = as_string_view_array(&args[0])?; + left_impl::(string_array, n_array) + } else { + let string_array = as_generic_string_array::(&args[0])?; + left_impl::(string_array, n_array) + } +} + +fn left_impl<'a, T: OffsetSizeTrait, V: ArrayAccessor>( + string_array: V, + n_array: &Int64Array, +) -> Result { + let iter = ArrayIter::new(string_array); + let result = iter .zip(n_array.iter()) .map(|(string, n)| match (string, n) { (Some(string), Some(n)) => match n.cmp(&0) { From afd72573225c6c0199d72c141f9aa985cfb163f2 Mon Sep 17 00:00:00 2001 From: kf zheng <100595273+kev1n8@users.noreply.github.com> Date: Wed, 14 Aug 2024 13:42:29 +0800 Subject: [PATCH 2/2] add tests for stringview support of LEFT --- .../sqllogictest/test_files/string_view.slt | 23 +++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/datafusion/sqllogictest/test_files/string_view.slt b/datafusion/sqllogictest/test_files/string_view.slt index 0a9b73babb96..c9c7e979faa9 100644 --- a/datafusion/sqllogictest/test_files/string_view.slt +++ b/datafusion/sqllogictest/test_files/string_view.slt @@ -918,6 +918,29 @@ eng (empty) ngpeng ael (empty) hael NULL NULL NULL +## Ensure no casts for LEFT +query TT +EXPLAIN SELECT + LEFT(column1_utf8view, 3) as c2 +FROM test; +---- +logical_plan +01)Projection: left(test.column1_utf8view, Int64(3)) AS c2 +02)--TableScan: test projection=[column1_utf8view] + +# Test outputs of LEFT +query TTT +SELECT + LEFT(column1_utf8view, 3) as c1, + LEFT(column1_utf8view, 0) as c2, + LEFT(column1_utf8view, -3) as c3 +FROM test; +---- +And (empty) And +Xia (empty) Xiangp +Rap (empty) Raph +NULL NULL NULL + ## Ensure no casts for RPAD ## TODO file ticket query TT