From 5709f3b91a5bd9082e57106b570d9ba67f76091b Mon Sep 17 00:00:00 2001 From: Trent Hauck Date: Fri, 23 Aug 2024 11:19:46 -0700 Subject: [PATCH] feat: support upper and lower for stringview --- datafusion/functions/src/string/common.rs | 23 +++++++++++- datafusion/functions/src/string/lower.rs | 2 +- datafusion/functions/src/string/upper.rs | 2 +- .../sqllogictest/test_files/string_view.slt | 36 +++++++++++++++---- 4 files changed, 54 insertions(+), 9 deletions(-) diff --git a/datafusion/functions/src/string/common.rs b/datafusion/functions/src/string/common.rs index 6f23a5ddd2369..a5dc22b4d9e4f 100644 --- a/datafusion/functions/src/string/common.rs +++ b/datafusion/functions/src/string/common.rs @@ -23,7 +23,7 @@ use std::sync::Arc; use arrow::array::{ new_null_array, Array, ArrayAccessor, ArrayDataBuilder, ArrayIter, ArrayRef, GenericStringArray, GenericStringBuilder, OffsetSizeTrait, StringArray, - StringViewArray, + StringBuilder, StringViewArray, }; use arrow::buffer::{Buffer, MutableBuffer, NullBuffer}; use arrow::datatypes::DataType; @@ -214,6 +214,23 @@ where i64, _, >(array, op)?)), + DataType::Utf8View => { + let string_array = as_string_view_array(array)?; + let mut string_builder = StringBuilder::with_capacity( + string_array.len(), + string_array.get_array_memory_size(), + ); + + for str in string_array.iter() { + if let Some(str) = str { + string_builder.append_value(op(str)); + } else { + string_builder.append_null(); + } + } + + Ok(ColumnarValue::Array(Arc::new(string_builder.finish()))) + } other => exec_err!("Unsupported data type {other:?} for function {name}"), }, ColumnarValue::Scalar(scalar) => match scalar { @@ -225,6 +242,10 @@ where let result = a.as_ref().map(|x| op(x)); Ok(ColumnarValue::Scalar(ScalarValue::LargeUtf8(result))) } + ScalarValue::Utf8View(a) => { + let result = a.as_ref().map(|x| op(x)); + Ok(ColumnarValue::Scalar(ScalarValue::Utf8(result))) + } other => exec_err!("Unsupported data type {other:?} for function {name}"), }, } diff --git a/datafusion/functions/src/string/lower.rs b/datafusion/functions/src/string/lower.rs index 29ca682c380be..ca324e69c0d23 100644 --- a/datafusion/functions/src/string/lower.rs +++ b/datafusion/functions/src/string/lower.rs @@ -43,7 +43,7 @@ impl LowerFunc { Self { signature: Signature::uniform( 1, - vec![Utf8, LargeUtf8], + vec![Utf8, LargeUtf8, Utf8View], Volatility::Immutable, ), } diff --git a/datafusion/functions/src/string/upper.rs b/datafusion/functions/src/string/upper.rs index da31948fbcfab..593e33ab6bb48 100644 --- a/datafusion/functions/src/string/upper.rs +++ b/datafusion/functions/src/string/upper.rs @@ -40,7 +40,7 @@ impl UpperFunc { Self { signature: Signature::uniform( 1, - vec![Utf8, LargeUtf8], + vec![Utf8, LargeUtf8, Utf8View], Volatility::Immutable, ), } diff --git a/datafusion/sqllogictest/test_files/string_view.slt b/datafusion/sqllogictest/test_files/string_view.slt index 3b3d7b88a4a1f..8c1408649357c 100644 --- a/datafusion/sqllogictest/test_files/string_view.slt +++ b/datafusion/sqllogictest/test_files/string_view.slt @@ -460,8 +460,6 @@ Xiangpeng Raphael NULL - - ### Initcap query TT @@ -478,7 +476,7 @@ statement ok CREATE TABLE test_lowercase AS SELECT lower(column1_utf8) as column1_utf8_lower, lower(column1_large_utf8) as column1_large_utf8_lower, - lower(column1_utf8view) as column1_utf8view_lower + arrow_cast(lower(column1_utf8), 'Utf8View') as column1_utf8view_lower FROM test; # Test INITCAP with utf8view, utf8, and largeutf8 @@ -501,7 +499,7 @@ SELECT INITCAP(column1_large_utf8_lower) as c3 FROM test_lowercase; ---- -Andrew Andrew Andrew +Andrew Andrew Andrew Xiangpeng Xiangpeng Xiangpeng Raphael Raphael Raphael NULL NULL NULL @@ -828,16 +826,42 @@ logical_plan 02)--TableScan: test projection=[column1_utf8view, column2_utf8view] ## Ensure no casts for LOWER -## TODO https://github.com/apache/datafusion/issues/11855 query TT EXPLAIN SELECT LOWER(column1_utf8view) as c1 FROM test; ---- logical_plan -01)Projection: lower(CAST(test.column1_utf8view AS Utf8)) AS c1 +01)Projection: lower(test.column1_utf8view) AS c1 +02)--TableScan: test projection=[column1_utf8view] + +query T +SELECT LOWER(column1_utf8view) as c1 +FROM test; +---- +andrew +xiangpeng +raphael +NULL + +## Ensure no casts for UPPER +query TT +EXPLAIN SELECT + UPPER(column1_utf8view) as c1 +FROM test; +---- +logical_plan +01)Projection: upper(test.column1_utf8view) AS c1 02)--TableScan: test projection=[column1_utf8view] +query T +SELECT UPPER(column1_utf8view) as c1 +FROM test; +---- +ANDREW +XIANGPENG +RAPHAEL +NULL ## Ensure no casts for LPAD query TT