From bcf5a764e59446732f8a2b6e258fdb717d8270a8 Mon Sep 17 00:00:00 2001 From: Bruce Ritchie Date: Sun, 29 Sep 2024 11:54:23 -0400 Subject: [PATCH] Refactored Documentation to allow it to be used in a const. --- datafusion-cli/Cargo.lock | 2 - .../src/bin/print_aggregate_functions_docs.rs | 4 +- .../src/bin/print_scalar_functions_docs.rs | 4 +- .../src/bin/print_window_functions_docs.rs | 6 +- datafusion/expr/Cargo.toml | 1 - datafusion/expr/src/udaf.rs | 21 ++--- datafusion/expr/src/udf.rs | 23 +++-- datafusion/expr/src/udf_docs.rs | 12 +-- datafusion/expr/src/udwf.rs | 21 ++--- .../functions-aggregate/src/bit_and_or_xor.rs | 85 +++++++++-------- datafusion/functions-window/src/row_number.rs | 21 ++--- datafusion/functions/Cargo.toml | 1 - datafusion/functions/src/core/coalesce.rs | 31 +++---- datafusion/functions/src/crypto/sha224.rs | 33 +++---- datafusion/functions/src/datetime/to_date.rs | 91 +++++++++---------- datafusion/functions/src/encoding/inner.rs | 53 ++++++----- datafusion/functions/src/math/log.rs | 41 ++++----- datafusion/functions/src/regex/regexplike.rs | 72 +++++++-------- datafusion/functions/src/string/ascii.rs | 31 +++---- datafusion/functions/src/unicode/rpad.rs | 47 +++++----- 20 files changed, 295 insertions(+), 305 deletions(-) diff --git a/datafusion-cli/Cargo.lock b/datafusion-cli/Cargo.lock index 179d410e185e..6d4eeee97675 100644 --- a/datafusion-cli/Cargo.lock +++ b/datafusion-cli/Cargo.lock @@ -1345,7 +1345,6 @@ dependencies = [ "datafusion-functions-aggregate-common", "datafusion-functions-window-common", "datafusion-physical-expr-common", - "indexmap", "paste", "serde_json", "sqlparser", @@ -1377,7 +1376,6 @@ dependencies = [ "datafusion-expr", "hashbrown", "hex", - "indexmap", "itertools", "log", "md-5", diff --git a/datafusion/core/src/bin/print_aggregate_functions_docs.rs b/datafusion/core/src/bin/print_aggregate_functions_docs.rs index 9f1661cfd6a6..83fe99aa3368 100644 --- a/datafusion/core/src/bin/print_aggregate_functions_docs.rs +++ b/datafusion/core/src/bin/print_aggregate_functions_docs.rs @@ -92,7 +92,7 @@ fn main() { ); // next, arguments - if let Some(args) = &documentation.arguments { + if let Some(args) = documentation.arguments { let _ = writeln!(&mut docs, "#### Arguments\n"); for (arg_name, arg_desc) in args { let _ = writeln!(&mut docs, "- **{arg_name}**: {arg_desc}"); @@ -122,7 +122,7 @@ fn main() { } // finally, any related udfs - if let Some(related_udfs) = &documentation.related_udfs { + if let Some(related_udfs) = documentation.related_udfs { let _ = writeln!(&mut docs, "\n**Related functions**:"); for related in related_udfs { diff --git a/datafusion/core/src/bin/print_scalar_functions_docs.rs b/datafusion/core/src/bin/print_scalar_functions_docs.rs index b96b42e15948..951500b5f02b 100644 --- a/datafusion/core/src/bin/print_scalar_functions_docs.rs +++ b/datafusion/core/src/bin/print_scalar_functions_docs.rs @@ -92,7 +92,7 @@ fn main() { ); // next, arguments - if let Some(args) = &documentation.arguments { + if let Some(args) = documentation.arguments { let _ = writeln!(&mut docs, "#### Arguments\n"); for (arg_name, arg_desc) in args { let _ = writeln!(&mut docs, "- **{arg_name}**: {arg_desc}"); @@ -122,7 +122,7 @@ fn main() { } // finally, any related udfs - if let Some(related_udfs) = &documentation.related_udfs { + if let Some(related_udfs) = documentation.related_udfs { let _ = writeln!(&mut docs, "\n**Related functions**:"); for related in related_udfs { diff --git a/datafusion/core/src/bin/print_window_functions_docs.rs b/datafusion/core/src/bin/print_window_functions_docs.rs index 272f423af2dc..8a2f793393f5 100644 --- a/datafusion/core/src/bin/print_window_functions_docs.rs +++ b/datafusion/core/src/bin/print_window_functions_docs.rs @@ -92,7 +92,7 @@ fn main() { ); // next, arguments - if let Some(args) = &documentation.arguments { + if let Some(args) = documentation.arguments { let _ = writeln!(&mut docs, "#### Arguments\n"); for (arg_name, arg_desc) in args { let _ = writeln!(&mut docs, "- **{arg_name}**: {arg_desc}"); @@ -122,10 +122,10 @@ fn main() { } // finally, any related udfs - if let Some(related_udfs) = &documentation.related_udfs { + if let Some(related_udfs) = documentation.related_udfs { let _ = writeln!(&mut docs, "\n**Related functions**:"); - for related in related_udfs { + for &related in related_udfs { let _ = writeln!(&mut docs, "- [{related}](#{related})"); } } diff --git a/datafusion/expr/Cargo.toml b/datafusion/expr/Cargo.toml index d7dc1afe4d50..55387fea22ee 100644 --- a/datafusion/expr/Cargo.toml +++ b/datafusion/expr/Cargo.toml @@ -48,7 +48,6 @@ datafusion-expr-common = { workspace = true } datafusion-functions-aggregate-common = { workspace = true } datafusion-functions-window-common = { workspace = true } datafusion-physical-expr-common = { workspace = true } -indexmap = { workspace = true } paste = "^1.0" serde_json = { workspace = true } sqlparser = { workspace = true } diff --git a/datafusion/expr/src/udaf.rs b/datafusion/expr/src/udaf.rs index 7aeeff3799fa..24fa89e45cb1 100644 --- a/datafusion/expr/src/udaf.rs +++ b/datafusion/expr/src/udaf.rs @@ -285,30 +285,29 @@ where /// # use datafusion_expr::window_doc_sections::DOC_SECTION_AGGREGATE; /// # use arrow::datatypes::Schema; /// # use arrow::datatypes::Field; -/// # use indexmap::IndexMap; /// /// #[derive(Debug, Clone)] /// struct GeoMeanUdf { /// signature: Signature, -/// documentation: Documentation, /// } /// /// impl GeoMeanUdf { /// fn new() -> Self { /// Self { /// signature: Signature::uniform(1, vec![DataType::Float64], Volatility::Immutable), -/// documentation: Documentation { -/// doc_section: DOC_SECTION_AGGREGATE, -/// description: "calculates a geometric mean", -/// syntax_example: "geo_mean(2.0)", -/// sql_example: None, -/// arguments: Some(IndexMap::from([("arg_1", "The Float64 number for the geometric mean")])), -/// related_udfs: None, -/// } /// } /// } /// } /// +/// const DOCUMENTATION: Documentation = Documentation { +/// doc_section: DOC_SECTION_AGGREGATE, +/// description: "calculates a geometric mean", +/// syntax_example: "geo_mean(2.0)", +/// sql_example: None, +/// arguments: Some(&[("arg_1", "The Float64 number for the geometric mean")]), +/// related_udfs: None, +/// }; +/// /// /// Implement the AggregateUDFImpl trait for GeoMeanUdf /// impl AggregateUDFImpl for GeoMeanUdf { /// fn as_any(&self) -> &dyn Any { self } @@ -329,7 +328,7 @@ where /// ]) /// } /// fn documentation(&self) -> &Documentation { -/// &self.documentation +/// &DOCUMENTATION /// } /// } /// diff --git a/datafusion/expr/src/udf.rs b/datafusion/expr/src/udf.rs index b059b0419760..36cc8f64d8a9 100644 --- a/datafusion/expr/src/udf.rs +++ b/datafusion/expr/src/udf.rs @@ -307,7 +307,6 @@ where /// ``` /// # use std::any::Any; /// # use arrow::datatypes::DataType; -/// # use indexmap::IndexMap; /// # use datafusion_common::{DataFusionError, plan_err, Result}; /// # use datafusion_expr::{col, ColumnarValue, Documentation, Signature, Volatility}; /// # use datafusion_expr::{ScalarUDFImpl, ScalarUDF}; @@ -316,25 +315,25 @@ where /// #[derive(Debug)] /// struct AddOne { /// signature: Signature, -/// documentation: Documentation, /// } /// /// impl AddOne { /// fn new() -> Self { /// Self { /// signature: Signature::uniform(1, vec![DataType::Int32], Volatility::Immutable), -/// documentation: Documentation { -/// doc_section: DOC_SECTION_MATH, -/// description: "Add one to an int32", -/// syntax_example: "add_one(2)", -/// sql_example: None, -/// arguments: Some(IndexMap::from([("arg_1", "The int32 number to add one to")])), -/// related_udfs: None, -/// } /// } /// } /// } -/// +/// +/// const DOCUMENTATION: Documentation = Documentation { +/// doc_section: DOC_SECTION_MATH, +/// description: "Add one to an int32", +/// syntax_example: "add_one(2)", +/// sql_example: None, +/// arguments: Some(&[("arg_1", "The int32 number to add one to")]), +/// related_udfs: None, +/// }; +/// /// /// Implement the ScalarUDFImpl trait for AddOne /// impl ScalarUDFImpl for AddOne { /// fn as_any(&self) -> &dyn Any { self } @@ -349,7 +348,7 @@ where /// // The actual implementation would add one to the argument /// fn invoke(&self, args: &[ColumnarValue]) -> Result { unimplemented!() } /// fn documentation(&self) -> &Documentation { -/// &self.documentation +/// &DOCUMENTATION /// } /// } /// diff --git a/datafusion/expr/src/udf_docs.rs b/datafusion/expr/src/udf_docs.rs index 2deb48ffdd43..faf6492c11eb 100644 --- a/datafusion/expr/src/udf_docs.rs +++ b/datafusion/expr/src/udf_docs.rs @@ -15,8 +15,6 @@ // specific language governing permissions and limitations // under the License. -use indexmap::IndexMap; - /// Documentation for use by [`crate::ScalarUDFImpl`], /// [`crate::AggregateUDFImpl`] and [`crate::WindowUDFImpl`] functions /// that will be used to generate public documentation. @@ -41,15 +39,15 @@ pub struct Documentation { /// query and output. It is strongly recommended to provide an /// example for anything but the most basic UDF's pub sql_example: Option<&'static str>, - /// arguments for the UDF which will be displayed in insertion - /// order. Key is the argument name, value is a description for - /// the argument - pub arguments: Option>, + /// arguments for the UDF which will be displayed in array order. + /// Left member of a pair is the argument name, right is a + /// description for the argument + pub arguments: Option<&'static [(&'static str, &'static str)]>, /// related functions if any. Values should match the related /// udf's name exactly. Related udf's must be of the same /// UDF type (scalar, aggregate or window) for proper linking to /// occur - pub related_udfs: Option>, + pub related_udfs: Option<&'static [&'static str]>, } #[derive(Debug, Clone, PartialEq)] diff --git a/datafusion/expr/src/udwf.rs b/datafusion/expr/src/udwf.rs index 0471c0788d32..7f9c6a5173c9 100644 --- a/datafusion/expr/src/udwf.rs +++ b/datafusion/expr/src/udwf.rs @@ -217,30 +217,29 @@ where /// # use datafusion_expr::{WindowUDFImpl, WindowUDF}; /// # use datafusion_expr::window_doc_sections::DOC_SECTION_ANALYTICAL; /// # use datafusion_functions_window_common::field::WindowUDFFieldArgs; -/// # use indexmap::IndexMap; /// /// #[derive(Debug, Clone)] /// struct SmoothIt { /// signature: Signature, -/// documentation: Documentation, /// } /// /// impl SmoothIt { /// fn new() -> Self { /// Self { /// signature: Signature::uniform(1, vec![DataType::Int32], Volatility::Immutable), -/// documentation: Documentation { -/// doc_section: DOC_SECTION_ANALYTICAL, -/// description: "smooths the windows", -/// syntax_example: "smooth_it(2)", -/// sql_example: None, -/// arguments: Some(IndexMap::from([("arg_1", "The int32 number to smooth by")])), -/// related_udfs: None, -/// } /// } /// } /// } /// +/// const DOCUMENTATION: Documentation = Documentation { +/// doc_section: DOC_SECTION_ANALYTICAL, +/// description: "smooths the windows", +/// syntax_example: "smooth_it(2)", +/// sql_example: None, +/// arguments: Some(&[("arg_1", "The int32 number to smooth by")]), +/// related_udfs: None, +/// }; + /// /// Implement the WindowUDFImpl trait for SmoothIt /// impl WindowUDFImpl for SmoothIt { /// fn as_any(&self) -> &dyn Any { self } @@ -256,7 +255,7 @@ where /// } /// } /// fn documentation(&self) -> &Documentation { -/// &self.documentation +/// &DOCUMENTATION /// } /// } /// diff --git a/datafusion/functions-aggregate/src/bit_and_or_xor.rs b/datafusion/functions-aggregate/src/bit_and_or_xor.rs index 4307a6d68f5d..48d2b9646cc7 100644 --- a/datafusion/functions-aggregate/src/bit_and_or_xor.rs +++ b/datafusion/functions-aggregate/src/bit_and_or_xor.rs @@ -17,7 +17,6 @@ //! Defines `BitAnd`, `BitOr`, `BitXor` and `BitXor DISTINCT` aggregate accumulators -use indexmap::IndexMap; use std::any::Any; use std::collections::HashSet; use std::fmt::{Display, Formatter}; @@ -134,59 +133,65 @@ macro_rules! make_bitwise_udaf_expr_and_func { }; } +const BIT_AND_DOC: Documentation = Documentation { + doc_section: DOC_SECTION_GENERAL, + description: "Computes the bitwise AND of all non-null input values.", + syntax_example: "bit_and(expression)", + sql_example: None, + arguments: Some(&[ + ( + "expression", + "Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators.", + ), + ]), + related_udfs: None, +}; + +const BIT_OR_DOC: Documentation = Documentation { + doc_section: DOC_SECTION_GENERAL, + description: "Computes the bitwise OR of all non-null input values.", + syntax_example: "bit_or(expression)", + sql_example: None, + arguments: Some(&[ + ( + "expression", + "Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators.", + ), + ]), + related_udfs: None, +}; + +const BIT_XOR_DOC: Documentation = Documentation { + doc_section: DOC_SECTION_GENERAL, + description: "Computes the bitwise exclusive OR of all non-null input values.", + syntax_example: "bit_xor(expression)", + sql_example: None, + arguments: Some(&[ + ( + "expression", + "Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators.", + ), + ]), + related_udfs: None, +}; + make_bitwise_udaf_expr_and_func!( bit_and, bit_and_udaf, BitwiseOperationType::And, - Documentation { - doc_section: DOC_SECTION_GENERAL, - description: "Computes the bitwise AND of all non-null input values.", - syntax_example: "bit_and(expression)", - sql_example: None, - arguments: Some(IndexMap::from([ - ( - "expression", - "Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators.", - ), - ])), - related_udfs: None, - } + BIT_AND_DOC ); make_bitwise_udaf_expr_and_func!( bit_or, bit_or_udaf, BitwiseOperationType::Or, - Documentation { - doc_section: DOC_SECTION_GENERAL, - description: "Computes the bitwise OR of all non-null input values.", - syntax_example: "bit_or(expression)", - sql_example: None, - arguments: Some(IndexMap::from([ - ( - "expression", - "Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators.", - ), - ])), - related_udfs: None, - } + BIT_OR_DOC ); make_bitwise_udaf_expr_and_func!( bit_xor, bit_xor_udaf, BitwiseOperationType::Xor, - Documentation { - doc_section: DOC_SECTION_GENERAL, - description: "Computes the bitwise exclusive OR of all non-null input values.", - syntax_example: "bit_xor(expression)", - sql_example: None, - arguments: Some(IndexMap::from([ - ( - "expression", - "Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators.", - ), - ])), - related_udfs: None, - } + BIT_XOR_DOC ); /// The different types of bitwise operations that can be performed. diff --git a/datafusion/functions-window/src/row_number.rs b/datafusion/functions-window/src/row_number.rs index a50939639dc2..46e239203ff0 100644 --- a/datafusion/functions-window/src/row_number.rs +++ b/datafusion/functions-window/src/row_number.rs @@ -60,7 +60,6 @@ pub fn row_number_udwf() -> std::sync::Arc { #[derive(Debug)] pub struct RowNumber { signature: Signature, - documentation: Documentation, } impl RowNumber { @@ -68,15 +67,6 @@ impl RowNumber { pub fn new() -> Self { Self { signature: Signature::any(0, Volatility::Immutable), - documentation: Documentation { - doc_section: DOC_SECTION_RANKING, - description: - "Number of the current row within its partition, counting from 1.", - syntax_example: "row_number()", - sql_example: None, - arguments: None, - related_udfs: None, - }, } } } @@ -87,6 +77,15 @@ impl Default for RowNumber { } } +const DOCUMENTATION: Documentation = Documentation { + doc_section: DOC_SECTION_RANKING, + description: "Number of the current row within its partition, counting from 1.", + syntax_example: "row_number()", + sql_example: None, + arguments: None, + related_udfs: None, +}; + impl WindowUDFImpl for RowNumber { fn as_any(&self) -> &dyn Any { self @@ -116,7 +115,7 @@ impl WindowUDFImpl for RowNumber { } fn documentation(&self) -> &Documentation { - &self.documentation + &DOCUMENTATION } } diff --git a/datafusion/functions/Cargo.toml b/datafusion/functions/Cargo.toml index 0b21be6821b0..ff1b926a9b82 100644 --- a/datafusion/functions/Cargo.toml +++ b/datafusion/functions/Cargo.toml @@ -76,7 +76,6 @@ datafusion-execution = { workspace = true } datafusion-expr = { workspace = true } hashbrown = { workspace = true, optional = true } hex = { version = "0.4", optional = true } -indexmap = { workspace = true } itertools = { workspace = true } log = { workspace = true } md-5 = { version = "^0.10.0", optional = true } diff --git a/datafusion/functions/src/core/coalesce.rs b/datafusion/functions/src/core/coalesce.rs index d2d4b04872e4..c0859bd4ae10 100644 --- a/datafusion/functions/src/core/coalesce.rs +++ b/datafusion/functions/src/core/coalesce.rs @@ -26,13 +26,11 @@ use datafusion_expr::scalar_doc_sections::DOC_SECTION_CONDITIONAL; use datafusion_expr::type_coercion::binary::type_union_resolution; use datafusion_expr::{ColumnarValue, Documentation, Expr, ExprSchemable}; use datafusion_expr::{ScalarUDFImpl, Signature, Volatility}; -use indexmap::IndexMap; use itertools::Itertools; #[derive(Debug)] pub struct CoalesceFunc { signature: Signature, - documentation: Documentation, } impl Default for CoalesceFunc { @@ -45,23 +43,24 @@ impl CoalesceFunc { pub fn new() -> Self { Self { signature: Signature::user_defined(Volatility::Immutable), - documentation: Documentation { - doc_section: DOC_SECTION_CONDITIONAL, - description: "Returns the first of its arguments that is not _null_. Returns _null_ if all arguments are _null_. This function is often used to substitute a default value for _null_ values.", - syntax_example: "coalesce(expression1[, ..., expression_n])", - sql_example: None, - arguments: Some(IndexMap::from([ - ( - "expression1, expression_n", - "Expression to use if previous expressions are _null_. Can be a constant, column, or function, and any combination of arithmetic operators. Pass as many expression arguments as necessary." - ), - ])), - related_udfs: None, - }, } } } +const DOCUMENTATION: Documentation = Documentation { + doc_section: DOC_SECTION_CONDITIONAL, + description: "Returns the first of its arguments that is not _null_. Returns _null_ if all arguments are _null_. This function is often used to substitute a default value for _null_ values.", + syntax_example: "coalesce(expression1[, ..., expression_n])", + sql_example: None, + arguments: Some(&[ + ( + "expression1, expression_n", + "Expression to use if previous expressions are _null_. Can be a constant, column, or function, and any combination of arithmetic operators. Pass as many expression arguments as necessary." + ), + ]), + related_udfs: None, +}; + impl ScalarUDFImpl for CoalesceFunc { fn as_any(&self) -> &dyn Any { self @@ -158,7 +157,7 @@ impl ScalarUDFImpl for CoalesceFunc { } fn documentation(&self) -> &Documentation { - &self.documentation + &DOCUMENTATION } } diff --git a/datafusion/functions/src/crypto/sha224.rs b/datafusion/functions/src/crypto/sha224.rs index 813f51aef335..6104f3a98d34 100644 --- a/datafusion/functions/src/crypto/sha224.rs +++ b/datafusion/functions/src/crypto/sha224.rs @@ -23,14 +23,13 @@ use datafusion_expr::scalar_doc_sections::DOC_SECTION_HASHING; use datafusion_expr::{ ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility, }; -use indexmap::IndexMap; use std::any::Any; #[derive(Debug)] pub struct SHA224Func { signature: Signature, - documentation: Documentation, } + impl Default for SHA224Func { fn default() -> Self { Self::new() @@ -46,22 +45,24 @@ impl SHA224Func { vec![Utf8, LargeUtf8, Binary, LargeBinary], Volatility::Immutable, ), - documentation: Documentation { - doc_section: DOC_SECTION_HASHING, - description: "Computes the SHA-224 hash of a binary string.", - syntax_example: "sha224(expression)", - sql_example: None, - arguments: Some(IndexMap::from([ - ( - "expression", - "String expression to operate on. Can be a constant, column, or function, and any combination of string operators." - ), - ])), - related_udfs: None, - } } } } + +const DOCUMENTATION: Documentation = Documentation { + doc_section: DOC_SECTION_HASHING, + description: "Computes the SHA-224 hash of a binary string.", + syntax_example: "sha224(expression)", + sql_example: None, + arguments: Some(&[ + ( + "expression", + "String expression to operate on. Can be a constant, column, or function, and any combination of string operators." + ), + ]), + related_udfs: None, +}; + impl ScalarUDFImpl for SHA224Func { fn as_any(&self) -> &dyn Any { self @@ -84,6 +85,6 @@ impl ScalarUDFImpl for SHA224Func { } fn documentation(&self) -> &Documentation { - &self.documentation + &DOCUMENTATION } } diff --git a/datafusion/functions/src/datetime/to_date.rs b/datafusion/functions/src/datetime/to_date.rs index 5b72f7d30705..f0b53b965dc1 100644 --- a/datafusion/functions/src/datetime/to_date.rs +++ b/datafusion/functions/src/datetime/to_date.rs @@ -28,12 +28,10 @@ use datafusion_expr::scalar_doc_sections::DOC_SECTION_DATETIME; use datafusion_expr::{ ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility, }; -use indexmap::IndexMap; #[derive(Debug)] pub struct ToDateFunc { signature: Signature, - documentation: Documentation, } impl Default for ToDateFunc { @@ -46,49 +44,6 @@ impl ToDateFunc { pub fn new() -> Self { Self { signature: Signature::variadic_any(Volatility::Immutable), - documentation: Documentation { - doc_section: DOC_SECTION_DATETIME, - description: r#"Converts a value to a date (`YYYY-MM-DD`). -Supports strings, integer and double types as input. -Strings are parsed as YYYY-MM-DD (e.g. '2023-07-20') if no [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)s are provided. -Integers and doubles are interpreted as days since the unix epoch (`1970-01-01T00:00:00Z`). -Returns the corresponding date. - -Note: `to_date` returns Date32, which represents its values as the number of days since unix epoch(`1970-01-01`) stored as signed 32 bit value. The largest supported date value is `9999-12-31`. -"#, - syntax_example: "to_date('2017-05-31', '%Y-%m-%d')", - sql_example: Some( - r#"```sql -> select to_date('2023-01-31'); -+-----------------------------+ -| to_date(Utf8("2023-01-31")) | -+-----------------------------+ -| 2023-01-31 | -+-----------------------------+ -> select to_date('2023/01/31', '%Y-%m-%d', '%Y/%m/%d'); -+---------------------------------------------------------------+ -| to_date(Utf8("2023/01/31"),Utf8("%Y-%m-%d"),Utf8("%Y/%m/%d")) | -+---------------------------------------------------------------+ -| 2023-01-31 | -+---------------------------------------------------------------+ -``` - -Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/to_date.rs) -"#), - arguments: Some(IndexMap::from([ - ( - "expression", - "Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators." - ), - ( - "format_n", - "Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression. Formats will be tried in the order - they appear with the first successful one being returned. If none of the formats successfully parse the expression - an error will be returned.", - ) - ])), - related_udfs: None, - } } } @@ -124,6 +79,50 @@ Additional examples can be found [here](https://github.com/apache/datafusion/blo } } +const DOCUMENTATION: Documentation = Documentation { + doc_section: DOC_SECTION_DATETIME, + description: r#"Converts a value to a date (`YYYY-MM-DD`). +Supports strings, integer and double types as input. +Strings are parsed as YYYY-MM-DD (e.g. '2023-07-20') if no [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)s are provided. +Integers and doubles are interpreted as days since the unix epoch (`1970-01-01T00:00:00Z`). +Returns the corresponding date. + +Note: `to_date` returns Date32, which represents its values as the number of days since unix epoch(`1970-01-01`) stored as signed 32 bit value. The largest supported date value is `9999-12-31`. +"#, + syntax_example: "to_date('2017-05-31', '%Y-%m-%d')", + sql_example: Some( + r#"```sql +> select to_date('2023-01-31'); ++-----------------------------+ +| to_date(Utf8("2023-01-31")) | ++-----------------------------+ +| 2023-01-31 | ++-----------------------------+ +> select to_date('2023/01/31', '%Y-%m-%d', '%Y/%m/%d'); ++---------------------------------------------------------------+ +| to_date(Utf8("2023/01/31"),Utf8("%Y-%m-%d"),Utf8("%Y/%m/%d")) | ++---------------------------------------------------------------+ +| 2023-01-31 | ++---------------------------------------------------------------+ +``` + +Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/to_date.rs) +"#), + arguments: Some(&[ + ( + "expression", + "Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators." + ), + ( + "format_n", + "Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression. Formats will be tried in the order + they appear with the first successful one being returned. If none of the formats successfully parse the expression + an error will be returned.", + ) + ]), + related_udfs: None, +}; + impl ScalarUDFImpl for ToDateFunc { fn as_any(&self) -> &dyn Any { self @@ -166,7 +165,7 @@ impl ScalarUDFImpl for ToDateFunc { } fn documentation(&self) -> &Documentation { - &self.documentation + &DOCUMENTATION } } diff --git a/datafusion/functions/src/encoding/inner.rs b/datafusion/functions/src/encoding/inner.rs index ba66f554d219..f41a10f6ace7 100644 --- a/datafusion/functions/src/encoding/inner.rs +++ b/datafusion/functions/src/encoding/inner.rs @@ -34,13 +34,11 @@ use std::{fmt, str::FromStr}; use datafusion_expr::scalar_doc_sections::DOC_SECTION_BINARY_STRING; use datafusion_expr::{ScalarUDFImpl, Signature, Volatility}; -use indexmap::IndexMap; use std::any::Any; #[derive(Debug)] pub struct EncodeFunc { signature: Signature, - documentation: Documentation, } impl Default for EncodeFunc { @@ -53,21 +51,22 @@ impl EncodeFunc { pub fn new() -> Self { Self { signature: Signature::user_defined(Volatility::Immutable), - documentation: Documentation { - doc_section: DOC_SECTION_BINARY_STRING, - description: "Encode binary data into a textual representation.", - syntax_example: "encode(expression, format)", - sql_example: None, - arguments: Some(IndexMap::from([ - ("expression", "Expression containing string or binary data"), - ("format", "Supported formats are: `base64`, `hex`"), - ])), - related_udfs: Some(vec!["decode"]), - }, } } } +const ENCODE_DOCUMENTATION: Documentation = Documentation { + doc_section: DOC_SECTION_BINARY_STRING, + description: "Encode binary data into a textual representation.", + syntax_example: "encode(expression, format)", + sql_example: None, + arguments: Some(&[ + ("expression", "Expression containing string or binary data"), + ("format", "Supported formats are: `base64`, `hex`"), + ]), + related_udfs: Some(&["decode"]), +}; + impl ScalarUDFImpl for EncodeFunc { fn as_any(&self) -> &dyn Any { self @@ -116,14 +115,13 @@ impl ScalarUDFImpl for EncodeFunc { } fn documentation(&self) -> &Documentation { - &self.documentation + &ENCODE_DOCUMENTATION } } #[derive(Debug)] pub struct DecodeFunc { signature: Signature, - documentation: Documentation, } impl Default for DecodeFunc { @@ -136,21 +134,22 @@ impl DecodeFunc { pub fn new() -> Self { Self { signature: Signature::user_defined(Volatility::Immutable), - documentation: Documentation { - doc_section: DOC_SECTION_BINARY_STRING, - description: "Decode binary data from textual representation in string.", - syntax_example: "decode(expression, format)", - sql_example: None, - arguments: Some(IndexMap::from([ - ("expression", "Expression containing encoded string data"), - ("format", "Same arguments as [encode](#encode)"), - ])), - related_udfs: Some(vec!["encode"]), - }, } } } +const DECODE_DOCUMENTATION: Documentation = Documentation { + doc_section: DOC_SECTION_BINARY_STRING, + description: "Decode binary data from textual representation in string.", + syntax_example: "decode(expression, format)", + sql_example: None, + arguments: Some(&[ + ("expression", "Expression containing encoded string data"), + ("format", "Same arguments as [encode](#encode)"), + ]), + related_udfs: Some(&["encode"]), +}; + impl ScalarUDFImpl for DecodeFunc { fn as_any(&self) -> &dyn Any { self @@ -199,7 +198,7 @@ impl ScalarUDFImpl for DecodeFunc { } fn documentation(&self) -> &Documentation { - &self.documentation + &DECODE_DOCUMENTATION } } diff --git a/datafusion/functions/src/math/log.rs b/datafusion/functions/src/math/log.rs index 66869e803886..5925b9a47bf2 100644 --- a/datafusion/functions/src/math/log.rs +++ b/datafusion/functions/src/math/log.rs @@ -36,12 +36,10 @@ use datafusion_expr::{ lit, ColumnarValue, Documentation, Expr, ScalarUDF, TypeSignature::*, }; use datafusion_expr::{ScalarUDFImpl, Signature, Volatility}; -use indexmap::IndexMap; #[derive(Debug)] pub struct LogFunc { signature: Signature, - documentation: Documentation, } impl Default for LogFunc { @@ -50,6 +48,25 @@ impl Default for LogFunc { } } +const DOCUMENTATION: Documentation = Documentation { + doc_section: DOC_SECTION_MATH, + description: "Returns the base-x logarithm of a number. Can either provide a specified base, or if omitted then takes the base-10 of a number.", + syntax_example: r#"log(base, numeric_expression) +log(numeric_expression)"#, + sql_example: None, + arguments: Some(&[ + ( + "base", + "Base numeric expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators." + ), + ( + "numeric_expression", + "Numeric expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators." + ), + ]), + related_udfs: None, +}; + impl LogFunc { pub fn new() -> Self { use DataType::*; @@ -63,24 +80,6 @@ impl LogFunc { ], Volatility::Immutable, ), - documentation: Documentation { - doc_section: DOC_SECTION_MATH, - description: "Returns the base-x logarithm of a number. Can either provide a specified base, or if omitted then takes the base-10 of a number.", - syntax_example: r#"log(base, numeric_expression) -log(numeric_expression)"#, - sql_example: None, - arguments: Some(IndexMap::from([ - ( - "base", - "Base numeric expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators." - ), - ( - "numeric_expression", - "Numeric expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators." - ), - ])), - related_udfs: None, - } } } } @@ -188,7 +187,7 @@ impl ScalarUDFImpl for LogFunc { } fn documentation(&self) -> &Documentation { - &self.documentation + &DOCUMENTATION } /// Simplify the `log` function by the relevant rules: diff --git a/datafusion/functions/src/regex/regexplike.rs b/datafusion/functions/src/regex/regexplike.rs index 9e74a86f1e52..41349e6e80df 100644 --- a/datafusion/functions/src/regex/regexplike.rs +++ b/datafusion/functions/src/regex/regexplike.rs @@ -29,40 +29,26 @@ use datafusion_expr::scalar_doc_sections::DOC_SECTION_REGEX; use datafusion_expr::TypeSignature::*; use datafusion_expr::{ColumnarValue, Documentation}; use datafusion_expr::{ScalarUDFImpl, Signature, Volatility}; -use indexmap::IndexMap; use std::any::Any; use std::sync::Arc; #[derive(Debug)] pub struct RegexpLikeFunc { signature: Signature, - documentation: Documentation, } + impl Default for RegexpLikeFunc { fn default() -> Self { Self::new() } } -impl RegexpLikeFunc { - pub fn new() -> Self { - use DataType::*; - Self { - signature: Signature::one_of( - vec![ - Exact(vec![Utf8, Utf8]), - Exact(vec![LargeUtf8, Utf8]), - Exact(vec![Utf8, Utf8, Utf8]), - Exact(vec![LargeUtf8, Utf8, Utf8]), - ], - Volatility::Immutable, - ), - documentation: Documentation { - doc_section: DOC_SECTION_REGEX, - description: "Returns true if a [regular expression](https://docs.rs/regex/latest/regex/#syntax) has at least one match in a string, false otherwise.", - syntax_example: "regexp_like(str, regexp[, flags])", - sql_example: Some( - r#"```sql +const DOCUMENTATION: Documentation = Documentation { + doc_section: DOC_SECTION_REGEX, + description: "Returns true if a [regular expression](https://docs.rs/regex/latest/regex/#syntax) has at least one match in a string, false otherwise.", + syntax_example: "regexp_like(str, regexp[, flags])", + sql_example: Some( + r#"```sql select regexp_like('Köln', '[a-zA-Z]ö[a-zA-Z]{2}'); +--------------------------------------------------------+ | regexp_like(Utf8("Köln"),Utf8("[a-zA-Z]ö[a-zA-Z]{2}")) | @@ -78,25 +64,39 @@ SELECT regexp_like('aBc', '(b|d)', 'i'); ``` Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/regexp.rs) "#), - arguments: Some(IndexMap::from([ - ( - "str", - "String expression to operate on. Can be a constant, column, or function, and any combination of string operators." - ), - ( "regexp", - "Regular expression to test against the string expression. Can be a constant, column, or function." - ), - ("flags", - r#"Optional regular expression flags that control the behavior of the regular expression. The following flags are supported: + arguments: Some(&[ + ( + "str", + "String expression to operate on. Can be a constant, column, or function, and any combination of string operators." + ), + ( "regexp", + "Regular expression to test against the string expression. Can be a constant, column, or function." + ), + ("flags", + r#"Optional regular expression flags that control the behavior of the regular expression. The following flags are supported: - **i**: case-insensitive: letters match both upper and lower case - **m**: multi-line mode: ^ and $ match begin/end of line - **s**: allow . to match \n - **R**: enables CRLF mode: when multi-line mode is enabled, \r\n is used - **U**: swap the meaning of x* and x*?"# - ) - ])), - related_udfs: None, - } + ) + ]), + related_udfs: None, +}; + +impl RegexpLikeFunc { + pub fn new() -> Self { + use DataType::*; + Self { + signature: Signature::one_of( + vec![ + Exact(vec![Utf8, Utf8]), + Exact(vec![LargeUtf8, Utf8]), + Exact(vec![Utf8, Utf8, Utf8]), + Exact(vec![LargeUtf8, Utf8, Utf8]), + ], + Volatility::Immutable, + ), } } } @@ -150,7 +150,7 @@ impl ScalarUDFImpl for RegexpLikeFunc { } fn documentation(&self) -> &Documentation { - &self.documentation + &DOCUMENTATION } } fn regexp_like_func(args: &[ArrayRef]) -> Result { diff --git a/datafusion/functions/src/string/ascii.rs b/datafusion/functions/src/string/ascii.rs index ca09ada0fed6..3e0b32180472 100644 --- a/datafusion/functions/src/string/ascii.rs +++ b/datafusion/functions/src/string/ascii.rs @@ -23,14 +23,26 @@ use datafusion_common::{internal_err, Result}; use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING; use datafusion_expr::{ColumnarValue, Documentation}; use datafusion_expr::{ScalarUDFImpl, Signature, Volatility}; -use indexmap::IndexMap; use std::any::Any; use std::sync::Arc; +const DOCUMENTATION: Documentation = Documentation { + doc_section: DOC_SECTION_STRING, + description: "Returns the ASCII value of the first character in a string.", + syntax_example: "ascii(str)", + sql_example: None, + arguments: Some(&[ + ( + "str", + "String expression to operate on. Can be a constant, column, or function that evaluates to or can be coerced to a Utf8, LargeUtf8 or a Utf8View." + ) + ]), + related_udfs: Some(&["chr"]), +}; + #[derive(Debug)] pub struct AsciiFunc { signature: Signature, - documentation: Documentation, } impl Default for AsciiFunc { @@ -48,19 +60,6 @@ impl AsciiFunc { vec![Utf8, LargeUtf8, Utf8View], Volatility::Immutable, ), - documentation: Documentation { - doc_section: DOC_SECTION_STRING, - description: "Returns the ASCII value of the first character in a string.", - syntax_example: "ascii(str)", - sql_example: None, - arguments: Some(IndexMap::from([ - ( - "str", - "String expression to operate on. Can be a constant, column, or function that evaluates to or can be coerced to a Utf8, LargeUtf8 or a Utf8View." - ) - ])), - related_udfs: Some(vec!["chr"]), - }, } } } @@ -89,7 +88,7 @@ impl ScalarUDFImpl for AsciiFunc { } fn documentation(&self) -> &Documentation { - &self.documentation + &DOCUMENTATION } } diff --git a/datafusion/functions/src/unicode/rpad.rs b/datafusion/functions/src/unicode/rpad.rs index 1867e7cfecae..e72490e7afb7 100644 --- a/datafusion/functions/src/unicode/rpad.rs +++ b/datafusion/functions/src/unicode/rpad.rs @@ -30,7 +30,6 @@ use datafusion_expr::TypeSignature::Exact; use datafusion_expr::{ ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility, }; -use indexmap::IndexMap; use std::any::Any; use std::fmt::Write; use std::sync::Arc; @@ -40,7 +39,6 @@ use DataType::{LargeUtf8, Utf8, Utf8View}; #[derive(Debug)] pub struct RPadFunc { signature: Signature, - documentation: Documentation, } impl Default for RPadFunc { @@ -49,6 +47,28 @@ impl Default for RPadFunc { } } +const DOCUMENTATION: Documentation = Documentation { + doc_section: DOC_SECTION_STRING, + description: "Pads the right side of a string with another string to a specified string length.", + syntax_example: "rpad(str, n[, padding_str])", + sql_example: None, + arguments: Some(&[ + ( + "str", + "String expression to operate on. Can be a constant, column, or function, and any combination of string operators." + ), + ( + "n", + "String length to pad to." + ), + ( + "padding_str", + "String expression to pad with. Can be a constant, column, or function, and any combination of string operators. _Default is a space._" + ), + ]), + related_udfs: Some(&["lpad"]), +}; + impl RPadFunc { pub fn new() -> Self { use DataType::*; @@ -70,27 +90,6 @@ impl RPadFunc { ], Volatility::Immutable, ), - documentation: Documentation { - doc_section: DOC_SECTION_STRING, - description: "Pads the right side of a string with another string to a specified string length.", - syntax_example: "rpad(str, n[, padding_str])", - sql_example: None, - arguments: Some(IndexMap::from([ - ( - "str", - "String expression to operate on. Can be a constant, column, or function, and any combination of string operators." - ), - ( - "n", - "String length to pad to." - ), - ( - "padding_str", - "String expression to pad with. Can be a constant, column, or function, and any combination of string operators. _Default is a space._" - ), - ])), - related_udfs: Some(vec!["lpad"]), - }, } } } @@ -141,7 +140,7 @@ impl ScalarUDFImpl for RPadFunc { } fn documentation(&self) -> &Documentation { - &self.documentation + &DOCUMENTATION } }