From 29731a8f8e27039563c5c236ab7453aa2c98ee37 Mon Sep 17 00:00:00 2001 From: Matthijs Brobbel Date: Tue, 29 Nov 2022 19:00:53 +0100 Subject: [PATCH] chore: prepare 0.0.11 release (#65) Bump substrait to 0.20.0 --- Cargo.lock | 10 +- README.md | 1 + c/Cargo.toml | 4 +- ci/version | 2 +- derive/Cargo.toml | 2 +- py/Cargo.toml | 4 +- py/pyproject.toml | 2 +- rs/Cargo.toml | 4 +- rs/README.md | 4 +- rs/src/output/extension/simple/function.rs | 17 +++ rs/src/parse/expressions/functions.rs | 114 ++++++++++++++------- rs/src/parse/relations/read.rs | 15 +++ rs/src/parse/sorts.rs | 1 + rs/src/resources/substrait-version | 2 +- substrait | 2 +- tests/Cargo.toml | 4 +- tests/tests/version/current-version.yaml | 2 +- 17 files changed, 132 insertions(+), 58 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index c736fd5a..4a7fa869 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1104,7 +1104,7 @@ dependencies = [ [[package]] name = "substrait-validator" -version = "0.0.10" +version = "0.0.11" dependencies = [ "antlr-rust", "base64", @@ -1137,7 +1137,7 @@ dependencies = [ [[package]] name = "substrait-validator-c" -version = "0.0.10" +version = "0.0.11" dependencies = [ "cbindgen", "libc", @@ -1148,7 +1148,7 @@ dependencies = [ [[package]] name = "substrait-validator-derive" -version = "0.0.10" +version = "0.0.11" dependencies = [ "heck 0.4.0", "quote", @@ -1157,7 +1157,7 @@ dependencies = [ [[package]] name = "substrait-validator-py" -version = "0.0.10" +version = "0.0.11" dependencies = [ "dunce", "prost-build", @@ -1200,7 +1200,7 @@ dependencies = [ [[package]] name = "test-runner" -version = "0.0.10" +version = "0.0.11" dependencies = [ "glob", "prost-build", diff --git a/README.md b/README.md index 26446e72..6451e17d 100644 --- a/README.md +++ b/README.md @@ -27,6 +27,7 @@ older version. Refer to the table below for the version compatibility matrix. | Substrait... | ... is supported by validator ... | | -------------- | ------------------------------------ | +| 0.20.x | 0.0.11 (current version) | | 0.19.x | 0.0.10 | | 0.18.x | 0.0.9 | | 0.9.x - 0.17.x | 0.0.8 | diff --git a/c/Cargo.toml b/c/Cargo.toml index e5a7da2a..1c87fc9c 100644 --- a/c/Cargo.toml +++ b/c/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "substrait-validator-c" -version = "0.0.10" +version = "0.0.11" edition = "2021" license = "Apache-2.0" @@ -12,7 +12,7 @@ doc = false cbindgen = "0.20.0" [dependencies] -substrait-validator = { path = "../rs", version = "0.0.10" } +substrait-validator = { path = "../rs", version = "0.0.11" } libc = "0.2" thiserror = "1.0" once_cell = "1.9" diff --git a/ci/version b/ci/version index b0a12275..58682af4 100644 --- a/ci/version +++ b/ci/version @@ -1 +1 @@ -0.0.10 \ No newline at end of file +0.0.11 \ No newline at end of file diff --git a/derive/Cargo.toml b/derive/Cargo.toml index 3370944f..4b484068 100644 --- a/derive/Cargo.toml +++ b/derive/Cargo.toml @@ -4,7 +4,7 @@ description = "Procedural macros for substrait-validator" homepage = "https://substrait.io/" repository = "https://github.com/substrait-io/substrait" readme = "README.md" -version = "0.0.10" +version = "0.0.11" edition = "2021" license = "Apache-2.0" diff --git a/py/Cargo.toml b/py/Cargo.toml index 2845c839..49f69ca8 100644 --- a/py/Cargo.toml +++ b/py/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "substrait-validator-py" -version = "0.0.10" +version = "0.0.11" edition = "2018" license = "Apache-2.0" include = [ @@ -29,7 +29,7 @@ name = "substrait_validator" doc = false [dependencies] -substrait-validator = { path = "../rs", version = "0.0.10" } +substrait-validator = { path = "../rs", version = "0.0.11" } pyo3 = { version = "0.17.3", features = ["extension-module"] } [build-dependencies] diff --git a/py/pyproject.toml b/py/pyproject.toml index c201f833..25a0b5d2 100644 --- a/py/pyproject.toml +++ b/py/pyproject.toml @@ -5,7 +5,7 @@ backend-path = ["."] [project] name = "substrait-validator" -version = "0.0.10" +version = "0.0.11" description = "Validator for Substrait query plans" readme = "README.md" license = { file = "LICENSE" } diff --git a/rs/Cargo.toml b/rs/Cargo.toml index 3dd841cd..a1640c4a 100644 --- a/rs/Cargo.toml +++ b/rs/Cargo.toml @@ -4,7 +4,7 @@ description = "Substrait validator" homepage = "https://substrait.io/" repository = "https://github.com/substrait-io/substrait" readme = "README.md" -version = "0.0.10" +version = "0.0.11" edition = "2021" license = "Apache-2.0" include = ["src", "build.rs", "README.md"] @@ -24,7 +24,7 @@ prost-types = "0.10" # Prost doesn't generate any introspection stuff, so we hack that stuff in with # our own procedural macros. -substrait-validator-derive = { path = "../derive", version = "0.0.10" } +substrait-validator-derive = { path = "../derive", version = "0.0.11" } # Google/protobuf has a funny idea about case conventions (it converts them all # over the place) and prost remaps to Rust's conventions to boot. So, to diff --git a/rs/README.md b/rs/README.md index 91ade181..14f82167 100644 --- a/rs/README.md +++ b/rs/README.md @@ -6,7 +6,7 @@ plans. ``` [dependencies] -substrait-validator = "0.0.10" +substrait-validator = "0.0.11" ``` YAML file resolution @@ -20,7 +20,7 @@ dependency: ``` [dependencies] -substrait-validator = { version = "0.0.10", features = ["curl"] } +substrait-validator = { version = "0.0.11", features = ["curl"] } ``` This adds the `substrait_validator::Config::add_curl_yaml_uri_resolver()` diff --git a/rs/src/output/extension/simple/function.rs b/rs/src/output/extension/simple/function.rs index 3521e51e..f3a95922 100644 --- a/rs/src/output/extension/simple/function.rs +++ b/rs/src/output/extension/simple/function.rs @@ -25,6 +25,9 @@ pub struct Definition { /// The expected arguments of the function. pub arguments: Vec, + /// The options of the function. + pub options: HashMap, + /// Specifies the variadic behavior of the last argument slot, if any. pub variadic: VariadicBehavior, @@ -136,6 +139,20 @@ pub struct EnumerationArgumentSlot { pub required: bool, } +/// Definition of a function option name. +#[derive(Clone, Debug)] +pub struct OptionName { + /// A human-readable name for this option. + pub name: String, +} + +/// Definition of a valid options for a function option. +#[derive(Clone, Debug)] +pub struct OptionValues { + /// A list of valid strings for this option. + pub values: Vec, +} + /// Definition of the variadic behavior of the last argument slot. #[derive(Clone, Debug)] pub struct VariadicBehavior { diff --git a/rs/src/parse/expressions/functions.rs b/rs/src/parse/expressions/functions.rs index 4bc460fd..9c54c326 100644 --- a/rs/src/parse/expressions/functions.rs +++ b/rs/src/parse/expressions/functions.rs @@ -41,8 +41,8 @@ pub enum FunctionArgument { /// Used for type arguments. Type(data::Type), - /// Used for enum option arguments. - Enum(Option), + /// Used for enum arguments. + Enum(String), } impl Default for FunctionArgument { @@ -61,8 +61,7 @@ impl Describe for FunctionArgument { FunctionArgument::Unresolved => write!(f, "!"), FunctionArgument::Value(_, e) => e.describe(f, limit), FunctionArgument::Type(t) => t.describe(f, limit), - FunctionArgument::Enum(Some(s)) => util::string::describe_identifier(f, s, limit), - FunctionArgument::Enum(None) => write!(f, "-"), + FunctionArgument::Enum(s) => util::string::describe_identifier(f, s, limit), } } } @@ -73,6 +72,16 @@ impl std::fmt::Display for FunctionArgument { } } +/// An optional function argument. Typically used for specifying behavior in +/// invalid or corner cases. +#[derive(Clone, Debug)] +pub struct FunctionOption { + /// Name of the option to set. + pub name: String, + /// List of behavior options allowed by the producer. + pub preference: Vec, +} + /// Information about the context in which a function is being called. #[derive(Clone, Debug)] pub struct FunctionContext { @@ -82,6 +91,9 @@ pub struct FunctionContext { /// The list of arguments bound to the function. pub arguments: Vec, + /// The list of optional function arguments. + pub options: Vec, + /// If known, the expected return type of the function. If not known this /// can just be set to unresolved. pub return_type: data::Type, @@ -101,17 +113,16 @@ pub struct FunctionBinding { } impl FunctionBinding { - /// Try to bind one of the provided function implementations to the - /// provided function context. + /// Try to bind one of the provided function implementations to the provided + /// function context. /// /// This is purely a validator thing. For valid plans, there should only /// ever be one implementation after name resolution, and the return type - /// should already have been specified. Much more intelligence was thrown - /// in here just to help people find and correct mistakes efficiently. - /// Common misconceptions and mistakes, like using the simple function name - /// vs. the compound name, not specifying optional arguments, or not - /// specifying the (correct) return type should yield more than just a - /// generic error message here! + /// should already have been specified. Much more intelligence was thrown in + /// here just to help people find and correct mistakes efficiently. Common + /// misconceptions and mistakes, like using the simple function name vs. the + /// compound name. or not specifying the (correct) return type should yield + /// more than just a generic error message here! pub fn new( functions: Option<&extension::simple::function::ResolutionResult>, function_context: &FunctionContext, @@ -154,36 +165,13 @@ impl FunctionBinding { } } -/// Parse an enum option argument type. -fn parse_enum_type( - x: &substrait::function_argument::r#enum::EnumKind, - _y: &mut context::Context, -) -> diagnostic::Result> { - match x { - substrait::function_argument::r#enum::EnumKind::Specified(x) => Ok(Some(x.clone())), - substrait::function_argument::r#enum::EnumKind::Unspecified(_) => Ok(None), - } -} - -/// Parse an enum option argument. -fn parse_enum( - x: &substrait::function_argument::Enum, - y: &mut context::Context, -) -> diagnostic::Result> { - Ok(proto_required_field!(x, y, enum_kind, parse_enum_type) - .1 - .flatten()) -} - /// Parse a 0.3.0+ function argument type. fn parse_function_argument_type( x: &substrait::function_argument::ArgType, y: &mut context::Context, ) -> diagnostic::Result { match x { - substrait::function_argument::ArgType::Enum(x) => { - Ok(FunctionArgument::Enum(parse_enum(x, y)?)) - } + substrait::function_argument::ArgType::Enum(x) => Ok(FunctionArgument::Enum(x.to_string())), substrait::function_argument::ArgType::Type(x) => { types::parse_type(x, y)?; Ok(FunctionArgument::Type(y.data_type())) @@ -207,6 +195,29 @@ fn parse_function_argument( ) } +fn parse_function_option( + x: &substrait::FunctionOption, + y: &mut context::Context, +) -> diagnostic::Result { + proto_primitive_field!(x, y, name); + proto_required_repeated_field!(x, y, preference); + + if x.preference.is_empty() { + let err = cause!(IllegalValue, "at least one option must be specified"); + diagnostic!(y, Error, err.clone()); + comment!( + y, + "To leave an option unspecified, simply don't add an entry to `options`" + ); + Err(err) + } else { + Ok(FunctionOption { + name: x.name.clone(), + preference: x.preference.clone(), + }) + } +} + /// Parse a pre-0.3.0 function argument expression. fn parse_legacy_function_argument( x: &substrait::Expression, @@ -214,7 +225,18 @@ fn parse_legacy_function_argument( ) -> diagnostic::Result { expressions::parse_legacy_function_argument(x, y).map(|x| match x { expressions::ExpressionOrEnum::Value(x) => FunctionArgument::Value(y.data_type(), x), - expressions::ExpressionOrEnum::Enum(x) => FunctionArgument::Enum(x), + expressions::ExpressionOrEnum::Enum(x) => match x { + Some(x) => FunctionArgument::Enum(x), + None => { + diagnostic!( + y, + Error, + Deprecation, + "support for optional enum arguments was removed in Substrait 0.20.0 (#342)" + ); + FunctionArgument::Unresolved + } + }, }) } @@ -279,6 +301,11 @@ pub fn parse_scalar_function( .into_iter() .map(|x| x.unwrap_or_default()) .collect(); + let options = proto_repeated_field!(x, y, options, parse_function_option) + .1 + .into_iter() + .flatten() + .collect(); let return_type = proto_required_field!(x, y, output_type, types::parse_type) .0 .data_type(); @@ -288,6 +315,7 @@ pub fn parse_scalar_function( let context = FunctionContext { function_type: FunctionType::Scalar, arguments, + options, return_type, }; let binding = FunctionBinding::new(functions.as_ref(), &context, y); @@ -341,6 +369,11 @@ pub fn parse_window_function( .into_iter() .map(|x| x.unwrap_or_default()) .collect(); + let options = proto_repeated_field!(x, y, options, parse_function_option) + .1 + .into_iter() + .flatten() + .collect(); let return_type = proto_required_field!(x, y, output_type, types::parse_type) .0 .data_type(); @@ -359,6 +392,7 @@ pub fn parse_window_function( let context = FunctionContext { function_type: FunctionType::Window, arguments, + options, return_type, }; let binding = FunctionBinding::new(functions.as_ref(), &context, y); @@ -395,6 +429,11 @@ pub fn parse_aggregate_function( .into_iter() .map(|x| x.unwrap_or_default()) .collect(); + let options = proto_repeated_field!(x, y, options, parse_function_option) + .1 + .into_iter() + .flatten() + .collect(); let return_type = proto_required_field!(x, y, output_type, types::parse_type) .0 .data_type(); @@ -416,6 +455,7 @@ pub fn parse_aggregate_function( let context = FunctionContext { function_type: FunctionType::Aggregate, arguments, + options, return_type, }; let binding = FunctionBinding::new(functions.as_ref(), &context, y); diff --git a/rs/src/parse/relations/read.rs b/rs/src/parse/relations/read.rs index b787cc7a..1b7e3f1d 100644 --- a/rs/src/parse/relations/read.rs +++ b/rs/src/parse/relations/read.rs @@ -339,6 +339,10 @@ pub fn parse_read_rel(x: &substrait::ReadRel, y: &mut context::Context) -> diagn // Handle filter. let filter = proto_boxed_field!(x, y, filter, expressions::parse_predicate); + // Handle best effort filter. + let best_effort_filter = + proto_boxed_field!(x, y, best_effort_filter, expressions::parse_predicate); + // Handle projection. if x.projection.is_some() { schema = @@ -367,6 +371,17 @@ pub fn parse_read_rel(x: &substrait::ReadRel, y: &mut context::Context) -> diagn ); } + // Add best effort filter summary. + if let (Some(best_effort_filter_node), Some(best_effort_filter_expr)) = best_effort_filter { + let nullable = best_effort_filter_node.data_type().nullable(); + summary!( + y, + "This relation may discards all rows for which the expression {} yields {}.", + best_effort_filter_expr, + if nullable { "false or null" } else { "false" } + ); + } + // Handle the common field. handle_rel_common!(x, y); diff --git a/rs/src/parse/sorts.rs b/rs/src/parse/sorts.rs index 0302f995..3961dba0 100644 --- a/rs/src/parse/sorts.rs +++ b/rs/src/parse/sorts.rs @@ -72,6 +72,7 @@ fn parse_comparison_function_reference( let context = expressions::functions::FunctionContext { function_type: expressions::functions::FunctionType::Scalar, arguments: vec![argument.clone(), argument], + options: vec![], return_type: data::new_unresolved_type(), }; let binding = expressions::functions::FunctionBinding::new(Some(&functions), &context, y); diff --git a/rs/src/resources/substrait-version b/rs/src/resources/substrait-version index 3f46c4d1..a881cf79 100644 --- a/rs/src/resources/substrait-version +++ b/rs/src/resources/substrait-version @@ -1 +1 @@ -0.19.0 \ No newline at end of file +0.20.0 \ No newline at end of file diff --git a/substrait b/substrait index 9d4c4d0d..6d7936df 160000 --- a/substrait +++ b/substrait @@ -1 +1 @@ -Subproject commit 9d4c4d0dae179e7b1d91ee382f3e578af0015d08 +Subproject commit 6d7936df1dd369b28aa1f48da06a53c0c605d39f diff --git a/tests/Cargo.toml b/tests/Cargo.toml index d4fff455..59012e64 100644 --- a/tests/Cargo.toml +++ b/tests/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "test-runner" -version = "0.0.10" +version = "0.0.11" edition = "2018" license = "Apache-2.0" default-run = "runner" @@ -14,7 +14,7 @@ name = "find_protoc" path = "src/find_protoc.rs" [dependencies] -substrait-validator = { path = "../rs", version = "0.0.10" } +substrait-validator = { path = "../rs", version = "0.0.11" } serde = { version = "1.0", features = ["derive"] } serde_json = "1.0" walkdir = "2" diff --git a/tests/tests/version/current-version.yaml b/tests/tests/version/current-version.yaml index 37425414..007c9fdb 100644 --- a/tests/tests/version/current-version.yaml +++ b/tests/tests/version/current-version.yaml @@ -6,5 +6,5 @@ plan: __test: - level: i version: - minorNumber: 19 + minorNumber: 20 producer: validator-test