-
Notifications
You must be signed in to change notification settings - Fork 1.2k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Support binary temporal arithmetic with integers #13741
base: main
Are you sure you want to change the base?
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -186,6 +186,12 @@ fn signature(lhs: &DataType, op: &Operator, rhs: &DataType) -> Result<Signature> | |
} else if let Some(numeric) = mathematics_numerical_coercion(lhs, rhs) { | ||
// Numeric arithmetic, e.g. Int32 + Int32 | ||
Ok(Signature::uniform(numeric)) | ||
} else if let Some((new_lhs, new_rhs, ret)) = resolve_ints_to_intervals(lhs, rhs) { | ||
Ok(Signature { | ||
lhs: new_lhs, | ||
rhs: new_rhs, | ||
ret, | ||
}) | ||
} else { | ||
plan_err!( | ||
"Cannot coerce arithmetic expression {lhs} {op} {rhs} to valid types" | ||
|
@@ -1449,6 +1455,22 @@ fn null_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option<DataType> { | |
} | ||
} | ||
|
||
/// Resolves integer types to interval types for temporal arithmetic | ||
fn resolve_ints_to_intervals( | ||
lhs: &DataType, | ||
rhs: &DataType, | ||
) -> Option<(DataType, DataType, DataType)> { | ||
use arrow::datatypes::DataType::*; | ||
use arrow::datatypes::IntervalUnit::*; | ||
|
||
match (lhs, rhs) { | ||
// Handle integer + interval cases | ||
milevin marked this conversation as resolved.
Show resolved
Hide resolved
|
||
(Int32 | Int64, _) => Some((Interval(DayTime), rhs.clone(), rhs.clone())), | ||
milevin marked this conversation as resolved.
Show resolved
Hide resolved
|
||
(_, Int32 | Int64) => Some((lhs.clone(), Interval(DayTime), lhs.clone())), | ||
_ => None, | ||
} | ||
} | ||
|
||
#[cfg(test)] | ||
mod tests { | ||
use super::*; | ||
|
@@ -1607,6 +1629,18 @@ mod tests { | |
}}; | ||
} | ||
|
||
/// Test coercion rules for assymetric binary operators | ||
/// | ||
/// Applies coercion rules for `$LHS_TYPE $OP $RHS_TYPE` and asserts that the | ||
/// the result types are `$RESULT_TYPE1` and `$RESULT_TYPE2` respectively | ||
macro_rules! test_coercion_assymetric_binary_rule { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What's the benefit of this being a macro? Could this be ordinary function? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't know -- I just copied how the rest is done here, via macros too. This macro is a clone of test_coercion_binary_rule macro. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. i assumed so. But also -- macros aren't "better functions", they come with downsides, so if something is expressible as a function, it should. |
||
($LHS_TYPE:expr, $RHS_TYPE:expr, $OP:expr, $RESULT_TYPE1:expr, $RESULT_TYPE2:expr) => {{ | ||
let (lhs, rhs) = get_input_types(&$LHS_TYPE, &$OP, &$RHS_TYPE)?; | ||
assert_eq!(lhs, $RESULT_TYPE1); | ||
assert_eq!(rhs, $RESULT_TYPE2); | ||
Comment on lines
+1642
to
+1644
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. for There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The operator is symmetric, but how we cast the left and right types is not symmetric. Hence I needed an assymetric test for this. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You could have this let (a, b) = get_input_types(&$LHS_TYPE, &$OP, &$RHS_TYPE)?;
assert_eq!(a, $RESULT_TYPE1);
assert_eq!(b, $RESULT_TYPE2);
// test symmetry
let (b, a) = get_input_types(&$RHS_TYPE, &$OP, &$LHS_TYPE)?;
assert_eq!(a, $RESULT_TYPE1);
assert_eq!(b, $RESULT_TYPE2); |
||
}}; | ||
} | ||
|
||
/// Test coercion rules for like | ||
/// | ||
/// Applies coercion rules for both | ||
|
@@ -1837,6 +1871,8 @@ mod tests { | |
|
||
#[test] | ||
fn test_type_coercion_arithmetic() -> Result<()> { | ||
use arrow::datatypes::IntervalUnit; | ||
|
||
// integer | ||
test_coercion_binary_rule!( | ||
DataType::Int32, | ||
|
@@ -1869,6 +1905,51 @@ mod tests { | |
Operator::Multiply, | ||
DataType::Float64 | ||
); | ||
|
||
// Test integer to interval coercion for temporal arithmetic | ||
test_coercion_assymetric_binary_rule!( | ||
DataType::Int32, | ||
DataType::Timestamp(TimeUnit::Nanosecond, None), | ||
Operator::Plus, | ||
DataType::Interval(IntervalUnit::DayTime), | ||
DataType::Timestamp(TimeUnit::Nanosecond, None) | ||
); | ||
test_coercion_assymetric_binary_rule!( | ||
DataType::Timestamp(TimeUnit::Nanosecond, None), | ||
DataType::Int64, | ||
Operator::Plus, | ||
DataType::Timestamp(TimeUnit::Nanosecond, None), | ||
DataType::Interval(IntervalUnit::DayTime) | ||
); | ||
test_coercion_assymetric_binary_rule!( | ||
DataType::Int32, | ||
DataType::Date32, | ||
milevin marked this conversation as resolved.
Show resolved
Hide resolved
|
||
Operator::Plus, | ||
DataType::Interval(IntervalUnit::DayTime), | ||
DataType::Date32 | ||
); | ||
test_coercion_assymetric_binary_rule!( | ||
DataType::Date32, | ||
DataType::Int64, | ||
Operator::Plus, | ||
DataType::Date32, | ||
DataType::Interval(IntervalUnit::DayTime) | ||
); | ||
test_coercion_assymetric_binary_rule!( | ||
DataType::Int32, | ||
DataType::Date64, | ||
Operator::Plus, | ||
DataType::Interval(IntervalUnit::DayTime), | ||
DataType::Date64 | ||
); | ||
test_coercion_assymetric_binary_rule!( | ||
DataType::Date64, | ||
DataType::Int64, | ||
Operator::Plus, | ||
DataType::Date64, | ||
DataType::Interval(IntervalUnit::DayTime) | ||
); | ||
|
||
// TODO add other data type | ||
Ok(()) | ||
} | ||
|
Original file line number | Diff line number | Diff line change | ||
---|---|---|---|---|
|
@@ -437,6 +437,8 @@ impl ExprSchemable for Expr { | |||
/// This function errors when it is impossible to cast the | ||||
/// expression to the target [arrow::datatypes::DataType]. | ||||
fn cast_to(self, cast_to_type: &DataType, schema: &dyn ExprSchema) -> Result<Expr> { | ||||
use datafusion_common::ScalarValue; | ||||
|
||||
let this_type = self.get_type(schema)?; | ||||
if this_type == *cast_to_type { | ||||
return Ok(self); | ||||
|
@@ -453,6 +455,26 @@ impl ExprSchemable for Expr { | |||
} | ||||
_ => Ok(Expr::Cast(Cast::new(Box::new(self), cast_to_type.clone()))), | ||||
} | ||||
} else if matches!( | ||||
(&this_type, cast_to_type), | ||||
(DataType::Int32 | DataType::Int64, DataType::Interval(_)) | ||||
) { | ||||
// Convert integer (days) to the corresponding DayTime interval | ||||
match self { | ||||
Expr::Literal(ScalarValue::Int32(Some(days))) => { | ||||
Ok(Expr::Literal(ScalarValue::IntervalDayTime(Some( | ||||
arrow_buffer::IntervalDayTime::new(days, 0), | ||||
)))) | ||||
} | ||||
Expr::Literal(ScalarValue::Int64(Some(days))) => { | ||||
Ok(Expr::Literal(ScalarValue::IntervalDayTime(Some( | ||||
arrow_buffer::IntervalDayTime::new(days as i32, 0), | ||||
)))) | ||||
} | ||||
_ => plan_err!( | ||||
"Cannot automatically convert {this_type:?} to {cast_to_type:?}" | ||||
), | ||||
} | ||||
Comment on lines
+458
to
+477
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This works for literals only and is immediate. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What's the alternative? You can't cast an integer to an interval in arrow. You are right; it only works for literals. Do you have thoughts on how to make it work for non also? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. if a Cast is supposed to work, it should work regardless how it's weaved into the plan. However, we should first decide whether integer should be castable to interval at all. It's not castable in PostgreSQL
so I am inclined it should not be castable in DF either. Which means, the Alternatively, we defer
but i'd prefer to desugar earlier and avoid separate code path in the BinaryExpr physical expr
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Perhaps we require a new scalar function, There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||||
} else { | ||||
plan_err!("Cannot automatically convert {this_type:?} to {cast_to_type:?}") | ||||
} | ||||
|
@@ -761,4 +783,42 @@ mod tests { | |||
Ok((self.data_type(col)?, self.nullable(col)?)) | ||||
} | ||||
} | ||||
|
||||
#[test] | ||||
fn test_cast_int_to_interval() -> Result<()> { | ||||
use arrow::datatypes::IntervalUnit; | ||||
|
||||
let schema = MockExprSchema::new().with_data_type(DataType::Int32); | ||||
|
||||
// Test casting Int32 literal to Interval | ||||
let expr = lit(ScalarValue::Int32(Some(5))); | ||||
let result = expr.cast_to(&DataType::Interval(IntervalUnit::DayTime), &schema)?; | ||||
assert_eq!( | ||||
result, | ||||
Expr::Literal(ScalarValue::IntervalDayTime(Some( | ||||
arrow_buffer::IntervalDayTime::new(5, 0) | ||||
))) | ||||
); | ||||
|
||||
// Test casting Int64 literal to Interval | ||||
let expr = lit(ScalarValue::Int64(Some(7))); | ||||
let result = expr.cast_to(&DataType::Interval(IntervalUnit::DayTime), &schema)?; | ||||
assert_eq!( | ||||
result, | ||||
Expr::Literal(ScalarValue::IntervalDayTime(Some( | ||||
arrow_buffer::IntervalDayTime::new(7, 0) | ||||
))) | ||||
); | ||||
|
||||
// Test that non-literal expressions cannot be cast from int to interval | ||||
let expr = col("foo") + lit(1); | ||||
let err = expr | ||||
.cast_to(&DataType::Interval(IntervalUnit::DayTime), &schema) | ||||
.unwrap_err(); | ||||
assert!(err | ||||
.to_string() | ||||
.contains("Cannot automatically convert Int32 to Interval(DayTime)")); | ||||
|
||||
Ok(()) | ||||
} | ||||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
we're in
Plus | Minus | Multiply | Divide | Modulo => {
branchbut the new logic should apply
+
symmetricallydate - int
, but likely not toint - date
(at least per PostgreSQL)so what about passing the operator to the new function and doing the work for
+
and-
(so that code already identifies asymmetric treatment)