From c126b1bf38c57c38b4345fde61851f5a576bfe6f Mon Sep 17 00:00:00 2001 From: Konstantin Burkalev Date: Tue, 18 Jun 2024 15:15:15 +0300 Subject: [PATCH 1/2] feat: timestamp subtract date32 support --- .../src/expressions/binary_distinct.rs | 58 ++++++++++++++++--- 1 file changed, 51 insertions(+), 7 deletions(-) diff --git a/datafusion/physical-expr/src/expressions/binary_distinct.rs b/datafusion/physical-expr/src/expressions/binary_distinct.rs index 41db971b39e5..a283bf1c1447 100644 --- a/datafusion/physical-expr/src/expressions/binary_distinct.rs +++ b/datafusion/physical-expr/src/expressions/binary_distinct.rs @@ -19,14 +19,13 @@ use std::sync::Arc; use arrow::{ array::{ - Array, ArrayRef, Int64Array, IntervalDayTimeArray, IntervalMonthDayNanoArray, - IntervalYearMonthArray, TimestampNanosecondArray, + Array, ArrayRef, Date32Array, Int64Array, IntervalDayTimeArray, + IntervalMonthDayNanoArray, IntervalYearMonthArray, TimestampNanosecondArray, }, datatypes::{DataType, IntervalUnit}, - temporal_conversions::timestamp_ns_to_datetime, + temporal_conversions::{date32_to_datetime, timestamp_ns_to_datetime}, }; use chrono::{Datelike, Days, Duration, Months, NaiveDate, NaiveDateTime}; - use datafusion_common::{DataFusionError, Result}; use datafusion_expr::Operator; @@ -49,6 +48,7 @@ pub fn distinct_types_allowed( (left_type, right_type), (Timestamp(Nanosecond, _), Interval(_)) | (Timestamp(Nanosecond, _), Timestamp(Nanosecond, _)) + | (Timestamp(Nanosecond, _), Date32) ), Operator::Multiply => matches!( (left_type, right_type), @@ -105,6 +105,9 @@ pub fn coerce_types_distinct( Timestamp(Nanosecond, tz.clone()), Timestamp(Nanosecond, tz2.clone()), )), + (Timestamp(_, tz), Date32) => { + Some((Timestamp(Nanosecond, tz.clone()), Date32)) + } _ => None, }, Operator::Multiply => match (lhs_type, rhs_type) { @@ -175,6 +178,9 @@ pub fn evaluate_distinct_with_resolved_args( // TODO: Implement postgres behavior with time zones Some(timestamp_subtract_timestamp(left, right)) } + (Timestamp(Nanosecond, None), Date32) => { + Some(timestamp_subtract_date(left, right)) + } _ => None, }, Operator::Multiply => match (left_data_type, right_data_type) { @@ -364,6 +370,24 @@ fn timestamp_subtract_timestamp( Ok(Arc::new(result)) } +fn timestamp_subtract_date( + left: Arc, + right: Arc, +) -> Result { + let left = left + .as_any() + .downcast_ref::() + .unwrap(); + let right = right.as_any().downcast_ref::().unwrap(); + + let result = left + .iter() + .zip(right.iter()) + .map(|(t_l, t_r)| scalar_timestamp_subtract_date(t_l, t_r)) + .collect::>()?; + Ok(Arc::new(result)) +} + fn scalar_timestamp_add_interval_year_month( timestamp: Option, interval: Option, @@ -487,7 +511,29 @@ fn scalar_timestamp_subtract_timestamp( let datetime_right: NaiveDateTime = timestamp_ns_to_datetime(timestamp_right.unwrap()); let duration = datetime_left.signed_duration_since(datetime_right); - // TODO: What is Postgres behavior? E.g. if these timestamp values are i64::MAX and i64::MIN, + + duration_to_interval_day_nano(duration) + + // TODO: How can day, above, in scalar_timestamp_add_interval_month_day_nano, be negative? +} + +fn scalar_timestamp_subtract_date( + timestamp_left: Option, + timestamp_right: Option, +) -> Result> { + if timestamp_left.is_none() || timestamp_right.is_none() { + return Ok(None); + } + + let datetime_left: NaiveDateTime = timestamp_ns_to_datetime(timestamp_left.unwrap()); + let datetime_right: NaiveDateTime = date32_to_datetime(timestamp_right.unwrap()); + let duration = datetime_left.signed_duration_since(datetime_right); + + duration_to_interval_day_nano(duration) +} + +fn duration_to_interval_day_nano(duration: Duration) -> Result> { + // TODO: What is Postgres behavior? E.g. if these timestamp values are i64::MIN and i32/i64::MAX, // we needlessly have a range error. let nanos: i64 = duration.num_nanoseconds().ok_or_else(|| { DataFusionError::Execution("Interval value is out of range".to_string()) @@ -499,8 +545,6 @@ fn scalar_timestamp_subtract_timestamp( (((days as i128) & 0xFFFF_FFFF) << 64) | ((nanos_rem as i128) & 0xFFFF_FFFF_FFFF_FFFF), )) - - // TODO: How can day, above, in scalar_timestamp_add_interval_month_day_nano, be negative? } fn change_ym(t: NaiveDateTime, y: i32, m: u32) -> Result { From 87eea7d446021551f0956220b091911e473a7bea Mon Sep 17 00:00:00 2001 From: Konstantin Burkalev Date: Tue, 18 Jun 2024 19:10:43 +0300 Subject: [PATCH 2/2] chore: freeing CI build disk space for tests --- .github/workflows/rust.yml | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index 2e6bf2e84ef0..c37ff81d20e4 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -376,6 +376,18 @@ jobs: rustup toolchain install ${{ matrix.rust }} rustup default ${{ matrix.rust }} rustup component add rustfmt + - name: Maximize build space (disk space limitations) + run: | + echo "Disk Space before cleanup" + df -h + du -h --max-depth 1 / 2>/dev/null || true + du -h --max-depth 1 /__t 2>/dev/null || true + du -h --max-depth 1 /__e 2>/dev/null || true + du -h --max-depth 1 /__w 2>/dev/null || true + + rm -rf /__t/CodeQL + echo "Disk Space after cleanup" + df -h - name: Run tests run: | export ARROW_TEST_DATA=$(pwd)/testing/data