Skip to content

Commit

Permalink
feat!: Update date_range to no longer produce datetime ranges (#16734)
Browse files Browse the repository at this point in the history
  • Loading branch information
stinodego authored Jun 5, 2024
1 parent e75ece9 commit 5d0e339
Show file tree
Hide file tree
Showing 7 changed files with 122 additions and 437 deletions.
126 changes: 26 additions & 100 deletions crates/polars-plan/src/dsl/function_expr/range/date_range.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,78 +2,66 @@ use polars_core::prelude::*;
use polars_core::utils::arrow::temporal_conversions::MILLISECONDS_IN_DAY;
use polars_time::{datetime_range_impl, ClosedWindow, Duration};

use super::datetime_range::{datetime_range, datetime_ranges};
use super::utils::{
ensure_range_bounds_contain_exactly_one_value, temporal_ranges_impl_broadcast,
temporal_series_to_i64_scalar,
};
use crate::dsl::function_expr::FieldsMapper;

const CAPACITY_FACTOR: usize = 5;

pub(super) fn temporal_range(
pub(super) fn date_range(
s: &[Series],
interval: Duration,
closed: ClosedWindow,
time_unit: Option<TimeUnit>,
time_zone: Option<TimeZone>,
) -> PolarsResult<Series> {
if s[0].dtype() == &DataType::Date && interval.is_full_days() {
date_range(s, interval, closed)
} else {
datetime_range(s, interval, closed, time_unit, time_zone)
}
}

pub(super) fn temporal_ranges(
s: &[Series],
interval: Duration,
closed: ClosedWindow,
time_unit: Option<TimeUnit>,
time_zone: Option<TimeZone>,
) -> PolarsResult<Series> {
if s[0].dtype() == &DataType::Date && interval.is_full_days() {
date_ranges(s, interval, closed)
} else {
datetime_ranges(s, interval, closed, time_unit, time_zone)
}
}

fn date_range(s: &[Series], interval: Duration, closed: ClosedWindow) -> PolarsResult<Series> {
let start = &s[0];
let end = &s[1];
let name = start.name();

ensure_range_bounds_contain_exactly_one_value(start, end)?;
let start = start.strict_cast(&DataType::Date)?;
let end = end.strict_cast(&DataType::Date)?;
polars_ensure!(
interval.is_full_days(),
ComputeError: "`interval` input for `date_range` must consist of full days, got: {interval}"
);

let dtype = DataType::Date;
let start = temporal_series_to_i64_scalar(start)
let name = start.name();
let start = temporal_series_to_i64_scalar(&start)
.ok_or_else(|| polars_err!(ComputeError: "start is an out-of-range time."))?
* MILLISECONDS_IN_DAY;
let end = temporal_series_to_i64_scalar(end)
let end = temporal_series_to_i64_scalar(&end)
.ok_or_else(|| polars_err!(ComputeError: "end is an out-of-range time."))?
* MILLISECONDS_IN_DAY;

let result = datetime_range_impl(
let out = datetime_range_impl(
name,
start,
end,
interval,
closed,
TimeUnit::Milliseconds,
None,
)?
.cast(&dtype)?;
)?;

Ok(result.into_series())
let to_type = DataType::Date;
out.cast(&to_type)
}

fn date_ranges(s: &[Series], interval: Duration, closed: ClosedWindow) -> PolarsResult<Series> {
pub(super) fn date_ranges(
s: &[Series],
interval: Duration,
closed: ClosedWindow,
) -> PolarsResult<Series> {
let start = &s[0];
let end = &s[1];

let start = start.cast(&DataType::Int64)?;
let end = end.cast(&DataType::Int64)?;
polars_ensure!(
interval.is_full_days(),
ComputeError: "`interval` input for `date_ranges` must consist of full days, got: {interval}"
);

let start = start.strict_cast(&DataType::Date)?.cast(&DataType::Int64)?;
let end = end.strict_cast(&DataType::Date)?.cast(&DataType::Int64)?;

let start = start.i64().unwrap() * MILLISECONDS_IN_DAY;
let end = end.i64().unwrap() * MILLISECONDS_IN_DAY;
Expand Down Expand Up @@ -107,65 +95,3 @@ fn date_ranges(s: &[Series], interval: Duration, closed: ClosedWindow) -> Polars
let to_type = DataType::List(Box::new(DataType::Date));
out.cast(&to_type)
}

impl<'a> FieldsMapper<'a> {
pub(super) fn map_to_date_range_dtype(
&self,
interval: &Duration,
time_unit: Option<&TimeUnit>,
time_zone: Option<&str>,
) -> PolarsResult<DataType> {
let data_dtype = self.map_to_supertype()?.dtype;
match data_dtype {
DataType::Datetime(tu, tz) => {
map_datetime_to_date_range_dtype(tu, tz, time_unit, time_zone)
},
DataType::Date => {
let schema_dtype = map_date_to_date_range_dtype(interval, time_unit, time_zone);
Ok(schema_dtype)
},
_ => polars_bail!(ComputeError: "expected Date or Datetime, got {}", data_dtype),
}
}
}

fn map_datetime_to_date_range_dtype(
data_time_unit: TimeUnit,
data_time_zone: Option<String>,
given_time_unit: Option<&TimeUnit>,
given_time_zone: Option<&str>,
) -> PolarsResult<DataType> {
let schema_time_zone = match (data_time_zone, given_time_zone) {
(Some(data_tz), Some(given_tz)) => {
polars_ensure!(
data_tz == given_tz,
ComputeError: format!(
"`time_zone` does not match the data\
\n\nData has time zone '{}', got '{}'.", data_tz, given_tz)
);
Some(data_tz)
},
(_, Some(given_tz)) => Some(given_tz.to_string()),
(Some(data_tz), None) => Some(data_tz),
(_, _) => None,
};
let schema_time_unit = given_time_unit.unwrap_or(&data_time_unit);

let schema_dtype = DataType::Datetime(*schema_time_unit, schema_time_zone);
Ok(schema_dtype)
}
fn map_date_to_date_range_dtype(
interval: &Duration,
time_unit: Option<&TimeUnit>,
time_zone: Option<&str>,
) -> DataType {
if interval.is_full_days() {
DataType::Date
} else if let Some(tu) = time_unit {
DataType::Datetime(*tu, time_zone.map(String::from))
} else if interval.nanoseconds() % 1000 != 0 {
DataType::Datetime(TimeUnit::Nanoseconds, time_zone.map(String::from))
} else {
DataType::Datetime(TimeUnit::Microseconds, time_zone.map(String::from))
}
}
84 changes: 16 additions & 68 deletions crates/polars-plan/src/dsl/function_expr/range/mod.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#[cfg(feature = "temporal")]
#[cfg(feature = "dtype-date")]
mod date_range;
#[cfg(feature = "dtype-datetime")]
mod datetime_range;
Expand Down Expand Up @@ -28,19 +28,15 @@ pub enum RangeFunction {
dtype: DataType,
},
IntRanges,
#[cfg(feature = "temporal")]
#[cfg(feature = "dtype-date")]
DateRange {
interval: Duration,
closed: ClosedWindow,
time_unit: Option<TimeUnit>,
time_zone: Option<TimeZone>,
},
#[cfg(feature = "temporal")]
#[cfg(feature = "dtype-date")]
DateRanges {
interval: Duration,
closed: ClosedWindow,
time_unit: Option<TimeUnit>,
time_zone: Option<TimeZone>,
},
#[cfg(feature = "dtype-datetime")]
DatetimeRange {
Expand Down Expand Up @@ -74,37 +70,11 @@ impl RangeFunction {
match self {
IntRange { dtype, .. } => mapper.with_dtype(dtype.clone()),
IntRanges => mapper.with_dtype(DataType::List(Box::new(DataType::Int64))),
#[cfg(feature = "temporal")]
DateRange {
interval,
closed: _,
time_unit,
time_zone,
} => {
// output dtype may change based on `interval`, `time_unit`, and `time_zone`
let dtype = mapper.map_to_date_range_dtype(
interval,
time_unit.as_ref(),
time_zone.as_deref(),
)?;
mapper.with_dtype(dtype)
},
#[cfg(feature = "temporal")]
DateRanges {
interval,
closed: _,
time_unit,
time_zone,
} => {
// output dtype may change based on `interval`, `time_unit`, and `time_zone`
let inner_dtype = mapper.map_to_date_range_dtype(
interval,
time_unit.as_ref(),
time_zone.as_deref(),
)?;
mapper.with_dtype(DataType::List(Box::new(inner_dtype)))
},
#[cfg(feature = "temporal")]
#[cfg(feature = "dtype-date")]
DateRange { .. } => mapper.with_dtype(DataType::Date),
#[cfg(feature = "dtype-date")]
DateRanges { .. } => mapper.with_dtype(DataType::List(Box::new(DataType::Date))),
#[cfg(feature = "dtype-datetime")]
DatetimeRange {
interval: _,
closed: _,
Expand All @@ -116,7 +86,7 @@ impl RangeFunction {
mapper.map_to_datetime_range_dtype(time_unit.as_ref(), time_zone.as_deref())?;
mapper.with_dtype(dtype)
},
#[cfg(feature = "temporal")]
#[cfg(feature = "dtype-datetime")]
DatetimeRanges {
interval: _,
closed: _,
Expand All @@ -142,7 +112,7 @@ impl Display for RangeFunction {
let s = match self {
IntRange { .. } => "int_range",
IntRanges => "int_ranges",
#[cfg(feature = "temporal")]
#[cfg(feature = "dtype-date")]
DateRange { .. } => "date_range",
#[cfg(feature = "temporal")]
DateRanges { .. } => "date_ranges",
Expand All @@ -169,35 +139,13 @@ impl From<RangeFunction> for SpecialEq<Arc<dyn SeriesUdf>> {
IntRanges => {
map_as_slice!(int_range::int_ranges)
},
#[cfg(feature = "temporal")]
DateRange {
interval,
closed,
time_unit,
time_zone,
} => {
map_as_slice!(
date_range::temporal_range,
interval,
closed,
time_unit,
time_zone.clone()
)
#[cfg(feature = "dtype-date")]
DateRange { interval, closed } => {
map_as_slice!(date_range::date_range, interval, closed)
},
#[cfg(feature = "temporal")]
DateRanges {
interval,
closed,
time_unit,
time_zone,
} => {
map_as_slice!(
date_range::temporal_ranges,
interval,
closed,
time_unit,
time_zone.clone()
)
#[cfg(feature = "dtype-date")]
DateRanges { interval, closed } => {
map_as_slice!(date_range::date_ranges, interval, closed)
},
#[cfg(feature = "dtype-datetime")]
DatetimeRange {
Expand Down
34 changes: 4 additions & 30 deletions crates/polars-plan/src/dsl/functions/range.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,27 +37,14 @@ pub fn int_ranges(start: Expr, end: Expr, step: Expr) -> Expr {

/// Create a date range from a `start` and `stop` expression.
#[cfg(feature = "temporal")]
pub fn date_range(
start: Expr,
end: Expr,
interval: Duration,
closed: ClosedWindow,
time_unit: Option<TimeUnit>,
time_zone: Option<TimeZone>,
) -> Expr {
pub fn date_range(start: Expr, end: Expr, interval: Duration, closed: ClosedWindow) -> Expr {
let input = vec![start, end];

Expr::Function {
input,
function: FunctionExpr::Range(RangeFunction::DateRange {
interval,
closed,
time_unit,
time_zone,
}),
function: FunctionExpr::Range(RangeFunction::DateRange { interval, closed }),
options: FunctionOptions {
collect_groups: ApplyOptions::GroupWise,
cast_to_supertypes: true,
allow_rename: true,
..Default::default()
},
Expand All @@ -66,27 +53,14 @@ pub fn date_range(

/// Create a column of date ranges from a `start` and `stop` expression.
#[cfg(feature = "temporal")]
pub fn date_ranges(
start: Expr,
end: Expr,
interval: Duration,
closed: ClosedWindow,
time_unit: Option<TimeUnit>,
time_zone: Option<TimeZone>,
) -> Expr {
pub fn date_ranges(start: Expr, end: Expr, interval: Duration, closed: ClosedWindow) -> Expr {
let input = vec![start, end];

Expr::Function {
input,
function: FunctionExpr::Range(RangeFunction::DateRanges {
interval,
closed,
time_unit,
time_zone,
}),
function: FunctionExpr::Range(RangeFunction::DateRanges { interval, closed }),
options: FunctionOptions {
collect_groups: ApplyOptions::GroupWise,
cast_to_supertypes: true,
allow_rename: true,
..Default::default()
},
Expand Down
Loading

0 comments on commit 5d0e339

Please sign in to comment.