From 5d0e339434a28b868936ff7a93ddaf69ab75870c Mon Sep 17 00:00:00 2001 From: Stijn de Gooijer Date: Wed, 5 Jun 2024 19:35:42 +0200 Subject: [PATCH] feat!: Update `date_range` to no longer produce datetime ranges (#16734) --- .../src/dsl/function_expr/range/date_range.rs | 126 ++++----------- .../src/dsl/function_expr/range/mod.rs | 84 ++-------- crates/polars-plan/src/dsl/functions/range.rs | 34 +--- .../polars/functions/range/date_range.py | 145 ++++-------------- .../polars/functions/range/datetime_range.py | 10 ++ py-polars/src/functions/range.rs | 18 +-- .../unit/functions/range/test_date_range.py | 142 ++++------------- 7 files changed, 122 insertions(+), 437 deletions(-) diff --git a/crates/polars-plan/src/dsl/function_expr/range/date_range.rs b/crates/polars-plan/src/dsl/function_expr/range/date_range.rs index 1dd96e3f6af4..bef4946e5729 100644 --- a/crates/polars-plan/src/dsl/function_expr/range/date_range.rs +++ b/crates/polars-plan/src/dsl/function_expr/range/date_range.rs @@ -2,59 +2,38 @@ use polars_core::prelude::*; use polars_core::utils::arrow::temporal_conversions::MILLISECONDS_IN_DAY; use polars_time::{datetime_range_impl, ClosedWindow, Duration}; -use super::datetime_range::{datetime_range, datetime_ranges}; use super::utils::{ ensure_range_bounds_contain_exactly_one_value, temporal_ranges_impl_broadcast, temporal_series_to_i64_scalar, }; -use crate::dsl::function_expr::FieldsMapper; const CAPACITY_FACTOR: usize = 5; -pub(super) fn temporal_range( +pub(super) fn date_range( s: &[Series], interval: Duration, closed: ClosedWindow, - time_unit: Option, - time_zone: Option, ) -> PolarsResult { - if s[0].dtype() == &DataType::Date && interval.is_full_days() { - date_range(s, interval, closed) - } else { - datetime_range(s, interval, closed, time_unit, time_zone) - } -} - -pub(super) fn temporal_ranges( - s: &[Series], - interval: Duration, - closed: ClosedWindow, - time_unit: Option, - time_zone: Option, -) -> PolarsResult { - if s[0].dtype() == &DataType::Date && interval.is_full_days() { - date_ranges(s, interval, closed) - } else { - datetime_ranges(s, interval, closed, time_unit, time_zone) - } -} - -fn date_range(s: &[Series], interval: Duration, closed: ClosedWindow) -> PolarsResult { let start = &s[0]; let end = &s[1]; - let name = start.name(); ensure_range_bounds_contain_exactly_one_value(start, end)?; + let start = start.strict_cast(&DataType::Date)?; + let end = end.strict_cast(&DataType::Date)?; + polars_ensure!( + interval.is_full_days(), + ComputeError: "`interval` input for `date_range` must consist of full days, got: {interval}" + ); - let dtype = DataType::Date; - let start = temporal_series_to_i64_scalar(start) + let name = start.name(); + let start = temporal_series_to_i64_scalar(&start) .ok_or_else(|| polars_err!(ComputeError: "start is an out-of-range time."))? * MILLISECONDS_IN_DAY; - let end = temporal_series_to_i64_scalar(end) + let end = temporal_series_to_i64_scalar(&end) .ok_or_else(|| polars_err!(ComputeError: "end is an out-of-range time."))? * MILLISECONDS_IN_DAY; - let result = datetime_range_impl( + let out = datetime_range_impl( name, start, end, @@ -62,18 +41,27 @@ fn date_range(s: &[Series], interval: Duration, closed: ClosedWindow) -> PolarsR closed, TimeUnit::Milliseconds, None, - )? - .cast(&dtype)?; + )?; - Ok(result.into_series()) + let to_type = DataType::Date; + out.cast(&to_type) } -fn date_ranges(s: &[Series], interval: Duration, closed: ClosedWindow) -> PolarsResult { +pub(super) fn date_ranges( + s: &[Series], + interval: Duration, + closed: ClosedWindow, +) -> PolarsResult { let start = &s[0]; let end = &s[1]; - let start = start.cast(&DataType::Int64)?; - let end = end.cast(&DataType::Int64)?; + polars_ensure!( + interval.is_full_days(), + ComputeError: "`interval` input for `date_ranges` must consist of full days, got: {interval}" + ); + + let start = start.strict_cast(&DataType::Date)?.cast(&DataType::Int64)?; + let end = end.strict_cast(&DataType::Date)?.cast(&DataType::Int64)?; let start = start.i64().unwrap() * MILLISECONDS_IN_DAY; let end = end.i64().unwrap() * MILLISECONDS_IN_DAY; @@ -107,65 +95,3 @@ fn date_ranges(s: &[Series], interval: Duration, closed: ClosedWindow) -> Polars let to_type = DataType::List(Box::new(DataType::Date)); out.cast(&to_type) } - -impl<'a> FieldsMapper<'a> { - pub(super) fn map_to_date_range_dtype( - &self, - interval: &Duration, - time_unit: Option<&TimeUnit>, - time_zone: Option<&str>, - ) -> PolarsResult { - let data_dtype = self.map_to_supertype()?.dtype; - match data_dtype { - DataType::Datetime(tu, tz) => { - map_datetime_to_date_range_dtype(tu, tz, time_unit, time_zone) - }, - DataType::Date => { - let schema_dtype = map_date_to_date_range_dtype(interval, time_unit, time_zone); - Ok(schema_dtype) - }, - _ => polars_bail!(ComputeError: "expected Date or Datetime, got {}", data_dtype), - } - } -} - -fn map_datetime_to_date_range_dtype( - data_time_unit: TimeUnit, - data_time_zone: Option, - given_time_unit: Option<&TimeUnit>, - given_time_zone: Option<&str>, -) -> PolarsResult { - let schema_time_zone = match (data_time_zone, given_time_zone) { - (Some(data_tz), Some(given_tz)) => { - polars_ensure!( - data_tz == given_tz, - ComputeError: format!( - "`time_zone` does not match the data\ - \n\nData has time zone '{}', got '{}'.", data_tz, given_tz) - ); - Some(data_tz) - }, - (_, Some(given_tz)) => Some(given_tz.to_string()), - (Some(data_tz), None) => Some(data_tz), - (_, _) => None, - }; - let schema_time_unit = given_time_unit.unwrap_or(&data_time_unit); - - let schema_dtype = DataType::Datetime(*schema_time_unit, schema_time_zone); - Ok(schema_dtype) -} -fn map_date_to_date_range_dtype( - interval: &Duration, - time_unit: Option<&TimeUnit>, - time_zone: Option<&str>, -) -> DataType { - if interval.is_full_days() { - DataType::Date - } else if let Some(tu) = time_unit { - DataType::Datetime(*tu, time_zone.map(String::from)) - } else if interval.nanoseconds() % 1000 != 0 { - DataType::Datetime(TimeUnit::Nanoseconds, time_zone.map(String::from)) - } else { - DataType::Datetime(TimeUnit::Microseconds, time_zone.map(String::from)) - } -} diff --git a/crates/polars-plan/src/dsl/function_expr/range/mod.rs b/crates/polars-plan/src/dsl/function_expr/range/mod.rs index dfee18e7e7cc..b13d45bdd73c 100644 --- a/crates/polars-plan/src/dsl/function_expr/range/mod.rs +++ b/crates/polars-plan/src/dsl/function_expr/range/mod.rs @@ -1,4 +1,4 @@ -#[cfg(feature = "temporal")] +#[cfg(feature = "dtype-date")] mod date_range; #[cfg(feature = "dtype-datetime")] mod datetime_range; @@ -28,19 +28,15 @@ pub enum RangeFunction { dtype: DataType, }, IntRanges, - #[cfg(feature = "temporal")] + #[cfg(feature = "dtype-date")] DateRange { interval: Duration, closed: ClosedWindow, - time_unit: Option, - time_zone: Option, }, - #[cfg(feature = "temporal")] + #[cfg(feature = "dtype-date")] DateRanges { interval: Duration, closed: ClosedWindow, - time_unit: Option, - time_zone: Option, }, #[cfg(feature = "dtype-datetime")] DatetimeRange { @@ -74,37 +70,11 @@ impl RangeFunction { match self { IntRange { dtype, .. } => mapper.with_dtype(dtype.clone()), IntRanges => mapper.with_dtype(DataType::List(Box::new(DataType::Int64))), - #[cfg(feature = "temporal")] - DateRange { - interval, - closed: _, - time_unit, - time_zone, - } => { - // output dtype may change based on `interval`, `time_unit`, and `time_zone` - let dtype = mapper.map_to_date_range_dtype( - interval, - time_unit.as_ref(), - time_zone.as_deref(), - )?; - mapper.with_dtype(dtype) - }, - #[cfg(feature = "temporal")] - DateRanges { - interval, - closed: _, - time_unit, - time_zone, - } => { - // output dtype may change based on `interval`, `time_unit`, and `time_zone` - let inner_dtype = mapper.map_to_date_range_dtype( - interval, - time_unit.as_ref(), - time_zone.as_deref(), - )?; - mapper.with_dtype(DataType::List(Box::new(inner_dtype))) - }, - #[cfg(feature = "temporal")] + #[cfg(feature = "dtype-date")] + DateRange { .. } => mapper.with_dtype(DataType::Date), + #[cfg(feature = "dtype-date")] + DateRanges { .. } => mapper.with_dtype(DataType::List(Box::new(DataType::Date))), + #[cfg(feature = "dtype-datetime")] DatetimeRange { interval: _, closed: _, @@ -116,7 +86,7 @@ impl RangeFunction { mapper.map_to_datetime_range_dtype(time_unit.as_ref(), time_zone.as_deref())?; mapper.with_dtype(dtype) }, - #[cfg(feature = "temporal")] + #[cfg(feature = "dtype-datetime")] DatetimeRanges { interval: _, closed: _, @@ -142,7 +112,7 @@ impl Display for RangeFunction { let s = match self { IntRange { .. } => "int_range", IntRanges => "int_ranges", - #[cfg(feature = "temporal")] + #[cfg(feature = "dtype-date")] DateRange { .. } => "date_range", #[cfg(feature = "temporal")] DateRanges { .. } => "date_ranges", @@ -169,35 +139,13 @@ impl From for SpecialEq> { IntRanges => { map_as_slice!(int_range::int_ranges) }, - #[cfg(feature = "temporal")] - DateRange { - interval, - closed, - time_unit, - time_zone, - } => { - map_as_slice!( - date_range::temporal_range, - interval, - closed, - time_unit, - time_zone.clone() - ) + #[cfg(feature = "dtype-date")] + DateRange { interval, closed } => { + map_as_slice!(date_range::date_range, interval, closed) }, - #[cfg(feature = "temporal")] - DateRanges { - interval, - closed, - time_unit, - time_zone, - } => { - map_as_slice!( - date_range::temporal_ranges, - interval, - closed, - time_unit, - time_zone.clone() - ) + #[cfg(feature = "dtype-date")] + DateRanges { interval, closed } => { + map_as_slice!(date_range::date_ranges, interval, closed) }, #[cfg(feature = "dtype-datetime")] DatetimeRange { diff --git a/crates/polars-plan/src/dsl/functions/range.rs b/crates/polars-plan/src/dsl/functions/range.rs index 6b6fa1af73fb..be1c851fdbcb 100644 --- a/crates/polars-plan/src/dsl/functions/range.rs +++ b/crates/polars-plan/src/dsl/functions/range.rs @@ -37,27 +37,14 @@ pub fn int_ranges(start: Expr, end: Expr, step: Expr) -> Expr { /// Create a date range from a `start` and `stop` expression. #[cfg(feature = "temporal")] -pub fn date_range( - start: Expr, - end: Expr, - interval: Duration, - closed: ClosedWindow, - time_unit: Option, - time_zone: Option, -) -> Expr { +pub fn date_range(start: Expr, end: Expr, interval: Duration, closed: ClosedWindow) -> Expr { let input = vec![start, end]; Expr::Function { input, - function: FunctionExpr::Range(RangeFunction::DateRange { - interval, - closed, - time_unit, - time_zone, - }), + function: FunctionExpr::Range(RangeFunction::DateRange { interval, closed }), options: FunctionOptions { collect_groups: ApplyOptions::GroupWise, - cast_to_supertypes: true, allow_rename: true, ..Default::default() }, @@ -66,27 +53,14 @@ pub fn date_range( /// Create a column of date ranges from a `start` and `stop` expression. #[cfg(feature = "temporal")] -pub fn date_ranges( - start: Expr, - end: Expr, - interval: Duration, - closed: ClosedWindow, - time_unit: Option, - time_zone: Option, -) -> Expr { +pub fn date_ranges(start: Expr, end: Expr, interval: Duration, closed: ClosedWindow) -> Expr { let input = vec![start, end]; Expr::Function { input, - function: FunctionExpr::Range(RangeFunction::DateRanges { - interval, - closed, - time_unit, - time_zone, - }), + function: FunctionExpr::Range(RangeFunction::DateRanges { interval, closed }), options: FunctionOptions { collect_groups: ApplyOptions::GroupWise, - cast_to_supertypes: true, allow_rename: true, ..Default::default() }, diff --git a/py-polars/polars/functions/range/date_range.py b/py-polars/polars/functions/range/date_range.py index aa4f882625ce..9ead55f60efa 100644 --- a/py-polars/polars/functions/range/date_range.py +++ b/py-polars/polars/functions/range/date_range.py @@ -1,13 +1,9 @@ from __future__ import annotations import contextlib -from datetime import datetime from typing import TYPE_CHECKING, overload from polars import functions as F -from polars._utils.deprecation import ( - issue_deprecation_warning, -) from polars._utils.parse_expr_input import parse_as_expression from polars._utils.wrap import wrap_expr from polars.functions.range._utils import parse_interval_argument @@ -16,11 +12,11 @@ import polars.polars as plr if TYPE_CHECKING: - from datetime import date, timedelta + from datetime import date, datetime, timedelta from typing import Literal from polars import Expr, Series - from polars.type_aliases import ClosedInterval, IntoExprColumn, TimeUnit + from polars.type_aliases import ClosedInterval, IntoExprColumn @overload @@ -30,8 +26,6 @@ def date_range( interval: str | timedelta = ..., *, closed: ClosedInterval = ..., - time_unit: TimeUnit | None = ..., - time_zone: str | None = ..., eager: Literal[False] = ..., ) -> Expr: ... @@ -43,8 +37,6 @@ def date_range( interval: str | timedelta = ..., *, closed: ClosedInterval = ..., - time_unit: TimeUnit | None = ..., - time_zone: str | None = ..., eager: Literal[True], ) -> Series: ... @@ -56,8 +48,6 @@ def date_range( interval: str | timedelta = ..., *, closed: ClosedInterval = ..., - time_unit: TimeUnit | None = ..., - time_zone: str | None = ..., eager: bool, ) -> Series | Expr: ... @@ -68,8 +58,6 @@ def date_range( interval: str | timedelta = "1d", *, closed: ClosedInterval = "both", - time_unit: TimeUnit | None = None, - time_zone: str | None = None, eager: bool = False, ) -> Series | Expr: """ @@ -84,14 +72,9 @@ def date_range( interval Interval of the range periods, specified as a Python `timedelta` object or using the Polars duration string language (see "Notes" section below). + Must consist of full days. closed : {'both', 'left', 'right', 'none'} Define which sides of the range are closed (inclusive). - time_unit : {None, 'ns', 'us', 'ms'} - Time unit of the resulting `Datetime` data type. - Only takes effect if the output column is of type `Datetime`. - time_zone - Time zone of the resulting `Datetime` data type. - Only takes effect if the output column is of type `Datetime`. eager Evaluate immediately and return a `Series`. If set to `False` (default), return an expression instead. @@ -99,38 +82,29 @@ def date_range( Returns ------- Expr or Series - Column of data type :class:`Date` or :class:`Datetime`. + Column of data type :class:`Date`. + + See Also + -------- + date_ranges + datetime_range Notes ----- - 1) If both `start` and `end` are passed as date types (not datetime), and the - interval granularity is no finer than 1d, the returned range is also of - type date. All other permutations return a datetime Series. - - .. deprecated:: 0.19.3 - In a future version of Polars, `date_range` will always return a `Date`. - Please use :func:`datetime_range` if you want a `Datetime` instead. - - 2) `interval` is created according to the following string language: - - - 1ns (1 nanosecond) - - 1us (1 microsecond) - - 1ms (1 millisecond) - - 1s (1 second) - - 1m (1 minute) - - 1h (1 hour) - - 1d (1 calendar day) - - 1w (1 calendar week) - - 1mo (1 calendar month) - - 1q (1 calendar quarter) - - 1y (1 calendar year) - - Or combine them: - "3d12h4m25s" # 3 days, 12 hours, 4 minutes, and 25 seconds - - By "calendar day", we mean the corresponding time on the next day (which may - not be 24 hours, due to daylight savings). Similarly for "calendar week", - "calendar month", "calendar quarter", and "calendar year". + `interval` is created according to the following string language: + + - 1d (1 calendar day) + - 1w (1 calendar week) + - 1mo (1 calendar month) + - 1q (1 calendar quarter) + - 1y (1 calendar year) + + Or combine them: + "1w2d" # 1 week, 2 days + + By "calendar day", we mean the corresponding time on the next day (which may + not be 24 hours, due to daylight savings). Similarly for "calendar week", + "calendar month", "calendar quarter", and "calendar year". Examples -------- @@ -168,16 +142,10 @@ def date_range( ] """ interval = parse_interval_argument(interval) - if time_unit is None and "ns" in interval: - time_unit = "ns" - - _warn_for_deprecated_date_range_use(start, end, interval, time_unit, time_zone) start_pyexpr = parse_as_expression(start) end_pyexpr = parse_as_expression(end) - result = wrap_expr( - plr.date_range(start_pyexpr, end_pyexpr, interval, closed, time_unit, time_zone) - ) + result = wrap_expr(plr.date_range(start_pyexpr, end_pyexpr, interval, closed)) if eager: return F.select(result).to_series() @@ -192,8 +160,6 @@ def date_ranges( interval: str | timedelta = ..., *, closed: ClosedInterval = ..., - time_unit: TimeUnit | None = ..., - time_zone: str | None = ..., eager: Literal[False] = ..., ) -> Expr: ... @@ -205,8 +171,6 @@ def date_ranges( interval: str | timedelta = ..., *, closed: ClosedInterval = ..., - time_unit: TimeUnit | None = ..., - time_zone: str | None = ..., eager: Literal[True], ) -> Series: ... @@ -218,8 +182,6 @@ def date_ranges( interval: str | timedelta = ..., *, closed: ClosedInterval = ..., - time_unit: TimeUnit | None = ..., - time_zone: str | None = ..., eager: bool, ) -> Series | Expr: ... @@ -230,8 +192,6 @@ def date_ranges( interval: str | timedelta = "1d", *, closed: ClosedInterval = "both", - time_unit: TimeUnit | None = None, - time_zone: str | None = None, eager: bool = False, ) -> Series | Expr: """ @@ -246,14 +206,9 @@ def date_ranges( interval Interval of the range periods, specified as a Python `timedelta` object or using the Polars duration string language (see "Notes" section below). + Must consist of full days. closed : {'both', 'left', 'right', 'none'} Define which sides of the range are closed (inclusive). - time_unit : {None, 'ns', 'us', 'ms'} - Time unit of the resulting `Datetime` data type. - Only takes effect if the output column is of type `Datetime`. - time_zone - Time zone of the resulting `Datetime` data type. - Only takes effect if the output column is of type `Datetime`. eager Evaluate immediately and return a `Series`. If set to `False` (default), return an expression instead. @@ -261,18 +216,17 @@ def date_ranges( Returns ------- Expr or Series - Column of data type `List(Date)` or `List(Datetime)`. + Column of data type `List(Date)`. + + See Also + -------- + date_range + datetime_ranges Notes ----- `interval` is created according to the following string language: - - 1ns (1 nanosecond) - - 1us (1 microsecond) - - 1ms (1 millisecond) - - 1s (1 second) - - 1m (1 minute) - - 1h (1 hour) - 1d (1 calendar day) - 1w (1 calendar week) - 1mo (1 calendar month) @@ -280,7 +234,7 @@ def date_ranges( - 1y (1 calendar year) Or combine them: - "3d12h4m25s" # 3 days, 12 hours, 4 minutes, and 25 seconds + "1w2d" # 1 week, 2 days By "calendar day", we mean the corresponding time on the next day (which may not be 24 hours, due to daylight savings). Similarly for "calendar week", @@ -308,45 +262,12 @@ def date_ranges( └────────────┴────────────┴──────────────────────────────────────┘ """ interval = parse_interval_argument(interval) - if time_unit is None and "ns" in interval: - time_unit = "ns" - - _warn_for_deprecated_date_range_use(start, end, interval, time_unit, time_zone) - start_pyexpr = parse_as_expression(start) end_pyexpr = parse_as_expression(end) - result = wrap_expr( - plr.date_ranges( - start_pyexpr, end_pyexpr, interval, closed, time_unit, time_zone - ) - ) + result = wrap_expr(plr.date_ranges(start_pyexpr, end_pyexpr, interval, closed)) if eager: return F.select(result).to_series() return result - - -def _warn_for_deprecated_date_range_use( - start: date | datetime | IntoExprColumn, - end: date | datetime | IntoExprColumn, - interval: str, - time_unit: TimeUnit | None, - time_zone: str | None, -) -> None: - # This check is not foolproof, but should catch most cases - if ( - isinstance(start, datetime) - or isinstance(end, datetime) - or time_unit is not None - or time_zone is not None - or ("h" in interval) - or ("m" in interval.replace("mo", "")) - or ("s" in interval.replace("saturating", "")) - ): - issue_deprecation_warning( - "Creating Datetime ranges using `date_range(s)` is deprecated." - " Use `datetime_range(s)` instead.", - version="0.19.3", - ) diff --git a/py-polars/polars/functions/range/datetime_range.py b/py-polars/polars/functions/range/datetime_range.py index 3c826d8ee2bd..b952d2c5c0b6 100644 --- a/py-polars/polars/functions/range/datetime_range.py +++ b/py-polars/polars/functions/range/datetime_range.py @@ -95,6 +95,11 @@ def datetime_range( Expr or Series Column of data type :class:`Datetime`. + See Also + -------- + datetime_ranges + date_range + Notes ----- `interval` is created according to the following string language: @@ -290,6 +295,11 @@ def datetime_ranges( Expr or Series Column of data type `List(Datetime)`. + See Also + -------- + datetime_range + date_ranges + Examples -------- >>> from datetime import datetime diff --git a/py-polars/src/functions/range.rs b/py-polars/src/functions/range.rs index e6ab7593ca9f..ce725dda4ca4 100644 --- a/py-polars/src/functions/range.rs +++ b/py-polars/src/functions/range.rs @@ -69,34 +69,28 @@ pub fn int_ranges( pub fn date_range( start: PyExpr, end: PyExpr, - every: &str, + interval: &str, closed: Wrap, - time_unit: Option>, - time_zone: Option, ) -> PyExpr { let start = start.inner; let end = end.inner; - let every = Duration::parse(every); + let interval = Duration::parse(interval); let closed = closed.0; - let time_unit = time_unit.map(|x| x.0); - dsl::date_range(start, end, every, closed, time_unit, time_zone).into() + dsl::date_range(start, end, interval, closed).into() } #[pyfunction] pub fn date_ranges( start: PyExpr, end: PyExpr, - every: &str, + interval: &str, closed: Wrap, - time_unit: Option>, - time_zone: Option, ) -> PyExpr { let start = start.inner; let end = end.inner; - let every = Duration::parse(every); + let interval = Duration::parse(interval); let closed = closed.0; - let time_unit = time_unit.map(|x| x.0); - dsl::date_ranges(start, end, every, closed, time_unit, time_zone).into() + dsl::date_ranges(start, end, interval, closed).into() } #[pyfunction] diff --git a/py-polars/tests/unit/functions/range/test_date_range.py b/py-polars/tests/unit/functions/range/test_date_range.py index 92753e53f2eb..c48cf8437abb 100644 --- a/py-polars/tests/unit/functions/range/test_date_range.py +++ b/py-polars/tests/unit/functions/range/test_date_range.py @@ -10,7 +10,7 @@ from polars.testing import assert_frame_equal, assert_series_equal if TYPE_CHECKING: - from polars.type_aliases import ClosedInterval, TimeUnit + from polars.type_aliases import ClosedInterval def test_date_range() -> None: @@ -201,109 +201,6 @@ def test_date_range_eager() -> None: assert_series_equal(result, expected) -@pytest.mark.parametrize( - ( - "input_time_unit", - "input_time_zone", - "expected_date_range", - ), - [ - (None, None, ["2020-01-01", "2020-01-02", "2020-01-03"]), - ], -) -def test_date_range_schema_no_upcast( - input_time_unit: TimeUnit | None, - input_time_zone: str | None, - expected_date_range: list[str], -) -> None: - output_dtype = pl.Date - interval = "1d" - - df = pl.DataFrame({"start": [date(2020, 1, 1)], "end": [date(2020, 1, 3)]}).lazy() - result = df.with_columns( - pl.date_ranges( - pl.col("start"), - pl.col("end"), - interval=interval, - time_unit=input_time_unit, - time_zone=input_time_zone, - ).alias("date_range") - ) - expected_schema = { - "start": pl.Date, - "end": pl.Date, - "date_range": pl.List(output_dtype), - } - assert result.schema == expected_schema - assert result.collect().schema == expected_schema - - expected = pl.DataFrame( - { - "start": [date(2020, 1, 1)], - "end": [date(2020, 1, 3)], - "date_range": pl.Series(expected_date_range) - .str.to_datetime(time_unit="ns") - .implode(), - } - ).with_columns( - pl.col("date_range").explode().cast(output_dtype).implode(), - ) - assert_frame_equal(result.collect(), expected) - - -@pytest.mark.parametrize( - ( - "input_time_unit", - "input_time_zone", - "expected_date_range", - ), - [ - ("ms", None, ["2020-01-01", "2020-01-02", "2020-01-03"]), - (None, "Asia/Kathmandu", ["2020-01-01", "2020-01-02", "2020-01-03"]), - ("ms", "Asia/Kathmandu", ["2020-01-01", "2020-01-02", "2020-01-03"]), - ], -) -def test_date_range_schema_no_upcast2( - input_time_unit: TimeUnit | None, - input_time_zone: str | None, - expected_date_range: list[str], -) -> None: - output_dtype = pl.Date - interval = "1d" - - df = pl.DataFrame({"start": [date(2020, 1, 1)], "end": [date(2020, 1, 3)]}).lazy() - with pytest.deprecated_call(): - result = df.with_columns( - pl.date_ranges( - pl.col("start"), - pl.col("end"), - interval=interval, - time_unit=input_time_unit, - time_zone=input_time_zone, - ).alias("date_range") - ) - expected_schema = { - "start": pl.Date, - "end": pl.Date, - "date_range": pl.List(output_dtype), - } - assert result.schema == expected_schema - assert result.collect().schema == expected_schema - - expected = pl.DataFrame( - { - "start": [date(2020, 1, 1)], - "end": [date(2020, 1, 3)], - "date_range": pl.Series(expected_date_range) - .str.to_datetime(time_unit="ns") - .implode(), - } - ).with_columns( - pl.col("date_range").explode().cast(output_dtype).implode(), - ) - assert_frame_equal(result.collect(), expected) - - def test_date_range_input_shape_empty() -> None: empty = pl.Series(dtype=pl.Datetime) single = pl.Series([datetime(2022, 1, 2)]) @@ -346,17 +243,12 @@ def test_date_range_start_later_than_end() -> None: assert_series_equal(result, expected) -def test_date_range_24h_interval_results_in_datetime() -> None: - with pytest.deprecated_call(): - result = pl.LazyFrame().select( - date=pl.date_range(date(2022, 1, 1), date(2022, 1, 3), interval="24h") - ) - - assert result.schema == {"date": pl.Datetime} - expected = pl.Series( - "date", [datetime(2022, 1, 1), datetime(2022, 1, 2), datetime(2022, 1, 3)] - ) - assert_series_equal(result.collect().to_series(), expected) +def test_date_range_24h_interval_raises() -> None: + with pytest.raises( + pl.ComputeError, + match="`interval` input for `date_range` must consist of full days", + ): + pl.date_range(date(2022, 1, 1), date(2022, 1, 3), interval="24h", eager=True) def test_long_date_range_12461() -> None: @@ -397,3 +289,23 @@ def test_date_ranges_broadcasting_fail() -> None: pl.ComputeError, match=r"lengths of `start` \(3\) and `end` \(2\) do not match" ): pl.date_ranges(start, end, eager=True) + + +def test_date_range_datetime_input() -> None: + result = pl.date_range( + datetime(2022, 1, 1, 12), datetime(2022, 1, 3), interval="1d", eager=True + ) + expected = pl.Series( + "literal", [date(2022, 1, 1), date(2022, 1, 2), date(2022, 1, 3)] + ) + assert_series_equal(result, expected) + + +def test_date_ranges_datetime_input() -> None: + result = pl.date_ranges( + datetime(2022, 1, 1, 12), datetime(2022, 1, 3), interval="1d", eager=True + ) + expected = pl.Series( + "literal", [[date(2022, 1, 1), date(2022, 1, 2), date(2022, 1, 3)]] + ) + assert_series_equal(result, expected)