diff --git a/crates/polars-time/src/upsample.rs b/crates/polars-time/src/upsample.rs index 73e3dc529c312..692f1a35744cf 100644 --- a/crates/polars-time/src/upsample.rs +++ b/crates/polars-time/src/upsample.rs @@ -43,7 +43,6 @@ pub trait PolarsUpsample { by: I, time_column: &str, every: Duration, - offset: Duration, ) -> PolarsResult; /// Upsample a [`DataFrame`] at a regular frequency. @@ -85,7 +84,6 @@ pub trait PolarsUpsample { by: I, time_column: &str, every: Duration, - offset: Duration, ) -> PolarsResult; } @@ -95,13 +93,11 @@ impl PolarsUpsample for DataFrame { by: I, time_column: &str, every: Duration, - offset: Duration, ) -> PolarsResult { let by = by.into_vec(); let time_type = self.column(time_column)?.dtype(); - ensure_duration_matches_data_type(offset, time_type, "offset")?; ensure_duration_matches_data_type(every, time_type, "every")?; - upsample_impl(self, by, time_column, every, offset, false) + upsample_impl(self, by, time_column, every, false) } fn upsample_stable>( @@ -109,13 +105,11 @@ impl PolarsUpsample for DataFrame { by: I, time_column: &str, every: Duration, - offset: Duration, ) -> PolarsResult { let by = by.into_vec(); let time_type = self.column(time_column)?.dtype(); - ensure_duration_matches_data_type(offset, time_type, "offset")?; ensure_duration_matches_data_type(every, time_type, "every")?; - upsample_impl(self, by, time_column, every, offset, true) + upsample_impl(self, by, time_column, every, true) } } @@ -124,7 +118,6 @@ fn upsample_impl( by: Vec, index_column: &str, every: Duration, - offset: Duration, stable: bool, ) -> PolarsResult { let s = source.column(index_column)?; @@ -137,7 +130,7 @@ fn upsample_impl( .unwrap() }) .unwrap(); - let mut out = upsample_impl(&df, by, index_column, every, offset, stable)?; + let mut out = upsample_impl(&df, by, index_column, every, stable)?; out.apply(index_column, |s| s.cast(time_type).unwrap()) .unwrap(); Ok(out) @@ -154,7 +147,7 @@ fn upsample_impl( .unwrap() }) .unwrap(); - let mut out = upsample_impl(&df, by, index_column, every, offset, stable)?; + let mut out = upsample_impl(&df, by, index_column, every, stable)?; out.apply(index_column, |s| s.cast(time_type).unwrap()) .unwrap(); Ok(out) @@ -165,13 +158,13 @@ fn upsample_impl( .unwrap() }) .unwrap(); - let mut out = upsample_impl(&df, by, index_column, every, offset, stable)?; + let mut out = upsample_impl(&df, by, index_column, every, stable)?; out.apply(index_column, |s| s.cast(time_type).unwrap()) .unwrap(); Ok(out) } else if by.is_empty() { let index_column = source.column(index_column)?; - upsample_single_impl(source, index_column, every, offset) + upsample_single_impl(source, index_column, every) } else { let gb = if stable { source.group_by_stable(by) @@ -181,7 +174,7 @@ fn upsample_impl( // don't parallelize this, this may SO on large data. gb?.apply(|df| { let index_column = df.column(index_column)?; - upsample_single_impl(&df, index_column, every, offset) + upsample_single_impl(&df, index_column, every) }) } } @@ -190,7 +183,6 @@ fn upsample_single_impl( source: &DataFrame, index_column: &Series, every: Duration, - offset: Duration, ) -> PolarsResult { let index_col_name = index_column.name(); @@ -208,11 +200,6 @@ fn upsample_single_impl( Some(tz) => Some(parse_time_zone(tz)?), _ => None, }; - let first = match tu { - TimeUnit::Nanoseconds => offset.add_ns(first, tz.as_ref())?, - TimeUnit::Microseconds => offset.add_us(first, tz.as_ref())?, - TimeUnit::Milliseconds => offset.add_ms(first, tz.as_ref())?, - }; let range = datetime_range_impl( index_col_name, first, diff --git a/docs/src/rust/user-guide/transformations/time-series/resampling.rs b/docs/src/rust/user-guide/transformations/time-series/resampling.rs index c99aebfd55f9f..e1cd4baa16824 100644 --- a/docs/src/rust/user-guide/transformations/time-series/resampling.rs +++ b/docs/src/rust/user-guide/transformations/time-series/resampling.rs @@ -31,7 +31,7 @@ fn main() -> Result<(), Box> { // --8<-- [start:upsample] let out1 = df .clone() - .upsample::<[String; 0]>([], "time", Duration::parse("15m"), Duration::parse("0"))? + .upsample::<[String; 0]>([], "time", Duration::parse("15m"))? .fill_null(FillNullStrategy::Forward(None))?; println!("{}", &out1); // --8<-- [end:upsample] @@ -39,7 +39,7 @@ fn main() -> Result<(), Box> { // --8<-- [start:upsample2] let out2 = df .clone() - .upsample::<[String; 0]>([], "time", Duration::parse("15m"), Duration::parse("0"))? + .upsample::<[String; 0]>([], "time", Duration::parse("15m"))? .lazy() .with_columns([col("values").interpolate(InterpolationMethod::Linear)]) .collect()? diff --git a/py-polars/polars/dataframe/frame.py b/py-polars/polars/dataframe/frame.py index e90aacf013a2f..afcc3ba7cb2a6 100644 --- a/py-polars/polars/dataframe/frame.py +++ b/py-polars/polars/dataframe/frame.py @@ -6003,15 +6003,13 @@ def upsample( time_column: str, *, every: str | timedelta, - offset: str | timedelta | None = None, group_by: str | Sequence[str] | None = None, maintain_order: bool = False, ) -> Self: """ Upsample a DataFrame at a regular frequency. - The `every` and `offset` arguments are created with - the following string language: + The `every` argument is created with the following string language: - 1ns (1 nanosecond) - 1us (1 microsecond) @@ -6042,12 +6040,6 @@ def upsample( Note that this column has to be sorted for the output to make sense. every Interval will start 'every' duration. - offset - Change the start of the date_range by this offset. - - .. deprecated:: 0.20.19 - This argument is deprecated and will be removed in the next breaking - release. Instead, chain `upsample` with `dt.offset_by`. group_by First group by these columns and then upsample for every group. maintain_order @@ -6094,24 +6086,15 @@ def upsample( │ 2021-06-01 00:00:00 ┆ B ┆ 3 │ └─────────────────────┴────────┴────────┘ """ - if offset is not None: - issue_deprecation_warning( - "`offset` is deprecated and will be removed in the next breaking release. " - "Instead, chain `upsample` with `dt.offset_by`.", - version="0.20.19", - ) if group_by is None: group_by = [] if isinstance(group_by, str): group_by = [group_by] - if offset is None: - offset = "0ns" every = parse_as_duration_string(every) - offset = parse_as_duration_string(offset) return self._from_pydf( - self._df.upsample(group_by, time_column, every, offset, maintain_order) + self._df.upsample(group_by, time_column, every, maintain_order) ) def join_asof( diff --git a/py-polars/polars/expr/expr.py b/py-polars/polars/expr/expr.py index 13230f06a3076..7ef41add6ea75 100644 --- a/py-polars/polars/expr/expr.py +++ b/py-polars/polars/expr/expr.py @@ -9175,7 +9175,7 @@ def ewm_mean( alpha: float | None = None, adjust: bool = True, min_periods: int = 1, - ignore_nulls: bool | None = None, + ignore_nulls: bool = False, ) -> Self: r""" Exponentially-weighted moving average. @@ -9218,7 +9218,7 @@ def ewm_mean( ignore_nulls Ignore missing values when calculating weights. - - When `ignore_nulls=False`, weights are based on absolute + - When `ignore_nulls=False` (default), weights are based on absolute positions. For example, the weights of :math:`x_0` and :math:`x_2` used in calculating the final weighted average of @@ -9226,7 +9226,7 @@ def ewm_mean( :math:`(1-\alpha)^2` and :math:`1` if `adjust=True`, and :math:`(1-\alpha)^2` and :math:`\alpha` if `adjust=False`. - - When `ignore_nulls=True` (current default), weights are based + - When `ignore_nulls=True`, weights are based on relative positions. For example, the weights of :math:`x_0` and :math:`x_2` used in calculating the final weighted average of [:math:`x_0`, None, :math:`x_2`] are @@ -9248,16 +9248,6 @@ def ewm_mean( │ 2.428571 │ └──────────┘ """ - if ignore_nulls is None: - issue_deprecation_warning( - "The default value for `ignore_nulls` for `ewm` methods" - " will change from True to False in the next breaking release." - " Explicitly set `ignore_nulls=True` to keep the existing behavior" - " and silence this warning.", - version="0.20.11", - ) - ignore_nulls = True - alpha = _prepare_alpha(com, span, half_life, alpha) return self._from_pyexpr( self._pyexpr.ewm_mean(alpha, adjust, min_periods, ignore_nulls) @@ -9377,7 +9367,7 @@ def ewm_std( adjust: bool = True, bias: bool = False, min_periods: int = 1, - ignore_nulls: bool | None = None, + ignore_nulls: bool = False, ) -> Self: r""" Exponentially-weighted moving standard deviation. @@ -9423,7 +9413,7 @@ def ewm_std( ignore_nulls Ignore missing values when calculating weights. - - When `ignore_nulls=False`, weights are based on absolute + - When `ignore_nulls=False` (default), weights are based on absolute positions. For example, the weights of :math:`x_0` and :math:`x_2` used in calculating the final weighted average of @@ -9431,7 +9421,7 @@ def ewm_std( :math:`(1-\alpha)^2` and :math:`1` if `adjust=True`, and :math:`(1-\alpha)^2` and :math:`\alpha` if `adjust=False`. - - When `ignore_nulls=True` (current default), weights are based + - When `ignore_nulls=True`, weights are based on relative positions. For example, the weights of :math:`x_0` and :math:`x_2` used in calculating the final weighted average of [:math:`x_0`, None, :math:`x_2`] are @@ -9453,16 +9443,6 @@ def ewm_std( │ 0.963624 │ └──────────┘ """ - if ignore_nulls is None: - issue_deprecation_warning( - "The default value for `ignore_nulls` for `ewm` methods" - " will change from True to False in the next breaking release." - " Explicitly set `ignore_nulls=True` to keep the existing behavior" - " and silence this warning.", - version="0.20.11", - ) - ignore_nulls = True - alpha = _prepare_alpha(com, span, half_life, alpha) return self._from_pyexpr( self._pyexpr.ewm_std(alpha, adjust, bias, min_periods, ignore_nulls) @@ -9478,7 +9458,7 @@ def ewm_var( adjust: bool = True, bias: bool = False, min_periods: int = 1, - ignore_nulls: bool | None = None, + ignore_nulls: bool = False, ) -> Self: r""" Exponentially-weighted moving variance. @@ -9524,7 +9504,7 @@ def ewm_var( ignore_nulls Ignore missing values when calculating weights. - - When `ignore_nulls=False`, weights are based on absolute + - When `ignore_nulls=False` (default), weights are based on absolute positions. For example, the weights of :math:`x_0` and :math:`x_2` used in calculating the final weighted average of @@ -9532,7 +9512,7 @@ def ewm_var( :math:`(1-\alpha)^2` and :math:`1` if `adjust=True`, and :math:`(1-\alpha)^2` and :math:`\alpha` if `adjust=False`. - - When `ignore_nulls=True` (current default), weights are based + - When `ignore_nulls=True`, weights are based on relative positions. For example, the weights of :math:`x_0` and :math:`x_2` used in calculating the final weighted average of [:math:`x_0`, None, :math:`x_2`] are @@ -9554,16 +9534,6 @@ def ewm_var( │ 0.928571 │ └──────────┘ """ - if ignore_nulls is None: - issue_deprecation_warning( - "The default value for `ignore_nulls` for `ewm` methods" - " will change from True to False in the next breaking release." - " Explicitly set `ignore_nulls=True` to keep the existing behavior" - " and silence this warning.", - version="0.20.11", - ) - ignore_nulls = True - alpha = _prepare_alpha(com, span, half_life, alpha) return self._from_pyexpr( self._pyexpr.ewm_var(alpha, adjust, bias, min_periods, ignore_nulls) diff --git a/py-polars/polars/functions/lazy.py b/py-polars/polars/functions/lazy.py index d186d8745af84..d2a72a557b6b8 100644 --- a/py-polars/polars/functions/lazy.py +++ b/py-polars/polars/functions/lazy.py @@ -17,7 +17,7 @@ from polars._utils.unstable import issue_unstable_warning, unstable from polars._utils.various import extend_bool from polars._utils.wrap import wrap_df, wrap_expr -from polars.datatypes import DTYPE_TEMPORAL_UNITS, Date, Datetime, Int64, UInt32 +from polars.datatypes import DTYPE_TEMPORAL_UNITS, Date, Datetime, Int64 with contextlib.suppress(ImportError): # Module not available when building docs import polars.polars as plr @@ -209,17 +209,6 @@ def cum_count(*columns: str, reverse: bool = False) -> Expr: │ 2 │ └─────┘ """ - if not columns: - issue_deprecation_warning( - "`pl.cum_count()` is deprecated. The same result can be achieved using" - " `pl.int_range(1, pl.len() + 1, dtype=pl.UInt32)`," - " or `int_range(pl.len(), 0, -1, dtype=pl.UInt32)` when `reverse=True`.", - version="0.20.5", - ) - if reverse: - return F.int_range(F.len(), 0, step=-1, dtype=UInt32).alias("cum_count") - else: - return F.int_range(1, F.len() + 1, dtype=UInt32).alias("cum_count") return F.col(*columns).cum_count(reverse=reverse) diff --git a/py-polars/polars/series/series.py b/py-polars/polars/series/series.py index bc4c4f603c8ba..75b19c3e046ff 100644 --- a/py-polars/polars/series/series.py +++ b/py-polars/polars/series/series.py @@ -194,14 +194,6 @@ class Series: nan_to_null : bool, default False In case a numpy array is used to create this Series, indicate how to deal with np.nan values. (This parameter is a no-op on non-numpy data). - dtype_if_empty : DataType, default Null - Data type of the Series if `values` contains no non-null data. - - .. deprecated:: 0.20.6 - The data type for empty Series will always be `Null`, unless `dtype` is - specified. To preserve behavior, check if the resulting Series has data type - `Null` and cast to the desired data type. - This parameter will be removed in the next breaking release. Examples -------- @@ -269,17 +261,7 @@ def __init__( *, strict: bool = True, nan_to_null: bool = False, - dtype_if_empty: PolarsDataType = Null, ): - if dtype_if_empty != Null: - issue_deprecation_warning( - "The `dtype_if_empty` parameter for the Series constructor is deprecated." - " The data type for empty Series will always be Null, unless `dtype` is specified." - " To preserve behavior, check if the resulting Series has data type Null and cast to the desired data type." - " This parameter will be removed in the next breaking release.", - version="0.20.6", - ) - # If 'Unknown' treat as None to trigger type inference if dtype == Unknown: dtype = None @@ -371,10 +353,6 @@ def __init__( ) raise TypeError(msg) - # Implementation of deprecated `dtype_if_empty` functionality - if dtype_if_empty != Null and self.dtype == Null: - self._s = self._s.cast(dtype_if_empty, False) - @classmethod def _from_pyseries(cls, pyseries: PySeries) -> Self: series = cls.__new__(cls) @@ -6898,7 +6876,7 @@ def ewm_mean( alpha: float | None = None, adjust: bool = True, min_periods: int = 1, - ignore_nulls: bool | None = None, + ignore_nulls: bool = False, ) -> Series: r""" Exponentially-weighted moving average. @@ -6941,7 +6919,7 @@ def ewm_mean( ignore_nulls Ignore missing values when calculating weights. - - When `ignore_nulls=False`, weights are based on absolute + - When `ignore_nulls=False` (default), weights are based on absolute positions. For example, the weights of :math:`x_0` and :math:`x_2` used in calculating the final weighted average of @@ -6949,7 +6927,7 @@ def ewm_mean( :math:`(1-\alpha)^2` and :math:`1` if `adjust=True`, and :math:`(1-\alpha)^2` and :math:`\alpha` if `adjust=False`. - - When `ignore_nulls=True` (current default), weights are based + - When `ignore_nulls=True`, weights are based on relative positions. For example, the weights of :math:`x_0` and :math:`x_2` used in calculating the final weighted average of [:math:`x_0`, None, :math:`x_2`] are @@ -7065,7 +7043,7 @@ def ewm_std( adjust: bool = True, bias: bool = False, min_periods: int = 1, - ignore_nulls: bool | None = None, + ignore_nulls: bool = False, ) -> Series: r""" Exponentially-weighted moving standard deviation. @@ -7111,7 +7089,7 @@ def ewm_std( ignore_nulls Ignore missing values when calculating weights. - - When `ignore_nulls=False`, weights are based on absolute + - When `ignore_nulls=False` (default), weights are based on absolute positions. For example, the weights of :math:`x_0` and :math:`x_2` used in calculating the final weighted average of @@ -7119,7 +7097,7 @@ def ewm_std( :math:`(1-\alpha)^2` and :math:`1` if `adjust=True`, and :math:`(1-\alpha)^2` and :math:`\alpha` if `adjust=False`. - - When `ignore_nulls=True` (current default), weights are based + - When `ignore_nulls=True`, weights are based on relative positions. For example, the weights of :math:`x_0` and :math:`x_2` used in calculating the final weighted average of [:math:`x_0`, None, :math:`x_2`] are @@ -7149,7 +7127,7 @@ def ewm_var( adjust: bool = True, bias: bool = False, min_periods: int = 1, - ignore_nulls: bool | None = None, + ignore_nulls: bool = False, ) -> Series: r""" Exponentially-weighted moving variance. @@ -7195,7 +7173,7 @@ def ewm_var( ignore_nulls Ignore missing values when calculating weights. - - When `ignore_nulls=False`, weights are based on absolute + - When `ignore_nulls=False` (default), weights are based on absolute positions. For example, the weights of :math:`x_0` and :math:`x_2` used in calculating the final weighted average of @@ -7203,7 +7181,7 @@ def ewm_var( :math:`(1-\alpha)^2` and :math:`1` if `adjust=True`, and :math:`(1-\alpha)^2` and :math:`\alpha` if `adjust=False`. - - When `ignore_nulls=True` (current default), weights are based + - When `ignore_nulls=True`, weights are based on relative positions. For example, the weights of :math:`x_0` and :math:`x_2` used in calculating the final weighted average of [:math:`x_0`, None, :math:`x_2`] are diff --git a/py-polars/src/dataframe/general.rs b/py-polars/src/dataframe/general.rs index af96d67eb66a0..fef05d9350ffd 100644 --- a/py-polars/src/dataframe/general.rs +++ b/py-polars/src/dataframe/general.rs @@ -590,23 +590,13 @@ impl PyDataFrame { by: Vec, index_column: &str, every: &str, - offset: &str, stable: bool, ) -> PyResult { let out = if stable { - self.df.upsample_stable( - by, - index_column, - Duration::parse(every), - Duration::parse(offset), - ) + self.df + .upsample_stable(by, index_column, Duration::parse(every)) } else { - self.df.upsample( - by, - index_column, - Duration::parse(every), - Duration::parse(offset), - ) + self.df.upsample(by, index_column, Duration::parse(every)) }; let out = out.map_err(PyPolarsErr::from)?; Ok(out.into()) diff --git a/py-polars/tests/unit/dataframe/test_upsample.py b/py-polars/tests/unit/dataframe/test_upsample.py new file mode 100644 index 0000000000000..a280301faae5c --- /dev/null +++ b/py-polars/tests/unit/dataframe/test_upsample.py @@ -0,0 +1,217 @@ +from __future__ import annotations + +from datetime import datetime +from typing import TYPE_CHECKING + +import pytest + +import polars as pl +from polars.testing import assert_frame_equal + +if TYPE_CHECKING: + from datetime import timezone + + from zoneinfo import ZoneInfo + + from polars.type_aliases import FillNullStrategy, PolarsIntegerType +else: + from polars._utils.convert import string_to_zoneinfo as ZoneInfo + + +@pytest.mark.parametrize( + ("time_zone", "tzinfo"), + [ + (None, None), + ("Europe/Warsaw", ZoneInfo("Europe/Warsaw")), + ], +) +def test_upsample(time_zone: str | None, tzinfo: ZoneInfo | timezone | None) -> None: + df = pl.DataFrame( + { + "time": [ + datetime(2021, 2, 1), + datetime(2021, 4, 1), + datetime(2021, 5, 1), + datetime(2021, 6, 1), + ], + "admin": ["Åland", "Netherlands", "Åland", "Netherlands"], + "test2": [0, 1, 2, 3], + } + ).with_columns(pl.col("time").dt.replace_time_zone(time_zone).set_sorted()) + + up = df.upsample( + time_column="time", + every="1mo", + group_by="admin", + maintain_order=True, + ).select(pl.all().forward_fill()) + + # this print will panic if timezones feature is not activated + # don't remove + print(up) + + expected = pl.DataFrame( + { + "time": [ + datetime(2021, 2, 1, 0, 0), + datetime(2021, 3, 1, 0, 0), + datetime(2021, 4, 1, 0, 0), + datetime(2021, 5, 1, 0, 0), + datetime(2021, 4, 1, 0, 0), + datetime(2021, 5, 1, 0, 0), + datetime(2021, 6, 1, 0, 0), + ], + "admin": [ + "Åland", + "Åland", + "Åland", + "Åland", + "Netherlands", + "Netherlands", + "Netherlands", + ], + "test2": [0, 0, 0, 2, 1, 1, 3], + } + ) + expected = expected.with_columns(pl.col("time").dt.replace_time_zone(time_zone)) + + assert_frame_equal(up, expected) + + +@pytest.mark.parametrize("time_zone", [None, "US/Central"]) +def test_upsample_crossing_dst(time_zone: str | None) -> None: + df = pl.DataFrame( + { + "time": pl.datetime_range( + datetime(2021, 11, 6), + datetime(2021, 11, 8), + time_zone=time_zone, + eager=True, + ), + "values": [1, 2, 3], + } + ) + + result = df.upsample(time_column="time", every="1d") + + expected = pl.DataFrame( + { + "time": [ + datetime(2021, 11, 6), + datetime(2021, 11, 7), + datetime(2021, 11, 8), + ], + "values": [1, 2, 3], + } + ).with_columns(pl.col("time").dt.replace_time_zone(time_zone)) + + assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + ("time_zone", "tzinfo"), + [ + (None, None), + ("Pacific/Rarotonga", ZoneInfo("Pacific/Rarotonga")), + ], +) +def test_upsample_time_zones( + time_zone: str | None, tzinfo: timezone | ZoneInfo | None +) -> None: + df = pl.DataFrame( + { + "time": pl.datetime_range( + start=datetime(2021, 12, 16), + end=datetime(2021, 12, 16, 3), + interval="30m", + eager=True, + ), + "groups": ["a", "a", "a", "b", "b", "a", "a"], + "values": [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0], + } + ) + expected = pl.DataFrame( + { + "time": [ + datetime(2021, 12, 16, 0, 0), + datetime(2021, 12, 16, 1, 0), + datetime(2021, 12, 16, 2, 0), + datetime(2021, 12, 16, 3, 0), + ], + "groups": ["a", "a", "b", "a"], + "values": [1.0, 3.0, 5.0, 7.0], + } + ) + df = df.with_columns(pl.col("time").dt.replace_time_zone(time_zone)) + expected = expected.with_columns(pl.col("time").dt.replace_time_zone(time_zone)) + result = df.upsample(time_column="time", every="60m").fill_null(strategy="forward") + assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + ("every", "fill", "expected_index", "expected_groups"), + [ + ( + "1i", + "forward", + [1, 2, 3, 4] + [5, 6, 7], + ["a"] * 4 + ["b"] * 3, + ), + ( + "1i", + "backward", + [1, 2, 3, 4] + [5, 6, 7], + ["a"] * 4 + ["b"] * 3, + ), + ], +) +@pytest.mark.parametrize("dtype", [pl.Int32, pl.Int64, pl.UInt32, pl.UInt64]) +def test_upsample_index( + every: str, + fill: FillNullStrategy | None, + expected_index: list[int], + expected_groups: list[str], + dtype: PolarsIntegerType, +) -> None: + df = ( + pl.DataFrame( + { + "index": [1, 2, 4] + [5, 7], + "groups": ["a"] * 3 + ["b"] * 2, + } + ) + .with_columns(pl.col("index").cast(dtype)) + .set_sorted("index") + ) + expected = pl.DataFrame( + { + "index": expected_index, + "groups": expected_groups, + } + ).with_columns(pl.col("index").cast(dtype)) + result = ( + df.upsample(time_column="index", group_by="groups", every=every) + .fill_null(strategy=fill) + .sort(["groups", "index"]) + ) + assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("maintain_order", [True, False]) +def test_upsample_index_invalid( + df: pl.DataFrame, + maintain_order: bool, +) -> None: + df = pl.DataFrame( + { + "index": [1, 2, 4, 5, 7], + "groups": ["a"] * 3 + ["b"] * 2, + } + ).set_sorted("index") + + with pytest.raises(pl.InvalidOperationError, match=r"must be a parsed integer"): + df.upsample( + time_column="index", + every="1h", + maintain_order=maintain_order, + ) diff --git a/py-polars/tests/unit/datatypes/test_temporal.py b/py-polars/tests/unit/datatypes/test_temporal.py index 3b53b07ac4e82..bcfb024b23ee1 100644 --- a/py-polars/tests/unit/datatypes/test_temporal.py +++ b/py-polars/tests/unit/datatypes/test_temporal.py @@ -29,8 +29,6 @@ from polars.type_aliases import ( Ambiguous, - FillNullStrategy, - PolarsIntegerType, PolarsTemporalType, TimeUnit, ) @@ -536,280 +534,6 @@ def test_explode_date() -> None: ] -@pytest.mark.parametrize( - ("time_zone", "tzinfo"), - [ - (None, None), - ("Europe/Warsaw", ZoneInfo("Europe/Warsaw")), - ], -) -@pytest.mark.parametrize("offset", [None, "1mo"]) -def test_upsample( - time_zone: str | None, tzinfo: ZoneInfo | timezone | None, offset: None | str -) -> None: - df = pl.DataFrame( - { - "time": [ - datetime(2021, 2, 1), - datetime(2021, 4, 1), - datetime(2021, 5, 1), - datetime(2021, 6, 1), - ], - "admin": ["Åland", "Netherlands", "Åland", "Netherlands"], - "test2": [0, 1, 2, 3], - } - ).with_columns(pl.col("time").dt.replace_time_zone(time_zone).set_sorted()) - - context_manager: contextlib.AbstractContextManager[pytest.WarningsRecorder | None] - msg = ( - "`offset` is deprecated and will be removed in the next breaking release. " - "Instead, chain `upsample` with `dt.offset_by`." - ) - if offset is not None: - context_manager = pytest.deprecated_call(match=msg) - else: - context_manager = contextlib.nullcontext() - - with context_manager: - up = df.upsample( - time_column="time", - every="1mo", - group_by="admin", - maintain_order=True, - offset=offset, - ).select(pl.all().forward_fill()) - # this print will panic if timezones feature is not activated - # don't remove - print(up) - - if offset is not None: - expected = pl.DataFrame( - { - "time": [ - datetime(2021, 3, 1, 0, 0), - datetime(2021, 4, 1, 0, 0), - datetime(2021, 5, 1, 0, 0), - datetime(2021, 5, 1, 0, 0), - datetime(2021, 6, 1, 0, 0), - ], - "admin": [None, None, "Åland", "Åland", "Netherlands"], - "test2": [None, None, 2, 2, 3], - } - ) - else: - expected = pl.DataFrame( - { - "time": [ - datetime(2021, 2, 1, 0, 0), - datetime(2021, 3, 1, 0, 0), - datetime(2021, 4, 1, 0, 0), - datetime(2021, 5, 1, 0, 0), - datetime(2021, 4, 1, 0, 0), - datetime(2021, 5, 1, 0, 0), - datetime(2021, 6, 1, 0, 0), - ], - "admin": [ - "Åland", - "Åland", - "Åland", - "Åland", - "Netherlands", - "Netherlands", - "Netherlands", - ], - "test2": [0, 0, 0, 2, 1, 1, 3], - } - ) - expected = expected.with_columns(pl.col("time").dt.replace_time_zone(time_zone)) - - assert_frame_equal(up, expected) - - -@pytest.mark.parametrize("time_zone", [None, "US/Central"]) -@pytest.mark.parametrize( - ("offset", "expected_time", "expected_values"), - [ - ( - None, - [datetime(2021, 11, 6), datetime(2021, 11, 7), datetime(2021, 11, 8)], - [1, 2, 3], - ), - ("1d", [datetime(2021, 11, 7), datetime(2021, 11, 8)], [2, 3]), - ], -) -def test_upsample_crossing_dst( - time_zone: str | None, - offset: str | None, - expected_time: list[datetime], - expected_values: list[int], -) -> None: - df = pl.DataFrame( - { - "time": pl.datetime_range( - datetime(2021, 11, 6), - datetime(2021, 11, 8), - time_zone=time_zone, - eager=True, - ), - "values": [1, 2, 3], - } - ) - context_manager: contextlib.AbstractContextManager[pytest.WarningsRecorder | None] - msg = ( - "`offset` is deprecated and will be removed in the next breaking release. " - "Instead, chain `upsample` with `dt.offset_by`." - ) - if offset is not None: - context_manager = pytest.deprecated_call(match=msg) - else: - context_manager = contextlib.nullcontext() - - with context_manager: - result = df.upsample(time_column="time", every="1d", offset=offset) - expected = pl.DataFrame( - { - "time": expected_time, - "values": expected_values, - } - ).with_columns(pl.col("time").dt.replace_time_zone(time_zone)) - assert_frame_equal(result, expected) - - -@pytest.mark.parametrize( - ("time_zone", "tzinfo"), - [ - (None, None), - ("Pacific/Rarotonga", ZoneInfo("Pacific/Rarotonga")), - ], -) -def test_upsample_time_zones( - time_zone: str | None, tzinfo: timezone | ZoneInfo | None -) -> None: - df = pl.DataFrame( - { - "time": pl.datetime_range( - start=datetime(2021, 12, 16), - end=datetime(2021, 12, 16, 3), - interval="30m", - eager=True, - ), - "groups": ["a", "a", "a", "b", "b", "a", "a"], - "values": [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0], - } - ) - expected = pl.DataFrame( - { - "time": [ - datetime(2021, 12, 16, 0, 0), - datetime(2021, 12, 16, 1, 0), - datetime(2021, 12, 16, 2, 0), - datetime(2021, 12, 16, 3, 0), - ], - "groups": ["a", "a", "b", "a"], - "values": [1.0, 3.0, 5.0, 7.0], - } - ) - df = df.with_columns(pl.col("time").dt.replace_time_zone(time_zone)) - expected = expected.with_columns(pl.col("time").dt.replace_time_zone(time_zone)) - result = df.upsample(time_column="time", every="60m").fill_null(strategy="forward") - assert_frame_equal(result, expected) - - -@pytest.mark.parametrize( - ("every", "fill", "expected_index", "expected_groups"), - [ - ( - "1i", - "forward", - [1, 2, 3, 4] + [5, 6, 7], - ["a"] * 4 + ["b"] * 3, - ), - ( - "1i", - "backward", - [1, 2, 3, 4] + [5, 6, 7], - ["a"] * 4 + ["b"] * 3, - ), - ], -) -@pytest.mark.parametrize("dtype", [pl.Int32, pl.Int64, pl.UInt32, pl.UInt64]) -def test_upsample_index( - every: str, - fill: FillNullStrategy | None, - expected_index: list[int], - expected_groups: list[str], - dtype: PolarsIntegerType, -) -> None: - df = ( - pl.DataFrame( - { - "index": [1, 2, 4] + [5, 7], - "groups": ["a"] * 3 + ["b"] * 2, - } - ) - .with_columns(pl.col("index").cast(dtype)) - .set_sorted("index") - ) - expected = pl.DataFrame( - { - "index": expected_index, - "groups": expected_groups, - } - ).with_columns(pl.col("index").cast(dtype)) - result = ( - df.upsample(time_column="index", group_by="groups", every=every) - .fill_null(strategy=fill) - .sort(["groups", "index"]) - ) - assert_frame_equal(result, expected) - - -@pytest.mark.parametrize( - ("every", "offset"), - [ - ( - "1i", - "1h", - ), - ( - "1h", - "1i", - ), - ( - "1h", - "0i", - ), - ( - "0i", - "1h", - ), - ], -) -@pytest.mark.parametrize("maintain_order", [True, False]) -def test_upsample_index_invalid( - df: pl.DataFrame, - every: str, - offset: str, - maintain_order: bool, -) -> None: - df = pl.DataFrame( - { - "index": [1, 2, 4] + [5, 7], - "groups": ["a"] * 3 + ["b"] * 2, - } - ).set_sorted("index") - # On Python3.8, mypy complains about combining two context managers into a - # tuple, so we nest them instead. - with pytest.raises(pl.InvalidOperationError, match=r"must be a parsed integer"): # noqa: SIM117 - with pytest.deprecated_call(): - df.upsample( - time_column="index", - every=every, - offset=offset, - maintain_order=maintain_order, - ) - - def test_microseconds_accuracy() -> None: timestamps = [ datetime(2600, 1, 1, 0, 0, 0, 123456), diff --git a/py-polars/tests/unit/functions/test_cum_count.py b/py-polars/tests/unit/functions/test_cum_count.py index b55f788812a1a..c780754af65c5 100644 --- a/py-polars/tests/unit/functions/test_cum_count.py +++ b/py-polars/tests/unit/functions/test_cum_count.py @@ -6,15 +6,6 @@ from polars.testing import assert_frame_equal, assert_series_equal -@pytest.mark.parametrize(("reverse", "output"), [(False, [1, 2, 3]), (True, [3, 2, 1])]) -def test_cum_count_no_args(reverse: bool, output: list[int]) -> None: - df = pl.DataFrame({"a": [5, 5, None]}) - with pytest.deprecated_call(): - result = df.select(pl.cum_count(reverse=reverse)) - expected = pl.Series("cum_count", output, dtype=pl.UInt32).to_frame() - assert_frame_equal(result, expected) - - @pytest.mark.parametrize(("reverse", "output"), [(False, [1, 2, 2]), (True, [2, 1, 0])]) def test_cum_count_single_arg(reverse: bool, output: list[int]) -> None: df = pl.DataFrame({"a": [5, 5, None]}) diff --git a/py-polars/tests/unit/operations/test_ewm.py b/py-polars/tests/unit/operations/test_ewm.py index 715fb9cd04821..6818df8b8cd80 100644 --- a/py-polars/tests/unit/operations/test_ewm.py +++ b/py-polars/tests/unit/operations/test_ewm.py @@ -304,13 +304,3 @@ def test_ewm_methods( ewm_var_pl = s.ewm_var(bias=bias, **pl_params).fill_nan(None) ewm_var_pd = pl.Series(p.ewm(**pd_params).var(bias=bias)) assert_series_equal(ewm_var_pl, ewm_var_pd, atol=1e-07) - - -def test_ewm_ignore_nulls_deprecation() -> None: - s = pl.Series([1, None, 3]) - with pytest.deprecated_call(): - s.ewm_mean(com=1.0) - with pytest.deprecated_call(): - s.ewm_std(com=1.0) - with pytest.deprecated_call(): - s.ewm_var(com=1.0) diff --git a/py-polars/tests/unit/series/test_series.py b/py-polars/tests/unit/series/test_series.py index 1ce5a74e897c8..c5a590507dce9 100644 --- a/py-polars/tests/unit/series/test_series.py +++ b/py-polars/tests/unit/series/test_series.py @@ -176,21 +176,6 @@ def test_init_inputs(monkeypatch: Any) -> None: pl.DataFrame(np.array([1, 2, 3]), schema=["a"]) -def test_init_dtype_if_empty_deprecated() -> None: - with pytest.deprecated_call(): - assert pl.Series(dtype_if_empty=pl.String).dtype == pl.String - with pytest.deprecated_call(): - assert pl.Series([], dtype_if_empty=pl.UInt16).dtype == pl.UInt16 - - with pytest.deprecated_call(): - assert pl.Series([None, None, None], dtype_if_empty=pl.Int8).dtype == pl.Int8 - - # note: "== []" will be cast to empty Series with String dtype. - with pytest.deprecated_call(): - s = pl.Series([], dtype_if_empty=pl.String) == [] - assert_series_equal(s, pl.Series("", dtype=pl.Boolean)) - - def test_init_structured_objects() -> None: # validate init from dataclass, namedtuple, and pydantic model objects from typing import NamedTuple