Skip to content

Commit

Permalink
feat!: Expedited removal of certain deprecated functionality (pola-rs…
Browse files Browse the repository at this point in the history
  • Loading branch information
stinodego authored and Wouittone committed Jun 22, 2024
1 parent cfbbd57 commit fc4ac10
Show file tree
Hide file tree
Showing 12 changed files with 250 additions and 446 deletions.
27 changes: 7 additions & 20 deletions crates/polars-time/src/upsample.rs
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,6 @@ pub trait PolarsUpsample {
by: I,
time_column: &str,
every: Duration,
offset: Duration,
) -> PolarsResult<DataFrame>;

/// Upsample a [`DataFrame`] at a regular frequency.
Expand Down Expand Up @@ -85,7 +84,6 @@ pub trait PolarsUpsample {
by: I,
time_column: &str,
every: Duration,
offset: Duration,
) -> PolarsResult<DataFrame>;
}

Expand All @@ -95,27 +93,23 @@ impl PolarsUpsample for DataFrame {
by: I,
time_column: &str,
every: Duration,
offset: Duration,
) -> PolarsResult<DataFrame> {
let by = by.into_vec();
let time_type = self.column(time_column)?.dtype();
ensure_duration_matches_data_type(offset, time_type, "offset")?;
ensure_duration_matches_data_type(every, time_type, "every")?;
upsample_impl(self, by, time_column, every, offset, false)
upsample_impl(self, by, time_column, every, false)
}

fn upsample_stable<I: IntoVec<String>>(
&self,
by: I,
time_column: &str,
every: Duration,
offset: Duration,
) -> PolarsResult<DataFrame> {
let by = by.into_vec();
let time_type = self.column(time_column)?.dtype();
ensure_duration_matches_data_type(offset, time_type, "offset")?;
ensure_duration_matches_data_type(every, time_type, "every")?;
upsample_impl(self, by, time_column, every, offset, true)
upsample_impl(self, by, time_column, every, true)
}
}

Expand All @@ -124,7 +118,6 @@ fn upsample_impl(
by: Vec<String>,
index_column: &str,
every: Duration,
offset: Duration,
stable: bool,
) -> PolarsResult<DataFrame> {
let s = source.column(index_column)?;
Expand All @@ -137,7 +130,7 @@ fn upsample_impl(
.unwrap()
})
.unwrap();
let mut out = upsample_impl(&df, by, index_column, every, offset, stable)?;
let mut out = upsample_impl(&df, by, index_column, every, stable)?;
out.apply(index_column, |s| s.cast(time_type).unwrap())
.unwrap();
Ok(out)
Expand All @@ -154,7 +147,7 @@ fn upsample_impl(
.unwrap()
})
.unwrap();
let mut out = upsample_impl(&df, by, index_column, every, offset, stable)?;
let mut out = upsample_impl(&df, by, index_column, every, stable)?;
out.apply(index_column, |s| s.cast(time_type).unwrap())
.unwrap();
Ok(out)
Expand All @@ -165,13 +158,13 @@ fn upsample_impl(
.unwrap()
})
.unwrap();
let mut out = upsample_impl(&df, by, index_column, every, offset, stable)?;
let mut out = upsample_impl(&df, by, index_column, every, stable)?;
out.apply(index_column, |s| s.cast(time_type).unwrap())
.unwrap();
Ok(out)
} else if by.is_empty() {
let index_column = source.column(index_column)?;
upsample_single_impl(source, index_column, every, offset)
upsample_single_impl(source, index_column, every)
} else {
let gb = if stable {
source.group_by_stable(by)
Expand All @@ -181,7 +174,7 @@ fn upsample_impl(
// don't parallelize this, this may SO on large data.
gb?.apply(|df| {
let index_column = df.column(index_column)?;
upsample_single_impl(&df, index_column, every, offset)
upsample_single_impl(&df, index_column, every)
})
}
}
Expand All @@ -190,7 +183,6 @@ fn upsample_single_impl(
source: &DataFrame,
index_column: &Series,
every: Duration,
offset: Duration,
) -> PolarsResult<DataFrame> {
let index_col_name = index_column.name();

Expand All @@ -208,11 +200,6 @@ fn upsample_single_impl(
Some(tz) => Some(parse_time_zone(tz)?),
_ => None,
};
let first = match tu {
TimeUnit::Nanoseconds => offset.add_ns(first, tz.as_ref())?,
TimeUnit::Microseconds => offset.add_us(first, tz.as_ref())?,
TimeUnit::Milliseconds => offset.add_ms(first, tz.as_ref())?,
};
let range = datetime_range_impl(
index_col_name,
first,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,15 +31,15 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
// --8<-- [start:upsample]
let out1 = df
.clone()
.upsample::<[String; 0]>([], "time", Duration::parse("15m"), Duration::parse("0"))?
.upsample::<[String; 0]>([], "time", Duration::parse("15m"))?
.fill_null(FillNullStrategy::Forward(None))?;
println!("{}", &out1);
// --8<-- [end:upsample]

// --8<-- [start:upsample2]
let out2 = df
.clone()
.upsample::<[String; 0]>([], "time", Duration::parse("15m"), Duration::parse("0"))?
.upsample::<[String; 0]>([], "time", Duration::parse("15m"))?
.lazy()
.with_columns([col("values").interpolate(InterpolationMethod::Linear)])
.collect()?
Expand Down
21 changes: 2 additions & 19 deletions py-polars/polars/dataframe/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -6003,15 +6003,13 @@ def upsample(
time_column: str,
*,
every: str | timedelta,
offset: str | timedelta | None = None,
group_by: str | Sequence[str] | None = None,
maintain_order: bool = False,
) -> Self:
"""
Upsample a DataFrame at a regular frequency.
The `every` and `offset` arguments are created with
the following string language:
The `every` argument is created with the following string language:
- 1ns (1 nanosecond)
- 1us (1 microsecond)
Expand Down Expand Up @@ -6042,12 +6040,6 @@ def upsample(
Note that this column has to be sorted for the output to make sense.
every
Interval will start 'every' duration.
offset
Change the start of the date_range by this offset.
.. deprecated:: 0.20.19
This argument is deprecated and will be removed in the next breaking
release. Instead, chain `upsample` with `dt.offset_by`.
group_by
First group by these columns and then upsample for every group.
maintain_order
Expand Down Expand Up @@ -6094,24 +6086,15 @@ def upsample(
│ 2021-06-01 00:00:00 ┆ B ┆ 3 │
└─────────────────────┴────────┴────────┘
"""
if offset is not None:
issue_deprecation_warning(
"`offset` is deprecated and will be removed in the next breaking release. "
"Instead, chain `upsample` with `dt.offset_by`.",
version="0.20.19",
)
if group_by is None:
group_by = []
if isinstance(group_by, str):
group_by = [group_by]
if offset is None:
offset = "0ns"

every = parse_as_duration_string(every)
offset = parse_as_duration_string(offset)

return self._from_pydf(
self._df.upsample(group_by, time_column, every, offset, maintain_order)
self._df.upsample(group_by, time_column, every, maintain_order)
)

def join_asof(
Expand Down
48 changes: 9 additions & 39 deletions py-polars/polars/expr/expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -9175,7 +9175,7 @@ def ewm_mean(
alpha: float | None = None,
adjust: bool = True,
min_periods: int = 1,
ignore_nulls: bool | None = None,
ignore_nulls: bool = False,
) -> Self:
r"""
Exponentially-weighted moving average.
Expand Down Expand Up @@ -9218,15 +9218,15 @@ def ewm_mean(
ignore_nulls
Ignore missing values when calculating weights.
- When `ignore_nulls=False`, weights are based on absolute
- When `ignore_nulls=False` (default), weights are based on absolute
positions.
For example, the weights of :math:`x_0` and :math:`x_2` used in
calculating the final weighted average of
[:math:`x_0`, None, :math:`x_2`] are
:math:`(1-\alpha)^2` and :math:`1` if `adjust=True`, and
:math:`(1-\alpha)^2` and :math:`\alpha` if `adjust=False`.
- When `ignore_nulls=True` (current default), weights are based
- When `ignore_nulls=True`, weights are based
on relative positions. For example, the weights of
:math:`x_0` and :math:`x_2` used in calculating the final weighted
average of [:math:`x_0`, None, :math:`x_2`] are
Expand All @@ -9248,16 +9248,6 @@ def ewm_mean(
│ 2.428571 │
└──────────┘
"""
if ignore_nulls is None:
issue_deprecation_warning(
"The default value for `ignore_nulls` for `ewm` methods"
" will change from True to False in the next breaking release."
" Explicitly set `ignore_nulls=True` to keep the existing behavior"
" and silence this warning.",
version="0.20.11",
)
ignore_nulls = True

alpha = _prepare_alpha(com, span, half_life, alpha)
return self._from_pyexpr(
self._pyexpr.ewm_mean(alpha, adjust, min_periods, ignore_nulls)
Expand Down Expand Up @@ -9377,7 +9367,7 @@ def ewm_std(
adjust: bool = True,
bias: bool = False,
min_periods: int = 1,
ignore_nulls: bool | None = None,
ignore_nulls: bool = False,
) -> Self:
r"""
Exponentially-weighted moving standard deviation.
Expand Down Expand Up @@ -9423,15 +9413,15 @@ def ewm_std(
ignore_nulls
Ignore missing values when calculating weights.
- When `ignore_nulls=False`, weights are based on absolute
- When `ignore_nulls=False` (default), weights are based on absolute
positions.
For example, the weights of :math:`x_0` and :math:`x_2` used in
calculating the final weighted average of
[:math:`x_0`, None, :math:`x_2`] are
:math:`(1-\alpha)^2` and :math:`1` if `adjust=True`, and
:math:`(1-\alpha)^2` and :math:`\alpha` if `adjust=False`.
- When `ignore_nulls=True` (current default), weights are based
- When `ignore_nulls=True`, weights are based
on relative positions. For example, the weights of
:math:`x_0` and :math:`x_2` used in calculating the final weighted
average of [:math:`x_0`, None, :math:`x_2`] are
Expand All @@ -9453,16 +9443,6 @@ def ewm_std(
│ 0.963624 │
└──────────┘
"""
if ignore_nulls is None:
issue_deprecation_warning(
"The default value for `ignore_nulls` for `ewm` methods"
" will change from True to False in the next breaking release."
" Explicitly set `ignore_nulls=True` to keep the existing behavior"
" and silence this warning.",
version="0.20.11",
)
ignore_nulls = True

alpha = _prepare_alpha(com, span, half_life, alpha)
return self._from_pyexpr(
self._pyexpr.ewm_std(alpha, adjust, bias, min_periods, ignore_nulls)
Expand All @@ -9478,7 +9458,7 @@ def ewm_var(
adjust: bool = True,
bias: bool = False,
min_periods: int = 1,
ignore_nulls: bool | None = None,
ignore_nulls: bool = False,
) -> Self:
r"""
Exponentially-weighted moving variance.
Expand Down Expand Up @@ -9524,15 +9504,15 @@ def ewm_var(
ignore_nulls
Ignore missing values when calculating weights.
- When `ignore_nulls=False`, weights are based on absolute
- When `ignore_nulls=False` (default), weights are based on absolute
positions.
For example, the weights of :math:`x_0` and :math:`x_2` used in
calculating the final weighted average of
[:math:`x_0`, None, :math:`x_2`] are
:math:`(1-\alpha)^2` and :math:`1` if `adjust=True`, and
:math:`(1-\alpha)^2` and :math:`\alpha` if `adjust=False`.
- When `ignore_nulls=True` (current default), weights are based
- When `ignore_nulls=True`, weights are based
on relative positions. For example, the weights of
:math:`x_0` and :math:`x_2` used in calculating the final weighted
average of [:math:`x_0`, None, :math:`x_2`] are
Expand All @@ -9554,16 +9534,6 @@ def ewm_var(
│ 0.928571 │
└──────────┘
"""
if ignore_nulls is None:
issue_deprecation_warning(
"The default value for `ignore_nulls` for `ewm` methods"
" will change from True to False in the next breaking release."
" Explicitly set `ignore_nulls=True` to keep the existing behavior"
" and silence this warning.",
version="0.20.11",
)
ignore_nulls = True

alpha = _prepare_alpha(com, span, half_life, alpha)
return self._from_pyexpr(
self._pyexpr.ewm_var(alpha, adjust, bias, min_periods, ignore_nulls)
Expand Down
13 changes: 1 addition & 12 deletions py-polars/polars/functions/lazy.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
from polars._utils.unstable import issue_unstable_warning, unstable
from polars._utils.various import extend_bool
from polars._utils.wrap import wrap_df, wrap_expr
from polars.datatypes import DTYPE_TEMPORAL_UNITS, Date, Datetime, Int64, UInt32
from polars.datatypes import DTYPE_TEMPORAL_UNITS, Date, Datetime, Int64

with contextlib.suppress(ImportError): # Module not available when building docs
import polars.polars as plr
Expand Down Expand Up @@ -209,17 +209,6 @@ def cum_count(*columns: str, reverse: bool = False) -> Expr:
│ 2 │
└─────┘
"""
if not columns:
issue_deprecation_warning(
"`pl.cum_count()` is deprecated. The same result can be achieved using"
" `pl.int_range(1, pl.len() + 1, dtype=pl.UInt32)`,"
" or `int_range(pl.len(), 0, -1, dtype=pl.UInt32)` when `reverse=True`.",
version="0.20.5",
)
if reverse:
return F.int_range(F.len(), 0, step=-1, dtype=UInt32).alias("cum_count")
else:
return F.int_range(1, F.len() + 1, dtype=UInt32).alias("cum_count")
return F.col(*columns).cum_count(reverse=reverse)


Expand Down
Loading

0 comments on commit fc4ac10

Please sign in to comment.