From 0f98ec07865235683a1a7d537d4e3ceeff9fba70 Mon Sep 17 00:00:00 2001 From: Egor Baturin <82458209+egoriyaa@users.noreply.github.com> Date: Thu, 12 Sep 2024 13:02:20 +0300 Subject: [PATCH] Apply WindowStatisticsTransform to regressors (#474) * update docstring * update changelog * fix tests with pipeline * delete warning, fix changelog * lints --------- Co-authored-by: Egor Baturin --- CHANGELOG.md | 2 +- etna/transforms/math/statistics.py | 93 +++++++++++++++++-- tests/test_commands/test_utils.py | 4 +- .../test_hierarchical_pipeline.py | 4 +- 4 files changed, 91 insertions(+), 12 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b8a5d004c..b298a8f4d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -24,7 +24,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Changed - - -- +- Add docstring warning about handling non-regressors (including target) to children of `WindowStatisticsTransform` ([#469](https://github.com/etna-team/etna/pull/474)) - - - diff --git a/etna/transforms/math/statistics.py b/etna/transforms/math/statistics.py index e55daa068..e292dd0a5 100644 --- a/etna/transforms/math/statistics.py +++ b/etna/transforms/math/statistics.py @@ -136,6 +136,14 @@ class MeanTransform(WindowStatisticsTransform): .. math:: MeanTransform(x_t) = \\sum_{i=1}^{window}{x_{t - i}\\cdot\\alpha^{i - 1}} + + Warning + ------- + This transform, applied to non-regressor column, generates non-regressor column. + Apply it to regressor columns to get regressor columns too. + In the majority of cases you need to generate regressor to use them in the future. + + For example, apply this transform to target lags, not to target directly. """ def __init__( @@ -235,6 +243,14 @@ def params_to_tune(self) -> Dict[str, BaseDistribution]: class StdTransform(WindowStatisticsTransform): """StdTransform computes std value for given window. + Warning + ------- + This transform, applied to non-regressor column, generates non-regressor column. + Apply it to regressor columns to get regressor columns too. + In the majority of cases you need to generate regressor to use them in the future. + + For example, apply this transform to target lags, not to target directly. + Notes ----- Note that ``pd.Series([1]).std()`` is ``np.nan``. @@ -293,7 +309,16 @@ def _aggregate(self, series: np.ndarray) -> np.ndarray: class QuantileTransform(WindowStatisticsTransform): - """QuantileTransform computes quantile value for given window.""" + """QuantileTransform computes quantile value for given window. + + Warning + ------- + This transform, applied to non-regressor column, generates non-regressor column. + Apply it to regressor columns to get regressor columns too. + In the majority of cases you need to generate regressor to use them in the future. + + For example, apply this transform to target lags, not to target directly. + """ def __init__( self, @@ -367,7 +392,16 @@ def params_to_tune(self) -> Dict[str, BaseDistribution]: class MinTransform(WindowStatisticsTransform): - """MinTransform computes min value for given window.""" + """MinTransform computes min value for given window. + + Warning + ------- + This transform, applied to non-regressor column, generates non-regressor column. + Apply it to regressor columns to get regressor columns too. + In the majority of cases you need to generate regressor to use them in the future. + + For example, apply this transform to target lags, not to target directly. + """ def __init__( self, @@ -418,7 +452,16 @@ def _aggregate(self, series: np.ndarray) -> np.ndarray: class MaxTransform(WindowStatisticsTransform): - """MaxTransform computes max value for given window.""" + """MaxTransform computes max value for given window. + + Warning + ------- + This transform, applied to non-regressor column, generates non-regressor column. + Apply it to regressor columns to get regressor columns too. + In the majority of cases you need to generate regressor to use them in the future. + + For example, apply this transform to target lags, not to target directly. + """ def __init__( self, @@ -469,7 +512,16 @@ def _aggregate(self, series: np.ndarray) -> np.ndarray: class MedianTransform(WindowStatisticsTransform): - """MedianTransform computes median value for given window.""" + """MedianTransform computes median value for given window. + + Warning + ------- + This transform, applied to non-regressor column, generates non-regressor column. + Apply it to regressor columns to get regressor columns too. + In the majority of cases you need to generate regressor to use them in the future. + + For example, apply this transform to target lags, not to target directly. + """ def __init__( self, @@ -520,7 +572,16 @@ def _aggregate(self, series: np.ndarray) -> np.ndarray: class MADTransform(WindowStatisticsTransform): - """MADTransform computes Mean Absolute Deviation over the window.""" + """MADTransform computes Mean Absolute Deviation over the window. + + Warning + ------- + This transform, applied to non-regressor column, generates non-regressor column. + Apply it to regressor columns to get regressor columns too. + In the majority of cases you need to generate regressor to use them in the future. + + For example, apply this transform to target lags, not to target directly. + """ def __init__( self, @@ -577,7 +638,16 @@ def _aggregate(self, series: np.ndarray) -> np.ndarray: class MinMaxDifferenceTransform(WindowStatisticsTransform): - """MinMaxDifferenceTransform computes difference between max and min values for given window.""" + """MinMaxDifferenceTransform computes difference between max and min values for given window. + + Warning + ------- + This transform, applied to non-regressor column, generates non-regressor column. + Apply it to regressor columns to get regressor columns too. + In the majority of cases you need to generate regressor to use them in the future. + + For example, apply this transform to target lags, not to target directly. + """ def __init__( self, @@ -630,7 +700,16 @@ def _aggregate(self, series: np.ndarray) -> np.ndarray: class SumTransform(WindowStatisticsTransform): - """SumTransform computes sum of values over given window.""" + """SumTransform computes sum of values over given window. + + Warning + ------- + This transform, applied to non-regressor column, generates non-regressor column. + Apply it to regressor columns to get regressor columns too. + In the majority of cases you need to generate regressor to use them in the future. + + For example, apply this transform to target lags, not to target directly. + """ def __init__( self, diff --git a/tests/test_commands/test_utils.py b/tests/test_commands/test_utils.py index 2545d72ad..52ac4ad7e 100644 --- a/tests/test_commands/test_utils.py +++ b/tests/test_commands/test_utils.py @@ -62,9 +62,9 @@ def pipeline_without_context(request): @pytest.fixture def pipeline_with_transforms(): transforms = [ - LagTransform(in_column="target", lags=[14, 17]), + LagTransform(in_column="target", lags=[14, 17], out_column="lag"), DifferencingTransform(in_column="target"), - MeanTransform(in_column="target", window=7), + MeanTransform(in_column="lag_14", window=7), DensityOutliersTransform(in_column="target"), ] diff --git a/tests/test_pipeline/test_hierarchical_pipeline.py b/tests/test_pipeline/test_hierarchical_pipeline.py index 88bfa4e43..2d457fb89 100644 --- a/tests/test_pipeline/test_hierarchical_pipeline.py +++ b/tests/test_pipeline/test_hierarchical_pipeline.py @@ -301,9 +301,9 @@ def test_backtest_w_transforms(market_level_constant_hierarchical_ts, reconcilia ts = market_level_constant_hierarchical_ts model = LinearPerSegmentModel() transforms = [ - MeanTransform(in_column="target", window=2), + LagTransform(in_column="target", lags=[1], out_column="lag"), + MeanTransform(in_column="lag_1", window=2), LinearTrendTransform(in_column="target"), - LagTransform(in_column="target", lags=[1]), ] pipeline = HierarchicalPipeline(reconciliator=reconciliator, model=model, transforms=transforms, horizon=1) metrics, _, _ = pipeline.backtest(ts=ts, metrics=[MAE()], n_folds=2, aggregate_metrics=True)