Skip to content

Commit

Permalink
Apply WindowStatisticsTransform to regressors (#474)
Browse files Browse the repository at this point in the history
* update docstring

* update changelog

* fix tests with pipeline

* delete warning, fix changelog

* lints

---------

Co-authored-by: Egor Baturin <[email protected]>
  • Loading branch information
egoriyaa and Egor Baturin authored Sep 12, 2024
1 parent 7ad2e61 commit 0f98ec0
Show file tree
Hide file tree
Showing 4 changed files with 91 additions and 12 deletions.
2 changes: 1 addition & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### Changed
-
-
-
- Add docstring warning about handling non-regressors (including target) to children of `WindowStatisticsTransform` ([#469](https://github.com/etna-team/etna/pull/474))
-
-
-
Expand Down
93 changes: 86 additions & 7 deletions etna/transforms/math/statistics.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,14 @@ class MeanTransform(WindowStatisticsTransform):
.. math::
MeanTransform(x_t) = \\sum_{i=1}^{window}{x_{t - i}\\cdot\\alpha^{i - 1}}
Warning
-------
This transform, applied to non-regressor column, generates non-regressor column.
Apply it to regressor columns to get regressor columns too.
In the majority of cases you need to generate regressor to use them in the future.
For example, apply this transform to target lags, not to target directly.
"""

def __init__(
Expand Down Expand Up @@ -235,6 +243,14 @@ def params_to_tune(self) -> Dict[str, BaseDistribution]:
class StdTransform(WindowStatisticsTransform):
"""StdTransform computes std value for given window.
Warning
-------
This transform, applied to non-regressor column, generates non-regressor column.
Apply it to regressor columns to get regressor columns too.
In the majority of cases you need to generate regressor to use them in the future.
For example, apply this transform to target lags, not to target directly.
Notes
-----
Note that ``pd.Series([1]).std()`` is ``np.nan``.
Expand Down Expand Up @@ -293,7 +309,16 @@ def _aggregate(self, series: np.ndarray) -> np.ndarray:


class QuantileTransform(WindowStatisticsTransform):
"""QuantileTransform computes quantile value for given window."""
"""QuantileTransform computes quantile value for given window.
Warning
-------
This transform, applied to non-regressor column, generates non-regressor column.
Apply it to regressor columns to get regressor columns too.
In the majority of cases you need to generate regressor to use them in the future.
For example, apply this transform to target lags, not to target directly.
"""

def __init__(
self,
Expand Down Expand Up @@ -367,7 +392,16 @@ def params_to_tune(self) -> Dict[str, BaseDistribution]:


class MinTransform(WindowStatisticsTransform):
"""MinTransform computes min value for given window."""
"""MinTransform computes min value for given window.
Warning
-------
This transform, applied to non-regressor column, generates non-regressor column.
Apply it to regressor columns to get regressor columns too.
In the majority of cases you need to generate regressor to use them in the future.
For example, apply this transform to target lags, not to target directly.
"""

def __init__(
self,
Expand Down Expand Up @@ -418,7 +452,16 @@ def _aggregate(self, series: np.ndarray) -> np.ndarray:


class MaxTransform(WindowStatisticsTransform):
"""MaxTransform computes max value for given window."""
"""MaxTransform computes max value for given window.
Warning
-------
This transform, applied to non-regressor column, generates non-regressor column.
Apply it to regressor columns to get regressor columns too.
In the majority of cases you need to generate regressor to use them in the future.
For example, apply this transform to target lags, not to target directly.
"""

def __init__(
self,
Expand Down Expand Up @@ -469,7 +512,16 @@ def _aggregate(self, series: np.ndarray) -> np.ndarray:


class MedianTransform(WindowStatisticsTransform):
"""MedianTransform computes median value for given window."""
"""MedianTransform computes median value for given window.
Warning
-------
This transform, applied to non-regressor column, generates non-regressor column.
Apply it to regressor columns to get regressor columns too.
In the majority of cases you need to generate regressor to use them in the future.
For example, apply this transform to target lags, not to target directly.
"""

def __init__(
self,
Expand Down Expand Up @@ -520,7 +572,16 @@ def _aggregate(self, series: np.ndarray) -> np.ndarray:


class MADTransform(WindowStatisticsTransform):
"""MADTransform computes Mean Absolute Deviation over the window."""
"""MADTransform computes Mean Absolute Deviation over the window.
Warning
-------
This transform, applied to non-regressor column, generates non-regressor column.
Apply it to regressor columns to get regressor columns too.
In the majority of cases you need to generate regressor to use them in the future.
For example, apply this transform to target lags, not to target directly.
"""

def __init__(
self,
Expand Down Expand Up @@ -577,7 +638,16 @@ def _aggregate(self, series: np.ndarray) -> np.ndarray:


class MinMaxDifferenceTransform(WindowStatisticsTransform):
"""MinMaxDifferenceTransform computes difference between max and min values for given window."""
"""MinMaxDifferenceTransform computes difference between max and min values for given window.
Warning
-------
This transform, applied to non-regressor column, generates non-regressor column.
Apply it to regressor columns to get regressor columns too.
In the majority of cases you need to generate regressor to use them in the future.
For example, apply this transform to target lags, not to target directly.
"""

def __init__(
self,
Expand Down Expand Up @@ -630,7 +700,16 @@ def _aggregate(self, series: np.ndarray) -> np.ndarray:


class SumTransform(WindowStatisticsTransform):
"""SumTransform computes sum of values over given window."""
"""SumTransform computes sum of values over given window.
Warning
-------
This transform, applied to non-regressor column, generates non-regressor column.
Apply it to regressor columns to get regressor columns too.
In the majority of cases you need to generate regressor to use them in the future.
For example, apply this transform to target lags, not to target directly.
"""

def __init__(
self,
Expand Down
4 changes: 2 additions & 2 deletions tests/test_commands/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,9 +62,9 @@ def pipeline_without_context(request):
@pytest.fixture
def pipeline_with_transforms():
transforms = [
LagTransform(in_column="target", lags=[14, 17]),
LagTransform(in_column="target", lags=[14, 17], out_column="lag"),
DifferencingTransform(in_column="target"),
MeanTransform(in_column="target", window=7),
MeanTransform(in_column="lag_14", window=7),
DensityOutliersTransform(in_column="target"),
]

Expand Down
4 changes: 2 additions & 2 deletions tests/test_pipeline/test_hierarchical_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -301,9 +301,9 @@ def test_backtest_w_transforms(market_level_constant_hierarchical_ts, reconcilia
ts = market_level_constant_hierarchical_ts
model = LinearPerSegmentModel()
transforms = [
MeanTransform(in_column="target", window=2),
LagTransform(in_column="target", lags=[1], out_column="lag"),
MeanTransform(in_column="lag_1", window=2),
LinearTrendTransform(in_column="target"),
LagTransform(in_column="target", lags=[1]),
]
pipeline = HierarchicalPipeline(reconciliator=reconciliator, model=model, transforms=transforms, horizon=1)
metrics, _, _ = pipeline.backtest(ts=ts, metrics=[MAE()], n_folds=2, aggregate_metrics=True)
Expand Down

0 comments on commit 0f98ec0

Please sign in to comment.