etna-team · egoriyaa · Sep 12, 2024 · Sep 9, 2024 · Sep 9, 2024 · Sep 11, 2024
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -24,7 +24,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ### Changed
 - 
 - 
-- 
+- Add docstring warning about handling non-regressors (including target) to children of `WindowStatisticsTransform` ([#469](https://github.com/etna-team/etna/pull/474))
 - 
 - 
 - 

diff --git a/etna/transforms/math/statistics.py b/etna/transforms/math/statistics.py
@@ -136,6 +136,14 @@ class MeanTransform(WindowStatisticsTransform):
 
     .. math::
        MeanTransform(x_t) = \\sum_{i=1}^{window}{x_{t - i}\\cdot\\alpha^{i - 1}}
+
+    Warning
+    -------
+    This transform, applied to non-regressor column, generates non-regressor column.
+    Apply it to regressor columns to get regressor columns too.
+    In the majority of cases you need to generate regressor to use them in the future.
+
+    For example, apply this transform to target lags, not to target directly.
     """
 
     def __init__(
@@ -235,6 +243,14 @@ def params_to_tune(self) -> Dict[str, BaseDistribution]:
 class StdTransform(WindowStatisticsTransform):
     """StdTransform computes std value for given window.
 
+    Warning
+    -------
+    This transform, applied to non-regressor column, generates non-regressor column.
+    Apply it to regressor columns to get regressor columns too.
+    In the majority of cases you need to generate regressor to use them in the future.
+
+    For example, apply this transform to target lags, not to target directly.
+
     Notes
     -----
     Note that ``pd.Series([1]).std()`` is ``np.nan``.
@@ -293,7 +309,16 @@ def _aggregate(self, series: np.ndarray) -> np.ndarray:
 
 
 class QuantileTransform(WindowStatisticsTransform):
-    """QuantileTransform computes quantile value for given window."""
+    """QuantileTransform computes quantile value for given window.
+
+    Warning
+    -------
+    This transform, applied to non-regressor column, generates non-regressor column.
+    Apply it to regressor columns to get regressor columns too.
+    In the majority of cases you need to generate regressor to use them in the future.
+
+    For example, apply this transform to target lags, not to target directly.
+    """
 
     def __init__(
         self,
@@ -367,7 +392,16 @@ def params_to_tune(self) -> Dict[str, BaseDistribution]:
 
 
 class MinTransform(WindowStatisticsTransform):
-    """MinTransform computes min value for given window."""
+    """MinTransform computes min value for given window.
+
+    Warning
+    -------
+    This transform, applied to non-regressor column, generates non-regressor column.
+    Apply it to regressor columns to get regressor columns too.
+    In the majority of cases you need to generate regressor to use them in the future.
+
+    For example, apply this transform to target lags, not to target directly.
+    """
 
     def __init__(
         self,
@@ -418,7 +452,16 @@ def _aggregate(self, series: np.ndarray) -> np.ndarray:
 
 
 class MaxTransform(WindowStatisticsTransform):
-    """MaxTransform computes max value for given window."""
+    """MaxTransform computes max value for given window.
+
+    Warning
+    -------
+    This transform, applied to non-regressor column, generates non-regressor column.
+    Apply it to regressor columns to get regressor columns too.
+    In the majority of cases you need to generate regressor to use them in the future.
+
+    For example, apply this transform to target lags, not to target directly.
+    """
 
     def __init__(
         self,
@@ -469,7 +512,16 @@ def _aggregate(self, series: np.ndarray) -> np.ndarray:
 
 
 class MedianTransform(WindowStatisticsTransform):
-    """MedianTransform computes median value for given window."""
+    """MedianTransform computes median value for given window.
+
+    Warning
+    -------
+    This transform, applied to non-regressor column, generates non-regressor column.
+    Apply it to regressor columns to get regressor columns too.
+    In the majority of cases you need to generate regressor to use them in the future.
+
+    For example, apply this transform to target lags, not to target directly.
+    """
 
     def __init__(
         self,
@@ -520,7 +572,16 @@ def _aggregate(self, series: np.ndarray) -> np.ndarray:
 
 
 class MADTransform(WindowStatisticsTransform):
-    """MADTransform computes Mean Absolute Deviation over the window."""
+    """MADTransform computes Mean Absolute Deviation over the window.
+
+    Warning
+    -------
+    This transform, applied to non-regressor column, generates non-regressor column.
+    Apply it to regressor columns to get regressor columns too.
+    In the majority of cases you need to generate regressor to use them in the future.
+
+    For example, apply this transform to target lags, not to target directly.
+    """
 
     def __init__(
         self,
@@ -577,7 +638,16 @@ def _aggregate(self, series: np.ndarray) -> np.ndarray:
 
 
 class MinMaxDifferenceTransform(WindowStatisticsTransform):
-    """MinMaxDifferenceTransform computes difference between max and min values for given window."""
+    """MinMaxDifferenceTransform computes difference between max and min values for given window.
+
+    Warning
+    -------
+    This transform, applied to non-regressor column, generates non-regressor column.
+    Apply it to regressor columns to get regressor columns too.
+    In the majority of cases you need to generate regressor to use them in the future.
+
+    For example, apply this transform to target lags, not to target directly.
+    """
 
     def __init__(
         self,
@@ -630,7 +700,16 @@ def _aggregate(self, series: np.ndarray) -> np.ndarray:
 
 
 class SumTransform(WindowStatisticsTransform):
-    """SumTransform computes sum of values over given window."""
+    """SumTransform computes sum of values over given window.
+
+    Warning
+    -------
+    This transform, applied to non-regressor column, generates non-regressor column.
+    Apply it to regressor columns to get regressor columns too.
+    In the majority of cases you need to generate regressor to use them in the future.
+
+    For example, apply this transform to target lags, not to target directly.
+    """
 
     def __init__(
         self,

diff --git a/tests/test_commands/test_utils.py b/tests/test_commands/test_utils.py
@@ -62,9 +62,9 @@ def pipeline_without_context(request):
 @pytest.fixture
 def pipeline_with_transforms():
     transforms = [
-        LagTransform(in_column="target", lags=[14, 17]),
+        LagTransform(in_column="target", lags=[14, 17], out_column="lag"),
         DifferencingTransform(in_column="target"),
-        MeanTransform(in_column="target", window=7),
+        MeanTransform(in_column="lag_14", window=7),
         DensityOutliersTransform(in_column="target"),
     ]
 

diff --git a/tests/test_pipeline/test_hierarchical_pipeline.py b/tests/test_pipeline/test_hierarchical_pipeline.py
@@ -301,9 +301,9 @@ def test_backtest_w_transforms(market_level_constant_hierarchical_ts, reconcilia
     ts = market_level_constant_hierarchical_ts
     model = LinearPerSegmentModel()
     transforms = [
-        MeanTransform(in_column="target", window=2),
+        LagTransform(in_column="target", lags=[1], out_column="lag"),
+        MeanTransform(in_column="lag_1", window=2),
         LinearTrendTransform(in_column="target"),
-        LagTransform(in_column="target", lags=[1]),
     ]
     pipeline = HierarchicalPipeline(reconciliator=reconciliator, model=model, transforms=transforms, horizon=1)
     metrics, _, _ = pipeline.backtest(ts=ts, metrics=[MAE()], n_folds=2, aggregate_metrics=True)