diff --git a/CHANGELOG.md b/CHANGELOG.md index 11210d737..7e07474c0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,6 +17,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Add `MADOutlierTransform` class for anomaly detection ([#415](https://github.com/etna-team/etna/pull/415)) - Add `MeanEncoderTransform` ([#413](https://github.com/etna-team/etna/pull/413)) - Add `FourierDecomposeTransform` transform for series decomposition using DFT ([#430](https://github.com/etna-team/etna/pull/430)) +- Add `ModelDecomposeTransform` transform for series decomposition using ETNA models ([#427](https://github.com/etna-team/etna/pull/427)) +- ### Changed - Allow to change `device`, `batch_size` and `num_workers` of embedding models ([#396](https://github.com/etna-team/etna/pull/396)) diff --git a/docs/source/api_reference/transforms.rst b/docs/source/api_reference/transforms.rst index 55a146690..c8ac13be6 100644 --- a/docs/source/api_reference/transforms.rst +++ b/docs/source/api_reference/transforms.rst @@ -38,13 +38,14 @@ Decomposition transforms and their utilities: STLTransform TheilSenTrendTransform TrendTransform + FourierDecomposeTransform + ModelDecomposeTransform decomposition.RupturesChangePointsModel decomposition.StatisticsPerIntervalModel decomposition.MeanPerIntervalModel decomposition.MedianPerIntervalModel decomposition.SklearnPreprocessingPerIntervalModel decomposition.SklearnRegressionPerIntervalModel - decomposition.FourierDecomposeTransform Categorical encoding transforms: diff --git a/etna/models/deadline_ma.py b/etna/models/deadline_ma.py index d733d0583..e148b76dd 100644 --- a/etna/models/deadline_ma.py +++ b/etna/models/deadline_ma.py @@ -331,7 +331,7 @@ def _predict( raise ValueError("There are NaNs in a target column, predict method requires target to be filled!") num_segments = context.shape[1] - index = pd.date_range(start=df.index[-prediction_size], end=df.index[-1], freq=self._freq) + index = pd.date_range(start=df.index[-prediction_size], end=df.index[-1], freq=self._freq, name=df.index.name) result_template = pd.DataFrame(np.zeros((prediction_size, num_segments)), index=index, columns=context.columns) result_values = self._make_predictions( result_template=result_template, context=context, prediction_size=prediction_size diff --git a/etna/models/mixins.py b/etna/models/mixins.py index d41eb2401..b404ba51d 100644 --- a/etna/models/mixins.py +++ b/etna/models/mixins.py @@ -486,7 +486,14 @@ def _make_component_predictions(self, ts: TSDataset, prediction_method: Callable """ features_df = ts.to_pandas() result_list = list() - for segment, model in self._get_model().items(): + + models = self._get_model() + + for segment in ts.segments: + if segment not in models: + raise NotImplementedError("Per-segment models can't estimate prediction components on new segments!") + + model = models[segment] segment_predict = self._make_predictions_segment( model=model, segment=segment, df=features_df, prediction_method=prediction_method, **kwargs ) diff --git a/etna/transforms/__init__.py b/etna/transforms/__init__.py index 44c536bac..8710419ce 100644 --- a/etna/transforms/__init__.py +++ b/etna/transforms/__init__.py @@ -15,6 +15,7 @@ from etna.transforms.decomposition import FourierDecomposeTransform from etna.transforms.decomposition import IrreversibleChangePointsTransform from etna.transforms.decomposition import LinearTrendTransform +from etna.transforms.decomposition import ModelDecomposeTransform from etna.transforms.decomposition import ReversibleChangePointsTransform from etna.transforms.decomposition import STLTransform from etna.transforms.decomposition import TheilSenTrendTransform diff --git a/etna/transforms/decomposition/__init__.py b/etna/transforms/decomposition/__init__.py index ae2558af6..37f7b56b4 100644 --- a/etna/transforms/decomposition/__init__.py +++ b/etna/transforms/decomposition/__init__.py @@ -17,4 +17,5 @@ from etna.transforms.decomposition.detrend import LinearTrendTransform from etna.transforms.decomposition.detrend import TheilSenTrendTransform from etna.transforms.decomposition.dft_based import FourierDecomposeTransform +from etna.transforms.decomposition.model_based import ModelDecomposeTransform from etna.transforms.decomposition.stl import STLTransform diff --git a/etna/transforms/decomposition/model_based.py b/etna/transforms/decomposition/model_based.py new file mode 100644 index 000000000..00804ff35 --- /dev/null +++ b/etna/transforms/decomposition/model_based.py @@ -0,0 +1,190 @@ +from typing import List +from typing import Union +from typing import get_args + +import pandas as pd + +from etna.datasets import TSDataset +from etna.datasets.utils import determine_num_steps +from etna.models import BATSModel +from etna.models import DeadlineMovingAverageModel +from etna.models import HoltWintersModel +from etna.models import ProphetModel +from etna.models import SARIMAXModel +from etna.models import SeasonalMovingAverageModel +from etna.models import TBATSModel +from etna.models.base import ContextRequiredModelType +from etna.models.base import ModelType +from etna.transforms import IrreversibleTransform + +_SUPPORTED_MODELS = Union[ + HoltWintersModel, # full + ProphetModel, # full + SARIMAXModel, # full + DeadlineMovingAverageModel, # need to account context/prediction size + SeasonalMovingAverageModel, # need to account context/prediction size + BATSModel, # dynamic components, not reliable + TBATSModel, # dynamic components, not reliable +] + + +class ModelDecomposeTransform(IrreversibleTransform): + """Transform that uses ETNA models to estimate series decomposition. + + Note + ---- + This transform decomposes only in-sample data. For the future timestamps it produces ``NaN``. + For the dataset to be transformed, it should contain at least the minimum amount of in-sample timestamps that are required by the model. + """ + + def __init__(self, model: ModelType, in_column: str = "target", residuals: bool = False): + """Init ``ModelDecomposeTransform``. + + Parameters + ---------- + model: + instance of the model to use for the decomposition. Note that not all models are supported. Possible selections are: + + - ``HoltWintersModel`` + - ``ProphetModel`` + - ``SARIMAXModel`` + - ``DeadlineMovingAverageModel`` + - ``SeasonalMovingAverageModel`` + - ``BATSModel`` + - ``TBATSModel`` + + Currently, only the specified series itself is used for model fitting. There is no way to add additional features/regressors to the decomposition model. + + in_column: + name of the processed column. + residuals: + whether to add residuals after decomposition. This guarantees that all components, including residuals, sum up to the series. + + Warning + ------- + Options for parameter ``model`` :py:class:`etna.models.BATSModel` and :py:class:`etna.models.TBATSModel` may result in different components set compared to the initialization parameters. + In such case, a corresponding warning would be raised. + """ + if not isinstance(model, get_args(_SUPPORTED_MODELS)): + raise ValueError( + f"Model type `{type(model).__name__}` is not supported! Supported models are: {_SUPPORTED_MODELS}" + ) + + self.model = model + self.in_column = in_column + self.residuals = residuals + + self._first_timestamp = None + self._last_timestamp = None + + super().__init__(required_features=[in_column]) + + def get_regressors_info(self) -> List[str]: + """Return the list with regressors created by the transform.""" + return [] + + def _fit(self, df: pd.DataFrame): + """Fit transform with the dataframe.""" + pass + + def _transform(self, df: pd.DataFrame) -> pd.DataFrame: + """Transform provided dataframe.""" + pass + + def _prepare_ts(self, ts: TSDataset) -> TSDataset: + """Prepare dataset for the decomposition model.""" + if self.in_column not in ts.features: + raise KeyError(f"Column {self.in_column} is not found in features!") + + df = ts.df.loc[:, pd.IndexSlice[:, self.in_column]] + df = df.rename(columns={self.in_column: "target"}, level="feature") + + return TSDataset(df=df, freq=ts.freq) + + def fit(self, ts: TSDataset) -> "ModelDecomposeTransform": + """Fit the transform and the decomposition model. + + Parameters + ---------- + ts: + dataset to fit the transform on. + + Returns + ------- + : + the fitted transform instance. + """ + self._first_timestamp = ts.index.min() + self._last_timestamp = ts.index.max() + + ts = self._prepare_ts(ts=ts) + + self.model.fit(ts) + return self + + def transform(self, ts: TSDataset) -> TSDataset: + """Transform ``TSDataset`` inplace. + + Parameters + ---------- + ts: + Dataset to transform. + + Returns + ------- + : + Transformed ``TSDataset``. + """ + if self._first_timestamp is None: + raise ValueError("Transform is not fitted!") + + if ts.index.min() < self._first_timestamp: + raise ValueError( + f"First index of the dataset to be transformed must be larger or equal than {self._first_timestamp}!" + ) + + if ts.index.min() > self._last_timestamp: + raise ValueError( + f"Dataset to be transformed must contain historical observations in range {self._first_timestamp} - {self._last_timestamp}" + ) + + decompose_ts = self._prepare_ts(ts=ts) + + future_steps = 0 + ts_max_timestamp = decompose_ts.index.max() + if ts_max_timestamp > self._last_timestamp: + future_steps = determine_num_steps(self._last_timestamp, ts_max_timestamp, freq=decompose_ts.freq) + decompose_ts.df = decompose_ts.df.loc[: self._last_timestamp] + + target = decompose_ts[..., "target"].droplevel("feature", axis=1) + + if isinstance(self.model, get_args(ContextRequiredModelType)): + decompose_ts = self.model.predict( + decompose_ts, prediction_size=decompose_ts.size()[0] - self.model.context_size, return_components=True + ) + + else: + decompose_ts = self.model.predict(decompose_ts, return_components=True) + + components_df = decompose_ts[..., decompose_ts.target_components_names] + + components_names = [x.replace("target_component", self.in_column) for x in decompose_ts.target_components_names] + + rename = dict(zip(decompose_ts.target_components_names, components_names)) + + if self.residuals: + components_sum = components_df.groupby(level="segment", axis=1).sum() + for segment in ts.segments: + components_df[segment, f"{self.in_column}_residuals"] = target[segment] - components_sum[segment] + + components_df.rename(columns=rename, level="feature", inplace=True) + + if future_steps > 0: + components_df = TSDataset._expand_index(df=components_df, future_steps=future_steps, freq=decompose_ts.freq) + + ts.add_columns_from_pandas(components_df) + + return ts + + +__all__ = ["ModelDecomposeTransform"] diff --git a/tests/test_transforms/test_decomposition/conftest.py b/tests/test_transforms/test_decomposition/conftest.py new file mode 100644 index 000000000..821a1e40f --- /dev/null +++ b/tests/test_transforms/test_decomposition/conftest.py @@ -0,0 +1,34 @@ +import numpy as np +import pytest + +from etna.datasets import TSDataset +from etna.datasets import generate_ar_df + + +@pytest.fixture() +def ts_with_exogs() -> TSDataset: + periods = 100 + periods_exog = periods + 10 + df = generate_ar_df(start_time="2020-01-01", periods=periods, freq="D", n_segments=2) + df_exog = generate_ar_df(start_time="2020-01-01", periods=periods_exog, freq="D", n_segments=2, random_seed=2) + df_exog.rename(columns={"target": "exog"}, inplace=True) + df_exog["holiday"] = np.random.choice([0, 1], size=periods_exog * 2) + + ts = TSDataset(df, freq="D", df_exog=df_exog, known_future="all") + return ts + + +@pytest.fixture() +def ts_with_exogs_train_test(ts_with_exogs): + return ts_with_exogs.train_test_split(test_size=20) + + +@pytest.fixture() +def forward_stride_datasets(ts_with_exogs): + train_df = ts_with_exogs.df.iloc[:-10] + test_df = ts_with_exogs.df.iloc[-20:] + + train_ts = TSDataset(df=train_df, freq=ts_with_exogs.freq) + test_ts = TSDataset(df=test_df, freq=ts_with_exogs.freq) + + return train_ts, test_ts diff --git a/tests/test_transforms/test_decomposition/test_dft_based.py b/tests/test_transforms/test_decomposition/test_dft_based.py index e773d02b5..274d28a4a 100644 --- a/tests/test_transforms/test_decomposition/test_dft_based.py +++ b/tests/test_transforms/test_decomposition/test_dft_based.py @@ -3,7 +3,6 @@ import pytest from etna.datasets import TSDataset -from etna.datasets import generate_ar_df from etna.metrics import MAE from etna.models import CatBoostPerSegmentModel from etna.models import HoltWintersModel @@ -23,35 +22,6 @@ def simple_pipeline_with_decompose(in_column, horizon, k): return pipeline -@pytest.fixture() -def ts_with_exogs() -> TSDataset: - periods = 100 - periods_exog = periods + 10 - df = generate_ar_df(start_time="2020-01-01", periods=periods, freq="D", n_segments=2) - df_exog = generate_ar_df(start_time="2020-01-01", periods=periods_exog, freq="D", n_segments=2, random_seed=2) - df_exog.rename(columns={"target": "exog"}, inplace=True) - df_exog["holiday"] = np.random.choice([0, 1], size=periods_exog * 2) - - ts = TSDataset(df, freq="D", df_exog=df_exog, known_future="all") - return ts - - -@pytest.fixture() -def ts_with_exogs_train_test(ts_with_exogs): - return ts_with_exogs.train_test_split(test_size=20) - - -@pytest.fixture() -def forward_stride_datasets(ts_with_exogs): - train_df = ts_with_exogs.df.iloc[:-10] - test_df = ts_with_exogs.df.iloc[-20:] - - train_ts = TSDataset(df=train_df, freq=ts_with_exogs.freq) - test_ts = TSDataset(df=test_df, freq=ts_with_exogs.freq) - - return train_ts, test_ts - - @pytest.fixture() def ts_with_missing(ts_with_exogs): target_df = ts_with_exogs[..., "target"] diff --git a/tests/test_transforms/test_decomposition/test_model_based.py b/tests/test_transforms/test_decomposition/test_model_based.py new file mode 100644 index 000000000..fed86d28f --- /dev/null +++ b/tests/test_transforms/test_decomposition/test_model_based.py @@ -0,0 +1,334 @@ +import numpy as np +import pandas as pd +import pytest + +from etna.metrics import MAE +from etna.models import BATSModel +from etna.models import CatBoostPerSegmentModel +from etna.models import DeadlineMovingAverageModel +from etna.models import HoltWintersModel +from etna.models import ProphetModel +from etna.models import SARIMAXModel +from etna.models import SeasonalMovingAverageModel +from etna.models import TBATSModel +from etna.pipeline import Pipeline +from etna.transforms import IForestOutlierTransform +from etna.transforms import ModelDecomposeTransform +from etna.transforms import TimeSeriesImputerTransform + + +def simple_pipeline_with_decompose(in_column, horizon): + pipeline = Pipeline( + transforms=[ModelDecomposeTransform(model=HoltWintersModel(), in_column=in_column)], + model=HoltWintersModel(), + horizon=horizon, + ) + return pipeline + + +@pytest.mark.parametrize("in_column", ("target", "feat")) +def test_init(in_column): + transform = ModelDecomposeTransform(model=HoltWintersModel(), in_column=in_column) + assert transform.required_features == [in_column] + assert transform._first_timestamp is None + assert transform._last_timestamp is None + + +def test_unsupported_model(): + with pytest.raises(ValueError, match=".* is not supported! Supported models are:"): + ModelDecomposeTransform(model=CatBoostPerSegmentModel()) + + +def test_prepare_ts_invalid_feature(simple_tsdf): + transform = ModelDecomposeTransform(model=HoltWintersModel(), in_column="feat") + with pytest.raises(KeyError, match="is not found in features"): + _ = transform._prepare_ts(ts=simple_tsdf) + + +def test_is_not_fitted(simple_tsdf): + transform = ModelDecomposeTransform(model=HoltWintersModel(), in_column="feat") + with pytest.raises(ValueError, match="Transform is not fitted!"): + transform.transform(ts=simple_tsdf) + + +def test_prepare_ts_in_column_target(ts_with_exogs): + ts = ts_with_exogs + + transform = ModelDecomposeTransform(model=HoltWintersModel(), in_column="target") + prepared_ts = transform._prepare_ts(ts=ts) + + assert prepared_ts is not ts + assert prepared_ts.df_exog is None + pd.testing.assert_frame_equal(prepared_ts.df, ts[..., "target"]) + + +@pytest.mark.parametrize( + "ts_name,in_column", + ( + ("outliers_df_with_two_columns", "feature"), + ("ts_with_exogs", "exog"), + ("ts_with_exogs", "holiday"), + ), +) +def test_prepare_ts_in_column_feature(ts_name, in_column, request): + ts = request.getfixturevalue(ts_name) + + transform = ModelDecomposeTransform(model=HoltWintersModel(), in_column=in_column) + prepared_ts = transform._prepare_ts(ts=ts) + + assert prepared_ts is not ts + assert "feature" not in prepared_ts.features + assert prepared_ts.df_exog is None + pd.testing.assert_frame_equal( + prepared_ts.df, ts[..., in_column].rename({in_column: "target"}, axis=1, level="feature") + ) + + +@pytest.mark.parametrize( + "ts_name,in_column", + ( + ("outliers_df_with_two_columns", "target"), + ("outliers_df_with_two_columns", "feature"), + ("ts_with_exogs", "target"), + ("ts_with_exogs", "exog"), + ("ts_with_exogs", "holiday"), + ("example_tsds_int_timestamp", "target"), + ), +) +def test_fit(ts_name, in_column, request): + ts = request.getfixturevalue(ts_name) + transform = ModelDecomposeTransform(model=HoltWintersModel(), in_column=in_column) + transform.fit(ts=ts) + + assert transform._first_timestamp == ts.index.min() + assert transform._last_timestamp == ts.index.max() + + +@pytest.mark.parametrize("residuals", (True, False)) +@pytest.mark.parametrize("in_column", ("target", "exog")) +def test_add_residuals(ts_with_exogs, residuals, in_column): + ts = ts_with_exogs + + transform = ModelDecomposeTransform(model=HoltWintersModel(), in_column=in_column, residuals=residuals) + transformed = transform.fit_transform(ts=ts) + + assert (f"{in_column}_residuals" in transformed.features) is residuals + + +def test_timestamp_from_future(ts_with_exogs_train_test): + train, test = ts_with_exogs_train_test + transform = ModelDecomposeTransform(model=HoltWintersModel()) + transform.fit_transform(train) + + with pytest.raises(ValueError, match="Dataset to be transformed must contain historical observations in range"): + transform.transform(test) + + +def test_timestamp_from_history(ts_with_exogs_train_test): + test, train = ts_with_exogs_train_test + transform = ModelDecomposeTransform(model=HoltWintersModel()) + transform.fit_transform(train) + + with pytest.raises(ValueError, match="First index of the dataset to be transformed must be larger"): + transform.transform(test) + + +@pytest.mark.parametrize( + "in_column", + ( + "target", + "holiday", + "exog", + ), +) +@pytest.mark.parametrize("horizon", (1, 5)) +def test_simple_pipeline_forecast(ts_with_exogs, in_column, horizon): + ts = ts_with_exogs + + pipeline = simple_pipeline_with_decompose(in_column=in_column, horizon=horizon) + + pipeline.fit(ts=ts) + forecast = pipeline.forecast() + + assert forecast.size()[0] == horizon + assert np.sum(forecast[..., "target"].isna().sum()) == 0 + + +@pytest.mark.parametrize( + "in_column", + ( + "target", + "holiday", + "exog", + ), +) +@pytest.mark.parametrize("horizon", (1, 5)) +def test_simple_pipeline_predict(ts_with_exogs, in_column, horizon): + ts = ts_with_exogs + + pipeline = simple_pipeline_with_decompose(in_column=in_column, horizon=horizon) + + pipeline.fit(ts=ts) + forecast = pipeline.predict(ts) + + assert forecast.size()[0] == ts.size()[0] + assert np.sum(forecast[..., "target"].isna().sum()) == 0 + + +@pytest.mark.parametrize( + "in_column", + ( + "target", + "holiday", + "exog", + ), +) +@pytest.mark.parametrize("horizon", (1, 5)) +def test_simple_pipeline_predict_components(ts_with_exogs, in_column, horizon): + ts = ts_with_exogs + + pipeline = simple_pipeline_with_decompose(in_column=in_column, horizon=horizon) + + pipeline.fit(ts=ts) + forecast = pipeline.predict(ts, return_components=True) + + assert forecast.size()[0] == ts.size()[0] + assert forecast.target_components_names == ("target_component_level",) + + +@pytest.mark.parametrize( + "in_column", + ( + "target", + "holiday", + "exog", + ), +) +@pytest.mark.parametrize("horizon", (1, 5)) +def test_simple_pipeline_backtest(ts_with_exogs, in_column, horizon): + ts = ts_with_exogs + + pipeline = simple_pipeline_with_decompose(in_column=in_column, horizon=horizon) + + _, forecast, _ = pipeline.backtest(ts=ts, metrics=[MAE()], n_folds=3) + + assert len(forecast) == horizon * 3 + assert np.sum(forecast.loc[:, pd.IndexSlice[:, "target"]].isna().sum()) == 0 + + +@pytest.mark.parametrize( + "ts_name,in_column", + ( + ("outliers_df_with_two_columns", "target"), + ("outliers_df_with_two_columns", "feature"), + ("ts_with_exogs", "target"), + ("ts_with_exogs", "exog"), + ), +) +@pytest.mark.parametrize( + "decompose_model", + ( + HoltWintersModel(), + ProphetModel(), + ), +) +@pytest.mark.parametrize("forecast_model", (HoltWintersModel(), ProphetModel(), CatBoostPerSegmentModel(iterations=10))) +def test_pipeline_models(ts_name, in_column, decompose_model, forecast_model, request): + ts = request.getfixturevalue(ts_name) + + pipeline = Pipeline( + transforms=[ModelDecomposeTransform(model=decompose_model, in_column=in_column)], + model=forecast_model, + horizon=3, + ) + + pipeline.fit(ts) + forecast = pipeline.forecast() + + assert forecast.size()[0] == 3 + assert np.sum(forecast.loc[:, pd.IndexSlice[:, "target"]].isna().sum()) == 0 + + +@pytest.mark.parametrize( + "decompose_model", + ( + HoltWintersModel(), + ProphetModel(), + SARIMAXModel(), + DeadlineMovingAverageModel(), + SeasonalMovingAverageModel(), + BATSModel(use_arma_errors=False), + TBATSModel(use_arma_errors=False), + ), +) +def test_decompose_models(ts_with_exogs, decompose_model): + pipeline = Pipeline( + transforms=[ModelDecomposeTransform(model=decompose_model, in_column="exog")], + model=CatBoostPerSegmentModel(iterations=10), + horizon=3, + ) + + pipeline.fit(ts_with_exogs) + forecast = pipeline.forecast() + + assert forecast.size()[0] == 3 + assert np.sum(forecast.loc[:, pd.IndexSlice[:, "target"]].isna().sum()) == 0 + + +@pytest.mark.parametrize("answer", ({"1": ["2021-01-11"], "2": ["2021-01-27"]},)) +def test_outlier_detection(outliers_solid_tsds, answer): + ts = outliers_solid_tsds + + transforms = [ + ModelDecomposeTransform( + model=HoltWintersModel(seasonal="add", seasonal_periods=3), in_column="target", residuals=True + ), + IForestOutlierTransform( + in_column="target", + features_to_use=["target_residuals", "target_seasonality", "target_level"], + contamination=0.01, + ), + ] + ts.fit_transform(transforms) + + for segment in ts.segments: + empty_values = pd.isna(ts[:, segment, "target"]) + assert empty_values.sum() == len(answer[segment]) + assert all(empty_values[answer[segment]]) + + +def test_outlier_detection_pipeline(outliers_solid_tsds): + ts = outliers_solid_tsds + pipeline = Pipeline( + transforms=[ + ModelDecomposeTransform(model=HoltWintersModel(), in_column="target"), + IForestOutlierTransform(in_column="target"), + TimeSeriesImputerTransform(in_column="target"), + ], + model=SARIMAXModel(), + horizon=3, + ) + pipeline.fit(ts) + + +@pytest.mark.parametrize( + "decompose_model, context_size", + ( + (HoltWintersModel(), 0), + (ProphetModel(), 0), + (SARIMAXModel(), 0), + (SeasonalMovingAverageModel(window=3, seasonality=1), 3), + (BATSModel(use_arma_errors=False, use_trend=True), 0), + (TBATSModel(use_arma_errors=False, use_trend=True), 0), + ), +) +def test_stride_transform(forward_stride_datasets, decompose_model, context_size): + train, test = forward_stride_datasets + + transform = ModelDecomposeTransform(model=decompose_model, residuals=True) + + transform.fit(train) + transformed = transform.transform(test) + + assert not transformed.df.iloc[context_size:10].isna().any().any() + assert transformed.df.iloc[10:].isna().all().any() diff --git a/tests/test_transforms/test_inference/test_inverse_transform.py b/tests/test_transforms/test_inference/test_inverse_transform.py index 63f8aa2bc..c0c828e3b 100644 --- a/tests/test_transforms/test_inference/test_inverse_transform.py +++ b/tests/test_transforms/test_inference/test_inverse_transform.py @@ -7,6 +7,7 @@ from sklearn.tree import DecisionTreeRegressor from etna.analysis import StatisticsRelevanceTable +from etna.models import HoltWintersModel from etna.models import ProphetModel from etna.transforms import AddConstTransform from etna.transforms import BinaryOperationTransform @@ -47,6 +48,7 @@ from etna.transforms import MinMaxDifferenceTransform from etna.transforms import MinMaxScalerTransform from etna.transforms import MinTransform +from etna.transforms import ModelDecomposeTransform from etna.transforms import MRMRFeatureSelectionTransform from etna.transforms import OneHotEncoderTransform from etna.transforms import PredictionIntervalOutliersTransform @@ -144,6 +146,7 @@ def _test_inverse_transform_train(self, ts, transform, expected_changes): {}, ), (FourierDecomposeTransform(in_column="target", k=5, residuals=True), "regular_ts", {}), + (ModelDecomposeTransform(model=ProphetModel(), in_column="target", residuals=True), "regular_ts", {}), # embeddings ( EmbeddingSegmentTransform( @@ -607,6 +610,7 @@ def test_inverse_transform_train_fail_resample(self, transform, dataset_name, ex {}, ), (FourierDecomposeTransform(in_column="target", k=5, residuals=True), "regular_ts", {}), + (ModelDecomposeTransform(model=HoltWintersModel(), in_column="target", residuals=True), "regular_ts", {}), # embeddings ( EmbeddingSegmentTransform( @@ -1100,6 +1104,7 @@ def _test_inverse_transform_train_subset_segments(self, ts, transform, segments) "regular_ts", ), (FourierDecomposeTransform(in_column="target", k=5, residuals=True), "regular_ts"), + (ModelDecomposeTransform(model=HoltWintersModel(), in_column="target", residuals=True), "regular_ts"), # embeddings ( EmbeddingSegmentTransform( @@ -1388,6 +1393,8 @@ def _test_inverse_transform_future_subset_segments(self, ts, transform, segments ), (FourierDecomposeTransform(in_column="target", k=5, residuals=True), "regular_ts"), (FourierDecomposeTransform(in_column="positive", k=5, residuals=True), "ts_with_exog"), + (ModelDecomposeTransform(model=HoltWintersModel(), in_column="target", residuals=True), "regular_ts"), + (ModelDecomposeTransform(model=HoltWintersModel(), in_column="positive", residuals=True), "ts_with_exog"), # embeddings ( EmbeddingSegmentTransform( @@ -1980,6 +1987,7 @@ def test_inverse_transform_train_new_segments(self, transform, dataset_name, exp ), "regular_ts", ), + (ModelDecomposeTransform(model=ProphetModel(), in_column="target", residuals=True), "regular_ts"), # encoders (MeanEncoderTransform(in_column="weekday", out_column="mean_encoder"), "ts_with_exog"), (MeanSegmentEncoderTransform(), "regular_ts"), @@ -2421,6 +2429,7 @@ def test_inverse_transform_future_new_segments(self, transform, dataset_name, ex ), "regular_ts", ), + (ModelDecomposeTransform(model=ProphetModel(), in_column="target", residuals=True), "regular_ts"), # encoders (MeanEncoderTransform(in_column="weekday", out_column="mean_encoder"), "ts_with_exog"), (MeanSegmentEncoderTransform(), "regular_ts"), @@ -3006,6 +3015,7 @@ def test_inverse_transform_future_with_target_fail_difference( "transform, dataset_name, expected_changes", [ (FourierDecomposeTransform(in_column="target", k=5, residuals=True), "regular_ts", {}), + (ModelDecomposeTransform(model=HoltWintersModel(), in_column="target", residuals=True), "regular_ts", {}), ], ) def test_inverse_transform_future_with_target_fail_require_history( @@ -3127,6 +3137,8 @@ def _test_inverse_transform_future_without_target( ), (FourierDecomposeTransform(in_column="target", k=5, residuals=True), "regular_ts", {}), (FourierDecomposeTransform(in_column="positive", k=5, residuals=True), "ts_with_exog", {}), + (ModelDecomposeTransform(model=ProphetModel(), in_column="target", residuals=True), "regular_ts", {}), + (ModelDecomposeTransform(model=ProphetModel(), in_column="positive", residuals=True), "ts_with_exog", {}), # embeddings ( EmbeddingSegmentTransform( diff --git a/tests/test_transforms/test_inference/test_transform.py b/tests/test_transforms/test_inference/test_transform.py index 9d6cc7866..7fff2d780 100644 --- a/tests/test_transforms/test_inference/test_transform.py +++ b/tests/test_transforms/test_inference/test_transform.py @@ -7,6 +7,7 @@ from sklearn.tree import DecisionTreeRegressor from etna.analysis import StatisticsRelevanceTable +from etna.models import HoltWintersModel from etna.models import ProphetModel from etna.transforms import AddConstTransform from etna.transforms import BinaryOperationTransform @@ -47,6 +48,7 @@ from etna.transforms import MinMaxDifferenceTransform from etna.transforms import MinMaxScalerTransform from etna.transforms import MinTransform +from etna.transforms import ModelDecomposeTransform from etna.transforms import MRMRFeatureSelectionTransform from etna.transforms import OneHotEncoderTransform from etna.transforms import PredictionIntervalOutliersTransform @@ -134,6 +136,11 @@ def _test_transform_train(self, ts, transform, expected_changes): "regular_ts", {"create": {"target_dft_0", "target_dft_1", "target_dft_residuals"}}, ), + ( + ModelDecomposeTransform(model=HoltWintersModel(), in_column="target", residuals=True), + "regular_ts", + {"create": {"target_level", "target_residuals"}}, + ), # embeddings ( EmbeddingSegmentTransform( @@ -561,6 +568,11 @@ def test_transform_train_datetime_timestamp(self, transform, dataset_name, expec "regular_ts", {"create": {"target_dft_0", "target_dft_1", "target_dft_residuals"}}, ), + ( + ModelDecomposeTransform(model=HoltWintersModel(), in_column="target", residuals=True), + "regular_ts", + {"create": {"target_level", "target_residuals"}}, + ), # embeddings ( EmbeddingSegmentTransform( @@ -1072,6 +1084,7 @@ def _test_transform_train_subset_segments(self, ts, transform, segments): "regular_ts", ), (FourierDecomposeTransform(in_column="target", k=2, residuals=True), "regular_ts"), + (ModelDecomposeTransform(model=HoltWintersModel(), in_column="target", residuals=True), "regular_ts"), # embeddings ( EmbeddingSegmentTransform( @@ -1342,6 +1355,8 @@ def _test_transform_future_subset_segments(self, ts, transform, segments, horizo ), (FourierDecomposeTransform(in_column="target", k=2, residuals=True), "regular_ts"), (FourierDecomposeTransform(in_column="positive", k=2, residuals=True), "ts_with_exog"), + (ModelDecomposeTransform(model=HoltWintersModel(), in_column="target", residuals=True), "regular_ts"), + (ModelDecomposeTransform(model=HoltWintersModel(), in_column="positive", residuals=True), "ts_with_exog"), # embeddings ( EmbeddingSegmentTransform( @@ -1903,6 +1918,7 @@ def test_transform_train_new_segments(self, transform, dataset_name, expected_ch ), "regular_ts", ), + (ModelDecomposeTransform(model=HoltWintersModel(), in_column="target", residuals=True), "regular_ts"), # encoders (MeanEncoderTransform(in_column="weekday", out_column="mean_encoder"), "ts_with_exog"), (MeanSegmentEncoderTransform(), "regular_ts"), @@ -2338,6 +2354,7 @@ def test_transform_future_new_segments(self, transform, dataset_name, expected_c ), "regular_ts", ), + (ModelDecomposeTransform(model=HoltWintersModel(), in_column="target", residuals=True), "regular_ts"), # encoders (MeanEncoderTransform(in_column="weekday", out_column="mean_encoder"), "ts_with_exog"), (MeanSegmentEncoderTransform(), "regular_ts"), @@ -2838,6 +2855,11 @@ def test_transform_future_with_target(self, transform, dataset_name, expected_ch "regular_ts", {"create": {"target_dft_0", "target_dft_1", "target_dft_residuals"}}, ), + ( + ModelDecomposeTransform(model=HoltWintersModel(), in_column="target", residuals=True), + "regular_ts", + {"create": {"target_level", "target_residuals"}}, + ), ), ) def test_transform_future_with_target_fail_require_history( @@ -2930,6 +2952,11 @@ def _test_transform_future_without_target(self, ts, transform, expected_changes, "ts_with_exog", {"create": {"positive_dft_0", "positive_dft_1", "positive_dft_residuals"}}, ), + ( + ModelDecomposeTransform(model=HoltWintersModel(), in_column="target", residuals=True), + "regular_ts", + {"create": {"target_level", "target_residuals"}}, + ), # embeddings ( EmbeddingSegmentTransform(