diff --git a/CHANGELOG.md b/CHANGELOG.md index e7381bde7..605342211 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,6 +16,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Add `TSDataset.features` property to get list of all features in a dataset ([#405](https://github.com/etna-team/etna/pull/405)) - Add `MADOutlierTransform` class for anomaly detection ([#415](https://github.com/etna-team/etna/pull/415)) - Add `MeanEncoderTransform` ([#413](https://github.com/etna-team/etna/pull/413)) +- Add `FourierDecomposeTransform` transform for series decomposition using DFT ([#430](https://github.com/etna-team/etna/pull/430)) ### Changed - Allow to change `device`, `batch_size` and `num_workers` of embedding models ([#396](https://github.com/etna-team/etna/pull/396)) diff --git a/docs/source/api_reference/transforms.rst b/docs/source/api_reference/transforms.rst index 42fca2c0f..55a146690 100644 --- a/docs/source/api_reference/transforms.rst +++ b/docs/source/api_reference/transforms.rst @@ -44,6 +44,7 @@ Decomposition transforms and their utilities: decomposition.MedianPerIntervalModel decomposition.SklearnPreprocessingPerIntervalModel decomposition.SklearnRegressionPerIntervalModel + decomposition.FourierDecomposeTransform Categorical encoding transforms: diff --git a/etna/transforms/__init__.py b/etna/transforms/__init__.py index fb4b93896..44c536bac 100644 --- a/etna/transforms/__init__.py +++ b/etna/transforms/__init__.py @@ -12,6 +12,7 @@ from etna.transforms.decomposition import ChangePointsSegmentationTransform from etna.transforms.decomposition import ChangePointsTrendTransform from etna.transforms.decomposition import DeseasonalityTransform +from etna.transforms.decomposition import FourierDecomposeTransform from etna.transforms.decomposition import IrreversibleChangePointsTransform from etna.transforms.decomposition import LinearTrendTransform from etna.transforms.decomposition import ReversibleChangePointsTransform diff --git a/etna/transforms/decomposition/__init__.py b/etna/transforms/decomposition/__init__.py index a5516ec62..ae2558af6 100644 --- a/etna/transforms/decomposition/__init__.py +++ b/etna/transforms/decomposition/__init__.py @@ -16,4 +16,5 @@ from etna.transforms.decomposition.deseasonal import DeseasonalityTransform from etna.transforms.decomposition.detrend import LinearTrendTransform from etna.transforms.decomposition.detrend import TheilSenTrendTransform +from etna.transforms.decomposition.dft_based import FourierDecomposeTransform from etna.transforms.decomposition.stl import STLTransform diff --git a/etna/transforms/decomposition/dft_based.py b/etna/transforms/decomposition/dft_based.py new file mode 100644 index 000000000..def3eeed8 --- /dev/null +++ b/etna/transforms/decomposition/dft_based.py @@ -0,0 +1,200 @@ +from typing import List + +import numpy as np +import pandas as pd + +from etna.datasets import TSDataset +from etna.datasets.utils import determine_num_steps +from etna.transforms import IrreversibleTransform + + +class FourierDecomposeTransform(IrreversibleTransform): + """Transform that uses Fourier transformation to estimate series decomposition. + + Note + ---- + This transform decomposes only in-sample data. For the future timestamps it produces ``NaN``. + For the dataset to be transformed, it should contain at least the minimum amount of in-sample timestamps that are required by transform. + + Warning + ------- + This transform adds new columns to the dataset, that correspond to the selected frequencies. Such columns are named with + ``dft_{i}`` suffix. Suffix index do NOT indicate any relation to the frequencies. Produced names should be thought of as + arbitrary identifiers to the produced sinusoids. + """ + + def __init__(self, k: int, in_column: str = "target", residuals: bool = False): + """Init ``FourierDecomposeTransform``. + + Parameters + ---------- + k: + how many top positive frequencies selected for the decomposition. Selection performed proportional to the amplitudes. + in_column: + name of the processed column. + residuals: + whether to add residuals after decomposition. This guarantees that all components, including residuals, sum up to the series. + """ + if k <= 0: + raise ValueError("Parameter `k` must be positive integer!") + + self.k = k + self.in_column = in_column + self.residuals = residuals + + self._first_timestamp = None + self._last_timestamp = None + + super().__init__(required_features=[in_column]) + + def get_regressors_info(self) -> List[str]: + """Return the list with regressors created by the transform.""" + return [] + + def _fit(self, df: pd.DataFrame): + """Fit transform with the dataframe.""" + pass + + def _transform(self, df: pd.DataFrame) -> pd.DataFrame: + """Transform provided dataframe.""" + pass + + @staticmethod + def _get_num_pos_freqs(series: pd.Series) -> int: + """Get number of positive frequencies for the series.""" + num_obs = len(series) + return int(np.ceil((num_obs - 1) / 2) + 1) + + def _check_segments(self, df: pd.DataFrame): + """Check if series satisfy conditions.""" + segments_with_missing = [] + min_num_pos_freq = float("inf") + for segment in df: + series = df[segment] + series = series.loc[series.first_valid_index() : series.last_valid_index()] + if series.isna().any(): + segments_with_missing.append(segment) + + min_num_pos_freq = min(min_num_pos_freq, self._get_num_pos_freqs(series)) + + if len(segments_with_missing) > 0: + raise ValueError( + f"Feature `{self.in_column}` contains missing values in segments: {segments_with_missing}!" + ) + + if self.k > min_num_pos_freq: + raise ValueError(f"Parameter `k` must not be greater then {min_num_pos_freq} for the provided dataset!") + + def _dft_components(self, series: pd.Series) -> pd.DataFrame: + """Estimate series decomposition using FFT.""" + initial_index = series.index + series = series.loc[series.first_valid_index() : series.last_valid_index()] + + num_pos_freqs = self._get_num_pos_freqs(series) + + # compute Fourier decomposition of the series + dft_series = np.fft.fft(series) + + # compute "amplitudes" for each frequency + abs_dft_series = np.abs(dft_series) + + # select top-k indices + abs_pos_dft_series = abs_dft_series[:num_pos_freqs] + top_k_idxs = np.argpartition(abs_pos_dft_series, num_pos_freqs - self.k)[-self.k :] + + # select top-k and separate each frequency + freq_matrix = np.diag(dft_series) + freq_matrix = freq_matrix[:num_pos_freqs] + selected_freqs = freq_matrix[top_k_idxs] + + # return frequencies to initial domain + components = np.fft.ifft(selected_freqs).real + + components_df = pd.DataFrame( + data=components.T, columns=[f"dft_{i}" for i in range(components.shape[0])], index=series.index + ) + + if self.residuals: + components_df["dft_residuals"] = series.values - np.sum(components, axis=0) + + # return trailing and leading nans to the series if any existed initially + if not components_df.index.equals(initial_index): + components_df = components_df.reindex(index=initial_index, fill_value=np.nan) + + return components_df + + def fit(self, ts: TSDataset) -> "FourierDecomposeTransform": + """Fit the transform and the decomposition model. + + Parameters + ---------- + ts: + dataset to fit the transform on. + + Returns + ------- + : + the fitted transform instance. + """ + self._first_timestamp = ts.index.min() + self._last_timestamp = ts.index.max() + + self._check_segments(df=ts[..., self.in_column].droplevel("feature", axis=1)) + + return self + + def transform(self, ts: TSDataset) -> TSDataset: + """Transform ``TSDataset`` inplace. + + Parameters + ---------- + ts: + Dataset to transform. + + Returns + ------- + : + Transformed ``TSDataset``. + """ + if self._first_timestamp is None: + raise ValueError("Transform is not fitted!") + + if ts.index.min() < self._first_timestamp: + raise ValueError( + f"First index of the dataset to be transformed must be larger or equal than {self._first_timestamp}!" + ) + + if ts.index.min() > self._last_timestamp: + raise ValueError( + f"Dataset to be transformed must contain historical observations in range {self._first_timestamp} - {self._last_timestamp}" + ) + + segment_df = ts[..., self.in_column].droplevel("feature", axis=1) + + ts_max_timestamp = ts.index.max() + if ts_max_timestamp > self._last_timestamp: + future_steps = determine_num_steps(self._last_timestamp, ts_max_timestamp, freq=ts.freq) + segment_df.iloc[-future_steps:] = np.nan + + self._check_segments(df=segment_df) + + segments = segment_df.columns + segment_components = [] + for segment in segments: + components_df = self._dft_components(series=segment_df[segment]) + components_df.columns = f"{self.in_column}_" + components_df.columns + + components_df.columns = pd.MultiIndex.from_product( + [[segment], components_df.columns], names=["segment", "feature"] + ) + + segment_components.append(components_df) + + segment_components = pd.concat(segment_components, axis=1) + + ts.add_columns_from_pandas(segment_components) + + return ts + + +__all__ = ["FourierDecomposeTransform"] diff --git a/tests/conftest.py b/tests/conftest.py index dd81e2e3c..260afbef2 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -908,3 +908,29 @@ def ts_with_binary_exog() -> TSDataset: df_exog = TSDataset.to_dataset(df_exog) ts = TSDataset(df, freq="D", df_exog=df_exog, known_future="all") return ts + + +@pytest.fixture() +def outliers_solid_tsds(): + """Create TSDataset with outliers and same last date.""" + timestamp = pd.date_range("2021-01-01", end="2021-02-20", freq="D") + target1 = [np.sin(i) for i in range(len(timestamp))] + target1[10] += 10 + + target2 = [np.sin(i) for i in range(len(timestamp))] + target2[8] += 8 + target2[15] = 2 + target2[26] -= 12 + + df1 = pd.DataFrame({"timestamp": timestamp, "target": target1, "segment": "1"}) + df2 = pd.DataFrame({"timestamp": timestamp, "target": target2, "segment": "2"}) + df = pd.concat([df1, df2], ignore_index=True) + df_exog = df.copy() + df_exog.columns = ["timestamp", "regressor_1", "segment"] + ts = TSDataset( + df=TSDataset.to_dataset(df).iloc[:-10], + df_exog=TSDataset.to_dataset(df_exog), + freq="D", + known_future="all", + ) + return ts diff --git a/tests/test_transforms/test_decomposition/test_dft_based.py b/tests/test_transforms/test_decomposition/test_dft_based.py new file mode 100644 index 000000000..e773d02b5 --- /dev/null +++ b/tests/test_transforms/test_decomposition/test_dft_based.py @@ -0,0 +1,343 @@ +import numpy as np +import pandas as pd +import pytest + +from etna.datasets import TSDataset +from etna.datasets import generate_ar_df +from etna.metrics import MAE +from etna.models import CatBoostPerSegmentModel +from etna.models import HoltWintersModel +from etna.models import ProphetModel +from etna.pipeline import Pipeline +from etna.transforms import FourierDecomposeTransform +from etna.transforms import IForestOutlierTransform +from etna.transforms import TimeSeriesImputerTransform + + +def simple_pipeline_with_decompose(in_column, horizon, k): + pipeline = Pipeline( + transforms=[FourierDecomposeTransform(k=k, in_column=in_column)], + model=HoltWintersModel(), + horizon=horizon, + ) + return pipeline + + +@pytest.fixture() +def ts_with_exogs() -> TSDataset: + periods = 100 + periods_exog = periods + 10 + df = generate_ar_df(start_time="2020-01-01", periods=periods, freq="D", n_segments=2) + df_exog = generate_ar_df(start_time="2020-01-01", periods=periods_exog, freq="D", n_segments=2, random_seed=2) + df_exog.rename(columns={"target": "exog"}, inplace=True) + df_exog["holiday"] = np.random.choice([0, 1], size=periods_exog * 2) + + ts = TSDataset(df, freq="D", df_exog=df_exog, known_future="all") + return ts + + +@pytest.fixture() +def ts_with_exogs_train_test(ts_with_exogs): + return ts_with_exogs.train_test_split(test_size=20) + + +@pytest.fixture() +def forward_stride_datasets(ts_with_exogs): + train_df = ts_with_exogs.df.iloc[:-10] + test_df = ts_with_exogs.df.iloc[-20:] + + train_ts = TSDataset(df=train_df, freq=ts_with_exogs.freq) + test_ts = TSDataset(df=test_df, freq=ts_with_exogs.freq) + + return train_ts, test_ts + + +@pytest.fixture() +def ts_with_missing(ts_with_exogs): + target_df = ts_with_exogs[..., "target"] + target_df.iloc[10] = np.nan + + return TSDataset(df=target_df, freq=ts_with_exogs.freq) + + +@pytest.mark.parametrize("in_column", ("target", "feat")) +def test_init(in_column): + transform = FourierDecomposeTransform(k=5, in_column=in_column) + assert transform.required_features == [in_column] + assert transform._first_timestamp is None + assert transform._last_timestamp is None + + +@pytest.mark.parametrize("k", (-1, 0)) +def test_invalid_k(k): + with pytest.raises(ValueError, match="Parameter `k` must be positive integer!"): + FourierDecomposeTransform(k=k, in_column="target") + + +@pytest.mark.parametrize( + "series, answ", + ( + (pd.Series([1]), 1), + (pd.Series([1, 2]), 2), + (pd.Series([1, 2, 3]), 2), + (pd.Series([1, 2, 3, 4]), 3), + (pd.Series([1, 2, 3, 4, 5]), 3), + (pd.Series([1, 2, 3, 4, 5, 6]), 4), + ), +) +def test_get_num_pos_freqs(series, answ): + res = FourierDecomposeTransform._get_num_pos_freqs(series=series) + assert res == answ + + +def test_check_segments_missing_values(ts_with_missing): + df = ts_with_missing[..., "target"] + transform = FourierDecomposeTransform(k=5) + with pytest.raises(ValueError, match=f"Feature `target` contains missing values"): + transform._check_segments(df=df) + + +@pytest.mark.parametrize("k", (52, 100)) +def test_check_segments_large_k(ts_with_exogs, k): + df = ts_with_exogs[..., "target"] + transform = FourierDecomposeTransform(k=k) + with pytest.raises(ValueError, match=f"Parameter `k` must not be greater then"): + transform._check_segments(df=df) + + +def test_check_segments_ok(ts_with_exogs): + df = ts_with_exogs[..., "target"] + transform = FourierDecomposeTransform(k=5) + transform._check_segments(df=df) + + +@pytest.mark.parametrize( + "series", + ( + pd.Series(np.arange(5)), + pd.Series(np.arange(10)), + pd.Series([np.nan] * 2 + list(range(5)) + [np.nan] * 3), + ), +) +def test_fft_components_out_format(series): + expected_columns = ["dft_0", "dft_1", "dft_2", "dft_residuals"] + transform = FourierDecomposeTransform(k=3, residuals=True) + + decompose_df = transform._dft_components(series=series) + + assert isinstance(decompose_df, pd.DataFrame) + pd.testing.assert_index_equal(decompose_df.index, series.index) + assert (decompose_df.columns == expected_columns).all() + np.testing.assert_allclose(np.sum(decompose_df.values, axis=1), series.values) + + +def test_is_not_fitted(simple_tsdf): + transform = FourierDecomposeTransform(k=5, in_column="feat") + with pytest.raises(ValueError, match="Transform is not fitted!"): + transform.transform(ts=simple_tsdf) + + +@pytest.mark.parametrize( + "ts_name,in_column", + ( + ("outliers_df_with_two_columns", "target"), + ("outliers_df_with_two_columns", "feature"), + ("ts_with_exogs", "target"), + ("ts_with_exogs", "exog"), + ("ts_with_exogs", "holiday"), + ("example_tsds_int_timestamp", "target"), + ), +) +def test_fit(ts_name, in_column, request): + ts = request.getfixturevalue(ts_name) + transform = FourierDecomposeTransform(k=5, in_column=in_column) + transform.fit(ts=ts) + + assert transform._first_timestamp == ts.index.min() + assert transform._last_timestamp == ts.index.max() + + +@pytest.mark.parametrize("residuals", (True, False)) +@pytest.mark.parametrize("in_column", ("target", "exog")) +def test_add_residuals(ts_with_exogs, residuals, in_column): + ts = ts_with_exogs + + transform = FourierDecomposeTransform(k=5, in_column=in_column, residuals=residuals) + transformed = transform.fit_transform(ts=ts) + + assert (f"{in_column}_dft_residuals" in transformed.features) is residuals + + +def test_timestamp_from_history(ts_with_exogs_train_test): + test, train = ts_with_exogs_train_test + transform = FourierDecomposeTransform(k=5) + transform.fit_transform(train) + + with pytest.raises(ValueError, match="First index of the dataset to be transformed must be larger"): + transform.transform(test) + + +def test_timestamp_from_future(ts_with_exogs_train_test): + train, test = ts_with_exogs_train_test + transform = FourierDecomposeTransform(k=5) + transform.fit_transform(train) + + with pytest.raises(ValueError, match="Dataset to be transformed must contain historical observations in range"): + transform.transform(test) + + +@pytest.mark.parametrize( + "in_column", + ( + "target", + "holiday", + "exog", + ), +) +@pytest.mark.parametrize("horizon", (1, 5)) +def test_simple_pipeline_forecast(ts_with_exogs, in_column, horizon): + ts = ts_with_exogs + + pipeline = simple_pipeline_with_decompose(in_column=in_column, horizon=horizon, k=5) + + pipeline.fit(ts=ts) + forecast = pipeline.forecast() + + assert forecast.size()[0] == horizon + assert np.sum(forecast[..., "target"].isna().sum()) == 0 + + +@pytest.mark.parametrize( + "in_column", + ( + "target", + "holiday", + "exog", + ), +) +@pytest.mark.parametrize("horizon", (1, 5)) +def test_simple_pipeline_predict(ts_with_exogs, in_column, horizon): + ts = ts_with_exogs + + pipeline = simple_pipeline_with_decompose(in_column=in_column, horizon=horizon, k=5) + + pipeline.fit(ts=ts) + forecast = pipeline.predict(ts) + + assert forecast.size()[0] == ts.size()[0] + assert np.sum(forecast[..., "target"].isna().sum()) == 0 + + +@pytest.mark.parametrize( + "in_column", + ( + "target", + "holiday", + "exog", + ), +) +@pytest.mark.parametrize("horizon", (1, 5)) +def test_simple_pipeline_predict_components(ts_with_exogs, in_column, horizon): + ts = ts_with_exogs + + pipeline = simple_pipeline_with_decompose(in_column=in_column, horizon=horizon, k=5) + + pipeline.fit(ts=ts) + forecast = pipeline.predict(ts, return_components=True) + + assert forecast.size()[0] == ts.size()[0] + assert forecast.target_components_names == ("target_component_level",) + + +@pytest.mark.parametrize( + "in_column", + ( + "target", + "holiday", + "exog", + ), +) +@pytest.mark.parametrize("horizon", (1, 5)) +def test_simple_pipeline_backtest(ts_with_exogs, in_column, horizon): + ts = ts_with_exogs + + pipeline = simple_pipeline_with_decompose(in_column=in_column, horizon=horizon, k=5) + + _, forecast, _ = pipeline.backtest(ts=ts, metrics=[MAE()], n_folds=3) + + assert len(forecast) == horizon * 3 + assert np.sum(forecast.loc[:, pd.IndexSlice[:, "target"]].isna().sum()) == 0 + + +@pytest.mark.parametrize( + "ts_name,in_column", + ( + ("outliers_df_with_two_columns", "target"), + ("outliers_df_with_two_columns", "feature"), + ("ts_with_exogs", "target"), + ("ts_with_exogs", "exog"), + ), +) +@pytest.mark.parametrize("k", (1, 5, 10, 40, 51)) +@pytest.mark.parametrize("forecast_model", (ProphetModel(), CatBoostPerSegmentModel(iterations=10))) +def test_pipeline_parameter_k(ts_name, in_column, forecast_model, k, request): + ts = request.getfixturevalue(ts_name) + + pipeline = Pipeline( + transforms=[FourierDecomposeTransform(k=5, in_column=in_column)], + model=forecast_model, + horizon=3, + ) + + pipeline.fit(ts) + forecast = pipeline.forecast() + + assert forecast.size()[0] == 3 + assert np.sum(forecast.loc[:, pd.IndexSlice[:, "target"]].isna().sum()) == 0 + + +@pytest.mark.parametrize("answer", ({"1": ["2021-01-11"], "2": ["2021-01-09"]},)) +def test_outlier_detection(outliers_solid_tsds, answer): + ts = outliers_solid_tsds + + transforms = [ + FourierDecomposeTransform(k=2, in_column="target", residuals=True), + IForestOutlierTransform( + in_column="target", + features_to_ignore=["target", "regressor_1"], + contamination=0.01, + ), + ] + ts.fit_transform(transforms) + + for segment in ts.segments: + empty_values = pd.isna(ts[:, segment, "target"]) + assert empty_values.sum() == len(answer[segment]) + assert all(empty_values[answer[segment]]) + + +def test_outlier_detection_pipeline(outliers_solid_tsds): + ts = outliers_solid_tsds + pipeline = Pipeline( + transforms=[ + FourierDecomposeTransform(k=5, in_column="target"), + IForestOutlierTransform(in_column="target"), + TimeSeriesImputerTransform(in_column="target"), + ], + model=ProphetModel(), + horizon=3, + ) + pipeline.fit(ts) + + +@pytest.mark.parametrize("k", (1, 5)) +def test_stride_transform(forward_stride_datasets, k): + train, test = forward_stride_datasets + + transform = FourierDecomposeTransform(k=k, residuals=True) + + transform.fit(train) + transformed = transform.transform(test) + + assert not transformed.df.iloc[:10].isna().any().any() + assert transformed.df.iloc[10:].isna().all().any() diff --git a/tests/test_transforms/test_inference/test_inverse_transform.py b/tests/test_transforms/test_inference/test_inverse_transform.py index 34f43fcc2..63f8aa2bc 100644 --- a/tests/test_transforms/test_inference/test_inverse_transform.py +++ b/tests/test_transforms/test_inference/test_inverse_transform.py @@ -23,6 +23,7 @@ from etna.transforms import EventTransform from etna.transforms import ExogShiftTransform from etna.transforms import FilterFeaturesTransform +from etna.transforms import FourierDecomposeTransform from etna.transforms import FourierTransform from etna.transforms import GaleShapleyFeatureSelectionTransform from etna.transforms import HolidayTransform @@ -142,6 +143,7 @@ def _test_inverse_transform_train(self, ts, transform, expected_changes): "regular_ts", {}, ), + (FourierDecomposeTransform(in_column="target", k=5, residuals=True), "regular_ts", {}), # embeddings ( EmbeddingSegmentTransform( @@ -604,6 +606,7 @@ def test_inverse_transform_train_fail_resample(self, transform, dataset_name, ex "regular_ts", {}, ), + (FourierDecomposeTransform(in_column="target", k=5, residuals=True), "regular_ts", {}), # embeddings ( EmbeddingSegmentTransform( @@ -1096,6 +1099,7 @@ def _test_inverse_transform_train_subset_segments(self, ts, transform, segments) ), "regular_ts", ), + (FourierDecomposeTransform(in_column="target", k=5, residuals=True), "regular_ts"), # embeddings ( EmbeddingSegmentTransform( @@ -1382,6 +1386,8 @@ def _test_inverse_transform_future_subset_segments(self, ts, transform, segments ), "regular_ts", ), + (FourierDecomposeTransform(in_column="target", k=5, residuals=True), "regular_ts"), + (FourierDecomposeTransform(in_column="positive", k=5, residuals=True), "ts_with_exog"), # embeddings ( EmbeddingSegmentTransform( @@ -2996,6 +3002,19 @@ def test_inverse_transform_future_with_target_fail_difference( with pytest.raises(ValueError, match="Test should go after the train without gaps"): self._test_inverse_transform_future_with_target(ts, transform, expected_changes=expected_changes) + @pytest.mark.parametrize( + "transform, dataset_name, expected_changes", + [ + (FourierDecomposeTransform(in_column="target", k=5, residuals=True), "regular_ts", {}), + ], + ) + def test_inverse_transform_future_with_target_fail_require_history( + self, transform, dataset_name, expected_changes, request + ): + ts = request.getfixturevalue(dataset_name) + with pytest.raises(ValueError, match="Dataset to be transformed must contain historical observations"): + self._test_inverse_transform_future_with_target(ts, transform, expected_changes=expected_changes) + # It is the only transform that doesn't change values back during `inverse_transform` @to_be_fixed(raises=AssertionError) @pytest.mark.parametrize( @@ -3106,6 +3125,8 @@ def _test_inverse_transform_future_without_target( "regular_ts", {}, ), + (FourierDecomposeTransform(in_column="target", k=5, residuals=True), "regular_ts", {}), + (FourierDecomposeTransform(in_column="positive", k=5, residuals=True), "ts_with_exog", {}), # embeddings ( EmbeddingSegmentTransform( diff --git a/tests/test_transforms/test_inference/test_transform.py b/tests/test_transforms/test_inference/test_transform.py index 3f1073a73..9d6cc7866 100644 --- a/tests/test_transforms/test_inference/test_transform.py +++ b/tests/test_transforms/test_inference/test_transform.py @@ -23,6 +23,7 @@ from etna.transforms import EventTransform from etna.transforms import ExogShiftTransform from etna.transforms import FilterFeaturesTransform +from etna.transforms import FourierDecomposeTransform from etna.transforms import FourierTransform from etna.transforms import GaleShapleyFeatureSelectionTransform from etna.transforms import HolidayTransform @@ -128,6 +129,11 @@ def _test_transform_train(self, ts, transform, expected_changes): "regular_ts", {"create": {"res"}}, ), + ( + FourierDecomposeTransform(in_column="target", k=2, residuals=True), + "regular_ts", + {"create": {"target_dft_0", "target_dft_1", "target_dft_residuals"}}, + ), # embeddings ( EmbeddingSegmentTransform( @@ -550,6 +556,11 @@ def test_transform_train_datetime_timestamp(self, transform, dataset_name, expec "regular_ts", {"create": {"res"}}, ), + ( + FourierDecomposeTransform(in_column="target", k=2, residuals=True), + "regular_ts", + {"create": {"target_dft_0", "target_dft_1", "target_dft_residuals"}}, + ), # embeddings ( EmbeddingSegmentTransform( @@ -1060,6 +1071,7 @@ def _test_transform_train_subset_segments(self, ts, transform, segments): ), "regular_ts", ), + (FourierDecomposeTransform(in_column="target", k=2, residuals=True), "regular_ts"), # embeddings ( EmbeddingSegmentTransform( @@ -1328,6 +1340,8 @@ def _test_transform_future_subset_segments(self, ts, transform, segments, horizo ), "regular_ts", ), + (FourierDecomposeTransform(in_column="target", k=2, residuals=True), "regular_ts"), + (FourierDecomposeTransform(in_column="positive", k=2, residuals=True), "ts_with_exog"), # embeddings ( EmbeddingSegmentTransform( @@ -2816,6 +2830,23 @@ def test_transform_future_with_target(self, transform, dataset_name, expected_ch ts = request.getfixturevalue(dataset_name) self._test_transform_future_with_target(ts, transform, expected_changes=expected_changes) + @pytest.mark.parametrize( + "transform, dataset_name, expected_changes", + ( + ( + FourierDecomposeTransform(in_column="target", k=2, residuals=True), + "regular_ts", + {"create": {"target_dft_0", "target_dft_1", "target_dft_residuals"}}, + ), + ), + ) + def test_transform_future_with_target_fail_require_history( + self, transform, dataset_name, expected_changes, request + ): + ts = request.getfixturevalue(dataset_name) + with pytest.raises(ValueError, match="Dataset to be transformed must contain historical observations"): + self._test_transform_future_with_target(ts, transform, expected_changes=expected_changes) + class TestTransformFutureWithoutTarget: """Test transform on future dataset with unknown target. @@ -2889,6 +2920,16 @@ def _test_transform_future_without_target(self, ts, transform, expected_changes, "regular_ts", {"create": {"res"}}, ), + ( + FourierDecomposeTransform(in_column="target", k=2, residuals=True), + "regular_ts", + {"create": {"target_dft_0", "target_dft_1", "target_dft_residuals"}}, + ), + ( + FourierDecomposeTransform(in_column="positive", k=2, residuals=True), + "ts_with_exog", + {"create": {"positive_dft_0", "positive_dft_1", "positive_dft_residuals"}}, + ), # embeddings ( EmbeddingSegmentTransform( diff --git a/tests/test_transforms/test_outliers/test_outliers_transform.py b/tests/test_transforms/test_outliers/test_outliers_transform.py index 87ff27ecb..daad6fc8d 100644 --- a/tests/test_transforms/test_outliers/test_outliers_transform.py +++ b/tests/test_transforms/test_outliers/test_outliers_transform.py @@ -78,32 +78,6 @@ def compare_outputs(ts, in_column, method, transform_constructor, method_kwargs, assert np.all(transformed_column[transformed_column.isna()].index == nan_timestamps) -@pytest.fixture() -def outliers_solid_tsds(): - """Create TSDataset with outliers and same last date.""" - timestamp = pd.date_range("2021-01-01", end="2021-02-20", freq="D") - target1 = [np.sin(i) for i in range(len(timestamp))] - target1[10] += 10 - - target2 = [np.sin(i) for i in range(len(timestamp))] - target2[8] += 8 - target2[15] = 2 - target2[26] -= 12 - - df1 = pd.DataFrame({"timestamp": timestamp, "target": target1, "segment": "1"}) - df2 = pd.DataFrame({"timestamp": timestamp, "target": target2, "segment": "2"}) - df = pd.concat([df1, df2], ignore_index=True) - df_exog = df.copy() - df_exog.columns = ["timestamp", "regressor_1", "segment"] - ts = TSDataset( - df=TSDataset.to_dataset(df).iloc[:-10], - df_exog=TSDataset.to_dataset(df_exog), - freq="D", - known_future="all", - ) - return ts - - @pytest.fixture() def outliers_solid_tsds_with_holidays(outliers_solid_tsds): """Create TSDataset with outliers with holidays"""