diff --git a/CHANGELOG.md b/CHANGELOG.md index 8af6dcf67..dadb8fa0b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,6 +16,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Add `TSDataset.features` property to get list of all features in a dataset ([#405](https://github.com/etna-team/etna/pull/405)) - Add `MADOutlierTransform` class for anomaly detection ([#415](https://github.com/etna-team/etna/pull/415)) - Add `MeanEncoderTransform` ([#413](https://github.com/etna-team/etna/pull/413)) +- Add `FourierDecomposeTransform` transform for series decomposition using DFT ([#430](https://github.com/etna-team/etna/pull/430)) ### Changed - Allow to change `device`, `batch_size` and `num_workers` of embedding models ([#396](https://github.com/etna-team/etna/pull/396)) @@ -35,6 +36,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Fix typo in 103 tutorial ([#408](https://github.com/etna-team/etna/pull/408)) - Remove sorting of `ts.df` by timestamps in `plot_forecast` and `plot_forecast_decomposition` ([#410](https://github.com/etna-team/etna/pull/410)) - Fix forecast visualization with `horizon=1` ([#426](https://github.com/etna-team/etna/pull/426)) +- Set upper bound `<2` on numpy version ([#431](https://github.com/etna-team/etna/pull/431)) +- - - Fix `VotingEnsemble`, `StackingEnsemble`, `DirectEnsemble` have a valid `params_to_tune` that returns empty dict ([#432](https://github.com/etna-team/etna/pull/432)) - Fix passing custom model to `STLTransform` ([#412](https://github.com/etna-team/etna/pull/412)) diff --git a/docs/source/api_reference/transforms.rst b/docs/source/api_reference/transforms.rst index 42fca2c0f..55a146690 100644 --- a/docs/source/api_reference/transforms.rst +++ b/docs/source/api_reference/transforms.rst @@ -44,6 +44,7 @@ Decomposition transforms and their utilities: decomposition.MedianPerIntervalModel decomposition.SklearnPreprocessingPerIntervalModel decomposition.SklearnRegressionPerIntervalModel + decomposition.FourierDecomposeTransform Categorical encoding transforms: diff --git a/etna/experimental/change_points/regularization_search.py b/etna/experimental/change_points/regularization_search.py index 62dbd5c3f..c3faa5093 100644 --- a/etna/experimental/change_points/regularization_search.py +++ b/etna/experimental/change_points/regularization_search.py @@ -71,9 +71,9 @@ def _get_next_value( next value and its bounds """ if need_greater: - return np.mean([now_value, lower_bound]), lower_bound, now_value + return float(np.mean([now_value, lower_bound])), lower_bound, now_value else: - return np.mean([now_value, upper_bound]), now_value, upper_bound + return float(np.mean([now_value, upper_bound])), now_value, upper_bound def bin_search( @@ -121,7 +121,7 @@ def bin_search( raise ValueError("Impossible number of changepoints. Please, increase max_value or increase n_bkps value.") lower_bound, upper_bound = 0.0, max_value - now_value = np.mean([lower_bound, upper_bound]) + now_value = float(np.mean([lower_bound, upper_bound])) now_n_bkps = _get_n_bkps(series, change_point_model, **{opt_param: now_value}) iters = 0 diff --git a/etna/transforms/__init__.py b/etna/transforms/__init__.py index fb4b93896..44c536bac 100644 --- a/etna/transforms/__init__.py +++ b/etna/transforms/__init__.py @@ -12,6 +12,7 @@ from etna.transforms.decomposition import ChangePointsSegmentationTransform from etna.transforms.decomposition import ChangePointsTrendTransform from etna.transforms.decomposition import DeseasonalityTransform +from etna.transforms.decomposition import FourierDecomposeTransform from etna.transforms.decomposition import IrreversibleChangePointsTransform from etna.transforms.decomposition import LinearTrendTransform from etna.transforms.decomposition import ReversibleChangePointsTransform diff --git a/etna/transforms/decomposition/__init__.py b/etna/transforms/decomposition/__init__.py index a5516ec62..ae2558af6 100644 --- a/etna/transforms/decomposition/__init__.py +++ b/etna/transforms/decomposition/__init__.py @@ -16,4 +16,5 @@ from etna.transforms.decomposition.deseasonal import DeseasonalityTransform from etna.transforms.decomposition.detrend import LinearTrendTransform from etna.transforms.decomposition.detrend import TheilSenTrendTransform +from etna.transforms.decomposition.dft_based import FourierDecomposeTransform from etna.transforms.decomposition.stl import STLTransform diff --git a/etna/transforms/decomposition/dft_based.py b/etna/transforms/decomposition/dft_based.py new file mode 100644 index 000000000..def3eeed8 --- /dev/null +++ b/etna/transforms/decomposition/dft_based.py @@ -0,0 +1,200 @@ +from typing import List + +import numpy as np +import pandas as pd + +from etna.datasets import TSDataset +from etna.datasets.utils import determine_num_steps +from etna.transforms import IrreversibleTransform + + +class FourierDecomposeTransform(IrreversibleTransform): + """Transform that uses Fourier transformation to estimate series decomposition. + + Note + ---- + This transform decomposes only in-sample data. For the future timestamps it produces ``NaN``. + For the dataset to be transformed, it should contain at least the minimum amount of in-sample timestamps that are required by transform. + + Warning + ------- + This transform adds new columns to the dataset, that correspond to the selected frequencies. Such columns are named with + ``dft_{i}`` suffix. Suffix index do NOT indicate any relation to the frequencies. Produced names should be thought of as + arbitrary identifiers to the produced sinusoids. + """ + + def __init__(self, k: int, in_column: str = "target", residuals: bool = False): + """Init ``FourierDecomposeTransform``. + + Parameters + ---------- + k: + how many top positive frequencies selected for the decomposition. Selection performed proportional to the amplitudes. + in_column: + name of the processed column. + residuals: + whether to add residuals after decomposition. This guarantees that all components, including residuals, sum up to the series. + """ + if k <= 0: + raise ValueError("Parameter `k` must be positive integer!") + + self.k = k + self.in_column = in_column + self.residuals = residuals + + self._first_timestamp = None + self._last_timestamp = None + + super().__init__(required_features=[in_column]) + + def get_regressors_info(self) -> List[str]: + """Return the list with regressors created by the transform.""" + return [] + + def _fit(self, df: pd.DataFrame): + """Fit transform with the dataframe.""" + pass + + def _transform(self, df: pd.DataFrame) -> pd.DataFrame: + """Transform provided dataframe.""" + pass + + @staticmethod + def _get_num_pos_freqs(series: pd.Series) -> int: + """Get number of positive frequencies for the series.""" + num_obs = len(series) + return int(np.ceil((num_obs - 1) / 2) + 1) + + def _check_segments(self, df: pd.DataFrame): + """Check if series satisfy conditions.""" + segments_with_missing = [] + min_num_pos_freq = float("inf") + for segment in df: + series = df[segment] + series = series.loc[series.first_valid_index() : series.last_valid_index()] + if series.isna().any(): + segments_with_missing.append(segment) + + min_num_pos_freq = min(min_num_pos_freq, self._get_num_pos_freqs(series)) + + if len(segments_with_missing) > 0: + raise ValueError( + f"Feature `{self.in_column}` contains missing values in segments: {segments_with_missing}!" + ) + + if self.k > min_num_pos_freq: + raise ValueError(f"Parameter `k` must not be greater then {min_num_pos_freq} for the provided dataset!") + + def _dft_components(self, series: pd.Series) -> pd.DataFrame: + """Estimate series decomposition using FFT.""" + initial_index = series.index + series = series.loc[series.first_valid_index() : series.last_valid_index()] + + num_pos_freqs = self._get_num_pos_freqs(series) + + # compute Fourier decomposition of the series + dft_series = np.fft.fft(series) + + # compute "amplitudes" for each frequency + abs_dft_series = np.abs(dft_series) + + # select top-k indices + abs_pos_dft_series = abs_dft_series[:num_pos_freqs] + top_k_idxs = np.argpartition(abs_pos_dft_series, num_pos_freqs - self.k)[-self.k :] + + # select top-k and separate each frequency + freq_matrix = np.diag(dft_series) + freq_matrix = freq_matrix[:num_pos_freqs] + selected_freqs = freq_matrix[top_k_idxs] + + # return frequencies to initial domain + components = np.fft.ifft(selected_freqs).real + + components_df = pd.DataFrame( + data=components.T, columns=[f"dft_{i}" for i in range(components.shape[0])], index=series.index + ) + + if self.residuals: + components_df["dft_residuals"] = series.values - np.sum(components, axis=0) + + # return trailing and leading nans to the series if any existed initially + if not components_df.index.equals(initial_index): + components_df = components_df.reindex(index=initial_index, fill_value=np.nan) + + return components_df + + def fit(self, ts: TSDataset) -> "FourierDecomposeTransform": + """Fit the transform and the decomposition model. + + Parameters + ---------- + ts: + dataset to fit the transform on. + + Returns + ------- + : + the fitted transform instance. + """ + self._first_timestamp = ts.index.min() + self._last_timestamp = ts.index.max() + + self._check_segments(df=ts[..., self.in_column].droplevel("feature", axis=1)) + + return self + + def transform(self, ts: TSDataset) -> TSDataset: + """Transform ``TSDataset`` inplace. + + Parameters + ---------- + ts: + Dataset to transform. + + Returns + ------- + : + Transformed ``TSDataset``. + """ + if self._first_timestamp is None: + raise ValueError("Transform is not fitted!") + + if ts.index.min() < self._first_timestamp: + raise ValueError( + f"First index of the dataset to be transformed must be larger or equal than {self._first_timestamp}!" + ) + + if ts.index.min() > self._last_timestamp: + raise ValueError( + f"Dataset to be transformed must contain historical observations in range {self._first_timestamp} - {self._last_timestamp}" + ) + + segment_df = ts[..., self.in_column].droplevel("feature", axis=1) + + ts_max_timestamp = ts.index.max() + if ts_max_timestamp > self._last_timestamp: + future_steps = determine_num_steps(self._last_timestamp, ts_max_timestamp, freq=ts.freq) + segment_df.iloc[-future_steps:] = np.nan + + self._check_segments(df=segment_df) + + segments = segment_df.columns + segment_components = [] + for segment in segments: + components_df = self._dft_components(series=segment_df[segment]) + components_df.columns = f"{self.in_column}_" + components_df.columns + + components_df.columns = pd.MultiIndex.from_product( + [[segment], components_df.columns], names=["segment", "feature"] + ) + + segment_components.append(components_df) + + segment_components = pd.concat(segment_components, axis=1) + + ts.add_columns_from_pandas(segment_components) + + return ts + + +__all__ = ["FourierDecomposeTransform"] diff --git a/poetry.lock b/poetry.lock index 8b4dfd39a..bd2ee60ef 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1741,7 +1741,7 @@ files = [ name = "importlib-metadata" version = "6.6.0" description = "Read metadata from Python packages" -optional = true +optional = false python-versions = ">=3.7" files = [ {file = "importlib_metadata-6.6.0-py3-none-any.whl", hash = "sha256:43dd286a2cd8995d5eaef7fee2066340423b818ed3fd70adf0bad5f1fac53fed"}, @@ -2306,39 +2306,35 @@ typing = ["mypy (>=1.0.0)"] [[package]] name = "llvmlite" -version = "0.38.1" +version = "0.41.1" description = "lightweight wrapper around basic LLVM functionality" optional = false -python-versions = ">=3.7,<3.11" -files = [ - {file = "llvmlite-0.38.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:a7dd2bd1d6406e7789273e3f8a304ed5d9adcfaa5768052fca7dc233a857be98"}, - {file = "llvmlite-0.38.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:7a5e0ed215a576f0f872f47a70b8cb49864e0aefc8586aff5ce83e3bff47bc23"}, - {file = "llvmlite-0.38.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:633c9026eb43b9903cc4ffbc1c7d5293b2e3ad95d06fa9eab0f6ce6ff6ea15b3"}, - {file = "llvmlite-0.38.1-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b98da8436dbc29013ea301f1fdb0d596ab53bf0ab65c976d96d00bb6faa0b479"}, - {file = "llvmlite-0.38.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0c0adce1793d66d009c554809f27baeb6258bf13f6fbaa12eff7443500caec25"}, - {file = "llvmlite-0.38.1-cp310-cp310-win32.whl", hash = "sha256:8c64c90a8b0b7b7e1ed1912ba82c1a3f43cf25affbe06aa3c56c84050edee8ac"}, - {file = "llvmlite-0.38.1-cp310-cp310-win_amd64.whl", hash = "sha256:ab070266f0f51304789a6c20d4be91a9e69683ad9bd4861eb89980e8eb613b3a"}, - {file = "llvmlite-0.38.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:ed7528b8b85de930b76407e44b080e4f376b7a007c2879749599ff8e2fe32753"}, - {file = "llvmlite-0.38.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7db018da2863034ad9c73c946625637f3a89635bc70576068bab4bd085eea90d"}, - {file = "llvmlite-0.38.1-cp37-cp37m-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4c1e5805c92e049b4956ed01204c6647de6160ab9aefb0d67ea83ca02a1d889a"}, - {file = "llvmlite-0.38.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5559e46c79b4017c3c25edc3b9512d11adc3689b9046120c685b0905c08d48a5"}, - {file = "llvmlite-0.38.1-cp37-cp37m-win32.whl", hash = "sha256:ef9aa574eff2e15f8c47b255da0db5dab326dc7f76384c307ae35490e2d2489a"}, - {file = "llvmlite-0.38.1-cp37-cp37m-win_amd64.whl", hash = "sha256:84d5a0163c172db2b2ae561d2fc0866fbd9f716cf13f92c0d41ca4338e682672"}, - {file = "llvmlite-0.38.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:a263252a68d85450110ec1f2b406c0414e49b04a4d216d31c0515ea1d59c3882"}, - {file = "llvmlite-0.38.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:de8bd61480173930f2a029673e7cd0738fbbb5171dfe490340839ad7301d4cf0"}, - {file = "llvmlite-0.38.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fbfbe546394c39db39a6898a51972aa131c8d6b0628517728b350552f58bdc19"}, - {file = "llvmlite-0.38.1-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8c4f26c6c370e134a909ac555a671fa1376e74c69af0208f25c0979472577a9d"}, - {file = "llvmlite-0.38.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f95f455697c44d7c04ef95fdfce04629f48df08a832d0a0d9eb2363186dbb969"}, - {file = "llvmlite-0.38.1-cp38-cp38-win32.whl", hash = "sha256:41e638a71c85a9a4a33f279c4cd812bc2f84122505b1f6ab8984ec7debb8548b"}, - {file = "llvmlite-0.38.1-cp38-cp38-win_amd64.whl", hash = "sha256:5c07d63df4578f31b39b764d3b4291f70157af7f42e171a8884ae7aaf989d1f7"}, - {file = "llvmlite-0.38.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:4e11bd9929dcbd55d5eb5cd7b08bf71b0097ea48cc192b69d102a90dd6e9816f"}, - {file = "llvmlite-0.38.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:edfa2c761cfa56cf76e783290d82e117f829bb691d8d90aa375505204888abac"}, - {file = "llvmlite-0.38.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0e609f7312a439b53b6f622d99180c3ff6a3e1e4ceca4d18aca1c5b46f4e3664"}, - {file = "llvmlite-0.38.1-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9f53c3448410cc84d0e1af84dbc0d60ad32779853d40bcc8b1ee3c67ebbe94b1"}, - {file = "llvmlite-0.38.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9c8fac4edbadefa4dddf5dc6cca76bc2ae81df211dcd16a6638d60cc41249e56"}, - {file = "llvmlite-0.38.1-cp39-cp39-win32.whl", hash = "sha256:3d76c0fa42390bef56979ed213fbf0150c3fef36f5ea68d3d780d5d725da8c01"}, - {file = "llvmlite-0.38.1-cp39-cp39-win_amd64.whl", hash = "sha256:66462d768c30d5f648ca3361d657b434efa8b09f6cf04d6b6eae66e62e993644"}, - {file = "llvmlite-0.38.1.tar.gz", hash = "sha256:0622a86301fcf81cc50d7ed5b4bebe992c030580d413a8443b328ed4f4d82561"}, +python-versions = ">=3.8" +files = [ + {file = "llvmlite-0.41.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:c1e1029d47ee66d3a0c4d6088641882f75b93db82bd0e6178f7bd744ebce42b9"}, + {file = "llvmlite-0.41.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:150d0bc275a8ac664a705135e639178883293cf08c1a38de3bbaa2f693a0a867"}, + {file = "llvmlite-0.41.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1eee5cf17ec2b4198b509272cf300ee6577229d237c98cc6e63861b08463ddc6"}, + {file = "llvmlite-0.41.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0dd0338da625346538f1173a17cabf21d1e315cf387ca21b294ff209d176e244"}, + {file = "llvmlite-0.41.1-cp310-cp310-win32.whl", hash = "sha256:fa1469901a2e100c17eb8fe2678e34bd4255a3576d1a543421356e9c14d6e2ae"}, + {file = "llvmlite-0.41.1-cp310-cp310-win_amd64.whl", hash = "sha256:2b76acee82ea0e9304be6be9d4b3840208d050ea0dcad75b1635fa06e949a0ae"}, + {file = "llvmlite-0.41.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:210e458723436b2469d61b54b453474e09e12a94453c97ea3fbb0742ba5a83d8"}, + {file = "llvmlite-0.41.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:855f280e781d49e0640aef4c4af586831ade8f1a6c4df483fb901cbe1a48d127"}, + {file = "llvmlite-0.41.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b67340c62c93a11fae482910dc29163a50dff3dfa88bc874872d28ee604a83be"}, + {file = "llvmlite-0.41.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2181bb63ef3c607e6403813421b46982c3ac6bfc1f11fa16a13eaafb46f578e6"}, + {file = "llvmlite-0.41.1-cp311-cp311-win_amd64.whl", hash = "sha256:9564c19b31a0434f01d2025b06b44c7ed422f51e719ab5d24ff03b7560066c9a"}, + {file = "llvmlite-0.41.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:5940bc901fb0325970415dbede82c0b7f3e35c2d5fd1d5e0047134c2c46b3281"}, + {file = "llvmlite-0.41.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:8b0a9a47c28f67a269bb62f6256e63cef28d3c5f13cbae4fab587c3ad506778b"}, + {file = "llvmlite-0.41.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f8afdfa6da33f0b4226af8e64cfc2b28986e005528fbf944d0a24a72acfc9432"}, + {file = "llvmlite-0.41.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8454c1133ef701e8c050a59edd85d238ee18bb9a0eb95faf2fca8b909ee3c89a"}, + {file = "llvmlite-0.41.1-cp38-cp38-win32.whl", hash = "sha256:2d92c51e6e9394d503033ffe3292f5bef1566ab73029ec853861f60ad5c925d0"}, + {file = "llvmlite-0.41.1-cp38-cp38-win_amd64.whl", hash = "sha256:df75594e5a4702b032684d5481db3af990b69c249ccb1d32687b8501f0689432"}, + {file = "llvmlite-0.41.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:04725975e5b2af416d685ea0769f4ecc33f97be541e301054c9f741003085802"}, + {file = "llvmlite-0.41.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:bf14aa0eb22b58c231243dccf7e7f42f7beec48970f2549b3a6acc737d1a4ba4"}, + {file = "llvmlite-0.41.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:92c32356f669e036eb01016e883b22add883c60739bc1ebee3a1cc0249a50828"}, + {file = "llvmlite-0.41.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:24091a6b31242bcdd56ae2dbea40007f462260bc9bdf947953acc39dffd54f8f"}, + {file = "llvmlite-0.41.1-cp39-cp39-win32.whl", hash = "sha256:880cb57ca49e862e1cd077104375b9d1dfdc0622596dfa22105f470d7bacb309"}, + {file = "llvmlite-0.41.1-cp39-cp39-win_amd64.whl", hash = "sha256:92f093986ab92e71c9ffe334c002f96defc7986efda18397d0f08534f3ebdc4d"}, + {file = "llvmlite-0.41.1.tar.gz", hash = "sha256:f19f767a018e6ec89608e1f6b13348fa2fcde657151137cb64e56d48598a92db"}, ] [[package]] @@ -3050,75 +3046,74 @@ test = ["pytest", "pytest-console-scripts", "pytest-jupyter", "pytest-tornasync" [[package]] name = "numba" -version = "0.55.2" +version = "0.58.1" description = "compiling Python code using LLVM" optional = false -python-versions = ">=3.7,<3.11" -files = [ - {file = "numba-0.55.2-cp310-cp310-macosx_10_14_x86_64.whl", hash = "sha256:dd05f7c0ce64b6977596aa4e5a44747c6ef414d7989da1c7672337c54381a5ef"}, - {file = "numba-0.55.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:e36232eccd172c583b1f021c5c48744c087ae6fc9dc5c5f0dd2cb2286e517bf8"}, - {file = "numba-0.55.2-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:25410557d0deb1d97397b71e142a36772133986a7dd4fe2935786e2dd149245f"}, - {file = "numba-0.55.2-cp310-cp310-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:676c081162cc9403706071c1d1d42e479c0741551ab28096ba13859a2e3e9b80"}, - {file = "numba-0.55.2-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:2665ef28e900b3a55bf370daa81c12ebc64cd434116accd60c38a95a159a3182"}, - {file = "numba-0.55.2-cp310-cp310-win32.whl", hash = "sha256:d7ac9ea5feef9536ab8bfbbb3ded1a0617ea8794d7547800d535b7857800f996"}, - {file = "numba-0.55.2-cp310-cp310-win_amd64.whl", hash = "sha256:29b89a68af162acf87adeb8fbf01f6bb1effae4711b28146f95108d82e905624"}, - {file = "numba-0.55.2-cp37-cp37m-macosx_10_14_x86_64.whl", hash = "sha256:6e0f9b5d1c8ea1bdef39b0ad921a9bbf0cc4a88e76d722d756c68f1653787c35"}, - {file = "numba-0.55.2-cp37-cp37m-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:135fb7694928f9f57b4ff5b1be58f20f4771fedd1680636a9affdead96051959"}, - {file = "numba-0.55.2-cp37-cp37m-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:de1f93bd7e2d431451aec20a52ac651a020e98a4ba46797fad860bba338a7e64"}, - {file = "numba-0.55.2-cp37-cp37m-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:3eaf53e73e700370163e58257257299ac0d46fea4f244bf5476e4635bc31d808"}, - {file = "numba-0.55.2-cp37-cp37m-win32.whl", hash = "sha256:da4485e0f0b9562f39c78887149b33d13d787aa696553c9257b95575122905ed"}, - {file = "numba-0.55.2-cp37-cp37m-win_amd64.whl", hash = "sha256:5559c6684bf6cce7a22c656d8fef3e7c38ff5fec5153abef5955f6f7cae9f102"}, - {file = "numba-0.55.2-cp38-cp38-macosx_10_14_x86_64.whl", hash = "sha256:a85779adc5234f7857615d1bd2c7b514314521f9f0163c33017707ed9816e6e6"}, - {file = "numba-0.55.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:16a52a0641c342b09b39f6762dcbe3846e44aa9baaaf4703b2ca42a3aee7346f"}, - {file = "numba-0.55.2-cp38-cp38-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:46715180f87d5a1f3e4077d207ade66c96fc01159f5b7d49cee2d6ffb9e6539f"}, - {file = "numba-0.55.2-cp38-cp38-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:d1c3cef3289fefb5673ceae32024ab5a8a08d4f4380bcb8348d01f1ba570ccff"}, - {file = "numba-0.55.2-cp38-cp38-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:68bb33eaef1d6155fc1ae4fa6c915b8a42e5052c89a58742254eaad072eab118"}, - {file = "numba-0.55.2-cp38-cp38-win32.whl", hash = "sha256:dfddd633141608a09cbce275fb9fe7aa514918625ace20b0e587898a2d93c030"}, - {file = "numba-0.55.2-cp38-cp38-win_amd64.whl", hash = "sha256:a669212aa66ffee4ad778016ac3819add33f9bcb96b4c384d3099531dd175085"}, - {file = "numba-0.55.2-cp39-cp39-macosx_10_14_x86_64.whl", hash = "sha256:dcde1a1a3a430fb5f83c7e095b0b6ac7adb5595f50a3ee05babb2964f31613c4"}, - {file = "numba-0.55.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:69b2e823efa40d32b259f5c094476dde2226b92032f17015d8cd7c10472654ce"}, - {file = "numba-0.55.2-cp39-cp39-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:20de0139d2267c8f0e2470d4f88540446cd1bf40de0f29f31b7ab9bf25d49b45"}, - {file = "numba-0.55.2-cp39-cp39-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:09ff4d690abb05ffbb8a29a96d1cf35b46887a26796d3670de104beeec73d639"}, - {file = "numba-0.55.2-cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:1105449247f338e49d63eb04a4aaa5c440bb5435df00f718c8e6e7afad841bb0"}, - {file = "numba-0.55.2-cp39-cp39-win32.whl", hash = "sha256:32649584144c35ced239937ab2c416ab22bbc1490ef8d90609c30fff9f6aa1b8"}, - {file = "numba-0.55.2-cp39-cp39-win_amd64.whl", hash = "sha256:8d5760a1e6a48d98d6b9cf774e4d2a64813d981cca60d7b7356af61195a6ca17"}, - {file = "numba-0.55.2.tar.gz", hash = "sha256:e428d9e11d9ba592849ccc9f7a009003eb7d30612007e365afe743ce7118c6f4"}, -] - -[package.dependencies] -llvmlite = ">=0.38.0rc1,<0.39" -numpy = ">=1.18,<1.23" -setuptools = "*" +python-versions = ">=3.8" +files = [ + {file = "numba-0.58.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:07f2fa7e7144aa6f275f27260e73ce0d808d3c62b30cff8906ad1dec12d87bbe"}, + {file = "numba-0.58.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:7bf1ddd4f7b9c2306de0384bf3854cac3edd7b4d8dffae2ec1b925e4c436233f"}, + {file = "numba-0.58.1-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:bc2d904d0319d7a5857bd65062340bed627f5bfe9ae4a495aef342f072880d50"}, + {file = "numba-0.58.1-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:4e79b6cc0d2bf064a955934a2e02bf676bc7995ab2db929dbbc62e4c16551be6"}, + {file = "numba-0.58.1-cp310-cp310-win_amd64.whl", hash = "sha256:81fe5b51532478149b5081311b0fd4206959174e660c372b94ed5364cfb37c82"}, + {file = "numba-0.58.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:bcecd3fb9df36554b342140a4d77d938a549be635d64caf8bd9ef6c47a47f8aa"}, + {file = "numba-0.58.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:a1eaa744f518bbd60e1f7ccddfb8002b3d06bd865b94a5d7eac25028efe0e0ff"}, + {file = "numba-0.58.1-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:bf68df9c307fb0aa81cacd33faccd6e419496fdc621e83f1efce35cdc5e79cac"}, + {file = "numba-0.58.1-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:55a01e1881120e86d54efdff1be08381886fe9f04fc3006af309c602a72bc44d"}, + {file = "numba-0.58.1-cp311-cp311-win_amd64.whl", hash = "sha256:811305d5dc40ae43c3ace5b192c670c358a89a4d2ae4f86d1665003798ea7a1a"}, + {file = "numba-0.58.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:ea5bfcf7d641d351c6a80e8e1826eb4a145d619870016eeaf20bbd71ef5caa22"}, + {file = "numba-0.58.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:e63d6aacaae1ba4ef3695f1c2122b30fa3d8ba039c8f517784668075856d79e2"}, + {file = "numba-0.58.1-cp38-cp38-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:6fe7a9d8e3bd996fbe5eac0683227ccef26cba98dae6e5cee2c1894d4b9f16c1"}, + {file = "numba-0.58.1-cp38-cp38-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:898af055b03f09d33a587e9425500e5be84fc90cd2f80b3fb71c6a4a17a7e354"}, + {file = "numba-0.58.1-cp38-cp38-win_amd64.whl", hash = "sha256:d3e2fe81fe9a59fcd99cc572002101119059d64d31eb6324995ee8b0f144a306"}, + {file = "numba-0.58.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:5c765aef472a9406a97ea9782116335ad4f9ef5c9f93fc05fd44aab0db486954"}, + {file = "numba-0.58.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:9e9356e943617f5e35a74bf56ff6e7cc83e6b1865d5e13cee535d79bf2cae954"}, + {file = "numba-0.58.1-cp39-cp39-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:240e7a1ae80eb6b14061dc91263b99dc8d6af9ea45d310751b780888097c1aaa"}, + {file = "numba-0.58.1-cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:45698b995914003f890ad839cfc909eeb9c74921849c712a05405d1a79c50f68"}, + {file = "numba-0.58.1-cp39-cp39-win_amd64.whl", hash = "sha256:bd3dda77955be03ff366eebbfdb39919ce7c2620d86c906203bed92124989032"}, + {file = "numba-0.58.1.tar.gz", hash = "sha256:487ded0633efccd9ca3a46364b40006dbdaca0f95e99b8b83e778d1195ebcbaa"}, +] + +[package.dependencies] +importlib-metadata = {version = "*", markers = "python_version < \"3.9\""} +llvmlite = "==0.41.*" +numpy = ">=1.22,<1.27" [[package]] name = "numpy" -version = "1.22.4" -description = "NumPy is the fundamental package for array computing with Python." +version = "1.24.4" +description = "Fundamental package for array computing in Python" optional = false python-versions = ">=3.8" files = [ - {file = "numpy-1.22.4-cp310-cp310-macosx_10_14_x86_64.whl", hash = "sha256:ba9ead61dfb5d971d77b6c131a9dbee62294a932bf6a356e48c75ae684e635b3"}, - {file = "numpy-1.22.4-cp310-cp310-macosx_10_15_x86_64.whl", hash = "sha256:1ce7ab2053e36c0a71e7a13a7475bd3b1f54750b4b433adc96313e127b870887"}, - {file = "numpy-1.22.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:7228ad13744f63575b3a972d7ee4fd61815b2879998e70930d4ccf9ec721dce0"}, - {file = "numpy-1.22.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:43a8ca7391b626b4c4fe20aefe79fec683279e31e7c79716863b4b25021e0e74"}, - {file = "numpy-1.22.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a911e317e8c826ea632205e63ed8507e0dc877dcdc49744584dfc363df9ca08c"}, - {file = "numpy-1.22.4-cp310-cp310-win32.whl", hash = "sha256:9ce7df0abeabe7fbd8ccbf343dc0db72f68549856b863ae3dd580255d009648e"}, - {file = "numpy-1.22.4-cp310-cp310-win_amd64.whl", hash = "sha256:3e1ffa4748168e1cc8d3cde93f006fe92b5421396221a02f2274aab6ac83b077"}, - {file = "numpy-1.22.4-cp38-cp38-macosx_10_15_x86_64.whl", hash = "sha256:59d55e634968b8f77d3fd674a3cf0b96e85147cd6556ec64ade018f27e9479e1"}, - {file = "numpy-1.22.4-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:c1d937820db6e43bec43e8d016b9b3165dcb42892ea9f106c70fb13d430ffe72"}, - {file = "numpy-1.22.4-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d4c5d5eb2ec8da0b4f50c9a843393971f31f1d60be87e0fb0917a49133d257d6"}, - {file = "numpy-1.22.4-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:64f56fc53a2d18b1924abd15745e30d82a5782b2cab3429aceecc6875bd5add0"}, - {file = "numpy-1.22.4-cp38-cp38-win32.whl", hash = "sha256:fb7a980c81dd932381f8228a426df8aeb70d59bbcda2af075b627bbc50207cba"}, - {file = "numpy-1.22.4-cp38-cp38-win_amd64.whl", hash = "sha256:e96d7f3096a36c8754207ab89d4b3282ba7b49ea140e4973591852c77d09eb76"}, - {file = "numpy-1.22.4-cp39-cp39-macosx_10_14_x86_64.whl", hash = "sha256:4c6036521f11a731ce0648f10c18ae66d7143865f19f7299943c985cdc95afb5"}, - {file = "numpy-1.22.4-cp39-cp39-macosx_10_15_x86_64.whl", hash = "sha256:b89bf9b94b3d624e7bb480344e91f68c1c6c75f026ed6755955117de00917a7c"}, - {file = "numpy-1.22.4-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:2d487e06ecbf1dc2f18e7efce82ded4f705f4bd0cd02677ffccfb39e5c284c7e"}, - {file = "numpy-1.22.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f3eb268dbd5cfaffd9448113539e44e2dd1c5ca9ce25576f7c04a5453edc26fa"}, - {file = "numpy-1.22.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:37431a77ceb9307c28382c9773da9f306435135fae6b80b62a11c53cfedd8802"}, - {file = "numpy-1.22.4-cp39-cp39-win32.whl", hash = "sha256:cc7f00008eb7d3f2489fca6f334ec19ca63e31371be28fd5dad955b16ec285bd"}, - {file = "numpy-1.22.4-cp39-cp39-win_amd64.whl", hash = "sha256:f0725df166cf4785c0bc4cbfb320203182b1ecd30fee6e541c8752a92df6aa32"}, - {file = "numpy-1.22.4-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0791fbd1e43bf74b3502133207e378901272f3c156c4df4954cad833b1380207"}, - {file = "numpy-1.22.4.zip", hash = "sha256:425b390e4619f58d8526b3dcf656dde069133ae5c240229821f01b5f44ea07af"}, + {file = "numpy-1.24.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:c0bfb52d2169d58c1cdb8cc1f16989101639b34c7d3ce60ed70b19c63eba0b64"}, + {file = "numpy-1.24.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:ed094d4f0c177b1b8e7aa9cba7d6ceed51c0e569a5318ac0ca9a090680a6a1b1"}, + {file = "numpy-1.24.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:79fc682a374c4a8ed08b331bef9c5f582585d1048fa6d80bc6c35bc384eee9b4"}, + {file = "numpy-1.24.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7ffe43c74893dbf38c2b0a1f5428760a1a9c98285553c89e12d70a96a7f3a4d6"}, + {file = "numpy-1.24.4-cp310-cp310-win32.whl", hash = "sha256:4c21decb6ea94057331e111a5bed9a79d335658c27ce2adb580fb4d54f2ad9bc"}, + {file = "numpy-1.24.4-cp310-cp310-win_amd64.whl", hash = "sha256:b4bea75e47d9586d31e892a7401f76e909712a0fd510f58f5337bea9572c571e"}, + {file = "numpy-1.24.4-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:f136bab9c2cfd8da131132c2cf6cc27331dd6fae65f95f69dcd4ae3c3639c810"}, + {file = "numpy-1.24.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:e2926dac25b313635e4d6cf4dc4e51c8c0ebfed60b801c799ffc4c32bf3d1254"}, + {file = "numpy-1.24.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:222e40d0e2548690405b0b3c7b21d1169117391c2e82c378467ef9ab4c8f0da7"}, + {file = "numpy-1.24.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7215847ce88a85ce39baf9e89070cb860c98fdddacbaa6c0da3ffb31b3350bd5"}, + {file = "numpy-1.24.4-cp311-cp311-win32.whl", hash = "sha256:4979217d7de511a8d57f4b4b5b2b965f707768440c17cb70fbf254c4b225238d"}, + {file = "numpy-1.24.4-cp311-cp311-win_amd64.whl", hash = "sha256:b7b1fc9864d7d39e28f41d089bfd6353cb5f27ecd9905348c24187a768c79694"}, + {file = "numpy-1.24.4-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:1452241c290f3e2a312c137a9999cdbf63f78864d63c79039bda65ee86943f61"}, + {file = "numpy-1.24.4-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:04640dab83f7c6c85abf9cd729c5b65f1ebd0ccf9de90b270cd61935eef0197f"}, + {file = "numpy-1.24.4-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a5425b114831d1e77e4b5d812b69d11d962e104095a5b9c3b641a218abcc050e"}, + {file = "numpy-1.24.4-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dd80e219fd4c71fc3699fc1dadac5dcf4fd882bfc6f7ec53d30fa197b8ee22dc"}, + {file = "numpy-1.24.4-cp38-cp38-win32.whl", hash = "sha256:4602244f345453db537be5314d3983dbf5834a9701b7723ec28923e2889e0bb2"}, + {file = "numpy-1.24.4-cp38-cp38-win_amd64.whl", hash = "sha256:692f2e0f55794943c5bfff12b3f56f99af76f902fc47487bdfe97856de51a706"}, + {file = "numpy-1.24.4-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:2541312fbf09977f3b3ad449c4e5f4bb55d0dbf79226d7724211acc905049400"}, + {file = "numpy-1.24.4-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:9667575fb6d13c95f1b36aca12c5ee3356bf001b714fc354eb5465ce1609e62f"}, + {file = "numpy-1.24.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f3a86ed21e4f87050382c7bc96571755193c4c1392490744ac73d660e8f564a9"}, + {file = "numpy-1.24.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d11efb4dbecbdf22508d55e48d9c8384db795e1b7b51ea735289ff96613ff74d"}, + {file = "numpy-1.24.4-cp39-cp39-win32.whl", hash = "sha256:6620c0acd41dbcb368610bb2f4d83145674040025e5536954782467100aa8835"}, + {file = "numpy-1.24.4-cp39-cp39-win_amd64.whl", hash = "sha256:befe2bf740fd8373cf56149a5c23a0f601e82869598d41f8e188a0e9869926f8"}, + {file = "numpy-1.24.4-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:31f13e25b4e304632a4619d0e0777662c2ffea99fcae2029556b17d8ff958aef"}, + {file = "numpy-1.24.4-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:95f7ac6540e95bc440ad77f56e520da5bf877f87dca58bd095288dce8940532a"}, + {file = "numpy-1.24.4-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:e98f220aa76ca2a977fe435f5b04d7b3470c0a2e6312907b37ba6068f26787f2"}, + {file = "numpy-1.24.4.tar.gz", hash = "sha256:80f5e3a4e498641401868df4208b74581206afbee7cf7b8329daae82676d9463"}, ] [[package]] @@ -5856,4 +5851,4 @@ wandb = ["wandb"] [metadata] lock-version = "2.0" python-versions = ">=3.8.0, <3.11.0" -content-hash = "69c6c274909c072aa5efc1a8bd519dae2eecf68b34c242a284bd48d615a04efe" +content-hash = "b743c4a186808f1b1ccde63212570c9f8b925ba214c799fe8887c514071edb36" diff --git a/pyproject.toml b/pyproject.toml index 2d2e48231..938989dc5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -61,7 +61,7 @@ boto3 = "^1.5" botocore = "*" scipy = "^1.0" Bottleneck = "^1.3.4" -numpy = "*" +numpy = "<2" joblib = "*" plotly = "*" hydra_slayer = "*" diff --git a/tests/conftest.py b/tests/conftest.py index dd81e2e3c..260afbef2 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -908,3 +908,29 @@ def ts_with_binary_exog() -> TSDataset: df_exog = TSDataset.to_dataset(df_exog) ts = TSDataset(df, freq="D", df_exog=df_exog, known_future="all") return ts + + +@pytest.fixture() +def outliers_solid_tsds(): + """Create TSDataset with outliers and same last date.""" + timestamp = pd.date_range("2021-01-01", end="2021-02-20", freq="D") + target1 = [np.sin(i) for i in range(len(timestamp))] + target1[10] += 10 + + target2 = [np.sin(i) for i in range(len(timestamp))] + target2[8] += 8 + target2[15] = 2 + target2[26] -= 12 + + df1 = pd.DataFrame({"timestamp": timestamp, "target": target1, "segment": "1"}) + df2 = pd.DataFrame({"timestamp": timestamp, "target": target2, "segment": "2"}) + df = pd.concat([df1, df2], ignore_index=True) + df_exog = df.copy() + df_exog.columns = ["timestamp", "regressor_1", "segment"] + ts = TSDataset( + df=TSDataset.to_dataset(df).iloc[:-10], + df_exog=TSDataset.to_dataset(df_exog), + freq="D", + known_future="all", + ) + return ts diff --git a/tests/test_analysis/test_eda/test_plots.py b/tests/test_analysis/test_eda/test_plots.py index 6f1cd9ad0..d38a688e6 100644 --- a/tests/test_analysis/test_eda/test_plots.py +++ b/tests/test_analysis/test_eda/test_plots.py @@ -95,6 +95,7 @@ def test_cross_corr_normed(a, b, expected_result): np.testing.assert_almost_equal(result, expected_result) +@pytest.mark.filterwarnings("ignore: invalid value encountered in scalar divide") @pytest.mark.parametrize( "a, b, normed, expected_result", [ diff --git a/tests/test_transforms/test_decomposition/test_change_points_based/test_segmentation.py b/tests/test_transforms/test_decomposition/test_change_points_based/test_segmentation.py index 4f1045c46..eaa6fd76e 100644 --- a/tests/test_transforms/test_decomposition/test_change_points_based/test_segmentation.py +++ b/tests/test_transforms/test_decomposition/test_change_points_based/test_segmentation.py @@ -66,6 +66,8 @@ def test_transform_format_one_segment(pre_transformed_df: pd.DataFrame): assert transformed[OUT_COLUMN].dtype == "category" +# issue with assert on pandas==1.2 and pandas==1.3 +@pytest.mark.filterwarnings("ignore: invalid value encountered in cast") def test_monotonously_result(pre_transformed_df: pd.DataFrame): """Check that resulting column is monotonously non-decreasing.""" change_points_model = RupturesChangePointsModel(change_points_model=Binseg(), n_bkps=N_BKPS) @@ -100,6 +102,8 @@ def test_backtest(simple_ar_ts): _, _, _ = pipeline.backtest(ts=simple_ar_ts, metrics=[SMAPE()], n_folds=3) +# issue with assert on pandas==1.2 and pandas==1.3 +@pytest.mark.filterwarnings("ignore: invalid value encountered in cast") def test_future_and_past_filling(simple_ar_ts): change_points_model = RupturesChangePointsModel(change_points_model=Binseg(), n_bkps=N_BKPS) bs = ChangePointsSegmentationTransform( @@ -115,6 +119,8 @@ def test_future_and_past_filling(simple_ar_ts): assert (after.to_pandas()[seg][OUT_COLUMN].astype(int) == 5).all() +# issue with assert on pandas==1.2 and pandas==1.3 +@pytest.mark.filterwarnings("ignore: invalid value encountered in cast") def test_make_future(simple_ar_ts): change_points_model = RupturesChangePointsModel(change_points_model=Binseg(), n_bkps=N_BKPS) bs = ChangePointsSegmentationTransform( diff --git a/tests/test_transforms/test_decomposition/test_dft_based.py b/tests/test_transforms/test_decomposition/test_dft_based.py new file mode 100644 index 000000000..e773d02b5 --- /dev/null +++ b/tests/test_transforms/test_decomposition/test_dft_based.py @@ -0,0 +1,343 @@ +import numpy as np +import pandas as pd +import pytest + +from etna.datasets import TSDataset +from etna.datasets import generate_ar_df +from etna.metrics import MAE +from etna.models import CatBoostPerSegmentModel +from etna.models import HoltWintersModel +from etna.models import ProphetModel +from etna.pipeline import Pipeline +from etna.transforms import FourierDecomposeTransform +from etna.transforms import IForestOutlierTransform +from etna.transforms import TimeSeriesImputerTransform + + +def simple_pipeline_with_decompose(in_column, horizon, k): + pipeline = Pipeline( + transforms=[FourierDecomposeTransform(k=k, in_column=in_column)], + model=HoltWintersModel(), + horizon=horizon, + ) + return pipeline + + +@pytest.fixture() +def ts_with_exogs() -> TSDataset: + periods = 100 + periods_exog = periods + 10 + df = generate_ar_df(start_time="2020-01-01", periods=periods, freq="D", n_segments=2) + df_exog = generate_ar_df(start_time="2020-01-01", periods=periods_exog, freq="D", n_segments=2, random_seed=2) + df_exog.rename(columns={"target": "exog"}, inplace=True) + df_exog["holiday"] = np.random.choice([0, 1], size=periods_exog * 2) + + ts = TSDataset(df, freq="D", df_exog=df_exog, known_future="all") + return ts + + +@pytest.fixture() +def ts_with_exogs_train_test(ts_with_exogs): + return ts_with_exogs.train_test_split(test_size=20) + + +@pytest.fixture() +def forward_stride_datasets(ts_with_exogs): + train_df = ts_with_exogs.df.iloc[:-10] + test_df = ts_with_exogs.df.iloc[-20:] + + train_ts = TSDataset(df=train_df, freq=ts_with_exogs.freq) + test_ts = TSDataset(df=test_df, freq=ts_with_exogs.freq) + + return train_ts, test_ts + + +@pytest.fixture() +def ts_with_missing(ts_with_exogs): + target_df = ts_with_exogs[..., "target"] + target_df.iloc[10] = np.nan + + return TSDataset(df=target_df, freq=ts_with_exogs.freq) + + +@pytest.mark.parametrize("in_column", ("target", "feat")) +def test_init(in_column): + transform = FourierDecomposeTransform(k=5, in_column=in_column) + assert transform.required_features == [in_column] + assert transform._first_timestamp is None + assert transform._last_timestamp is None + + +@pytest.mark.parametrize("k", (-1, 0)) +def test_invalid_k(k): + with pytest.raises(ValueError, match="Parameter `k` must be positive integer!"): + FourierDecomposeTransform(k=k, in_column="target") + + +@pytest.mark.parametrize( + "series, answ", + ( + (pd.Series([1]), 1), + (pd.Series([1, 2]), 2), + (pd.Series([1, 2, 3]), 2), + (pd.Series([1, 2, 3, 4]), 3), + (pd.Series([1, 2, 3, 4, 5]), 3), + (pd.Series([1, 2, 3, 4, 5, 6]), 4), + ), +) +def test_get_num_pos_freqs(series, answ): + res = FourierDecomposeTransform._get_num_pos_freqs(series=series) + assert res == answ + + +def test_check_segments_missing_values(ts_with_missing): + df = ts_with_missing[..., "target"] + transform = FourierDecomposeTransform(k=5) + with pytest.raises(ValueError, match=f"Feature `target` contains missing values"): + transform._check_segments(df=df) + + +@pytest.mark.parametrize("k", (52, 100)) +def test_check_segments_large_k(ts_with_exogs, k): + df = ts_with_exogs[..., "target"] + transform = FourierDecomposeTransform(k=k) + with pytest.raises(ValueError, match=f"Parameter `k` must not be greater then"): + transform._check_segments(df=df) + + +def test_check_segments_ok(ts_with_exogs): + df = ts_with_exogs[..., "target"] + transform = FourierDecomposeTransform(k=5) + transform._check_segments(df=df) + + +@pytest.mark.parametrize( + "series", + ( + pd.Series(np.arange(5)), + pd.Series(np.arange(10)), + pd.Series([np.nan] * 2 + list(range(5)) + [np.nan] * 3), + ), +) +def test_fft_components_out_format(series): + expected_columns = ["dft_0", "dft_1", "dft_2", "dft_residuals"] + transform = FourierDecomposeTransform(k=3, residuals=True) + + decompose_df = transform._dft_components(series=series) + + assert isinstance(decompose_df, pd.DataFrame) + pd.testing.assert_index_equal(decompose_df.index, series.index) + assert (decompose_df.columns == expected_columns).all() + np.testing.assert_allclose(np.sum(decompose_df.values, axis=1), series.values) + + +def test_is_not_fitted(simple_tsdf): + transform = FourierDecomposeTransform(k=5, in_column="feat") + with pytest.raises(ValueError, match="Transform is not fitted!"): + transform.transform(ts=simple_tsdf) + + +@pytest.mark.parametrize( + "ts_name,in_column", + ( + ("outliers_df_with_two_columns", "target"), + ("outliers_df_with_two_columns", "feature"), + ("ts_with_exogs", "target"), + ("ts_with_exogs", "exog"), + ("ts_with_exogs", "holiday"), + ("example_tsds_int_timestamp", "target"), + ), +) +def test_fit(ts_name, in_column, request): + ts = request.getfixturevalue(ts_name) + transform = FourierDecomposeTransform(k=5, in_column=in_column) + transform.fit(ts=ts) + + assert transform._first_timestamp == ts.index.min() + assert transform._last_timestamp == ts.index.max() + + +@pytest.mark.parametrize("residuals", (True, False)) +@pytest.mark.parametrize("in_column", ("target", "exog")) +def test_add_residuals(ts_with_exogs, residuals, in_column): + ts = ts_with_exogs + + transform = FourierDecomposeTransform(k=5, in_column=in_column, residuals=residuals) + transformed = transform.fit_transform(ts=ts) + + assert (f"{in_column}_dft_residuals" in transformed.features) is residuals + + +def test_timestamp_from_history(ts_with_exogs_train_test): + test, train = ts_with_exogs_train_test + transform = FourierDecomposeTransform(k=5) + transform.fit_transform(train) + + with pytest.raises(ValueError, match="First index of the dataset to be transformed must be larger"): + transform.transform(test) + + +def test_timestamp_from_future(ts_with_exogs_train_test): + train, test = ts_with_exogs_train_test + transform = FourierDecomposeTransform(k=5) + transform.fit_transform(train) + + with pytest.raises(ValueError, match="Dataset to be transformed must contain historical observations in range"): + transform.transform(test) + + +@pytest.mark.parametrize( + "in_column", + ( + "target", + "holiday", + "exog", + ), +) +@pytest.mark.parametrize("horizon", (1, 5)) +def test_simple_pipeline_forecast(ts_with_exogs, in_column, horizon): + ts = ts_with_exogs + + pipeline = simple_pipeline_with_decompose(in_column=in_column, horizon=horizon, k=5) + + pipeline.fit(ts=ts) + forecast = pipeline.forecast() + + assert forecast.size()[0] == horizon + assert np.sum(forecast[..., "target"].isna().sum()) == 0 + + +@pytest.mark.parametrize( + "in_column", + ( + "target", + "holiday", + "exog", + ), +) +@pytest.mark.parametrize("horizon", (1, 5)) +def test_simple_pipeline_predict(ts_with_exogs, in_column, horizon): + ts = ts_with_exogs + + pipeline = simple_pipeline_with_decompose(in_column=in_column, horizon=horizon, k=5) + + pipeline.fit(ts=ts) + forecast = pipeline.predict(ts) + + assert forecast.size()[0] == ts.size()[0] + assert np.sum(forecast[..., "target"].isna().sum()) == 0 + + +@pytest.mark.parametrize( + "in_column", + ( + "target", + "holiday", + "exog", + ), +) +@pytest.mark.parametrize("horizon", (1, 5)) +def test_simple_pipeline_predict_components(ts_with_exogs, in_column, horizon): + ts = ts_with_exogs + + pipeline = simple_pipeline_with_decompose(in_column=in_column, horizon=horizon, k=5) + + pipeline.fit(ts=ts) + forecast = pipeline.predict(ts, return_components=True) + + assert forecast.size()[0] == ts.size()[0] + assert forecast.target_components_names == ("target_component_level",) + + +@pytest.mark.parametrize( + "in_column", + ( + "target", + "holiday", + "exog", + ), +) +@pytest.mark.parametrize("horizon", (1, 5)) +def test_simple_pipeline_backtest(ts_with_exogs, in_column, horizon): + ts = ts_with_exogs + + pipeline = simple_pipeline_with_decompose(in_column=in_column, horizon=horizon, k=5) + + _, forecast, _ = pipeline.backtest(ts=ts, metrics=[MAE()], n_folds=3) + + assert len(forecast) == horizon * 3 + assert np.sum(forecast.loc[:, pd.IndexSlice[:, "target"]].isna().sum()) == 0 + + +@pytest.mark.parametrize( + "ts_name,in_column", + ( + ("outliers_df_with_two_columns", "target"), + ("outliers_df_with_two_columns", "feature"), + ("ts_with_exogs", "target"), + ("ts_with_exogs", "exog"), + ), +) +@pytest.mark.parametrize("k", (1, 5, 10, 40, 51)) +@pytest.mark.parametrize("forecast_model", (ProphetModel(), CatBoostPerSegmentModel(iterations=10))) +def test_pipeline_parameter_k(ts_name, in_column, forecast_model, k, request): + ts = request.getfixturevalue(ts_name) + + pipeline = Pipeline( + transforms=[FourierDecomposeTransform(k=5, in_column=in_column)], + model=forecast_model, + horizon=3, + ) + + pipeline.fit(ts) + forecast = pipeline.forecast() + + assert forecast.size()[0] == 3 + assert np.sum(forecast.loc[:, pd.IndexSlice[:, "target"]].isna().sum()) == 0 + + +@pytest.mark.parametrize("answer", ({"1": ["2021-01-11"], "2": ["2021-01-09"]},)) +def test_outlier_detection(outliers_solid_tsds, answer): + ts = outliers_solid_tsds + + transforms = [ + FourierDecomposeTransform(k=2, in_column="target", residuals=True), + IForestOutlierTransform( + in_column="target", + features_to_ignore=["target", "regressor_1"], + contamination=0.01, + ), + ] + ts.fit_transform(transforms) + + for segment in ts.segments: + empty_values = pd.isna(ts[:, segment, "target"]) + assert empty_values.sum() == len(answer[segment]) + assert all(empty_values[answer[segment]]) + + +def test_outlier_detection_pipeline(outliers_solid_tsds): + ts = outliers_solid_tsds + pipeline = Pipeline( + transforms=[ + FourierDecomposeTransform(k=5, in_column="target"), + IForestOutlierTransform(in_column="target"), + TimeSeriesImputerTransform(in_column="target"), + ], + model=ProphetModel(), + horizon=3, + ) + pipeline.fit(ts) + + +@pytest.mark.parametrize("k", (1, 5)) +def test_stride_transform(forward_stride_datasets, k): + train, test = forward_stride_datasets + + transform = FourierDecomposeTransform(k=k, residuals=True) + + transform.fit(train) + transformed = transform.transform(test) + + assert not transformed.df.iloc[:10].isna().any().any() + assert transformed.df.iloc[10:].isna().all().any() diff --git a/tests/test_transforms/test_inference/test_inverse_transform.py b/tests/test_transforms/test_inference/test_inverse_transform.py index 34f43fcc2..63f8aa2bc 100644 --- a/tests/test_transforms/test_inference/test_inverse_transform.py +++ b/tests/test_transforms/test_inference/test_inverse_transform.py @@ -23,6 +23,7 @@ from etna.transforms import EventTransform from etna.transforms import ExogShiftTransform from etna.transforms import FilterFeaturesTransform +from etna.transforms import FourierDecomposeTransform from etna.transforms import FourierTransform from etna.transforms import GaleShapleyFeatureSelectionTransform from etna.transforms import HolidayTransform @@ -142,6 +143,7 @@ def _test_inverse_transform_train(self, ts, transform, expected_changes): "regular_ts", {}, ), + (FourierDecomposeTransform(in_column="target", k=5, residuals=True), "regular_ts", {}), # embeddings ( EmbeddingSegmentTransform( @@ -604,6 +606,7 @@ def test_inverse_transform_train_fail_resample(self, transform, dataset_name, ex "regular_ts", {}, ), + (FourierDecomposeTransform(in_column="target", k=5, residuals=True), "regular_ts", {}), # embeddings ( EmbeddingSegmentTransform( @@ -1096,6 +1099,7 @@ def _test_inverse_transform_train_subset_segments(self, ts, transform, segments) ), "regular_ts", ), + (FourierDecomposeTransform(in_column="target", k=5, residuals=True), "regular_ts"), # embeddings ( EmbeddingSegmentTransform( @@ -1382,6 +1386,8 @@ def _test_inverse_transform_future_subset_segments(self, ts, transform, segments ), "regular_ts", ), + (FourierDecomposeTransform(in_column="target", k=5, residuals=True), "regular_ts"), + (FourierDecomposeTransform(in_column="positive", k=5, residuals=True), "ts_with_exog"), # embeddings ( EmbeddingSegmentTransform( @@ -2996,6 +3002,19 @@ def test_inverse_transform_future_with_target_fail_difference( with pytest.raises(ValueError, match="Test should go after the train without gaps"): self._test_inverse_transform_future_with_target(ts, transform, expected_changes=expected_changes) + @pytest.mark.parametrize( + "transform, dataset_name, expected_changes", + [ + (FourierDecomposeTransform(in_column="target", k=5, residuals=True), "regular_ts", {}), + ], + ) + def test_inverse_transform_future_with_target_fail_require_history( + self, transform, dataset_name, expected_changes, request + ): + ts = request.getfixturevalue(dataset_name) + with pytest.raises(ValueError, match="Dataset to be transformed must contain historical observations"): + self._test_inverse_transform_future_with_target(ts, transform, expected_changes=expected_changes) + # It is the only transform that doesn't change values back during `inverse_transform` @to_be_fixed(raises=AssertionError) @pytest.mark.parametrize( @@ -3106,6 +3125,8 @@ def _test_inverse_transform_future_without_target( "regular_ts", {}, ), + (FourierDecomposeTransform(in_column="target", k=5, residuals=True), "regular_ts", {}), + (FourierDecomposeTransform(in_column="positive", k=5, residuals=True), "ts_with_exog", {}), # embeddings ( EmbeddingSegmentTransform( diff --git a/tests/test_transforms/test_inference/test_transform.py b/tests/test_transforms/test_inference/test_transform.py index 3f1073a73..9d6cc7866 100644 --- a/tests/test_transforms/test_inference/test_transform.py +++ b/tests/test_transforms/test_inference/test_transform.py @@ -23,6 +23,7 @@ from etna.transforms import EventTransform from etna.transforms import ExogShiftTransform from etna.transforms import FilterFeaturesTransform +from etna.transforms import FourierDecomposeTransform from etna.transforms import FourierTransform from etna.transforms import GaleShapleyFeatureSelectionTransform from etna.transforms import HolidayTransform @@ -128,6 +129,11 @@ def _test_transform_train(self, ts, transform, expected_changes): "regular_ts", {"create": {"res"}}, ), + ( + FourierDecomposeTransform(in_column="target", k=2, residuals=True), + "regular_ts", + {"create": {"target_dft_0", "target_dft_1", "target_dft_residuals"}}, + ), # embeddings ( EmbeddingSegmentTransform( @@ -550,6 +556,11 @@ def test_transform_train_datetime_timestamp(self, transform, dataset_name, expec "regular_ts", {"create": {"res"}}, ), + ( + FourierDecomposeTransform(in_column="target", k=2, residuals=True), + "regular_ts", + {"create": {"target_dft_0", "target_dft_1", "target_dft_residuals"}}, + ), # embeddings ( EmbeddingSegmentTransform( @@ -1060,6 +1071,7 @@ def _test_transform_train_subset_segments(self, ts, transform, segments): ), "regular_ts", ), + (FourierDecomposeTransform(in_column="target", k=2, residuals=True), "regular_ts"), # embeddings ( EmbeddingSegmentTransform( @@ -1328,6 +1340,8 @@ def _test_transform_future_subset_segments(self, ts, transform, segments, horizo ), "regular_ts", ), + (FourierDecomposeTransform(in_column="target", k=2, residuals=True), "regular_ts"), + (FourierDecomposeTransform(in_column="positive", k=2, residuals=True), "ts_with_exog"), # embeddings ( EmbeddingSegmentTransform( @@ -2816,6 +2830,23 @@ def test_transform_future_with_target(self, transform, dataset_name, expected_ch ts = request.getfixturevalue(dataset_name) self._test_transform_future_with_target(ts, transform, expected_changes=expected_changes) + @pytest.mark.parametrize( + "transform, dataset_name, expected_changes", + ( + ( + FourierDecomposeTransform(in_column="target", k=2, residuals=True), + "regular_ts", + {"create": {"target_dft_0", "target_dft_1", "target_dft_residuals"}}, + ), + ), + ) + def test_transform_future_with_target_fail_require_history( + self, transform, dataset_name, expected_changes, request + ): + ts = request.getfixturevalue(dataset_name) + with pytest.raises(ValueError, match="Dataset to be transformed must contain historical observations"): + self._test_transform_future_with_target(ts, transform, expected_changes=expected_changes) + class TestTransformFutureWithoutTarget: """Test transform on future dataset with unknown target. @@ -2889,6 +2920,16 @@ def _test_transform_future_without_target(self, ts, transform, expected_changes, "regular_ts", {"create": {"res"}}, ), + ( + FourierDecomposeTransform(in_column="target", k=2, residuals=True), + "regular_ts", + {"create": {"target_dft_0", "target_dft_1", "target_dft_residuals"}}, + ), + ( + FourierDecomposeTransform(in_column="positive", k=2, residuals=True), + "ts_with_exog", + {"create": {"positive_dft_0", "positive_dft_1", "positive_dft_residuals"}}, + ), # embeddings ( EmbeddingSegmentTransform( diff --git a/tests/test_transforms/test_outliers/test_outliers_transform.py b/tests/test_transforms/test_outliers/test_outliers_transform.py index 87ff27ecb..daad6fc8d 100644 --- a/tests/test_transforms/test_outliers/test_outliers_transform.py +++ b/tests/test_transforms/test_outliers/test_outliers_transform.py @@ -78,32 +78,6 @@ def compare_outputs(ts, in_column, method, transform_constructor, method_kwargs, assert np.all(transformed_column[transformed_column.isna()].index == nan_timestamps) -@pytest.fixture() -def outliers_solid_tsds(): - """Create TSDataset with outliers and same last date.""" - timestamp = pd.date_range("2021-01-01", end="2021-02-20", freq="D") - target1 = [np.sin(i) for i in range(len(timestamp))] - target1[10] += 10 - - target2 = [np.sin(i) for i in range(len(timestamp))] - target2[8] += 8 - target2[15] = 2 - target2[26] -= 12 - - df1 = pd.DataFrame({"timestamp": timestamp, "target": target1, "segment": "1"}) - df2 = pd.DataFrame({"timestamp": timestamp, "target": target2, "segment": "2"}) - df = pd.concat([df1, df2], ignore_index=True) - df_exog = df.copy() - df_exog.columns = ["timestamp", "regressor_1", "segment"] - ts = TSDataset( - df=TSDataset.to_dataset(df).iloc[:-10], - df_exog=TSDataset.to_dataset(df_exog), - freq="D", - known_future="all", - ) - return ts - - @pytest.fixture() def outliers_solid_tsds_with_holidays(outliers_solid_tsds): """Create TSDataset with outliers with holidays"""