Skip to content

Commit

Permalink
Issue Add MADOutlierTransform (#415)
Browse files Browse the repository at this point in the history
* init

* init

* new tests

* lack clear code

* lack clear code

* lack clear code

* full tests

* fixes

* fixes iloc indexes

* fix

* fix __all__

* fix renaming

* fix renaming

* add new test
  • Loading branch information
Polzovat123 authored Jul 4, 2024
1 parent 6863aab commit 1bd32a7
Show file tree
Hide file tree
Showing 9 changed files with 144 additions and 2 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Add `num_workers` parameter to `TS2VecEmbeddingModel` ([#396](https://github.com/etna-team/etna/pull/396))
- Add `get_anomalies_mad` function for anomaly detection ([#398](https://github.com/etna-team/etna/pull/398))
- Add `TSDataset.features` property to get list of all features in a dataset ([#405](https://github.com/etna-team/etna/pull/405))
- Add `MADOutlierTransform` class for anomaly detection ([#415](https://github.com/etna-team/etna/pull/415))
-

### Changed
Expand Down
1 change: 1 addition & 0 deletions docs/source/api_reference/transforms.rst
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@ Transforms to detect outliers:
PredictionIntervalOutliersTransform
IForestOutlierTransform
IQROutlierTransform
MADOutlierTransform

Transforms to work with time-related features:

Expand Down
2 changes: 1 addition & 1 deletion etna/analysis/outliers/rolling_statistics.py
Original file line number Diff line number Diff line change
Expand Up @@ -314,7 +314,7 @@ def mad_method(
if mad_scale <= 0:
raise ValueError("Scaling parameter must be positive!")

window = series[indices]
window = series.iloc[indices]

if trend or seasonality:
if stl_params is None:
Expand Down
1 change: 1 addition & 0 deletions etna/transforms/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@
from etna.transforms.outliers import DensityOutliersTransform
from etna.transforms.outliers import IForestOutlierTransform
from etna.transforms.outliers import IQROutlierTransform
from etna.transforms.outliers import MADOutlierTransform
from etna.transforms.outliers import MedianOutliersTransform
from etna.transforms.outliers import PredictionIntervalOutliersTransform
from etna.transforms.timestamp import DateFlagsTransform
Expand Down
1 change: 1 addition & 0 deletions etna/transforms/outliers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,6 @@
from etna.transforms.outliers.point_outliers import DensityOutliersTransform
from etna.transforms.outliers.point_outliers import IForestOutlierTransform
from etna.transforms.outliers.point_outliers import IQROutlierTransform
from etna.transforms.outliers.point_outliers import MADOutlierTransform
from etna.transforms.outliers.point_outliers import MedianOutliersTransform
from etna.transforms.outliers.point_outliers import PredictionIntervalOutliersTransform
93 changes: 93 additions & 0 deletions etna/transforms/outliers/point_outliers.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from etna.analysis import get_anomalies_density
from etna.analysis import get_anomalies_iqr
from etna.analysis import get_anomalies_isolation_forest
from etna.analysis import get_anomalies_mad
from etna.analysis import get_anomalies_median
from etna.analysis import get_anomalies_prediction_interval
from etna.datasets import TSDataset
Expand Down Expand Up @@ -489,10 +490,102 @@ def params_to_tune(self) -> Dict[str, BaseDistribution]:
}


class MADOutlierTransform(OutliersTransform):
"""Transform that uses :py:func:`~etna.analysis.outliers.rolling_statistics.get_anomalies_mad` to find anomalies in data."""

def __init__(
self,
in_column: str = "target",
ignore_flag_column: Optional[str] = None,
window_size: int = 10,
stride: int = 1,
mad_scale: float = 3,
trend: bool = False,
seasonality: bool = False,
period: Optional[int] = None,
stl_params: Optional[Dict[str, Any]] = None,
):
"""Create instance of ``MADOutlierTransform``.
Parameters
----------
in_column:
Name of the column in which the anomaly is searching
ignore_flag_column:
Column name for skipping values from outlier check
window_size:
Number of points in the window
stride:
Offset between neighboring windows
mad_scale:
Scaling parameter of the estimated interval
trend:
Whether to remove trend from the series
seasonality:
Whether to remove seasonality from the series
period:
Periodicity of the sequence for STL
stl_params:
Other parameters for STL. See :py:class:`statsmodels.tsa.seasonal.STL`
"""
self.window_size = window_size
self.stride = stride
self.mad_scale = mad_scale
self.trend = trend
self.seasonality = seasonality
self.period = period
self.stl_params = stl_params
super().__init__(in_column=in_column, ignore_flag_column=ignore_flag_column)

def detect_outliers(self, ts: TSDataset) -> Dict[str, pd.Series]:
"""Call :py:func:`~etna.analysis.outliers.rolling_statistics.get_anomalies_mad` function with self parameters.
Parameters
----------
ts:
Dataset to process
Returns
-------
:
Dict of outliers in format {segment: [outliers_timestamps]}
"""
return get_anomalies_mad(
ts=ts,
in_column=self.in_column,
window_size=self.window_size,
stride=self.stride,
mad_scale=self.mad_scale,
trend=self.trend,
seasonality=self.seasonality,
period=self.period,
stl_params=self.stl_params,
index_only=False,
)

def params_to_tune(self) -> Dict[str, BaseDistribution]:
"""Get default grid for tuning hyperparameters.
This grid tunes parameters: ``mad_scale``, ``trend``, ``seasonality``.
Other parameters are expected to be set by the user.
Returns
-------
:
Grid to tune.
"""
return {
"mad_scale": FloatDistribution(low=0.5, high=10),
"trend": CategoricalDistribution([True, False]),
"seasonality": CategoricalDistribution([True, False]),
}


__all__ = [
"MedianOutliersTransform",
"DensityOutliersTransform",
"PredictionIntervalOutliersTransform",
"IForestOutlierTransform",
"IQROutlierTransform",
"MADOutlierTransform",
]
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
from etna.transforms import LimitTransform
from etna.transforms import LinearTrendTransform
from etna.transforms import LogTransform
from etna.transforms import MADOutlierTransform
from etna.transforms import MADTransform
from etna.transforms import MaxAbsScalerTransform
from etna.transforms import MaxTransform
Expand Down Expand Up @@ -467,6 +468,7 @@ def _test_inverse_transform_train(self, ts, transform, expected_changes):
),
(IForestOutlierTransform(in_column="target"), "ts_with_outliers", {"change": {"target"}}),
(IQROutlierTransform(in_column="target"), "ts_with_outliers", {"change": {"target"}}),
(MADOutlierTransform(in_column="target"), "ts_with_outliers", {"change": {"target"}}),
# timestamp
(
DateFlagsTransform(out_column="res"),
Expand Down Expand Up @@ -915,6 +917,7 @@ def test_inverse_transform_train_fail_resample(self, transform, dataset_name, ex
(MedianOutliersTransform(in_column="target"), "ts_with_outliers", {"change": {"target"}}),
(IForestOutlierTransform(in_column="target"), "ts_with_outliers", {"change": {"target"}}),
(IQROutlierTransform(in_column="target"), "ts_with_outliers", {"change": {"target"}}),
(MADOutlierTransform(in_column="target"), "ts_with_outliers", {"change": {"target"}}),
# timestamp
(
DateFlagsTransform(out_column="res", in_column="external_timestamp"),
Expand Down Expand Up @@ -1244,6 +1247,7 @@ def _test_inverse_transform_train_subset_segments(self, ts, transform, segments)
(PredictionIntervalOutliersTransform(in_column="target", model=ProphetModel), "ts_with_outliers"),
(IForestOutlierTransform(in_column="target"), "ts_with_outliers"),
(IQROutlierTransform(in_column="target"), "ts_with_outliers"),
(MADOutlierTransform(in_column="target"), "ts_with_outliers"),
# timestamp
(DateFlagsTransform(), "regular_ts"),
(
Expand Down Expand Up @@ -1551,6 +1555,7 @@ def _test_inverse_transform_future_subset_segments(self, ts, transform, segments
(PredictionIntervalOutliersTransform(in_column="target", model=ProphetModel), "ts_with_outliers"),
(IForestOutlierTransform(in_column="target"), "ts_with_outliers"),
(IQROutlierTransform(in_column="target"), "ts_with_outliers"),
(MADOutlierTransform(in_column="target"), "ts_with_outliers"),
# timestamp
(DateFlagsTransform(), "regular_ts"),
(
Expand Down Expand Up @@ -2003,6 +2008,7 @@ def test_inverse_transform_train_new_segments(self, transform, dataset_name, exp
(PredictionIntervalOutliersTransform(in_column="target", model=ProphetModel), "ts_with_outliers"),
(IForestOutlierTransform(in_column="target"), "ts_with_outliers"),
(IQROutlierTransform(in_column="target"), "ts_with_outliers"),
(MADOutlierTransform(in_column="target"), "ts_with_outliers"),
# timestamp
(SpecialDaysTransform(), "regular_ts"),
(
Expand Down Expand Up @@ -2448,6 +2454,7 @@ def test_inverse_transform_future_new_segments(self, transform, dataset_name, ex
(PredictionIntervalOutliersTransform(in_column="target", model=ProphetModel), "ts_with_outliers"),
(IForestOutlierTransform(in_column="target"), "ts_with_outliers"),
(IQROutlierTransform(in_column="target"), "ts_with_outliers"),
(MADOutlierTransform(in_column="target"), "ts_with_outliers"),
# timestamp
(SpecialDaysTransform(), "regular_ts"),
(
Expand Down Expand Up @@ -2893,6 +2900,7 @@ def _test_inverse_transform_future_with_target(
(PredictionIntervalOutliersTransform(in_column="target", model=ProphetModel), "ts_with_outliers", {}),
(IForestOutlierTransform(in_column="target"), "ts_with_outliers", {}),
(IQROutlierTransform(in_column="target"), "ts_with_outliers", {}),
(MADOutlierTransform(in_column="target"), "ts_with_outliers", {}),
# timestamp
(
DateFlagsTransform(out_column="res"),
Expand Down Expand Up @@ -3421,6 +3429,7 @@ def _test_inverse_transform_future_without_target(
(PredictionIntervalOutliersTransform(in_column="target", model=ProphetModel), "ts_with_outliers", {}),
(IForestOutlierTransform(in_column="target"), "ts_with_outliers", {}),
(IQROutlierTransform(in_column="target"), "ts_with_outliers", {}),
(MADOutlierTransform(in_column="target"), "ts_with_outliers", {}),
# timestamp
(
DateFlagsTransform(out_column="res"),
Expand Down
9 changes: 9 additions & 0 deletions tests/test_transforms/test_inference/test_transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
from etna.transforms import LimitTransform
from etna.transforms import LinearTrendTransform
from etna.transforms import LogTransform
from etna.transforms import MADOutlierTransform
from etna.transforms import MADTransform
from etna.transforms import MaxAbsScalerTransform
from etna.transforms import MaxTransform
Expand Down Expand Up @@ -430,6 +431,7 @@ def _test_transform_train(self, ts, transform, expected_changes):
),
(IForestOutlierTransform(in_column="target"), "ts_with_outliers", {"change": {"target"}}),
(IQROutlierTransform(in_column="target"), "ts_with_outliers", {"change": {"target"}}),
(MADOutlierTransform(in_column="target"), "ts_with_outliers", {"change": {"target"}}),
# timestamp
(
DateFlagsTransform(out_column="res"),
Expand Down Expand Up @@ -871,6 +873,7 @@ def test_transform_train_datetime_timestamp(self, transform, dataset_name, expec
(MedianOutliersTransform(in_column="target"), "ts_with_outliers", {"change": {"target"}}),
(IForestOutlierTransform(in_column="target"), "ts_with_outliers", {"change": {"target"}}),
(IQROutlierTransform(in_column="target"), "ts_with_outliers", {"change": {"target"}}),
(MADOutlierTransform(in_column="target"), "ts_with_outliers", {"change": {"target"}}),
# timestamp
(
DateFlagsTransform(out_column="res", in_column="external_timestamp"),
Expand Down Expand Up @@ -1195,6 +1198,7 @@ def _test_transform_train_subset_segments(self, ts, transform, segments):
(PredictionIntervalOutliersTransform(in_column="target", model=ProphetModel), "ts_with_outliers"),
(IForestOutlierTransform(in_column="target"), "ts_with_outliers"),
(IQROutlierTransform(in_column="target"), "ts_with_outliers"),
(MADOutlierTransform(in_column="target"), "ts_with_outliers"),
# timestamp
(DateFlagsTransform(), "regular_ts"),
(
Expand Down Expand Up @@ -1489,6 +1493,7 @@ def _test_transform_future_subset_segments(self, ts, transform, segments, horizo
(PredictionIntervalOutliersTransform(in_column="target", model=ProphetModel), "ts_with_outliers"),
(IForestOutlierTransform(in_column="target"), "ts_with_outliers"),
(IQROutlierTransform(in_column="target"), "ts_with_outliers"),
(MADOutlierTransform(in_column="target"), "ts_with_outliers"),
# timestamp
(DateFlagsTransform(), "regular_ts"),
(
Expand Down Expand Up @@ -1906,6 +1911,7 @@ def test_transform_train_new_segments(self, transform, dataset_name, expected_ch
(PredictionIntervalOutliersTransform(in_column="target", model=ProphetModel), "ts_with_outliers"),
(IForestOutlierTransform(in_column="target"), "ts_with_outliers"),
(IQROutlierTransform(in_column="target"), "ts_with_outliers"),
(MADOutlierTransform(in_column="target"), "ts_with_outliers"),
# timestamp
(SpecialDaysTransform(), "regular_ts"),
(SpecialDaysTransform(in_column="external_timestamp"), "ts_with_external_timestamp"),
Expand Down Expand Up @@ -2341,6 +2347,7 @@ def test_transform_future_new_segments(self, transform, dataset_name, expected_c
(PredictionIntervalOutliersTransform(in_column="target", model=ProphetModel), "ts_with_outliers"),
(IForestOutlierTransform(in_column="target"), "ts_with_outliers"),
(IQROutlierTransform(in_column="target"), "ts_with_outliers"),
(MADOutlierTransform(in_column="target"), "ts_with_outliers"),
# timestamp
(SpecialDaysTransform(), "regular_ts"),
(
Expand Down Expand Up @@ -2702,6 +2709,7 @@ def _test_transform_future_with_target(self, ts, transform, expected_changes, ga
(PredictionIntervalOutliersTransform(in_column="target", model=ProphetModel), "ts_with_outliers", {}),
(IForestOutlierTransform(in_column="target"), "ts_with_outliers", {}),
(IQROutlierTransform(in_column="target"), "ts_with_outliers", {}),
(MADOutlierTransform(in_column="target"), "ts_with_outliers", {}),
# timestamp
(
DateFlagsTransform(out_column="res"),
Expand Down Expand Up @@ -3200,6 +3208,7 @@ def _test_transform_future_without_target(self, ts, transform, expected_changes,
(PredictionIntervalOutliersTransform(in_column="target", model=ProphetModel), "ts_with_outliers", {}),
(IForestOutlierTransform(in_column="target"), "ts_with_outliers", {}),
(IQROutlierTransform(in_column="target"), "ts_with_outliers", {}),
(MADOutlierTransform(in_column="target"), "ts_with_outliers", {}),
# timestamp
(
DateFlagsTransform(out_column="res"),
Expand Down
Loading

0 comments on commit 1bd32a7

Please sign in to comment.