From e1c8d34b43f11d51e9b3fa34780ea8394fafb277 Mon Sep 17 00:00:00 2001 From: Boyd Biersteker <108391625+Beerstabr@users.noreply.github.com> Date: Fri, 10 Feb 2023 16:32:00 +0100 Subject: [PATCH] Improvement/statsforecastets: make sf_ets probabilistic + add future_covariate support for sf_ets + add AutoTheta (#1476) * StatsForecastETS now is probabilistic in the same way as StatsForecastAutoARIMA * include future covariates in sf_ets * sf_ets with future_covariates works.. probably it is underestimating the uncertainty because it doesn't take into account the uncertainty of the coef esimation of the OLS * Create separate file for StatsForecast models and extract some functions. * Added AutoTheta from the StatsForecast package. * Deleted sf_auto_arima.py and sf_ets.py, because the code is now included in sf_models.py. * Update darts/models/forecasting/sf_models.py Co-authored-by: Julien Herzen * Update darts/models/forecasting/sf_models.py Co-authored-by: Julien Herzen * Moved all statsforecast models to their own .py file. Added some comments explaining the handling of future covariates by StatsForecastETS. Included StatsForecastTheta in the tests. Moved the utility functions that the statsforecast models share to a singly .py file. Added the CES model which is supposed to be probabilistic, but that doesn't work yet eventhough it is supposed to be included in statsforecast 1.4.0. Trying to figure out why it isn't working. Removed sf_models.py. * Beginning of test for fit on residuals for statsforecast ets. * - AutoCES not probablisitc anymore, because that is not yet released in statsforecast 1.4.0 - changed AutoETS to SFAutoETS - added models to the base tests - wrote two units tests for future covariates use for sf_ets * - AutoCES not probablisitc anymore, because that is not yet released in statsforecast 1.4.0 - changed AutoETS to SFAutoETS - added models to the base tests - wrote two units tests for future covariates use for sf_ets * Changed StatsForecastETS to StatsForecastAutoETS. --------- Co-authored-by: Julien Herzen Co-authored-by: Julien Herzen --- darts/models/__init__.py | 11 ++- .../models/components/statsforecast_utils.py | 30 ++++++ darts/models/forecasting/sf_auto_arima.py | 16 ++-- darts/models/forecasting/sf_auto_ces.py | 80 ++++++++++++++++ .../forecasting/{sf_ets.py => sf_auto_ets.py} | 68 +++++++++++--- darts/models/forecasting/sf_auto_theta.py | 93 +++++++++++++++++++ .../test_local_forecasting_models.py | 10 +- .../models/forecasting/test_sf_auto_ets.py | 63 +++++++++++++ requirements/core.txt | 2 +- 9 files changed, 346 insertions(+), 27 deletions(-) create mode 100644 darts/models/components/statsforecast_utils.py create mode 100644 darts/models/forecasting/sf_auto_ces.py rename darts/models/forecasting/{sf_ets.py => sf_auto_ets.py} (60%) create mode 100644 darts/models/forecasting/sf_auto_theta.py create mode 100644 darts/tests/models/forecasting/test_sf_auto_ets.py diff --git a/darts/models/__init__.py b/darts/models/__init__.py index cc982fe94b..60ac60b572 100644 --- a/darts/models/__init__.py +++ b/darts/models/__init__.py @@ -90,12 +90,15 @@ class NotImportedCatBoostModel: try: from darts.models.forecasting.croston import Croston from darts.models.forecasting.sf_auto_arima import StatsForecastAutoARIMA - from darts.models.forecasting.sf_ets import StatsForecastETS + from darts.models.forecasting.sf_auto_ces import StatsForecastAutoCES + from darts.models.forecasting.sf_auto_ets import StatsForecastAutoETS + from darts.models.forecasting.sf_auto_theta import StatsForecastAutoTheta + except ImportError: logger.warning( "The statsforecast module could not be imported. " "To enable support for the StatsForecastAutoARIMA, " - "StatsForecastETS and Croston models, please consider " + "StatsForecastAutoETS and Croston models, please consider " "installing it." ) @@ -104,10 +107,10 @@ class NotImportedStatsForecastAutoARIMA: StatsForecastAutoARIMA = NotImportedStatsForecastAutoARIMA() - class NotImportedStatsForecastETS: + class NotImportedStatsForecastAutoETS: usable = False - StatsForecastETS = NotImportedStatsForecastETS() + StatsForecastAutoETS = NotImportedStatsForecastAutoETS() class NotImportedCroston: usable = False diff --git a/darts/models/components/statsforecast_utils.py b/darts/models/components/statsforecast_utils.py new file mode 100644 index 0000000000..9659ec86a3 --- /dev/null +++ b/darts/models/components/statsforecast_utils.py @@ -0,0 +1,30 @@ +""" +StatsForecast utils +----------- +""" + +import numpy as np + +# In a normal distribution, 68.27 percentage of values lie within one standard deviation of the mean +one_sigma_rule = 68.27 + + +def create_normal_samples( + mu: float, + std: float, + num_samples: int, + n: int, +) -> np.array: + """Generate samples assuming a Normal distribution.""" + samples = np.random.normal(loc=mu, scale=std, size=(num_samples, n)).T + samples = np.expand_dims(samples, axis=1) + return samples + + +def unpack_sf_dict( + forecast_dict: dict, +): + """Unpack the dictionary that is returned by the StatsForecast 'predict()' method.""" + mu = forecast_dict["mean"] + std = forecast_dict[f"hi-{one_sigma_rule}"] - mu + return mu, std diff --git a/darts/models/forecasting/sf_auto_arima.py b/darts/models/forecasting/sf_auto_arima.py index 66e53876a2..a6a048926a 100644 --- a/darts/models/forecasting/sf_auto_arima.py +++ b/darts/models/forecasting/sf_auto_arima.py @@ -5,10 +5,14 @@ from typing import Optional -import numpy as np from statsforecast.models import AutoARIMA as SFAutoARIMA from darts import TimeSeries +from darts.models.components.statsforecast_utils import ( + create_normal_samples, + one_sigma_rule, + unpack_sf_dict, +) from darts.models.forecasting.forecasting_model import ( FutureCovariatesLocalForecastingModel, ) @@ -91,17 +95,15 @@ def _predict( verbose: bool = False, ): super()._predict(n, future_covariates, num_samples) - forecast_df = self.model.predict( + forecast_dict = self.model.predict( h=n, X=future_covariates.values(copy=False) if future_covariates else None, - level=(68.27,), # ask one std for the confidence interval. + level=(one_sigma_rule,), # ask one std for the confidence interval. ) - mu = forecast_df["mean"] + mu, std = unpack_sf_dict(forecast_dict) if num_samples > 1: - std = forecast_df["hi-68.27"] - mu - samples = np.random.normal(loc=mu, scale=std, size=(num_samples, n)).T - samples = np.expand_dims(samples, axis=1) + samples = create_normal_samples(mu, std, num_samples, n) else: samples = mu diff --git a/darts/models/forecasting/sf_auto_ces.py b/darts/models/forecasting/sf_auto_ces.py new file mode 100644 index 0000000000..6d18713595 --- /dev/null +++ b/darts/models/forecasting/sf_auto_ces.py @@ -0,0 +1,80 @@ +""" +StatsForecastAutoCES +----------- +""" + +from statsforecast.models import AutoCES as SFAutoCES + +from darts import TimeSeries +from darts.models.forecasting.forecasting_model import LocalForecastingModel + + +class StatsForecastAutoCES(LocalForecastingModel): + def __init__(self, *autoces_args, **autoces_kwargs): + """Auto-CES based on `Statsforecasts package + `_. + + Automatically selects the best Complex Exponential Smoothing model using an information criterion. + + + We refer to the `statsforecast AutoCES documentation + `_ + for the documentation of the arguments. + + Parameters + ---------- + autoces_args + Positional arguments for ``statsforecasts.models.AutoCES``. + autoces_kwargs + Keyword arguments for ``statsforecasts.models.AutoCES``. + + .. + + Examples + -------- + >>> from darts.models import StatsForecastAutoCES + >>> from darts.datasets import AirPassengersDataset + >>> series = AirPassengersDataset().load() + >>> model = StatsForecastAutoCES(season_length=12) + >>> model.fit(series[:-36]) + >>> pred = model.predict(36, num_samples=100) + """ + super().__init__() + self.model = SFAutoCES(*autoces_args, **autoces_kwargs) + + def __str__(self): + return "Auto-CES-Statsforecasts" + + def fit(self, series: TimeSeries): + super().fit(series) + self._assert_univariate(series) + series = self.training_series + self.model.fit( + series.values(copy=False).flatten(), + ) + return self + + def predict( + self, + n: int, + num_samples: int = 1, + verbose: bool = False, + ): + super().predict(n, num_samples) + forecast_dict = self.model.predict( + h=n, + ) + + mu = forecast_dict["mean"] + + return self._build_forecast_series(mu) + + @property + def min_train_series_length(self) -> int: + return 10 + + def _supports_range_index(self) -> bool: + return True + + def _is_probabilistic(self) -> bool: + return False diff --git a/darts/models/forecasting/sf_ets.py b/darts/models/forecasting/sf_auto_ets.py similarity index 60% rename from darts/models/forecasting/sf_ets.py rename to darts/models/forecasting/sf_auto_ets.py index 2f5f622d90..db12fd320d 100644 --- a/darts/models/forecasting/sf_ets.py +++ b/darts/models/forecasting/sf_auto_ets.py @@ -1,19 +1,25 @@ """ -StatsForecastETS +StatsForecastAutoETS ----------- """ from typing import Optional -from statsforecast.models import ETS +from statsforecast.models import AutoETS as SFAutoETS from darts import TimeSeries +from darts.models import LinearRegressionModel +from darts.models.components.statsforecast_utils import ( + create_normal_samples, + one_sigma_rule, + unpack_sf_dict, +) from darts.models.forecasting.forecasting_model import ( FutureCovariatesLocalForecastingModel, ) -class StatsForecastETS(FutureCovariatesLocalForecastingModel): +class StatsForecastAutoETS(FutureCovariatesLocalForecastingModel): def __init__(self, *ets_args, add_encoders: Optional[dict] = None, **ets_kwargs): """ETS based on `Statsforecasts package `_. @@ -25,6 +31,12 @@ def __init__(self, *ets_args, add_encoders: Optional[dict] = None, **ets_kwargs) This model accepts the same arguments as the `statsforecast ETS `_. package. + In addition to the StatsForecast implementation, this model can handle future covariates. It does so by first + regressing the series against the future covariates using the :class:'LinearRegressionModel' model and then + running StatsForecast's AutoETS on the in-sample residuals from this original regression. This approach was + inspired by 'this post of Stephan Kolassa< https://stats.stackexchange.com/q/220885>'_. + + Parameters ---------- season_length @@ -64,14 +76,15 @@ def __init__(self, *ets_args, add_encoders: Optional[dict] = None, **ets_kwargs) Examples -------- >>> from darts.datasets import AirPassengersDataset - >>> from darts.models import StatsForecastETS + >>> from darts.models import StatsForecastAutoETS >>> series = AirPassengersDataset().load() - >>> model = StatsForecastETS(season_length=12, model="AZZ") + >>> model = StatsForecastAutoETS(season_length=12, model="AZZ") >>> model.fit(series[:-36]) >>> pred = model.predict(36) """ super().__init__(add_encoders=add_encoders) - self.model = ETS(*ets_args, **ets_kwargs) + self.model = SFAutoETS(*ets_args, **ets_kwargs) + self._linreg = None def __str__(self): return "ETS-Statsforecasts" @@ -80,9 +93,25 @@ def _fit(self, series: TimeSeries, future_covariates: Optional[TimeSeries] = Non super()._fit(series, future_covariates) self._assert_univariate(series) series = self.training_series + + if future_covariates is not None: + # perform OLS and get in-sample residuals + linreg = LinearRegressionModel(lags_future_covariates=[0]) + linreg.fit(series, future_covariates=future_covariates) + fitted_values = linreg.model.predict( + X=future_covariates.slice_intersect(series).values(copy=False) + ) + fitted_values_ts = TimeSeries.from_times_and_values( + times=series.time_index, values=fitted_values + ) + resids = series - fitted_values_ts + self._linreg = linreg + target = resids + else: + target = series + self.model.fit( - series.values(copy=False).flatten(), - X=future_covariates.values(copy=False) if future_covariates else None, + target.values(copy=False).flatten(), ) return self @@ -94,12 +123,27 @@ def _predict( verbose: bool = False, ): super()._predict(n, future_covariates, num_samples) - forecast_df = self.model.predict( + forecast_dict = self.model.predict( h=n, - X=future_covariates.values(copy=False) if future_covariates else None, + level=(one_sigma_rule,), # ask one std for the confidence interval ) - return self._build_forecast_series(forecast_df["mean"]) + mu_ets, std = unpack_sf_dict(forecast_dict) + + if future_covariates is not None: + mu_linreg = self._linreg.predict(n, future_covariates=future_covariates) + mu_linreg_values = mu_linreg.values(copy=False).reshape( + n, + ) + mu = mu_ets + mu_linreg_values + else: + mu = mu_ets + + if num_samples > 1: + samples = create_normal_samples(mu, std, num_samples, n) + else: + samples = mu + return self._build_forecast_series(samples) @property def min_train_series_length(self) -> int: @@ -109,4 +153,4 @@ def _supports_range_index(self) -> bool: return True def _is_probabilistic(self) -> bool: - return False + return True diff --git a/darts/models/forecasting/sf_auto_theta.py b/darts/models/forecasting/sf_auto_theta.py new file mode 100644 index 0000000000..3280a16722 --- /dev/null +++ b/darts/models/forecasting/sf_auto_theta.py @@ -0,0 +1,93 @@ +""" +StatsForecastAutoTheta +----------- +""" + +from statsforecast.models import AutoTheta as SFAutoTheta + +from darts import TimeSeries +from darts.models.components.statsforecast_utils import ( + create_normal_samples, + one_sigma_rule, + unpack_sf_dict, +) +from darts.models.forecasting.forecasting_model import LocalForecastingModel + + +class StatsForecastAutoTheta(LocalForecastingModel): + def __init__(self, *autotheta_args, **autotheta_kwargs): + """Auto-Theta based on `Statsforecasts package + `_. + + Automatically selects the best Theta (Standard Theta Model (‘STM’), Optimized Theta Model (‘OTM’), + Dynamic Standard Theta Model (‘DSTM’), Dynamic Optimized Theta Model (‘DOTM’)) model using mse. + + + It is probabilistic, whereas :class:`FourTheta` is not. + + We refer to the `statsforecast AutoTheta documentation + `_ + for the documentation of the arguments. + + Parameters + ---------- + autotheta_args + Positional arguments for ``statsforecasts.models.AutoTheta``. + autotheta_kwargs + Keyword arguments for ``statsforecasts.models.AutoTheta``. + + .. + + Examples + -------- + >>> from darts.models import StatsForecastAutoTheta + >>> from darts.datasets import AirPassengersDataset + >>> series = AirPassengersDataset().load() + >>> model = StatsForecastAutoTheta(season_length=12) + >>> model.fit(series[:-36]) + >>> pred = model.predict(36, num_samples=100) + """ + super().__init__() + self.model = SFAutoTheta(*autotheta_args, **autotheta_kwargs) + + def __str__(self): + return "Auto-Theta-Statsforecasts" + + def fit(self, series: TimeSeries): + super().fit(series) + self._assert_univariate(series) + series = self.training_series + self.model.fit( + series.values(copy=False).flatten(), + ) + return self + + def predict( + self, + n: int, + num_samples: int = 1, + verbose: bool = False, + ): + super().predict(n, num_samples) + forecast_dict = self.model.predict( + h=n, + level=(one_sigma_rule,), # ask one std for the confidence interval. + ) + + mu, std = unpack_sf_dict(forecast_dict) + if num_samples > 1: + samples = create_normal_samples(mu, std, num_samples, n) + else: + samples = mu + + return self._build_forecast_series(samples) + + @property + def min_train_series_length(self) -> int: + return 10 + + def _supports_range_index(self) -> bool: + return True + + def _is_probabilistic(self) -> bool: + return True diff --git a/darts/tests/models/forecasting/test_local_forecasting_models.py b/darts/tests/models/forecasting/test_local_forecasting_models.py index 63b5c9d82a..04fc2ce771 100644 --- a/darts/tests/models/forecasting/test_local_forecasting_models.py +++ b/darts/tests/models/forecasting/test_local_forecasting_models.py @@ -31,7 +31,9 @@ RandomForest, RegressionModel, StatsForecastAutoARIMA, - StatsForecastETS, + StatsForecastAutoCES, + StatsForecastAutoETS, + StatsForecastAutoTheta, Theta, ) from darts.models.forecasting.forecasting_model import ( @@ -51,7 +53,9 @@ (ARIMA(12, 2, 1), 5.2), (ARIMA(1, 1, 1), 24), (StatsForecastAutoARIMA(season_length=12), 4.6), - (StatsForecastETS(season_length=12, model="AAZ"), 4.1), + (StatsForecastAutoTheta(season_length=12), 5.5), + (StatsForecastAutoCES(season_length=12, model="Z"), 7.3), + (StatsForecastAutoETS(season_length=12, model="AAZ"), 4.1), (Croston(version="classic"), 23), (Croston(version="tsb", alpha_d=0.1, alpha_p=0.1), 23), (Theta(), 11), @@ -85,7 +89,7 @@ dual_models = [ ARIMA(), StatsForecastAutoARIMA(season_length=12), - StatsForecastETS(season_length=12), + StatsForecastAutoETS(season_length=12), Prophet(), AutoARIMA(), ] diff --git a/darts/tests/models/forecasting/test_sf_auto_ets.py b/darts/tests/models/forecasting/test_sf_auto_ets.py new file mode 100644 index 0000000000..00927b9977 --- /dev/null +++ b/darts/tests/models/forecasting/test_sf_auto_ets.py @@ -0,0 +1,63 @@ +import numpy as np +import pandas as pd + +from darts import TimeSeries +from darts.datasets import AirPassengersDataset +from darts.metrics import mae +from darts.models import LinearRegressionModel, StatsForecastAutoETS +from darts.tests.base_test_class import DartsBaseTestClass + + +class StatsForecastAutoETSTestCase(DartsBaseTestClass): + # real timeseries for functionality tests + ts_passengers = AirPassengersDataset().load() + ts_pass_train, ts_pass_val = ts_passengers.split_after(pd.Timestamp("19570101")) + + # as future covariates we want a trend + trend_values = np.arange(start=1, stop=len(ts_passengers) + 1) + trend_times = ts_passengers.time_index + ts_trend = TimeSeries.from_times_and_values( + times=trend_times, values=trend_values, columns=["trend"] + ) + ts_trend_train, ts_trend_val = ts_trend.split_after(pd.Timestamp("19570101")) + + def test_fit_on_residuals(self): + model = StatsForecastAutoETS(season_length=12, model="ZZZ") + + # test if we are indeed fitting the AutoETS on the residuals of the linear regression + model.fit(series=self.ts_pass_train, future_covariates=self.ts_trend_train) + + # create the residuals from the linear regression + fitted_values_linreg = model._linreg.model.predict( + X=self.ts_trend_train.values(copy=False) + ) + fitted_values_linreg_ts = TimeSeries.from_times_and_values( + times=self.ts_pass_train.time_index, values=fitted_values_linreg + ) + resids = self.ts_pass_train - fitted_values_linreg_ts + + # now make in-sample predictions with the AutoETS model + in_sample_preds = model.model.predict_in_sample()["fitted"] + ts_in_sample_preds = TimeSeries.from_times_and_values( + times=self.ts_pass_train.time_index, values=in_sample_preds + ) + + # compare in-sample predictions to the residuals they have supposedly been fitted on + current_mae = mae(resids, ts_in_sample_preds) + + self.assertTrue(current_mae < 9) + + def test_fit_a_linreg(self): + model = StatsForecastAutoETS(season_length=12, model="ZZZ") + model.fit(series=self.ts_pass_train, future_covariates=self.ts_trend_train) + + # check if linear regression was fit + self.assertIsNotNone(model._linreg) + self.assertTrue(model._linreg._fit_called) + + # fit a linear regression + linreg = LinearRegressionModel(lags_future_covariates=[0]) + linreg.fit(series=self.ts_pass_train, future_covariates=self.ts_trend_train) + + # check if the linear regression was fit on the same data by checking if the coefficients are equal + self.assertEqual(model._linreg.model.coef_, linreg.model.coef_) diff --git a/requirements/core.txt b/requirements/core.txt index ccc0ebe5f4..a1b6758911 100644 --- a/requirements/core.txt +++ b/requirements/core.txt @@ -13,7 +13,7 @@ requests>=2.22.0 scikit-learn>=1.0.1 scipy>=1.3.2 shap>=0.40.0 -statsforecast>=1.0.0 +statsforecast>=1.4.0 statsmodels>=0.13.0 tbats>=1.1.0 tqdm>=4.60.0