From e55bc8a48f697c240f9f510a89c7ef11acd7ee91 Mon Sep 17 00:00:00 2001 From: Beerstabr Date: Mon, 27 Feb 2023 21:23:54 +0100 Subject: [PATCH 01/16] MIDASTransformer now outputs a low sample variant of the high sample series through 'ts_transform()' --- darts/dataprocessing/transformers/midas.py | 107 +++++++++++++++++++++ 1 file changed, 107 insertions(+) create mode 100644 darts/dataprocessing/transformers/midas.py diff --git a/darts/dataprocessing/transformers/midas.py b/darts/dataprocessing/transformers/midas.py new file mode 100644 index 0000000000..e060a727a0 --- /dev/null +++ b/darts/dataprocessing/transformers/midas.py @@ -0,0 +1,107 @@ +""" +Mixed-data sampling (MIDAS) Transformer +------------------ +""" +from typing import Union + +import numpy as np +import pandas as pd +from pandas import DateOffset, Timedelta + +from darts import TimeSeries +from darts.dataprocessing.transformers import BaseDataTransformer +from darts.logging import get_logger, raise_log + +logger = get_logger(__name__) + + +def _assert_high_to_low_freq(high_freq_series_df: pd.DataFrame, + low_freq_series_df: pd.DataFrame, + rule, + high_freq, + ): + if not low_freq_series_df.shape[0] < high_freq_series_df.shape[0]: + raise_log( + ValueError(f"The target conversion should go from a high to a " \ + f"low frequency, instead the targeted frequency is" \ + f"{rule}, while the original frequency is {high_freq}.") + ) + + +class MIDASTransformer(BaseDataTransformer): + def __init__( + self, + rule: str, + strip: bool = True, + name: str = "MIDASTransformer", + n_jobs: int = 1, + verbose: bool = False, + ): + """ + A transformer that converts higher frequency time series to lower frequency using mixed-data sampling. + """ + super().__init__(name, n_jobs, verbose) + self.rule = rule + self.strip = strip + + @staticmethod + def ts_transform(series: TimeSeries, + rule: Union[DateOffset, Timedelta, str], + strip: bool, + ) -> TimeSeries: + high_freq = series.freq_str + series_df = series.pd_dataframe() + series_copy_df = series_df.copy() + series_df.index = series_df.index.to_period() + high_freq_period = series_df.index.freqstr + + # first ensure the length of the series is an exact multiple of the length of the targeted low frequency series + # we do this by resampling from high to low and then back to high again + low_freq_series_df = series_df.resample(rule).last() + low_index = low_freq_series_df.index.to_timestamp() + high_freq_series_df = low_freq_series_df.resample(high_freq_period).bfill().ffill() + high_index = high_freq_series_df.index.to_timestamp() + + _assert_high_to_low_freq(high_freq_series_df=high_freq_series_df, + low_freq_series_df=low_freq_series_df, + rule=rule, + high_freq=high_freq, + ) + + # if necessary, expand the original series + if len(high_index) > series_df.shape[0]: + series_df = pd.DataFrame(np.nan, index=high_index, columns=series_df.columns) + series_df.loc[series_copy_df.index, :] = series_copy_df.values + else: + series_df = series_copy_df + + + n_high = series_df.shape[0] + n_low = low_freq_series_df.shape[0] + factor = int(n_high / n_low) + + range_lst = list(range(n_high)) + col_names = list(series_df.columns) + midas_lst = [] + for f in range(factor): + range_lst_tmp = range_lst[f:][0::factor] + series_tmp_df = series_df.iloc[range_lst_tmp, :] + series_tmp_df.index = low_index + col_names_tmp = [col_name + f'_{f}' for col_name in col_names] + rename_dict_tmp = dict(zip(col_names, col_names_tmp)) + midas_lst += [series_tmp_df.rename(columns=rename_dict_tmp)] + + midas_df = pd.concat(midas_lst, axis=1) + + midas_ts = TimeSeries.from_dataframe(midas_df) + + if strip: + midas_ts = midas_ts.strip() + + return midas_ts + + +# from darts.datasets import AirPassengersDataset + +# series = AirPassengersDataset().load() + From 1622a34c505c59c4a776a175c849679d6f3004ff Mon Sep 17 00:00:00 2001 From: Beerstabr Date: Mon, 27 Feb 2023 21:26:25 +0100 Subject: [PATCH 02/16] MIDASTransformer now outputs a low sample variant of the high sample series through 'ts_transform()' --- darts/dataprocessing/transformers/midas.py | 63 ++++++++++++---------- 1 file changed, 35 insertions(+), 28 deletions(-) diff --git a/darts/dataprocessing/transformers/midas.py b/darts/dataprocessing/transformers/midas.py index e060a727a0..2f9b350434 100644 --- a/darts/dataprocessing/transformers/midas.py +++ b/darts/dataprocessing/transformers/midas.py @@ -15,27 +15,30 @@ logger = get_logger(__name__) -def _assert_high_to_low_freq(high_freq_series_df: pd.DataFrame, - low_freq_series_df: pd.DataFrame, - rule, - high_freq, - ): +def _assert_high_to_low_freq( + high_freq_series_df: pd.DataFrame, + low_freq_series_df: pd.DataFrame, + rule, + high_freq, +): if not low_freq_series_df.shape[0] < high_freq_series_df.shape[0]: raise_log( - ValueError(f"The target conversion should go from a high to a " \ - f"low frequency, instead the targeted frequency is" \ - f"{rule}, while the original frequency is {high_freq}.") + ValueError( + f"The target conversion should go from a high to a " + f"low frequency, instead the targeted frequency is" + f"{rule}, while the original frequency is {high_freq}." + ) ) class MIDASTransformer(BaseDataTransformer): def __init__( - self, - rule: str, - strip: bool = True, - name: str = "MIDASTransformer", - n_jobs: int = 1, - verbose: bool = False, + self, + rule: str, + strip: bool = True, + name: str = "MIDASTransformer", + n_jobs: int = 1, + verbose: bool = False, ): """ A transformer that converts higher frequency time series to lower frequency using mixed-data sampling. @@ -45,10 +48,11 @@ def __init__( self.strip = strip @staticmethod - def ts_transform(series: TimeSeries, - rule: Union[DateOffset, Timedelta, str], - strip: bool, - ) -> TimeSeries: + def ts_transform( + series: TimeSeries, + rule: Union[DateOffset, Timedelta, str], + strip: bool, + ) -> TimeSeries: high_freq = series.freq_str series_df = series.pd_dataframe() series_copy_df = series_df.copy() @@ -59,23 +63,27 @@ def ts_transform(series: TimeSeries, # we do this by resampling from high to low and then back to high again low_freq_series_df = series_df.resample(rule).last() low_index = low_freq_series_df.index.to_timestamp() - high_freq_series_df = low_freq_series_df.resample(high_freq_period).bfill().ffill() + high_freq_series_df = ( + low_freq_series_df.resample(high_freq_period).bfill().ffill() + ) high_index = high_freq_series_df.index.to_timestamp() - _assert_high_to_low_freq(high_freq_series_df=high_freq_series_df, - low_freq_series_df=low_freq_series_df, - rule=rule, - high_freq=high_freq, - ) + _assert_high_to_low_freq( + high_freq_series_df=high_freq_series_df, + low_freq_series_df=low_freq_series_df, + rule=rule, + high_freq=high_freq, + ) # if necessary, expand the original series if len(high_index) > series_df.shape[0]: - series_df = pd.DataFrame(np.nan, index=high_index, columns=series_df.columns) + series_df = pd.DataFrame( + np.nan, index=high_index, columns=series_df.columns + ) series_df.loc[series_copy_df.index, :] = series_copy_df.values else: series_df = series_copy_df - n_high = series_df.shape[0] n_low = low_freq_series_df.shape[0] factor = int(n_high / n_low) @@ -87,7 +95,7 @@ def ts_transform(series: TimeSeries, range_lst_tmp = range_lst[f:][0::factor] series_tmp_df = series_df.iloc[range_lst_tmp, :] series_tmp_df.index = low_index - col_names_tmp = [col_name + f'_{f}' for col_name in col_names] + col_names_tmp = [col_name + f"_{f}" for col_name in col_names] rename_dict_tmp = dict(zip(col_names, col_names_tmp)) midas_lst += [series_tmp_df.rename(columns=rename_dict_tmp)] @@ -104,4 +112,3 @@ def ts_transform(series: TimeSeries, # from darts.datasets import AirPassengersDataset # series = AirPassengersDataset().load() - From 268bdc2ea00b9404e0ac34cf0548df2112867f11 Mon Sep 17 00:00:00 2001 From: Beerstabr Date: Mon, 27 Feb 2023 21:46:59 +0100 Subject: [PATCH 03/16] extracted '_create_midas_df' from 'ts_transform' --- darts/dataprocessing/transformers/midas.py | 57 ++++++++++++++-------- 1 file changed, 37 insertions(+), 20 deletions(-) diff --git a/darts/dataprocessing/transformers/midas.py b/darts/dataprocessing/transformers/midas.py index 2f9b350434..c23a30f563 100644 --- a/darts/dataprocessing/transformers/midas.py +++ b/darts/dataprocessing/transformers/midas.py @@ -6,7 +6,7 @@ import numpy as np import pandas as pd -from pandas import DateOffset, Timedelta +from pandas import DateOffset, DatetimeIndex, Timedelta from darts import TimeSeries from darts.dataprocessing.transformers import BaseDataTransformer @@ -59,8 +59,8 @@ def ts_transform( series_df.index = series_df.index.to_period() high_freq_period = series_df.index.freqstr - # first ensure the length of the series is an exact multiple of the length of the targeted low frequency series - # we do this by resampling from high to low and then back to high again + # ensure the length of the series is an exact multiple of the length of the targeted low frequency series + # we do this by resampling from a high freq to a low freq and then back to high again (possibly adding NaNs) low_freq_series_df = series_df.resample(rule).last() low_index = low_freq_series_df.index.to_timestamp() high_freq_series_df = ( @@ -84,31 +84,48 @@ def ts_transform( else: series_df = series_copy_df - n_high = series_df.shape[0] - n_low = low_freq_series_df.shape[0] - factor = int(n_high / n_low) - - range_lst = list(range(n_high)) - col_names = list(series_df.columns) - midas_lst = [] - for f in range(factor): - range_lst_tmp = range_lst[f:][0::factor] - series_tmp_df = series_df.iloc[range_lst_tmp, :] - series_tmp_df.index = low_index - col_names_tmp = [col_name + f"_{f}" for col_name in col_names] - rename_dict_tmp = dict(zip(col_names, col_names_tmp)) - midas_lst += [series_tmp_df.rename(columns=rename_dict_tmp)] - - midas_df = pd.concat(midas_lst, axis=1) + midas_df = _create_midas_df( + series_df=series_df, + low_freq_series_df=low_freq_series_df, + low_index=low_index, + ) + # back to TimeSeries midas_ts = TimeSeries.from_dataframe(midas_df) - if strip: midas_ts = midas_ts.strip() return midas_ts +def _create_midas_df( + series_df: pd.DataFrame, + low_freq_series_df: int, + low_index_datetime: DatetimeIndex, +) -> pd.DataFrame: + # calculate the multiple + n_high = series_df.shape[0] + n_low = low_freq_series_df.shape[0] + multiple = int(n_high / n_low) + + # set up integer index + range_lst = list(range(n_high)) + col_names = list(series_df.columns) + midas_lst = [] + + # for every column we now create 'multiple' columns + # by going through a column and picking every one in 'multiple' values + for f in range(multiple): + range_lst_tmp = range_lst[f:][0::multiple] + series_tmp_df = series_df.iloc[range_lst_tmp, :] + series_tmp_df.index = low_index_datetime + col_names_tmp = [col_name + f"_{f}" for col_name in col_names] + rename_dict_tmp = dict(zip(col_names, col_names_tmp)) + midas_lst += [series_tmp_df.rename(columns=rename_dict_tmp)] + + return pd.concat(midas_lst, axis=1) + + # from darts.datasets import AirPassengersDataset # series = AirPassengersDataset().load() From dde5b202c5c09408c2a7c0b11df73924031d4ee9 Mon Sep 17 00:00:00 2001 From: Beerstabr Date: Mon, 27 Feb 2023 21:47:23 +0100 Subject: [PATCH 04/16] extracted '_create_midas_df' from 'ts_transform' --- darts/dataprocessing/transformers/midas.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/darts/dataprocessing/transformers/midas.py b/darts/dataprocessing/transformers/midas.py index c23a30f563..bc8a2143b0 100644 --- a/darts/dataprocessing/transformers/midas.py +++ b/darts/dataprocessing/transformers/midas.py @@ -87,7 +87,7 @@ def ts_transform( midas_df = _create_midas_df( series_df=series_df, low_freq_series_df=low_freq_series_df, - low_index=low_index, + low_index_datetime=low_index, ) # back to TimeSeries From 1f8843f481cfa248069d8521e10199c16c52c23d Mon Sep 17 00:00:00 2001 From: Beerstabr Date: Mon, 27 Feb 2023 21:50:44 +0100 Subject: [PATCH 05/16] Added some comments to helper functions --- darts/dataprocessing/transformers/midas.py | 38 +++++++++++++--------- 1 file changed, 22 insertions(+), 16 deletions(-) diff --git a/darts/dataprocessing/transformers/midas.py b/darts/dataprocessing/transformers/midas.py index bc8a2143b0..6fbfb926c6 100644 --- a/darts/dataprocessing/transformers/midas.py +++ b/darts/dataprocessing/transformers/midas.py @@ -15,22 +15,6 @@ logger = get_logger(__name__) -def _assert_high_to_low_freq( - high_freq_series_df: pd.DataFrame, - low_freq_series_df: pd.DataFrame, - rule, - high_freq, -): - if not low_freq_series_df.shape[0] < high_freq_series_df.shape[0]: - raise_log( - ValueError( - f"The target conversion should go from a high to a " - f"low frequency, instead the targeted frequency is" - f"{rule}, while the original frequency is {high_freq}." - ) - ) - - class MIDASTransformer(BaseDataTransformer): def __init__( self, @@ -98,11 +82,33 @@ def ts_transform( return midas_ts +def _assert_high_to_low_freq( + high_freq_series_df: pd.DataFrame, + low_freq_series_df: pd.DataFrame, + rule, + high_freq, +): + """ " + Asserts that the lower frequency series really has a lower frequency then the assumed higher frequency series. + """ + if not low_freq_series_df.shape[0] < high_freq_series_df.shape[0]: + raise_log( + ValueError( + f"The target conversion should go from a high to a " + f"low frequency, instead the targeted frequency is" + f"{rule}, while the original frequency is {high_freq}." + ) + ) + + def _create_midas_df( series_df: pd.DataFrame, low_freq_series_df: int, low_index_datetime: DatetimeIndex, ) -> pd.DataFrame: + """ + Function for actually creating the lower frequency dataframe out of a higher frequency dataframe. + """ # calculate the multiple n_high = series_df.shape[0] n_low = low_freq_series_df.shape[0] From 99fa5d210320e4c3696537a029b45dc5fe4c5d1e Mon Sep 17 00:00:00 2001 From: Beerstabr Date: Mon, 27 Feb 2023 21:54:25 +0100 Subject: [PATCH 06/16] changed some variable names --- darts/dataprocessing/transformers/midas.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/darts/dataprocessing/transformers/midas.py b/darts/dataprocessing/transformers/midas.py index 6fbfb926c6..72df84b530 100644 --- a/darts/dataprocessing/transformers/midas.py +++ b/darts/dataprocessing/transformers/midas.py @@ -37,7 +37,7 @@ def ts_transform( rule: Union[DateOffset, Timedelta, str], strip: bool, ) -> TimeSeries: - high_freq = series.freq_str + high_freq_datetime = series.freq_str series_df = series.pd_dataframe() series_copy_df = series_df.copy() series_df.index = series_df.index.to_period() @@ -46,23 +46,23 @@ def ts_transform( # ensure the length of the series is an exact multiple of the length of the targeted low frequency series # we do this by resampling from a high freq to a low freq and then back to high again (possibly adding NaNs) low_freq_series_df = series_df.resample(rule).last() - low_index = low_freq_series_df.index.to_timestamp() + low_index_datetime = low_freq_series_df.index.to_timestamp() high_freq_series_df = ( low_freq_series_df.resample(high_freq_period).bfill().ffill() ) - high_index = high_freq_series_df.index.to_timestamp() + high_index_datetime = high_freq_series_df.index.to_timestamp() _assert_high_to_low_freq( high_freq_series_df=high_freq_series_df, low_freq_series_df=low_freq_series_df, rule=rule, - high_freq=high_freq, + high_freq=high_freq_datetime, ) # if necessary, expand the original series - if len(high_index) > series_df.shape[0]: + if len(high_index_datetime) > series_df.shape[0]: series_df = pd.DataFrame( - np.nan, index=high_index, columns=series_df.columns + np.nan, index=high_index_datetime, columns=series_df.columns ) series_df.loc[series_copy_df.index, :] = series_copy_df.values else: @@ -71,7 +71,7 @@ def ts_transform( midas_df = _create_midas_df( series_df=series_df, low_freq_series_df=low_freq_series_df, - low_index_datetime=low_index, + low_index_datetime=low_index_datetime, ) # back to TimeSeries From e4403e1426d7b37823d449f0035b9e6e34809260 Mon Sep 17 00:00:00 2001 From: Beerstabr Date: Mon, 27 Feb 2023 21:55:54 +0100 Subject: [PATCH 07/16] changed some variable names --- darts/dataprocessing/transformers/midas.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/darts/dataprocessing/transformers/midas.py b/darts/dataprocessing/transformers/midas.py index 72df84b530..1cc5c802e8 100644 --- a/darts/dataprocessing/transformers/midas.py +++ b/darts/dataprocessing/transformers/midas.py @@ -103,7 +103,7 @@ def _assert_high_to_low_freq( def _create_midas_df( series_df: pd.DataFrame, - low_freq_series_df: int, + low_freq_series_df: pd.DataFrame, low_index_datetime: DatetimeIndex, ) -> pd.DataFrame: """ From a61c4d64fd4026ee1c384bbf2596091ead0d5680 Mon Sep 17 00:00:00 2001 From: Beerstabr Date: Wed, 8 Mar 2023 21:56:46 +0100 Subject: [PATCH 08/16] added warning if target frequency and input frequency don't match up like they should in case of a MIDAS transformation. --- darts/dataprocessing/transformers/midas.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/darts/dataprocessing/transformers/midas.py b/darts/dataprocessing/transformers/midas.py index 1cc5c802e8..c7f4221f80 100644 --- a/darts/dataprocessing/transformers/midas.py +++ b/darts/dataprocessing/transformers/midas.py @@ -88,7 +88,7 @@ def _assert_high_to_low_freq( rule, high_freq, ): - """ " + """ Asserts that the lower frequency series really has a lower frequency then the assumed higher frequency series. """ if not low_freq_series_df.shape[0] < high_freq_series_df.shape[0]: @@ -112,7 +112,17 @@ def _create_midas_df( # calculate the multiple n_high = series_df.shape[0] n_low = low_freq_series_df.shape[0] - multiple = int(n_high / n_low) + multiple = n_high / n_low + + if not multiple.is_integer(): + raise_log( + ValueError( + "The frequency of the high frequency input series should be an exact multiple of the targeted" + "low frequency output. For example, you could go from a monthly series to a quarterly series." + ) + ) + else: + multiple = int(multiple) # set up integer index range_lst = list(range(n_high)) From 24daea550273ac8ffc1ecbfe672103e2daaf725f Mon Sep 17 00:00:00 2001 From: Beerstabr Date: Wed, 8 Mar 2023 22:05:20 +0100 Subject: [PATCH 09/16] add _transform_iterator like the one in window_transformer --- darts/dataprocessing/transformers/midas.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/darts/dataprocessing/transformers/midas.py b/darts/dataprocessing/transformers/midas.py index c7f4221f80..2c7790e297 100644 --- a/darts/dataprocessing/transformers/midas.py +++ b/darts/dataprocessing/transformers/midas.py @@ -2,7 +2,7 @@ Mixed-data sampling (MIDAS) Transformer ------------------ """ -from typing import Union +from typing import Iterator, Sequence, Tuple, Union import numpy as np import pandas as pd @@ -11,6 +11,7 @@ from darts import TimeSeries from darts.dataprocessing.transformers import BaseDataTransformer from darts.logging import get_logger, raise_log +from darts.utils.utils import series2seq logger = get_logger(__name__) @@ -31,6 +32,15 @@ def __init__( self.rule = rule self.strip = strip + def _transform_iterator( + self, series: Sequence[TimeSeries] + ) -> Iterator[Tuple[TimeSeries]]: + + series = series2seq(series) + + for s in series: + yield s, self.rule, self.strip + @staticmethod def ts_transform( series: TimeSeries, From a7b86a221003fb86fa0c66211004bcb490c04d56 Mon Sep 17 00:00:00 2001 From: Beerstabr Date: Fri, 10 Mar 2023 10:54:31 +0100 Subject: [PATCH 10/16] docstring finished, including example --- darts/dataprocessing/transformers/__init__.py | 1 + darts/dataprocessing/transformers/midas.py | 57 +++++++++++++++++-- 2 files changed, 54 insertions(+), 4 deletions(-) diff --git a/darts/dataprocessing/transformers/__init__.py b/darts/dataprocessing/transformers/__init__.py index e8efdfc919..5080760af3 100644 --- a/darts/dataprocessing/transformers/__init__.py +++ b/darts/dataprocessing/transformers/__init__.py @@ -9,6 +9,7 @@ from .fittable_data_transformer import FittableDataTransformer from .invertible_data_transformer import InvertibleDataTransformer from .mappers import InvertibleMapper, Mapper +from .midas import MIDAS from .missing_values_filler import MissingValuesFiller from .reconciliation import ( BottomUpReconciliator, diff --git a/darts/dataprocessing/transformers/midas.py b/darts/dataprocessing/transformers/midas.py index 2c7790e297..ce74f00ecf 100644 --- a/darts/dataprocessing/transformers/midas.py +++ b/darts/dataprocessing/transformers/midas.py @@ -16,7 +16,7 @@ logger = get_logger(__name__) -class MIDASTransformer(BaseDataTransformer): +class MIDAS(BaseDataTransformer): def __init__( self, rule: str, @@ -25,8 +25,57 @@ def __init__( n_jobs: int = 1, verbose: bool = False, ): - """ - A transformer that converts higher frequency time series to lower frequency using mixed-data sampling. + """Mixed-data sampling transformer. + + A transformer that converts higher frequency time series to lower frequency using mixed-data sampling; see + [1]_ for further details. This allows higher frequency covariates to be used whilst forecasting a lower + frequency target series. For example, using monthly inputs to forecast a quarterly target. + + Notes + ----- + The high input frequency should always relate in the same rate to the low target frequency. For + example, there's always three months in quarter. However, the number of days in a month varies per month. So in + the latter case a MIDAS transformation does not work and the transformer will raise an error. + + Parameters + ---------- + rule + The offset string or object representing target conversion. Passed on to the rule parameter in + pandas.DataFrame.resample and therefore it is equivalent to it. + strip + Whether to strip -remove the NaNs from the start and the end of- the transformed series. + + Examples + -------- + >>> from darts.datasets import AirPassengersDataset + >>> from darts.dataprocessing.transformers import MIDAS + >>> monthly_series = AirPassengersDataset().load() + >>> midas = MIDAS(rule="Q") + >>> quarterly_series = midas.transform(monthly_series) + >>> print(quarterly_series.head()) + + array([[[112.], + [118.], + [132.]], + [[129.], + [121.], + [135.]], + [[148.], + [148.], + [136.]], + [[119.], + [104.], + [118.]], + [[115.], + [126.], + [141.]]]) + Coordinates: + * Month (Month) datetime64[ns] 1949-01-01 1949-04-01 ... 1950-01-01 + * component (component) object '#Passengers_0' ... '#Passengers_2' + + References + ---------- + .. [1] https://en.wikipedia.org/wiki/Mixed-data_sampling """ super().__init__(name, n_jobs, verbose) self.rule = rule @@ -45,7 +94,7 @@ def _transform_iterator( def ts_transform( series: TimeSeries, rule: Union[DateOffset, Timedelta, str], - strip: bool, + strip: bool = True, ) -> TimeSeries: high_freq_datetime = series.freq_str series_df = series.pd_dataframe() From 27a59999b77ec2ede8f3231084433e7992346ba9 Mon Sep 17 00:00:00 2001 From: Beerstabr Date: Fri, 10 Mar 2023 11:15:43 +0100 Subject: [PATCH 11/16] added comments and description to 'ts_transform()' --- darts/dataprocessing/transformers/midas.py | 25 ++++++++++++++++------ 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/darts/dataprocessing/transformers/midas.py b/darts/dataprocessing/transformers/midas.py index ce74f00ecf..9040dca35f 100644 --- a/darts/dataprocessing/transformers/midas.py +++ b/darts/dataprocessing/transformers/midas.py @@ -96,21 +96,36 @@ def ts_transform( rule: Union[DateOffset, Timedelta, str], strip: bool = True, ) -> TimeSeries: + """ + Transforms series from high to low frequency using a mixed-data sampling approach. Uses and relies on + pandas.DataFrame.resample. + + Steps: + (1) Transform series to pd.DataFrame and it's index to a pd.PeriodIndex + (2) Downsample series and then upsample it again + (3) Replace input series by unsampled series if it's not 'full' + (4) Transform every column of the high frequency series into multiple columns for the low frequency series + (5) Transform the low frequency series back into a TimeSeries + """ + # TimeSeries to pd.DataFrame high_freq_datetime = series.freq_str series_df = series.pd_dataframe() series_copy_df = series_df.copy() + # DateTime to PeriodIndex series_df.index = series_df.index.to_period() high_freq_period = series_df.index.freqstr - # ensure the length of the series is an exact multiple of the length of the targeted low frequency series - # we do this by resampling from a high freq to a low freq and then back to high again (possibly adding NaNs) + # length of the series must be an exact multiple of the length of the targeted low frequency series + # downsample low_freq_series_df = series_df.resample(rule).last() low_index_datetime = low_freq_series_df.index.to_timestamp() + # upsample (possibly add nans) high_freq_series_df = ( low_freq_series_df.resample(high_freq_period).bfill().ffill() ) high_index_datetime = high_freq_series_df.index.to_timestamp() + # check if user requested a transform from a high to a low frequency _assert_high_to_low_freq( high_freq_series_df=high_freq_series_df, low_freq_series_df=low_freq_series_df, @@ -127,6 +142,7 @@ def ts_transform( else: series_df = series_copy_df + # make multiple low frequency columns out of the high frequency column(s) midas_df = _create_midas_df( series_df=series_df, low_freq_series_df=low_freq_series_df, @@ -199,8 +215,3 @@ def _create_midas_df( midas_lst += [series_tmp_df.rename(columns=rename_dict_tmp)] return pd.concat(midas_lst, axis=1) - - -# from darts.datasets import AirPassengersDataset - -# series = AirPassengersDataset().load() From 800b98bbdaba561fe3918ffb51f6f7705ca8f137 Mon Sep 17 00:00:00 2001 From: Beerstabr Date: Fri, 17 Mar 2023 10:34:44 +0100 Subject: [PATCH 12/16] more robust up and downsampling in order to get for example 3 months for every quarter instead of there being some missing months, also changed the docstring a bit such that it works with the debugger --- darts/dataprocessing/transformers/midas.py | 49 ++++++++++--------- .../dataprocessing/transformers/test_midas.py | 36 ++++++++++++++ 2 files changed, 63 insertions(+), 22 deletions(-) create mode 100644 darts/tests/dataprocessing/transformers/test_midas.py diff --git a/darts/dataprocessing/transformers/midas.py b/darts/dataprocessing/transformers/midas.py index 9040dca35f..0227615bd0 100644 --- a/darts/dataprocessing/transformers/midas.py +++ b/darts/dataprocessing/transformers/midas.py @@ -50,28 +50,36 @@ def __init__( >>> from darts.datasets import AirPassengersDataset >>> from darts.dataprocessing.transformers import MIDAS >>> monthly_series = AirPassengersDataset().load() - >>> midas = MIDAS(rule="Q") + >>> midas = MIDAS(rule="QS") >>> quarterly_series = midas.transform(monthly_series) >>> print(quarterly_series.head()) array([[[112.], [118.], [132.]], - [[129.], + + [[129.], [121.], [135.]], - [[148.], + + [[148.], [148.], [136.]], - [[119.], + + [[119.], [104.], [118.]], - [[115.], + + [[115.], [126.], [141.]]]) Coordinates: - * Month (Month) datetime64[ns] 1949-01-01 1949-04-01 ... 1950-01-01 - * component (component) object '#Passengers_0' ... '#Passengers_2' + * Month (Month) datetime64[ns] 1949-01-01 1949-04-01 ... 1950-01-01 + * component (component) object '#Passengers_0' ... '#Passengers_2' + Dimensions without coordinates: sample + Attributes: + static_covariates: None + hierarchy: None References ---------- @@ -101,25 +109,26 @@ def ts_transform( pandas.DataFrame.resample. Steps: - (1) Transform series to pd.DataFrame and it's index to a pd.PeriodIndex + (1) Transform series to pd.DataFrame and get frequency string for PeriodIndex (2) Downsample series and then upsample it again (3) Replace input series by unsampled series if it's not 'full' (4) Transform every column of the high frequency series into multiple columns for the low frequency series (5) Transform the low frequency series back into a TimeSeries """ - # TimeSeries to pd.DataFrame high_freq_datetime = series.freq_str + # TimeSeries to pd.DataFrame series_df = series.pd_dataframe() - series_copy_df = series_df.copy() - # DateTime to PeriodIndex - series_df.index = series_df.index.to_period() - high_freq_period = series_df.index.freqstr + # get high frequency string that's suitable for PeriodIndex + series_period_index_df = series_df.copy() + series_period_index_df.index = series_df.index.to_period() + high_freq_period = series_period_index_df.index.freqstr - # length of the series must be an exact multiple of the length of the targeted low frequency series # downsample low_freq_series_df = series_df.resample(rule).last() - low_index_datetime = low_freq_series_df.index.to_timestamp() - # upsample (possibly add nans) + low_index_datetime = low_freq_series_df.index + low_freq_series_df.index = low_index_datetime.to_period() + + # upsample to get full range of high freq periods for every low freq period high_freq_series_df = ( low_freq_series_df.resample(high_freq_period).bfill().ffill() ) @@ -138,14 +147,11 @@ def ts_transform( series_df = pd.DataFrame( np.nan, index=high_index_datetime, columns=series_df.columns ) - series_df.loc[series_copy_df.index, :] = series_copy_df.values - else: - series_df = series_copy_df + series_df.loc[series_df.index, :] = series_df.values # make multiple low frequency columns out of the high frequency column(s) midas_df = _create_midas_df( series_df=series_df, - low_freq_series_df=low_freq_series_df, low_index_datetime=low_index_datetime, ) @@ -178,7 +184,6 @@ def _assert_high_to_low_freq( def _create_midas_df( series_df: pd.DataFrame, - low_freq_series_df: pd.DataFrame, low_index_datetime: DatetimeIndex, ) -> pd.DataFrame: """ @@ -186,7 +191,7 @@ def _create_midas_df( """ # calculate the multiple n_high = series_df.shape[0] - n_low = low_freq_series_df.shape[0] + n_low = len(low_index_datetime) multiple = n_high / n_low if not multiple.is_integer(): diff --git a/darts/tests/dataprocessing/transformers/test_midas.py b/darts/tests/dataprocessing/transformers/test_midas.py new file mode 100644 index 0000000000..b446ec3dcb --- /dev/null +++ b/darts/tests/dataprocessing/transformers/test_midas.py @@ -0,0 +1,36 @@ +import unittest + +import numpy as np +import pandas as pd + +from darts import TimeSeries + + +class MIDASTestCase(unittest.TestCase): + monthly_start_values = np.arange(1, 10) + monthly_start_times = pd.date_range(start="01-2020", periods=9, freq="M") + monthly_start_ts = TimeSeries.from_times_and_values( + times=monthly_start_times, values=monthly_start_values, columns=["values"] + ) + + quarterly_end_values = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]) + quarterly_end_times = pd.date_range(start="01-2020", periods=3, freq="Q") + quarterly_end_ts = TimeSeries.from_times_and_values( + times=quarterly_end_times, + values=quarterly_end_values, + columns=["values_0", "values_1", "values_2"], + ) + # def assert_monthly_to_quarterly + + +monthly_values = np.arange(1, 10) +monthly_times = pd.date_range(start="01-2020", periods=9, freq="M") +monthly_ts = TimeSeries.from_times_and_values( + times=monthly_times, values=monthly_values, columns=["values"] +) + +values = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9]) +times = pd.date_range(start="01-2020", periods=9, freq="M") +monthly_ts = TimeSeries.from_times_and_values( + times=times, values=values, columns=["values"] +) From e64cef23f226aae402c717d2f17492487b3d778a Mon Sep 17 00:00:00 2001 From: Beerstabr Date: Fri, 17 Mar 2023 14:44:36 +0100 Subject: [PATCH 13/16] tests are coming along, but still giving some errors --- darts/dataprocessing/transformers/midas.py | 20 ++-- .../dataprocessing/transformers/test_midas.py | 94 ++++++++++++++++--- 2 files changed, 93 insertions(+), 21 deletions(-) diff --git a/darts/dataprocessing/transformers/midas.py b/darts/dataprocessing/transformers/midas.py index 0227615bd0..b38ac7c18b 100644 --- a/darts/dataprocessing/transformers/midas.py +++ b/darts/dataprocessing/transformers/midas.py @@ -116,8 +116,14 @@ def ts_transform( (5) Transform the low frequency series back into a TimeSeries """ high_freq_datetime = series.freq_str + if "End" in str(series.freq): + start_or_end = "end" + else: + start_or_end = "start" + # TimeSeries to pd.DataFrame series_df = series.pd_dataframe() + series_copy_df = series_df.copy() # get high frequency string that's suitable for PeriodIndex series_period_index_df = series_df.copy() series_period_index_df.index = series_df.index.to_period() @@ -126,13 +132,13 @@ def ts_transform( # downsample low_freq_series_df = series_df.resample(rule).last() low_index_datetime = low_freq_series_df.index - low_freq_series_df.index = low_index_datetime.to_period() # upsample to get full range of high freq periods for every low freq period - high_freq_series_df = ( - low_freq_series_df.resample(high_freq_period).bfill().ffill() + low_freq_series_df.index = low_index_datetime.to_period() + high_freq_series_df = low_freq_series_df.resample(high_freq_period).last() + high_index_datetime = high_freq_series_df.index.to_timestamp( + freq=high_freq_period, how=start_or_end ) - high_index_datetime = high_freq_series_df.index.to_timestamp() # check if user requested a transform from a high to a low frequency _assert_high_to_low_freq( @@ -145,9 +151,9 @@ def ts_transform( # if necessary, expand the original series if len(high_index_datetime) > series_df.shape[0]: series_df = pd.DataFrame( - np.nan, index=high_index_datetime, columns=series_df.columns + np.nan, index=high_index_datetime, columns=series_copy_df.columns ) - series_df.loc[series_df.index, :] = series_df.values + series_df.loc[series_copy_df.index, :] = series_copy_df.values # make multiple low frequency columns out of the high frequency column(s) midas_df = _create_midas_df( @@ -176,7 +182,7 @@ def _assert_high_to_low_freq( raise_log( ValueError( f"The target conversion should go from a high to a " - f"low frequency, instead the targeted frequency is" + f"low frequency, instead the targeted frequency is " f"{rule}, while the original frequency is {high_freq}." ) ) diff --git a/darts/tests/dataprocessing/transformers/test_midas.py b/darts/tests/dataprocessing/transformers/test_midas.py index b446ec3dcb..9565dc5fa0 100644 --- a/darts/tests/dataprocessing/transformers/test_midas.py +++ b/darts/tests/dataprocessing/transformers/test_midas.py @@ -4,23 +4,74 @@ import pandas as pd from darts import TimeSeries +from darts.dataprocessing.transformers import MIDAS class MIDASTestCase(unittest.TestCase): - monthly_start_values = np.arange(1, 10) - monthly_start_times = pd.date_range(start="01-2020", periods=9, freq="M") - monthly_start_ts = TimeSeries.from_times_and_values( - times=monthly_start_times, values=monthly_start_values, columns=["values"] + monthly_values = np.arange(1, 10) + monthly_times = pd.date_range(start="01-2020", periods=9, freq="M") + monthly_ts = TimeSeries.from_times_and_values( + times=monthly_times, values=monthly_values, columns=["values"] ) - quarterly_end_values = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]) - quarterly_end_times = pd.date_range(start="01-2020", periods=3, freq="Q") - quarterly_end_ts = TimeSeries.from_times_and_values( - times=quarterly_end_times, - values=quarterly_end_values, + monthly_not_complete_ts = monthly_ts[2:] + + quarterly_values = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]) + quarterly_times = pd.date_range(start="01-2020", periods=3, freq="QS") + quarterly_ts = TimeSeries.from_times_and_values( + times=quarterly_times, + values=quarterly_values, columns=["values_0", "values_1", "values_2"], ) - # def assert_monthly_to_quarterly + + quarterly_not_complete_values = np.array( + [[np.nan, np.nan, 3], [4, 5, 6], [7, 8, 9]] + ) + quarterly_times = pd.date_range(start="01-2020", periods=3, freq="QS") + quarterly_not_complete_ts = TimeSeries.from_times_and_values( + times=quarterly_times, + values=quarterly_not_complete_values, + columns=["values_0", "values_1", "values_2"], + ) + + def test_complete_monthly_to_quarterly(self): + """ + Tests if monthly series is transformed into a quarterly series in the expected way. + """ + # 'complete' monthly series + midas = MIDAS(rule="QS") + quarterly_midas_ts = midas.transform(self.monthly_ts) + self.assertEqual( + quarterly_midas_ts, + self.quarterly_ts, + "Monthly TimeSeries is not correctly transformed " + "into a quarterly TimeSeries.", + ) + + def test_not_complete_monthly_to_quarterly(self): + """ + Tests if a not 'complete' monthly series is transformed into a quarterly series in the expected way. + """ + # not 'complete' monthly series + midas = MIDAS(rule="QS", strip=False) + quarterly_midas_not_complete_ts = midas.transform(self.monthly_not_complete_ts) + self.assertEqual( + quarterly_midas_not_complete_ts, + self.quarterly_not_complete_ts, + "Monthly TimeSeries is not " + "correctly transformed when" + " it is not 'complete'.", + ) + + # def assert_error_when_from_low_to_high(self): + # """ + # Tests if the transformer raises an error when the user asks for a transform in the wrong direction. + # """ + # wrong direction / low to high freq + # midas = MIDAS(rule="Q") + # self.assertRaises(ValueError): + # midas.transform(self.quarterly_ts) + # self.assertRaises(ValueError, midas.transform, self.monthly_ts) monthly_values = np.arange(1, 10) @@ -29,8 +80,23 @@ class MIDASTestCase(unittest.TestCase): times=monthly_times, values=monthly_values, columns=["values"] ) -values = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9]) -times = pd.date_range(start="01-2020", periods=9, freq="M") -monthly_ts = TimeSeries.from_times_and_values( - times=times, values=values, columns=["values"] +monthly_not_complete_ts = monthly_ts[2:] + +quarterly_values = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]) +quarterly_times = pd.date_range(start="01-2020", periods=3, freq="QS") +quarterly_ts = TimeSeries.from_times_and_values( + times=quarterly_times, + values=quarterly_values, + columns=["values_0", "values_1", "values_2"], ) + +quarterly_not_complete_values = np.array([[np.nan, np.nan, 3], [4, 5, 6], [7, 8, 9]]) +quarterly_times = pd.date_range(start="01-2020", periods=3, freq="QS") +quarterly_not_complete_ts = TimeSeries.from_times_and_values( + times=quarterly_times, + values=quarterly_not_complete_values, + columns=["values_0", "values_1", "values_2"], +) + +# midas = MIDAS(rule="M") +# midas.transform(quarterly_ts) From 98cf5a7dc88e26b05c8f100e948e1b38c8f5fdd3 Mon Sep 17 00:00:00 2001 From: Beerstabr Date: Fri, 17 Mar 2023 17:15:10 +0100 Subject: [PATCH 14/16] tests work --- darts/dataprocessing/transformers/midas.py | 12 +- .../dataprocessing/transformers/test_midas.py | 111 +++++++++++------- 2 files changed, 77 insertions(+), 46 deletions(-) diff --git a/darts/dataprocessing/transformers/midas.py b/darts/dataprocessing/transformers/midas.py index b38ac7c18b..ee3eeb3be3 100644 --- a/darts/dataprocessing/transformers/midas.py +++ b/darts/dataprocessing/transformers/midas.py @@ -116,10 +116,6 @@ def ts_transform( (5) Transform the low frequency series back into a TimeSeries """ high_freq_datetime = series.freq_str - if "End" in str(series.freq): - start_or_end = "end" - else: - start_or_end = "start" # TimeSeries to pd.DataFrame series_df = series.pd_dataframe() @@ -136,8 +132,14 @@ def ts_transform( # upsample to get full range of high freq periods for every low freq period low_freq_series_df.index = low_index_datetime.to_period() high_freq_series_df = low_freq_series_df.resample(high_freq_period).last() + + # make sure the extension of the index matches the original index + if "End" in str(series.freq): + args_to_timestamp = {"freq": high_freq_period} + else: + args_to_timestamp = {"how": "start"} high_index_datetime = high_freq_series_df.index.to_timestamp( - freq=high_freq_period, how=start_or_end + **args_to_timestamp ) # check if user requested a transform from a high to a low frequency diff --git a/darts/tests/dataprocessing/transformers/test_midas.py b/darts/tests/dataprocessing/transformers/test_midas.py index 9565dc5fa0..e6d1d4f20d 100644 --- a/darts/tests/dataprocessing/transformers/test_midas.py +++ b/darts/tests/dataprocessing/transformers/test_midas.py @@ -14,7 +14,7 @@ class MIDASTestCase(unittest.TestCase): times=monthly_times, values=monthly_values, columns=["values"] ) - monthly_not_complete_ts = monthly_ts[2:] + monthly_not_complete_ts = monthly_ts[2:-1] quarterly_values = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]) quarterly_times = pd.date_range(start="01-2020", periods=3, freq="QS") @@ -24,8 +24,16 @@ class MIDASTestCase(unittest.TestCase): columns=["values_0", "values_1", "values_2"], ) + quarterly_values = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]) + quarterly_end_times = pd.date_range(start="01-2020", periods=3, freq="Q") + quarterly_with_quarter_end_index_ts = TimeSeries.from_times_and_values( + times=quarterly_end_times, + values=quarterly_values, + columns=["values_0", "values_1", "values_2"], + ) + quarterly_not_complete_values = np.array( - [[np.nan, np.nan, 3], [4, 5, 6], [7, 8, 9]] + [[np.nan, np.nan, 3], [4, 5, 6], [7, 8, np.nan]] ) quarterly_times = pd.date_range(start="01-2020", periods=3, freq="QS") quarterly_not_complete_ts = TimeSeries.from_times_and_values( @@ -34,13 +42,33 @@ class MIDASTestCase(unittest.TestCase): columns=["values_0", "values_1", "values_2"], ) + daily_times = pd.date_range(start="01-2020", end="09-30-2020", freq="D") + daily_values = np.arange(1, len(daily_times) + 1) + daily_ts = TimeSeries.from_times_and_values( + times=daily_times, values=daily_values, columns=["values"] + ) + + second_times = pd.date_range(start="01-2020", periods=120, freq="S") + second_values = np.arange(1, len(second_times) + 1) + second_ts = TimeSeries.from_times_and_values( + times=second_times, values=second_values, columns=["values"] + ) + + minute_times = pd.date_range(start="01-2020", periods=2, freq="T") + minute_values = np.array([[i for i in range(1, 61)], [i for i in range(61, 121)]]) + minute_ts = TimeSeries.from_times_and_values( + times=minute_times, + values=minute_values, + columns=[f"values_{i}" for i in range(60)], + ) + def test_complete_monthly_to_quarterly(self): """ Tests if monthly series is transformed into a quarterly series in the expected way. """ # 'complete' monthly series - midas = MIDAS(rule="QS") - quarterly_midas_ts = midas.transform(self.monthly_ts) + midas_1 = MIDAS(rule="QS") + quarterly_midas_ts = midas_1.transform(self.monthly_ts) self.assertEqual( quarterly_midas_ts, self.quarterly_ts, @@ -48,6 +76,16 @@ def test_complete_monthly_to_quarterly(self): "into a quarterly TimeSeries.", ) + # 'complete' monthly series + midas_2 = MIDAS(rule="Q") + quarterly_midas_ts = midas_2.transform(self.monthly_ts) + self.assertEqual( + quarterly_midas_ts, + self.quarterly_with_quarter_end_index_ts, + "Monthly TimeSeries is not correctly transformed " + "into a quarterly TimeSeries. Specifically, when the rule requires an QuarterEnd index.", + ) + def test_not_complete_monthly_to_quarterly(self): """ Tests if a not 'complete' monthly series is transformed into a quarterly series in the expected way. @@ -63,40 +101,31 @@ def test_not_complete_monthly_to_quarterly(self): " it is not 'complete'.", ) - # def assert_error_when_from_low_to_high(self): - # """ - # Tests if the transformer raises an error when the user asks for a transform in the wrong direction. - # """ - # wrong direction / low to high freq - # midas = MIDAS(rule="Q") - # self.assertRaises(ValueError): - # midas.transform(self.quarterly_ts) - # self.assertRaises(ValueError, midas.transform, self.monthly_ts) - - -monthly_values = np.arange(1, 10) -monthly_times = pd.date_range(start="01-2020", periods=9, freq="M") -monthly_ts = TimeSeries.from_times_and_values( - times=monthly_times, values=monthly_values, columns=["values"] -) - -monthly_not_complete_ts = monthly_ts[2:] - -quarterly_values = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]) -quarterly_times = pd.date_range(start="01-2020", periods=3, freq="QS") -quarterly_ts = TimeSeries.from_times_and_values( - times=quarterly_times, - values=quarterly_values, - columns=["values_0", "values_1", "values_2"], -) - -quarterly_not_complete_values = np.array([[np.nan, np.nan, 3], [4, 5, 6], [7, 8, 9]]) -quarterly_times = pd.date_range(start="01-2020", periods=3, freq="QS") -quarterly_not_complete_ts = TimeSeries.from_times_and_values( - times=quarterly_times, - values=quarterly_not_complete_values, - columns=["values_0", "values_1", "values_2"], -) - -# midas = MIDAS(rule="M") -# midas.transform(quarterly_ts) + def test_error_when_from_low_to_high(self): + """ + Tests if the transformer raises an error when the user asks for a transform in the wrong direction. + """ + # wrong direction / low to high freq + midas_1 = MIDAS(rule="M") + self.assertRaises(ValueError, midas_1.transform, self.quarterly_ts) + + # transform to same index requested + midas_2 = MIDAS(rule="Q") + self.assertRaises(ValueError, midas_2.transform, self.quarterly_ts) + + def test_error_when_frequency_not_suitable_for_midas(self): + """ + MIDAS can only be performed when the high frequency is the same and the exact multiple of the low frequency. + For example, there are always exactly three months in a quarter, but the number of days in a month differs. + So the monthly to quarterly transformation is possible, while the daily to monthly MIDAS transform is + impossible. + """ + midas = MIDAS(rule="M") + self.assertRaises(ValueError, midas.transform, self.daily_ts) + + def test_from_second_to_minute(self): + """ + Test to see if other frequencies transforms like second to minute work as well. + """ + midas = MIDAS(rule="T") + self.assertEqual(midas.transform(self.second_ts), self.minute_ts) From 998f4b6c0729141a06d88ab07255697dc2849bfb Mon Sep 17 00:00:00 2001 From: Beerstabr Date: Fri, 17 Mar 2023 17:16:38 +0100 Subject: [PATCH 15/16] small comment change --- darts/dataprocessing/transformers/midas.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/darts/dataprocessing/transformers/midas.py b/darts/dataprocessing/transformers/midas.py index ee3eeb3be3..2659bed5cc 100644 --- a/darts/dataprocessing/transformers/midas.py +++ b/darts/dataprocessing/transformers/midas.py @@ -120,6 +120,7 @@ def ts_transform( # TimeSeries to pd.DataFrame series_df = series.pd_dataframe() series_copy_df = series_df.copy() + # get high frequency string that's suitable for PeriodIndex series_period_index_df = series_df.copy() series_period_index_df.index = series_df.index.to_period() @@ -129,7 +130,7 @@ def ts_transform( low_freq_series_df = series_df.resample(rule).last() low_index_datetime = low_freq_series_df.index - # upsample to get full range of high freq periods for every low freq period + # upsample again to get full range of high freq periods for every low freq period low_freq_series_df.index = low_index_datetime.to_period() high_freq_series_df = low_freq_series_df.resample(high_freq_period).last() @@ -142,7 +143,7 @@ def ts_transform( **args_to_timestamp ) - # check if user requested a transform from a high to a low frequency + # check if user requested a transform from a high to a low frequency (otherwise raise an error) _assert_high_to_low_freq( high_freq_series_df=high_freq_series_df, low_freq_series_df=low_freq_series_df, From d9918c9b1054fd4d03226e801562a5a9905bf5d2 Mon Sep 17 00:00:00 2001 From: Beerstabr Date: Fri, 31 Mar 2023 12:10:37 +0200 Subject: [PATCH 16/16] adapted to 'params['fixed']['variable_name']' way of dealing with args --- darts/dataprocessing/transformers/midas.py | 25 ++++++---------------- 1 file changed, 6 insertions(+), 19 deletions(-) diff --git a/darts/dataprocessing/transformers/midas.py b/darts/dataprocessing/transformers/midas.py index 2659bed5cc..4e83220210 100644 --- a/darts/dataprocessing/transformers/midas.py +++ b/darts/dataprocessing/transformers/midas.py @@ -2,16 +2,15 @@ Mixed-data sampling (MIDAS) Transformer ------------------ """ -from typing import Iterator, Sequence, Tuple, Union +from typing import Any, Mapping import numpy as np import pandas as pd -from pandas import DateOffset, DatetimeIndex, Timedelta +from pandas import DatetimeIndex from darts import TimeSeries from darts.dataprocessing.transformers import BaseDataTransformer from darts.logging import get_logger, raise_log -from darts.utils.utils import series2seq logger = get_logger(__name__) @@ -85,25 +84,12 @@ def __init__( ---------- .. [1] https://en.wikipedia.org/wiki/Mixed-data_sampling """ + self._rule = rule + self._strip = strip super().__init__(name, n_jobs, verbose) - self.rule = rule - self.strip = strip - - def _transform_iterator( - self, series: Sequence[TimeSeries] - ) -> Iterator[Tuple[TimeSeries]]: - - series = series2seq(series) - - for s in series: - yield s, self.rule, self.strip @staticmethod - def ts_transform( - series: TimeSeries, - rule: Union[DateOffset, Timedelta, str], - strip: bool = True, - ) -> TimeSeries: + def ts_transform(series: TimeSeries, params: Mapping[str, Any]) -> TimeSeries: """ Transforms series from high to low frequency using a mixed-data sampling approach. Uses and relies on pandas.DataFrame.resample. @@ -115,6 +101,7 @@ def ts_transform( (4) Transform every column of the high frequency series into multiple columns for the low frequency series (5) Transform the low frequency series back into a TimeSeries """ + rule, strip = params["fixed"]["_rule"], params["fixed"]["_strip"] high_freq_datetime = series.freq_str # TimeSeries to pd.DataFrame