Add support for python 3.10 & 3.11 (#8)

* test new python * unpin packages * fix tests and upgrades * fix imports * update ubuntu image * remove comments
sintel-dev · Sep 28, 2023 · 9297173 · 9297173
1 parent e3acfaf
commit 9297173
Show file tree

Hide file tree

Showing 9 changed files with 60 additions and 117 deletions.
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
@@ -32,8 +32,8 @@ jobs:
     runs-on: ${{ matrix.os }}
     strategy:
       matrix:
-        python-version: ['3.6', '3.7', '3.8', '3.9']
-        os: [ubuntu-20.04]
+        python-version: ['3.7', '3.8', '3.9', '3.10', '3.11']
+        os: [ubuntu-latest]
     steps:
     - uses: actions/checkout@v1
     - name: Set up Python ${{ matrix.python-version }}
@@ -50,8 +50,8 @@ jobs:
     runs-on: ${{ matrix.os }}
     strategy:
       matrix:
-        python-version: ['3.6', '3.7', '3.8', '3.9']
-        os: [ubuntu-20.04, macos-latest]
+        python-version: ['3.7', '3.8', '3.9', '3.10', '3.11']
+        os: [ubuntu-latest, macos-latest]
     steps:
     - uses: actions/checkout@v1
     - name: Set up Python ${{ matrix.python-version }}
@@ -68,8 +68,8 @@ jobs:
     runs-on: ${{ matrix.os }}
     strategy:
       matrix:
-        python-version: ['3.6', '3.7', '3.8', '3.9']
-        os: [ubuntu-20.04, macos-latest, windows-latest]
+        python-version: ['3.7', '3.8', '3.9', '3.10', '3.11']
+        os: [ubuntu-latest, macos-latest, windows-latest]
     steps:
     - uses: actions/checkout@v1
     - name: Set up Python ${{ matrix.python-version }}

diff --git a/mlstars/adapters/statsmodels.py b/mlstars/adapters/statsmodels.py
@@ -1,11 +1,11 @@
 import numpy as np
-from statsmodels.tsa import arima_model
+from statsmodels.tsa.arima import model
 
 
 class ARIMA(object):
     """A Wrapper for the statsmodels.tsa.arima_model.ARIMA class."""
 
-    def __init__(self, p, d, q, steps):
+    def __init__(self, p, d, q, trend, steps):
         """Initialize the ARIMA object.
 
         Args:
@@ -15,12 +15,17 @@ def __init__(self, p, d, q, steps):
                 Integer denoting the degree of differencing.
             q (int):
                 Integer denoting the order of the moving-average model.
+            trend (str):
+                Parameter controlling the deterministic trend. Can be specified
+                as a string where 'c' indicates a constant term, 't' indicates
+                a linear trend in time, and 'ct' includes both.
             steps (int):
                 Integer denoting the number of time steps to predict ahead.
         """
         self.p = p
         self.d = d
         self.q = q
+        self.trend = trend
         self.steps = steps
 
     def predict(self, X):
@@ -45,8 +50,8 @@ def predict(self, X):
 
         num_sequences = len(X)
         for sequence in range(num_sequences):
-            arima = arima_model.ARIMA(X[sequence], order=(self.p, self.d, self.q))
-            arima_fit = arima.fit(disp=0)
+            arima = model.ARIMA(X[sequence], order=(self.p, self.d, self.q), trend=self.trend)
+            arima_fit = arima.fit()
             arima_results.append(arima_fit.forecast(self.steps)[0])
 
         arima_results = np.asarray(arima_results)

diff --git a/mlstars/custom/timeseries_preprocessing.py b/mlstars/custom/timeseries_preprocessing.py
@@ -111,53 +111,6 @@ def rolling_window_sequences(X, index, window_size, target_size, step_size, targ
     return np.asarray(out_X), np.asarray(out_y), np.asarray(X_index), np.asarray(y_index)
 
 
-_TIME_SEGMENTS_AVERAGE_DEPRECATION_WARNING = (
-    "mlstars.custom.timeseries_preprocessing.time_segments_average "
-    "is deprecated and will be removed in a future version. Please use "
-    "mlstars.custom.timeseries_preprocessing.time_segments_aggregate instead."
-)
-
-
-def time_segments_average(X, interval, time_column):
-    """Compute average of values over given time span.
-
-    Args:
-        X (ndarray or pandas.DataFrame):
-            N-dimensional sequence of values.
-        interval (int):
-            Integer denoting time span to compute average of.
-        time_column (int):
-            Column of X that contains time values.
-
-    Returns:
-        ndarray, ndarray:
-            * Sequence of averaged values.
-            * Sequence of index values (first index of each averaged segment).
-    """
-    warnings.warn(_TIME_SEGMENTS_AVERAGE_DEPRECATION_WARNING, DeprecationWarning)
-
-    if isinstance(X, np.ndarray):
-        X = pd.DataFrame(X)
-
-    X = X.sort_values(time_column).set_index(time_column)
-
-    start_ts = X.index.values[0]
-    max_ts = X.index.values[-1]
-
-    values = list()
-    index = list()
-
-    while start_ts <= max_ts:
-        end_ts = start_ts + interval
-        subset = X.loc[start_ts:end_ts - 1]
-        means = subset.mean(skipna=True).values
-        values.append(means)
-        index.append(start_ts)
-        start_ts = end_ts
-
-    return np.asarray(values), np.asarray(index)
-
-
 def time_segments_aggregate(X, interval, time_column, method=['mean']):
     """Aggregate values over given time span.
 
@@ -269,4 +222,8 @@ def cutoff_window_sequences(X, timeseries, window_size, cutoff_time=None, time_i
 
         output.append(selected.values)
 
-    return np.array(output)
+    output = np.array(output, dtype=object)
+    if output.ndim >= 2:
+        output = output.astype(float)
+
+    return output
diff --git a/...es/statsmodels.tsa.arima_model.Arima.json → ...es/statsmodels.tsa.arima.model.ARIMA.json b/...es/statsmodels.tsa.arima_model.Arima.json → ...es/statsmodels.tsa.arima.model.ARIMA.json
@@ -1,7 +1,8 @@
 {
-    "name": "statsmodels.tsa.arima_model.Arima",
+    "name": "statsmodels.tsa.arima.model.ARIMA",
     "contributors": [
-        "Alexander Geiger <[email protected]>"
+        "Alexander Geiger <[email protected]>",
+        "Sarah Alnegheimish <[email protected]>"
     ],
     "description": "ARIMA Model",
     "classifiers": {
@@ -59,4 +60,4 @@
             }
         }
     }
-}
+}
diff --git a/setup.py b/setup.py
@@ -11,14 +11,14 @@
 
 install_requires = [
     'Keras>=2.4,<2.13',
-    'mlblocks>=0.6',
-    'numpy<1.21.0,>=1.16.0',
-    'pandas>=1,<2',
+    'mlblocks>=0.6.1',
+    'numpy>=1.17.4,<2',
+    'pandas>=1,<3',
     'scikit-learn>=0.21,<1.2',
     'scipy>=1.1.0,<2',
-    'statsmodels>=0.9.0,<0.13',
+    'statsmodels>=0.12.0,<0.15',
     'tensorflow>=2,<2.13',
-    'xgboost>=0.72.1,<1',
+    'xgboost>=0.72.1,<2',
 
     # fix google/protobuf/descriptor
     'protobuf<4',
@@ -46,10 +46,8 @@
     'nbsphinx>=0.5.0,<0.7',
     'Sphinx>=3,<3.3',
     'pydata-sphinx-theme<0.5',
-    'autodocsumm>=0.1.10,<1',
     'markupsafe<2.1.0',
-    'ipython>=6.5,<7.5',
-    'mistune>=0.7,<2',
+    'ipython>=6.5,<9',
     'Jinja2>=2,<3',
 
     # style check
@@ -68,6 +66,7 @@
     # Advanced testing
     'coverage>=4.5.1',
     'tox>=2.9.1',
+    'invoke',
 ]
 
 setup(
@@ -79,10 +78,11 @@
         'License :: OSI Approved :: MIT License',
         'Natural Language :: English',
         'Programming Language :: Python :: 3',
-        'Programming Language :: Python :: 3.6',
         'Programming Language :: Python :: 3.7',
         'Programming Language :: Python :: 3.8',
         'Programming Language :: Python :: 3.9',
+	'Programming Language :: Python :: 3.10',
+	'Programming Language :: Python :: 3.11',
     ],
     description='Primitives and Pipelines for Time Series Data.',
     entry_points={
@@ -104,7 +104,7 @@
     keywords='mlstars',
     name='ml-stars',
     packages=find_packages(include=['mlstars', 'mlstars.*']),
-    python_requires='>=3.6,<3.10',
+    python_requires='>=3.7,<3.12',
     setup_requires=setup_requires,
     test_suite='tests',
     tests_require=tests_require,

diff --git a/tests/adapters/test_pandas.py b/tests/adapters/test_pandas.py
@@ -3,7 +3,7 @@
 
 import numpy as np
 import pandas as pd
-from pandas.util.testing import assert_frame_equal
+from pandas.testing import assert_frame_equal
 
 from mlstars.adapters.pandas import resample
 

diff --git a/tests/adapters/test_statsmodels.py b/tests/adapters/test_statsmodels.py
@@ -1,25 +1,26 @@
 from unittest.mock import patch
 
 import numpy as np
+import pytest
 from numpy.testing import assert_allclose
 
 from mlstars.adapters.statsmodels import ARIMA
 
 
-@patch('statsmodels.tsa.arima_model.ARIMA')
+@patch('statsmodels.tsa.arima.model.ARIMA')
 def test_arima_1d(arima_mock):
-    arima = ARIMA(1, 0, 0, 3)
+    arima = ARIMA(1, 0, 0, 't', 3)
     X = np.array([1, 2, 3, 4, 5])
     arima.predict(X)
     assert_allclose(arima_mock.call_args[0][0], [1, 2, 3, 4, 5])
-    assert arima_mock.call_args[1] == {'order': (1, 0, 0)}
+    assert arima_mock.call_args[1] == {'order': (1, 0, 0), 'trend': 't'}
 
 
-@patch('statsmodels.tsa.arima_model.ARMAResults.forecast')
+@patch('statsmodels.tsa.arima.model.ARIMAResultsWrapper.forecast')
 def test_predict_1d(arima_mock):
     arima_mock.return_value = [[1, 2, 3]]
 
-    arima = ARIMA(1, 0, 0, 3)
+    arima = ARIMA(1, 0, 0, 't', 3)
 
     X = np.array([1, 2, 3, 4, 5])
     result = arima.predict(X)
@@ -29,9 +30,9 @@ def test_predict_1d(arima_mock):
     arima_mock.assert_called_once_with(3)
 
 
-@patch('statsmodels.tsa.arima_model.ARIMA')
+@patch('statsmodels.tsa.arima.model.ARIMA')
 def test_arima_2d(arima_mock):
-    arima = ARIMA(1, 0, 0, 3)
+    arima = ARIMA(1, 0, 0, 't', 3)
     X = np.array([
         [1, 2, 3, 4, 5],
         [6, 7, 8, 9, 10],
@@ -41,19 +42,19 @@ def test_arima_2d(arima_mock):
     assert_allclose(arima_mock.call_args_list[0][0], [[1, 2, 3, 4, 5]])
     assert_allclose(arima_mock.call_args_list[1][0], [[6, 7, 8, 9, 10]])
     assert_allclose(arima_mock.call_args_list[2][0], [[11, 12, 13, 14, 15]])
-    assert arima_mock.call_args_list[0][1] == {'order': (1, 0, 0)}
-    assert arima_mock.call_args_list[1][1] == {'order': (1, 0, 0)}
-    assert arima_mock.call_args_list[2][1] == {'order': (1, 0, 0)}
+    assert arima_mock.call_args_list[0][1] == {'order': (1, 0, 0), 'trend': 't'}
+    assert arima_mock.call_args_list[1][1] == {'order': (1, 0, 0), 'trend': 't'}
+    assert arima_mock.call_args_list[2][1] == {'order': (1, 0, 0), 'trend': 't'}
 
 
-@patch('statsmodels.tsa.arima_model.ARMAResults.forecast')
+@patch('statsmodels.tsa.arima.model.ARIMAResultsWrapper.forecast')
 def test_predict_2d(arima_mock):
     arima_mock.side_effect = [
         [[1, 2, 3]],
         [[4, 5, 6]],
         [[7, 8, 9]],
     ]
-    arima = ARIMA(1, 0, 0, 3)
+    arima = ARIMA(1, 0, 0, 't', 3)
 
     X = np.array([
         [1, 2, 3, 4, 5],
@@ -69,3 +70,11 @@ def test_predict_2d(arima_mock):
     ])
     assert_allclose(result, expected)
     arima_mock.assert_called_with(3)
+
+
+@patch('statsmodels.tsa.arima.model.ARIMA')
+def test_arima_3d(arima_mock):
+    arima = ARIMA(1, 0, 0, 'ct', 3)
+    X = np.ones(shape=(3, 2, 1))
+    with pytest.raises(ValueError):
+        arima.predict(X)
diff --git a/tests/custom/test_timeseries_preprocessing.py b/tests/custom/test_timeseries_preprocessing.py
@@ -5,8 +5,7 @@
 from numpy.testing import assert_allclose
 
 from mlstars.custom.timeseries_preprocessing import (
-    cutoff_window_sequences, intervals_to_mask, rolling_window_sequences, time_segments_aggregate,
-    time_segments_average,)
+    cutoff_window_sequences, intervals_to_mask, rolling_window_sequences, time_segments_aggregate,)
 
 
 class IntervalsToMaskTest(TestCase):
@@ -178,34 +177,6 @@ def test_drop_bool(self):
                   drop=drop, drop_windows=True)
 
 
-class TimeSegmentsAverageTest(TestCase):
-
-    def _run(self, X, interval, expected_values, expected_index, time_column):
-        values, index = time_segments_average(X, interval, time_column)
-
-        assert_allclose(values, expected_values)
-        assert_allclose(index, expected_index)
-
-    def test_array(self):
-        X = np.array([[1, 1], [2, 3], [3, 1], [4, 3]])
-        interval = 2
-        expected_values = np.array([[2], [2]])
-        expected_index = np.array([1, 3])
-        self._run(X, interval, expected_values, expected_index, time_column=0)
-
-    def test_pandas_dataframe(self):
-        X = pd.DataFrame([
-            [1, 1],
-            [2, 3],
-            [3, 1],
-            [4, 3]
-        ], columns=['timestamp', 'value'])
-        interval = 2
-        expected_values = np.array([[2], [2]])
-        expected_index = np.array([1, 3])
-        self._run(X, interval, expected_values, expected_index, time_column="timestamp")
-
-
 class TimeSegmentsAggregateTest(TestCase):
 
     def _run(self, X, interval, expected_values, expected_index, time_column, method=['mean']):
@@ -417,7 +388,7 @@ def test_not_enough_data(self):
                 [15, 35],
                 [16, 36]
             ])
-        ])
+        ], dtype=object)
 
         assert_allclose(
             array[0],

diff --git a/tox.ini b/tox.ini
@@ -1,5 +1,5 @@
 [tox]
-envlist = lint, docs, py3{6,7}-{readme,pytest,minimum,tutorials}
+envlist = lint, docs, py3{7, 8, 9, 10, 11}-{readme,pytest,minimum,tutorials}
 
 [testenv]
 skipsdist = false
@@ -20,4 +20,4 @@ commands =
     pytest: invoke pytest
     minimum: invoke  minimum
     tutorials: invoke tutorials
-    invoke rmdir --path {envdir}
+    invoke rmdir --path {envdir}