fix: remove unnecessary comments

etna-team · Aug 29, 2024 · bd23912 · bd23912
1 parent 4f6973d
commit bd23912
Show file tree

Hide file tree

Showing 2 changed files with 169 additions and 176 deletions.
diff --git a/etna/datasets/internal_datasets.py b/etna/datasets/internal_datasets.py
@@ -347,6 +347,7 @@ def read_data(path: Path, part: str) -> np.ndarray:
     targets = np.concatenate([targets_train, targets_test], axis=0)
     targets = targets[np.argsort(ts_indecies)].reshape(-1, 963)
 
+    # federal holidays and days with anomalies
     drop_days = [
         date(2008, 1, 1),
         date(2008, 1, 21),

diff --git a/tests/test_datasets/test_internal_datasets.py b/tests/test_datasets/test_internal_datasets.py
@@ -81,189 +81,181 @@ def test_not_present_part():
 @pytest.mark.parametrize(
     "dataset_name, expected_shape, expected_min_timestamp, expected_max_timestamp, dataset_parts",
     [
-        # pytest.param(
-        #     "electricity_15T",
-        #     (139896 + 360, 370),
-        #     pd.to_datetime("2011-01-01 00:15:00"),
-        #     pd.to_datetime("2015-01-01 00:00:00"),
-        #     ("train", "test"),
-        #     marks=pytest.mark.skip(reason="Dataset is too large for testing in GitHub."),
-        # ),
-        # (
-        #     "m4_hourly",
-        #     (960 + 48, 414),
-        #     0,
-        #     1007,
-        #     ("train", "test"),
-        # ),
-        # pytest.param(
-        #     "m4_daily",
-        #     (9919 + 14, 4227),
-        #     0,
-        #     9932,
-        #     ("train", "test"),
-        #     marks=pytest.mark.skip(reason="Dataset is too large for testing in GitHub."),
-        # ),
-        # (
-        #     "m4_weekly",
-        #     (2597 + 13, 359),
-        #     0,
-        #     2609,
-        #     ("train", "test"),
-        # ),
-        # pytest.param(
-        #     "m4_monthly",
-        #     (2794 + 18, 48000),
-        #     0,
-        #     2811,
-        #     ("train", "test"),
-        #     marks=pytest.mark.skip(reason="Dataset is too large for testing in GitHub."),
-        # ),
-        # (
-        #     "m4_quarterly",
-        #     (866 + 8, 24000),
-        #     0,
-        #     873,
-        #     ("train", "test"),
-        # ),
-        # (
-        #     "m4_yearly",
-        #     (835 + 6, 23000),
-        #     0,
-        #     840,
-        #     ("train", "test"),
-        # ),
-        # pytest.param(
-        #     "traffic_2008_10T",
-        #     (65376 + 144, 963),
-        #     pd.to_datetime("2008-01-01 00:00:00"),
-        #     pd.to_datetime("2009-03-30 23:50:00"),
-        #     ("train", "test"),
-        #     marks=pytest.mark.skip(reason="Dataset is too large for testing in GitHub."),
-        # ),
+        pytest.param(
+            "electricity_15T",
+            (139896 + 360, 370),
+            pd.to_datetime("2011-01-01 00:15:00"),
+            pd.to_datetime("2015-01-01 00:00:00"),
+            ("train", "test"),
+            marks=pytest.mark.skip(reason="Dataset is too large for testing in GitHub."),
+        ),
+        (
+            "m4_hourly",
+            (960 + 48, 414),
+            0,
+            1007,
+            ("train", "test"),
+        ),
+        pytest.param(
+            "m4_daily",
+            (9919 + 14, 4227),
+            0,
+            9932,
+            ("train", "test"),
+            marks=pytest.mark.skip(reason="Dataset is too large for testing in GitHub."),
+        ),
+        (
+            "m4_weekly",
+            (2597 + 13, 359),
+            0,
+            2609,
+            ("train", "test"),
+        ),
+        pytest.param(
+            "m4_monthly",
+            (2794 + 18, 48000),
+            0,
+            2811,
+            ("train", "test"),
+            marks=pytest.mark.skip(reason="Dataset is too large for testing in GitHub."),
+        ),
+        (
+            "m4_quarterly",
+            (866 + 8, 24000),
+            0,
+            873,
+            ("train", "test"),
+        ),
         (
+            "m4_yearly",
+            (835 + 6, 23000),
+            0,
+            840,
+            ("train", "test"),
+        ),
+        pytest.param(
+            "traffic_2008_10T",
+            (65376 + 144, 963),
+            pd.to_datetime("2008-01-01 00:00:00"),
+            pd.to_datetime("2009-03-30 23:50:00"),
+            ("train", "test"),
+            marks=pytest.mark.skip(reason="Dataset is too large for testing in GitHub."),
+        ),
+        pytest.param(
             "traffic_2008_hourly",
             (10896 + 24, 963),
             pd.to_datetime("2008-01-01 00:00:00"),
             pd.to_datetime("2009-03-30 23:00:00"),
             ("train", "test"),
+            marks=pytest.mark.skip(reason="Dataset is too large for testing in GitHub."),
+        ),
+        pytest.param(
+            "traffic_2015_hourly",
+            (17520 + 24, 862),
+            pd.to_datetime("2015-01-01 00:00:00"),
+            pd.to_datetime("2016-12-31 23:00:00"),
+            ("train", "test"),
+            marks=pytest.mark.skip(reason="Dataset is too large for testing in GitHub."),
+        ),
+        (
+            "m3_monthly",
+            (126 + 18, 2856),
+            0,
+            143,
+            ("train", "test"),
+        ),
+        (
+            "m3_quarterly",
+            (64 + 8, 1512),
+            0,
+            71,
+            ("train", "test"),
+        ),
+        (
+            "m3_other",
+            (96 + 8, 348),
+            0,
+            103,
+            ("train", "test"),
+        ),
+        (
+            "m3_yearly",
+            (41 + 6, 1290),
+            0,
+            46,
+            ("train", "test"),
+        ),
+        (
+            "tourism_monthly",
+            (309 + 24, 732),
+            0,
+            332,
+            ("train", "test"),
+        ),
+        (
+            "tourism_quarterly",
+            (122 + 8, 854),
+            0,
+            129,
+            ("train", "test"),
+        ),
+        (
+            "tourism_yearly",
+            (43 + 4, 1036),
+            0,
+            46,
+            ("train", "test"),
+        ),
+        (
+            "weather_10T",
+            (52560 + 144, 21),
+            pd.to_datetime("2020-01-01 00:10:00"),
+            pd.to_datetime("2021-01-01 00:00:00"),
+            ("train", "test"),
+        ),
+        (
+            "ETTm1",
+            (66800 + 2880, 7),
+            pd.to_datetime("2016-07-01 00:00:00"),
+            pd.to_datetime("2018-06-26 19:45:00"),
+            ("train", "test"),
+        ),
+        (
+            "ETTm2",
+            (66800 + 2880, 7),
+            pd.to_datetime("2016-07-01 00:00:00"),
+            pd.to_datetime("2018-06-26 19:45:00"),
+            ("train", "test"),
+        ),
+        (
+            "ETTh1",
+            (16700 + 720, 7),
+            pd.to_datetime("2016-07-01 00:00:00"),
+            pd.to_datetime("2018-06-26 19:00:00"),
+            ("train", "test"),
+        ),
+        (
+            "ETTh2",
+            (16700 + 720, 7),
+            pd.to_datetime("2016-07-01 00:00:00"),
+            pd.to_datetime("2018-06-26 19:00:00"),
+            ("train", "test"),
+        ),
+        pytest.param(
+            "IHEPC_T",
+            (2075259, 7),
+            pd.to_datetime("2006-12-16 17:24:00"),
+            pd.to_datetime("2010-11-26 21:02:00"),
+            tuple(),
+            marks=pytest.mark.skip(reason="Dataset is too large for testing in GitHub."),
+        ),
+        (
+            "australian_wine_sales_monthly",
+            (176, 1),
+            pd.to_datetime("1980-01-01 00:00:00"),
+            pd.to_datetime("1994-08-01 00:00:00"),
+            tuple(),
         ),
-        # TODO: revert
-        # pytest.param(
-        #     "traffic_2008_hourly",
-        #     (10896 + 24, 963),
-        #     pd.to_datetime("2008-01-01 00:00:00"),
-        #     pd.to_datetime("2009-03-30 23:00:00"),
-        #     ("train", "test"),
-        #     marks=pytest.mark.skip(reason="Dataset is too large for testing in GitHub."),
-        # ),
-        # pytest.param(
-        #     "traffic_2015_hourly",
-        #     (17520 + 24, 862),
-        #     pd.to_datetime("2015-01-01 00:00:00"),
-        #     pd.to_datetime("2016-12-31 23:00:00"),
-        #     ("train", "test"),
-        #     marks=pytest.mark.skip(reason="Dataset is too large for testing in GitHub."),
-        # ),
-        # (
-        #     "m3_monthly",
-        #     (126 + 18, 2856),
-        #     0,
-        #     143,
-        #     ("train", "test"),
-        # ),
-        # (
-        #     "m3_quarterly",
-        #     (64 + 8, 1512),
-        #     0,
-        #     71,
-        #     ("train", "test"),
-        # ),
-        # (
-        #     "m3_other",
-        #     (96 + 8, 348),
-        #     0,
-        #     103,
-        #     ("train", "test"),
-        # ),
-        # (
-        #     "m3_yearly",
-        #     (41 + 6, 1290),
-        #     0,
-        #     46,
-        #     ("train", "test"),
-        # ),
-        # (
-        #     "tourism_monthly",
-        #     (309 + 24, 732),
-        #     0,
-        #     332,
-        #     ("train", "test"),
-        # ),
-        # (
-        #     "tourism_quarterly",
-        #     (122 + 8, 854),
-        #     0,
-        #     129,
-        #     ("train", "test"),
-        # ),
-        # (
-        #     "tourism_yearly",
-        #     (43 + 4, 1036),
-        #     0,
-        #     46,
-        #     ("train", "test"),
-        # ),
-        # (
-        #     "weather_10T",
-        #     (52560 + 144, 21),
-        #     pd.to_datetime("2020-01-01 00:10:00"),
-        #     pd.to_datetime("2021-01-01 00:00:00"),
-        #     ("train", "test"),
-        # ),
-        # (
-        #     "ETTm1",
-        #     (66800 + 2880, 7),
-        #     pd.to_datetime("2016-07-01 00:00:00"),
-        #     pd.to_datetime("2018-06-26 19:45:00"),
-        #     ("train", "test"),
-        # ),
-        # (
-        #     "ETTm2",
-        #     (66800 + 2880, 7),
-        #     pd.to_datetime("2016-07-01 00:00:00"),
-        #     pd.to_datetime("2018-06-26 19:45:00"),
-        #     ("train", "test"),
-        # ),
-        # (
-        #     "ETTh1",
-        #     (16700 + 720, 7),
-        #     pd.to_datetime("2016-07-01 00:00:00"),
-        #     pd.to_datetime("2018-06-26 19:00:00"),
-        #     ("train", "test"),
-        # ),
-        # (
-        #     "ETTh2",
-        #     (16700 + 720, 7),
-        #     pd.to_datetime("2016-07-01 00:00:00"),
-        #     pd.to_datetime("2018-06-26 19:00:00"),
-        #     ("train", "test"),
-        # ),
-        # pytest.param(
-        #     "IHEPC_T",
-        #     (2075259, 7),
-        #     pd.to_datetime("2006-12-16 17:24:00"),
-        #     pd.to_datetime("2010-11-26 21:02:00"),
-        #     tuple(),
-        #     marks=pytest.mark.skip(reason="Dataset is too large for testing in GitHub."),
-        # ),
-        # (
-        #     "australian_wine_sales_monthly",
-        #     (176, 1),
-        #     pd.to_datetime("1980-01-01 00:00:00"),
-        #     pd.to_datetime("1994-08-01 00:00:00"),
-        #     tuple(),
-        # ),
     ],
 )
 def test_dataset_statistics(