From 53082af10ac090e7f4cd971a99bdc2993fff5d5c Mon Sep 17 00:00:00 2001
From: Clara De Smet <clara.de.smet@alliander.com>
Date: Wed, 2 Oct 2024 15:14:41 +0200
Subject: [PATCH] fix: Renamed MLModelType to ModelType

Signed-off-by: Clara De Smet <clara.de.smet@alliander.com>
---
 openstef/enums.py                             |  3 +-
 openstef/model/model_creator.py               | 40 +++++++++----------
 openstef/model/objective.py                   | 14 +++----
 openstef/model/objective_creator.py           | 22 +++++-----
 openstef/tasks/calculate_kpi.py               |  6 +--
 openstef/tasks/create_components_forecast.py  |  4 +-
 openstef/tasks/create_forecast.py             |  4 +-
 openstef/tasks/optimize_hyperparameters.py    |  4 +-
 openstef/tasks/split_forecast.py              |  4 +-
 openstef/tasks/train_model.py                 |  4 +-
 test/unit/model/test_model_creator.py         | 16 ++++----
 .../pipeline/test_pipeline_train_model.py     | 14 +++----
 12 files changed, 67 insertions(+), 68 deletions(-)

diff --git a/openstef/enums.py b/openstef/enums.py
index cef096121..3c0dcab12 100644
--- a/openstef/enums.py
+++ b/openstef/enums.py
@@ -4,8 +4,7 @@
 from enum import Enum
 
 
-# TODO replace this with ModelType (MLModelType == Machine Learning model type)
-class MLModelType(Enum):
+class ModelType(Enum):
     XGB = "xgb"
     XGB_QUANTILE = "xgb_quantile"
     XGB_MULTIOUTPUT_QUANTILE = "xgb_multioutput_quantile"
diff --git a/openstef/model/model_creator.py b/openstef/model/model_creator.py
index 837c6052a..40515fb6f 100644
--- a/openstef/model/model_creator.py
+++ b/openstef/model/model_creator.py
@@ -6,7 +6,7 @@
 
 import structlog
 
-from openstef.enums import MLModelType
+from openstef.enums import ModelType
 from openstef.model.regressors.arima import ARIMAOpenstfRegressor
 from openstef.model.regressors.custom_regressor import is_custom_type, load_custom_model
 from openstef.model.regressors.lgbm import LGBMOpenstfRegressor
@@ -29,7 +29,7 @@
 logger = structlog.get_logger(__name__)
 
 valid_model_kwargs = {
-    MLModelType.XGB: [
+    ModelType.XGB: [
         "n_estimators",
         "objective",
         "max_depth",
@@ -60,7 +60,7 @@
         "validate_parameters",
         "early_stopping_rounds",
     ],
-    MLModelType.LGB: [
+    ModelType.LGB: [
         "boosting_type",
         "objective",
         "num_leaves",
@@ -82,7 +82,7 @@
         "importance_type",
         "early_stopping_rounds",
     ],
-    MLModelType.XGB_QUANTILE: [
+    ModelType.XGB_QUANTILE: [
         "quantiles",
         "gamma",
         "colsample_bytree",
@@ -91,7 +91,7 @@
         "max_depth",
         "early_stopping_rounds",
     ],
-    MLModelType.XGB_MULTIOUTPUT_QUANTILE: [
+    ModelType.XGB_MULTIOUTPUT_QUANTILE: [
         "quantiles",
         "gamma",
         "colsample_bytree",
@@ -101,15 +101,15 @@
         "early_stopping_rounds",
         "arctan_smoothing",
     ],
-    MLModelType.LINEAR: [
+    ModelType.LINEAR: [
         "missing_values",
         "imputation_strategy",
         "fill_value",
     ],
-    MLModelType.FLATLINER: [
+    ModelType.FLATLINER: [
         "quantiles",
     ],
-    MLModelType.LINEAR_QUANTILE: [
+    ModelType.LINEAR_QUANTILE: [
         "alpha",
         "quantiles",
         "solver",
@@ -117,7 +117,7 @@
         "imputation_strategy",
         "fill_value",
     ],
-    MLModelType.ARIMA: [
+    ModelType.ARIMA: [
         "backtest_max_horizon",
         "order",
         "seasonal_order",
@@ -131,18 +131,18 @@ class ModelCreator:
 
     # Set object mapping
     MODEL_CONSTRUCTORS = {
-        MLModelType.XGB: XGBOpenstfRegressor,
-        MLModelType.LGB: LGBMOpenstfRegressor,
-        MLModelType.XGB_QUANTILE: XGBQuantileOpenstfRegressor,
-        MLModelType.XGB_MULTIOUTPUT_QUANTILE: XGBMultiOutputQuantileOpenstfRegressor,
-        MLModelType.LINEAR: LinearOpenstfRegressor,
-        MLModelType.LINEAR_QUANTILE: LinearQuantileOpenstfRegressor,
-        MLModelType.ARIMA: ARIMAOpenstfRegressor,
-        MLModelType.FLATLINER: FlatlinerRegressor,
+        ModelType.XGB: XGBOpenstfRegressor,
+        ModelType.LGB: LGBMOpenstfRegressor,
+        ModelType.XGB_QUANTILE: XGBQuantileOpenstfRegressor,
+        ModelType.XGB_MULTIOUTPUT_QUANTILE: XGBMultiOutputQuantileOpenstfRegressor,
+        ModelType.LINEAR: LinearOpenstfRegressor,
+        ModelType.LINEAR_QUANTILE: LinearQuantileOpenstfRegressor,
+        ModelType.ARIMA: ARIMAOpenstfRegressor,
+        ModelType.FLATLINER: FlatlinerRegressor,
     }
 
     @staticmethod
-    def create_model(model_type: Union[MLModelType, str], **kwargs) -> OpenstfRegressor:
+    def create_model(model_type: Union[ModelType, str], **kwargs) -> OpenstfRegressor:
         """Create a machine learning model based on model type.
 
         Args:
@@ -163,7 +163,7 @@ def create_model(model_type: Union[MLModelType, str], **kwargs) -> OpenstfRegres
                 model_class = load_custom_model(model_type)
                 valid_kwargs = model_class.valid_kwargs()
             else:
-                model_type = MLModelType(model_type)
+                model_type = ModelType(model_type)
                 model_class = ModelCreator.MODEL_CONSTRUCTORS[model_type]
                 valid_kwargs = valid_model_kwargs[model_type]
                 # Check if model as imported
@@ -174,7 +174,7 @@ def create_model(model_type: Union[MLModelType, str], **kwargs) -> OpenstfRegres
                         "Please refer to the ReadMe for instructions"
                     )
         except ValueError as e:
-            valid_types = [t.value for t in MLModelType]
+            valid_types = [t.value for t in ModelType]
             raise NotImplementedError(
                 f"No constructor for '{model_type}', "
                 f"valid model_types are: {valid_types} "
diff --git a/openstef/model/objective.py b/openstef/model/objective.py
index 08c52a702..bebbf894e 100644
--- a/openstef/model/objective.py
+++ b/openstef/model/objective.py
@@ -8,7 +8,7 @@
 import optuna
 import pandas as pd
 
-from openstef.enums import MLModelType
+from openstef.enums import ModelType
 from openstef.metrics import metrics
 from openstef.metrics.reporter import Report, Reporter
 from openstef.model.regressors.regressor import OpenstfRegressor
@@ -245,7 +245,7 @@ def get_default_values(cls) -> dict:
 class XGBRegressorObjective(RegressorObjective):
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
-        self.model_type = MLModelType.XGB
+        self.model_type = ModelType.XGB
 
     # extend the parameters with the model specific ones per implementation
     def get_params(self, trial: optuna.trial.FrozenTrial) -> dict:
@@ -282,7 +282,7 @@ def get_default_values(cls) -> dict:
 class LGBRegressorObjective(RegressorObjective):
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
-        self.model_type = MLModelType.LGB
+        self.model_type = ModelType.LGB
 
     def get_params(self, trial: optuna.trial.FrozenTrial) -> dict:
         """Get parameters for LGB Regressor Objective with objective specific parameters.
@@ -323,7 +323,7 @@ def get_pruning_callback(self, trial: optuna.trial.FrozenTrial):
 class XGBQuantileRegressorObjective(RegressorObjective):
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
-        self.model_type = MLModelType.XGB_QUANTILE
+        self.model_type = ModelType.XGB_QUANTILE
 
     def get_params(self, trial: optuna.trial.FrozenTrial) -> dict:
         """Get parameters for XGBQuantile Regressor Objective with objective specific parameters.
@@ -352,7 +352,7 @@ def get_pruning_callback(self, trial: optuna.trial.FrozenTrial):
 class XGBMultioutputQuantileRegressorObjective(RegressorObjective):
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
-        self.model_type = MLModelType.XGB_QUANTILE
+        self.model_type = ModelType.XGB_QUANTILE
 
     def get_params(self, trial: optuna.trial.FrozenTrial) -> dict:
         """Get parameters for XGB Multioutput Quantile Regressor Objective with objective specific parameters.
@@ -382,7 +382,7 @@ def get_pruning_callback(self, trial: optuna.trial.FrozenTrial):
 class LinearRegressorObjective(RegressorObjective):
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
-        self.model_type = MLModelType.LINEAR
+        self.model_type = ModelType.LINEAR
 
     def get_params(self, trial: optuna.trial.FrozenTrial) -> dict:
         """Get parameters for Linear Regressor Objective with objective specific parameters.
@@ -405,7 +405,7 @@ def get_params(self, trial: optuna.trial.FrozenTrial) -> dict:
 class ARIMARegressorObjective(RegressorObjective):
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
-        self.model_type = MLModelType.ARIMA
+        self.model_type = ModelType.ARIMA
 
     def get_params(self, trial: optuna.trial.FrozenTrial) -> dict:
         """Get parameters for ARIMA Regressor Objective with objective specific parameters.
diff --git a/openstef/model/objective_creator.py b/openstef/model/objective_creator.py
index 3a05e7a8d..6c980f125 100644
--- a/openstef/model/objective_creator.py
+++ b/openstef/model/objective_creator.py
@@ -4,7 +4,7 @@
 
 from typing import Union
 
-from openstef.enums import MLModelType
+from openstef.enums import ModelType
 from openstef.model.objective import (
     ARIMARegressorObjective,
     LGBRegressorObjective,
@@ -22,17 +22,17 @@
 
 class ObjectiveCreator:
     OBJECTIVES = {
-        MLModelType.XGB: XGBRegressorObjective,
-        MLModelType.LGB: LGBRegressorObjective,
-        MLModelType.XGB_QUANTILE: XGBQuantileRegressorObjective,
-        MLModelType.XGB_MULTIOUTPUT_QUANTILE: XGBMultioutputQuantileRegressorObjective,
-        MLModelType.LINEAR: LinearRegressorObjective,
-        MLModelType.LINEAR_QUANTILE: LinearRegressorObjective,
-        MLModelType.ARIMA: ARIMARegressorObjective,
+        ModelType.XGB: XGBRegressorObjective,
+        ModelType.LGB: LGBRegressorObjective,
+        ModelType.XGB_QUANTILE: XGBQuantileRegressorObjective,
+        ModelType.XGB_MULTIOUTPUT_QUANTILE: XGBMultioutputQuantileRegressorObjective,
+        ModelType.LINEAR: LinearRegressorObjective,
+        ModelType.LINEAR_QUANTILE: LinearRegressorObjective,
+        ModelType.ARIMA: ARIMARegressorObjective,
     }
 
     @staticmethod
-    def create_objective(model_type: Union[MLModelType, str]) -> RegressorObjective:
+    def create_objective(model_type: Union[ModelType, str]) -> RegressorObjective:
         """Create an objective function based on model type.
 
         Args:
@@ -51,10 +51,10 @@ def create_objective(model_type: Union[MLModelType, str]) -> RegressorObjective:
             if is_custom_type(model_type):
                 objective = create_custom_objective(model_type)
             else:
-                model_type = MLModelType(model_type)
+                model_type = ModelType(model_type)
                 objective = ObjectiveCreator.OBJECTIVES[model_type]
         except ValueError as e:
-            valid_types = [t.value for t in MLModelType]
+            valid_types = [t.value for t in ModelType]
             raise NotImplementedError(
                 f"No objective for '{model_type}', "
                 f"valid model_types are: {valid_types}"
diff --git a/openstef/tasks/calculate_kpi.py b/openstef/tasks/calculate_kpi.py
index 6d38210cb..ea2c10255 100644
--- a/openstef/tasks/calculate_kpi.py
+++ b/openstef/tasks/calculate_kpi.py
@@ -29,7 +29,7 @@
 import structlog
 
 from openstef.data_classes.prediction_job import PredictionJobDataClass
-from openstef.enums import MLModelType
+from openstef.enums import ModelType
 from openstef.exceptions import NoPredictedLoadError, NoRealisedLoadError
 from openstef.metrics import metrics
 from openstef.settings import Settings
@@ -42,7 +42,7 @@
 THRESHOLD_OPTIMIZING = 0.50
 
 
-def main(model_type: MLModelType = None, config=None, database=None) -> None:
+def main(model_type: ModelType = None, config=None, database=None) -> None:
     taskname = Path(__file__).name.replace(".py", "")
 
     if database is None or config is None:
@@ -52,7 +52,7 @@ def main(model_type: MLModelType = None, config=None, database=None) -> None:
         )
 
     if model_type is None:
-        model_type = [ml.value for ml in MLModelType]
+        model_type = [ml.value for ml in ModelType]
 
     with TaskContext(taskname, config, database) as context:
         # Set start and end time
diff --git a/openstef/tasks/create_components_forecast.py b/openstef/tasks/create_components_forecast.py
index 3e6e35cc2..ee2b43dc4 100644
--- a/openstef/tasks/create_components_forecast.py
+++ b/openstef/tasks/create_components_forecast.py
@@ -29,7 +29,7 @@
 import structlog
 
 from openstef.data_classes.prediction_job import PredictionJobDataClass
-from openstef.enums import MLModelType
+from openstef.enums import ModelType
 from openstef.exceptions import ComponentForecastTooShortHorizonError
 from openstef.pipeline.create_component_forecast import (
     create_components_forecast_pipeline,
@@ -150,7 +150,7 @@ def main(config: object = None, database: object = None, **kwargs):
         )
 
     with TaskContext(taskname, config, database) as context:
-        model_type = [ml.value for ml in MLModelType]
+        model_type = [ml.value for ml in ModelType]
 
         PredictionJobLoop(
             context,
diff --git a/openstef/tasks/create_forecast.py b/openstef/tasks/create_forecast.py
index fff469346..da2f72623 100644
--- a/openstef/tasks/create_forecast.py
+++ b/openstef/tasks/create_forecast.py
@@ -24,7 +24,7 @@
 from pathlib import Path
 
 from openstef.data_classes.prediction_job import PredictionJobDataClass
-from openstef.enums import MLModelType, PipelineType
+from openstef.enums import ModelType, PipelineType
 from openstef.exceptions import InputDataOngoingZeroFlatlinerError
 from openstef.pipeline.create_forecast import create_forecast_pipeline
 from openstef.tasks.utils.predictionjobloop import PredictionJobLoop
@@ -129,7 +129,7 @@ def main(model_type=None, config=None, database=None, **kwargs):
 
     with TaskContext(taskname, config, database) as context:
         if model_type is None:
-            model_type = [ml.value for ml in MLModelType]
+            model_type = [ml.value for ml in ModelType]
 
         PredictionJobLoop(context, model_type=model_type).map(
             create_forecast_task, context, **kwargs
diff --git a/openstef/tasks/optimize_hyperparameters.py b/openstef/tasks/optimize_hyperparameters.py
index 6d3cfa6fb..304547204 100644
--- a/openstef/tasks/optimize_hyperparameters.py
+++ b/openstef/tasks/optimize_hyperparameters.py
@@ -20,7 +20,7 @@
 from pathlib import Path
 
 from openstef.data_classes.prediction_job import PredictionJobDataClass
-from openstef.enums import MLModelType, PipelineType
+from openstef.enums import ModelType, PipelineType
 from openstef.model.serializer import MLflowSerializer
 from openstef.monitoring import teams
 from openstef.pipeline.optimize_hyperparameters import optimize_hyperparameters_pipeline
@@ -124,7 +124,7 @@ def main(config=None, database=None):
         )
 
     with TaskContext(taskname, config, database) as context:
-        model_type = [ml.value for ml in MLModelType]
+        model_type = [ml.value for ml in ModelType]
 
         PredictionJobLoop(context, model_type=model_type).map(
             optimize_hyperparameters_task, context
diff --git a/openstef/tasks/split_forecast.py b/openstef/tasks/split_forecast.py
index 033313add..be1f83d0d 100644
--- a/openstef/tasks/split_forecast.py
+++ b/openstef/tasks/split_forecast.py
@@ -33,7 +33,7 @@
 
 import openstef.monitoring.teams as monitoring
 from openstef.data_classes.prediction_job import PredictionJobDataClass
-from openstef.enums import MLModelType
+from openstef.enums import ModelType
 from openstef.settings import Settings
 from openstef.tasks.utils.predictionjobloop import PredictionJobLoop
 from openstef.tasks.utils.taskcontext import TaskContext
@@ -51,7 +51,7 @@ def main(config=None, database=None):
         )
 
     with TaskContext(taskname, config, database) as context:
-        model_type = [ml.value for ml in MLModelType]
+        model_type = [ml.value for ml in ModelType]
 
         PredictionJobLoop(
             context,
diff --git a/openstef/tasks/train_model.py b/openstef/tasks/train_model.py
index a88d87cb3..5c6e21625 100644
--- a/openstef/tasks/train_model.py
+++ b/openstef/tasks/train_model.py
@@ -23,7 +23,7 @@
 from pathlib import Path
 
 from openstef.data_classes.prediction_job import PredictionJobDataClass
-from openstef.enums import MLModelType, PipelineType
+from openstef.enums import ModelType, PipelineType
 from openstef.exceptions import (
     InputDataOngoingZeroFlatlinerError,
     SkipSaveTrainingForecasts,
@@ -179,7 +179,7 @@ def main(model_type=None, config=None, database=None):
         )
 
     if model_type is None:
-        model_type = [ml.value for ml in MLModelType]
+        model_type = [ml.value for ml in ModelType]
 
     taskname = Path(__file__).name.replace(".py", "")
     datetime_now = datetime.utcnow()
diff --git a/test/unit/model/test_model_creator.py b/test/unit/model/test_model_creator.py
index 5465325a3..9ff180aac 100644
--- a/test/unit/model/test_model_creator.py
+++ b/test/unit/model/test_model_creator.py
@@ -5,7 +5,7 @@
 import sys
 from unittest import TestCase
 
-from openstef.enums import MLModelType
+from openstef.enums import ModelType
 from openstef.model.model_creator import ModelCreator
 from openstef.model.regressors.regressor import OpenstfRegressor
 from openstef.model.regressors.xgb_quantile import XGBQuantileOpenstfRegressor
@@ -14,22 +14,22 @@
 class TestModelCreator(TestCase):
     def test_create_model_happy_flow(self):
         # Test happy flow (both str and enum model_type arguments)
-        valid_types = [t.value for t in MLModelType] + [t for t in MLModelType]
+        valid_types = [t.value for t in ModelType] + [t for t in ModelType]
         for model_type in valid_types:
             model = ModelCreator.create_model(model_type)
             self.assertIsInstance(model, OpenstfRegressor)
             self.assertTrue(hasattr(model, "can_predict_quantiles"))
             if model_type in [
                 "xgb_quantile",
-                MLModelType("xgb_quantile"),
+                ModelType("xgb_quantile"),
                 "arima",
-                MLModelType("arima"),
+                ModelType("arima"),
                 "linear_quantile",
-                MLModelType("linear_quantile"),
+                ModelType("linear_quantile"),
                 "xgb_multioutput_quantile",
-                MLModelType("xgb_multioutput_quantile"),
+                ModelType("xgb_multioutput_quantile"),
                 "flatliner",
-                MLModelType("flatliner"),
+                ModelType("flatliner"),
             ]:
                 self.assertTrue(model.can_predict_quantiles)
             else:
@@ -39,7 +39,7 @@ def test_create_model_happy_flow(self):
 
     def test_create_model_quantile_model(self):
         # Test if quantile model is properly returned
-        model_type = MLModelType.XGB_QUANTILE
+        model_type = ModelType.XGB_QUANTILE
         quantiles = tuple([0.5, 0.2, 0.5])
         # Create relevant model
         model = ModelCreator.create_model(model_type, quantiles=quantiles)
diff --git a/test/unit/pipeline/test_pipeline_train_model.py b/test/unit/pipeline/test_pipeline_train_model.py
index f4b5b7491..2809816ef 100644
--- a/test/unit/pipeline/test_pipeline_train_model.py
+++ b/test/unit/pipeline/test_pipeline_train_model.py
@@ -15,7 +15,7 @@
 
 from openstef.data_classes.data_prep import DataPrepDataClass
 from openstef.data_classes.split_function import SplitFuncDataClass
-from openstef.enums import MLModelType
+from openstef.enums import ModelType
 from openstef.exceptions import (
     InputDataInsufficientError,
     InputDataWrongColumnOrderError,
@@ -128,7 +128,7 @@ def test_train_model_pipeline_core_happy_flow(self):
         # Select 50 data points to speedup test
         train_input = self.train_input.iloc[:50, :]
         # Remove modeltypes which are optional, and add a dummy regressor
-        for model_type in list(MLModelType) + [__name__ + ".DummyRegressor"]:
+        for model_type in list(ModelType) + [__name__ + ".DummyRegressor"]:
             with self.subTest(model_type=model_type):
                 pj = self.pj
 
@@ -143,10 +143,10 @@ def test_train_model_pipeline_core_happy_flow(self):
                 model_specs.hyper_params["max_epochs"] = 1
 
                 # For Linear model we need to choose an imputation strategy to handle missing value
-                if model_type == MLModelType.LINEAR:
+                if model_type == ModelType.LINEAR:
                     model_specs.hyper_params["imputation_strategy"] = "mean"
 
-                if model_type == MLModelType.ARIMA:
+                if model_type == ModelType.ARIMA:
                     pj.data_prep_class = DataPrepDataClass(
                         klass=ARDataPreparation,
                         arguments={},
@@ -203,10 +203,10 @@ def test_train_model_pipeline_core_happy_flow_with_legacy_data_prep(self):
         """Test happy flow of the train model pipeline with the legacy data prep class."""
         # Select 50 data points to speedup test
         train_input = self.train_input.iloc[::50, :]
-        for model_type in list(MLModelType) + [__name__ + ".DummyRegressor"]:
+        for model_type in list(ModelType) + [__name__ + ".DummyRegressor"]:
             with self.subTest(model_type=model_type):
                 # Skip the arima model because it does not use legacy data prep
-                if model_type == MLModelType.ARIMA:
+                if model_type == ModelType.ARIMA:
                     continue
                 pj = self.pj
                 pj.data_prep_class = DataPrepDataClass(
@@ -224,7 +224,7 @@ def test_train_model_pipeline_core_happy_flow_with_legacy_data_prep(self):
                 model_specs.hyper_params["max_epochs"] = 1
 
                 # For Linear model we need to choose an imputation strategy to handle missing value
-                if model_type == MLModelType.LINEAR:
+                if model_type == ModelType.LINEAR:
                     model_specs.hyper_params["imputation_strategy"] = "mean"
 
                 model, report, modelspecs, _ = train_model_pipeline_core(