From 53082af10ac090e7f4cd971a99bdc2993fff5d5c Mon Sep 17 00:00:00 2001 From: Clara De Smet Date: Wed, 2 Oct 2024 15:14:41 +0200 Subject: [PATCH] fix: Renamed MLModelType to ModelType Signed-off-by: Clara De Smet --- openstef/enums.py | 3 +- openstef/model/model_creator.py | 40 +++++++++---------- openstef/model/objective.py | 14 +++---- openstef/model/objective_creator.py | 22 +++++----- openstef/tasks/calculate_kpi.py | 6 +-- openstef/tasks/create_components_forecast.py | 4 +- openstef/tasks/create_forecast.py | 4 +- openstef/tasks/optimize_hyperparameters.py | 4 +- openstef/tasks/split_forecast.py | 4 +- openstef/tasks/train_model.py | 4 +- test/unit/model/test_model_creator.py | 16 ++++---- .../pipeline/test_pipeline_train_model.py | 14 +++---- 12 files changed, 67 insertions(+), 68 deletions(-) diff --git a/openstef/enums.py b/openstef/enums.py index cef096121..3c0dcab12 100644 --- a/openstef/enums.py +++ b/openstef/enums.py @@ -4,8 +4,7 @@ from enum import Enum -# TODO replace this with ModelType (MLModelType == Machine Learning model type) -class MLModelType(Enum): +class ModelType(Enum): XGB = "xgb" XGB_QUANTILE = "xgb_quantile" XGB_MULTIOUTPUT_QUANTILE = "xgb_multioutput_quantile" diff --git a/openstef/model/model_creator.py b/openstef/model/model_creator.py index 837c6052a..40515fb6f 100644 --- a/openstef/model/model_creator.py +++ b/openstef/model/model_creator.py @@ -6,7 +6,7 @@ import structlog -from openstef.enums import MLModelType +from openstef.enums import ModelType from openstef.model.regressors.arima import ARIMAOpenstfRegressor from openstef.model.regressors.custom_regressor import is_custom_type, load_custom_model from openstef.model.regressors.lgbm import LGBMOpenstfRegressor @@ -29,7 +29,7 @@ logger = structlog.get_logger(__name__) valid_model_kwargs = { - MLModelType.XGB: [ + ModelType.XGB: [ "n_estimators", "objective", "max_depth", @@ -60,7 +60,7 @@ "validate_parameters", "early_stopping_rounds", ], - MLModelType.LGB: [ + ModelType.LGB: [ "boosting_type", "objective", "num_leaves", @@ -82,7 +82,7 @@ "importance_type", "early_stopping_rounds", ], - MLModelType.XGB_QUANTILE: [ + ModelType.XGB_QUANTILE: [ "quantiles", "gamma", "colsample_bytree", @@ -91,7 +91,7 @@ "max_depth", "early_stopping_rounds", ], - MLModelType.XGB_MULTIOUTPUT_QUANTILE: [ + ModelType.XGB_MULTIOUTPUT_QUANTILE: [ "quantiles", "gamma", "colsample_bytree", @@ -101,15 +101,15 @@ "early_stopping_rounds", "arctan_smoothing", ], - MLModelType.LINEAR: [ + ModelType.LINEAR: [ "missing_values", "imputation_strategy", "fill_value", ], - MLModelType.FLATLINER: [ + ModelType.FLATLINER: [ "quantiles", ], - MLModelType.LINEAR_QUANTILE: [ + ModelType.LINEAR_QUANTILE: [ "alpha", "quantiles", "solver", @@ -117,7 +117,7 @@ "imputation_strategy", "fill_value", ], - MLModelType.ARIMA: [ + ModelType.ARIMA: [ "backtest_max_horizon", "order", "seasonal_order", @@ -131,18 +131,18 @@ class ModelCreator: # Set object mapping MODEL_CONSTRUCTORS = { - MLModelType.XGB: XGBOpenstfRegressor, - MLModelType.LGB: LGBMOpenstfRegressor, - MLModelType.XGB_QUANTILE: XGBQuantileOpenstfRegressor, - MLModelType.XGB_MULTIOUTPUT_QUANTILE: XGBMultiOutputQuantileOpenstfRegressor, - MLModelType.LINEAR: LinearOpenstfRegressor, - MLModelType.LINEAR_QUANTILE: LinearQuantileOpenstfRegressor, - MLModelType.ARIMA: ARIMAOpenstfRegressor, - MLModelType.FLATLINER: FlatlinerRegressor, + ModelType.XGB: XGBOpenstfRegressor, + ModelType.LGB: LGBMOpenstfRegressor, + ModelType.XGB_QUANTILE: XGBQuantileOpenstfRegressor, + ModelType.XGB_MULTIOUTPUT_QUANTILE: XGBMultiOutputQuantileOpenstfRegressor, + ModelType.LINEAR: LinearOpenstfRegressor, + ModelType.LINEAR_QUANTILE: LinearQuantileOpenstfRegressor, + ModelType.ARIMA: ARIMAOpenstfRegressor, + ModelType.FLATLINER: FlatlinerRegressor, } @staticmethod - def create_model(model_type: Union[MLModelType, str], **kwargs) -> OpenstfRegressor: + def create_model(model_type: Union[ModelType, str], **kwargs) -> OpenstfRegressor: """Create a machine learning model based on model type. Args: @@ -163,7 +163,7 @@ def create_model(model_type: Union[MLModelType, str], **kwargs) -> OpenstfRegres model_class = load_custom_model(model_type) valid_kwargs = model_class.valid_kwargs() else: - model_type = MLModelType(model_type) + model_type = ModelType(model_type) model_class = ModelCreator.MODEL_CONSTRUCTORS[model_type] valid_kwargs = valid_model_kwargs[model_type] # Check if model as imported @@ -174,7 +174,7 @@ def create_model(model_type: Union[MLModelType, str], **kwargs) -> OpenstfRegres "Please refer to the ReadMe for instructions" ) except ValueError as e: - valid_types = [t.value for t in MLModelType] + valid_types = [t.value for t in ModelType] raise NotImplementedError( f"No constructor for '{model_type}', " f"valid model_types are: {valid_types} " diff --git a/openstef/model/objective.py b/openstef/model/objective.py index 08c52a702..bebbf894e 100644 --- a/openstef/model/objective.py +++ b/openstef/model/objective.py @@ -8,7 +8,7 @@ import optuna import pandas as pd -from openstef.enums import MLModelType +from openstef.enums import ModelType from openstef.metrics import metrics from openstef.metrics.reporter import Report, Reporter from openstef.model.regressors.regressor import OpenstfRegressor @@ -245,7 +245,7 @@ def get_default_values(cls) -> dict: class XGBRegressorObjective(RegressorObjective): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) - self.model_type = MLModelType.XGB + self.model_type = ModelType.XGB # extend the parameters with the model specific ones per implementation def get_params(self, trial: optuna.trial.FrozenTrial) -> dict: @@ -282,7 +282,7 @@ def get_default_values(cls) -> dict: class LGBRegressorObjective(RegressorObjective): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) - self.model_type = MLModelType.LGB + self.model_type = ModelType.LGB def get_params(self, trial: optuna.trial.FrozenTrial) -> dict: """Get parameters for LGB Regressor Objective with objective specific parameters. @@ -323,7 +323,7 @@ def get_pruning_callback(self, trial: optuna.trial.FrozenTrial): class XGBQuantileRegressorObjective(RegressorObjective): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) - self.model_type = MLModelType.XGB_QUANTILE + self.model_type = ModelType.XGB_QUANTILE def get_params(self, trial: optuna.trial.FrozenTrial) -> dict: """Get parameters for XGBQuantile Regressor Objective with objective specific parameters. @@ -352,7 +352,7 @@ def get_pruning_callback(self, trial: optuna.trial.FrozenTrial): class XGBMultioutputQuantileRegressorObjective(RegressorObjective): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) - self.model_type = MLModelType.XGB_QUANTILE + self.model_type = ModelType.XGB_QUANTILE def get_params(self, trial: optuna.trial.FrozenTrial) -> dict: """Get parameters for XGB Multioutput Quantile Regressor Objective with objective specific parameters. @@ -382,7 +382,7 @@ def get_pruning_callback(self, trial: optuna.trial.FrozenTrial): class LinearRegressorObjective(RegressorObjective): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) - self.model_type = MLModelType.LINEAR + self.model_type = ModelType.LINEAR def get_params(self, trial: optuna.trial.FrozenTrial) -> dict: """Get parameters for Linear Regressor Objective with objective specific parameters. @@ -405,7 +405,7 @@ def get_params(self, trial: optuna.trial.FrozenTrial) -> dict: class ARIMARegressorObjective(RegressorObjective): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) - self.model_type = MLModelType.ARIMA + self.model_type = ModelType.ARIMA def get_params(self, trial: optuna.trial.FrozenTrial) -> dict: """Get parameters for ARIMA Regressor Objective with objective specific parameters. diff --git a/openstef/model/objective_creator.py b/openstef/model/objective_creator.py index 3a05e7a8d..6c980f125 100644 --- a/openstef/model/objective_creator.py +++ b/openstef/model/objective_creator.py @@ -4,7 +4,7 @@ from typing import Union -from openstef.enums import MLModelType +from openstef.enums import ModelType from openstef.model.objective import ( ARIMARegressorObjective, LGBRegressorObjective, @@ -22,17 +22,17 @@ class ObjectiveCreator: OBJECTIVES = { - MLModelType.XGB: XGBRegressorObjective, - MLModelType.LGB: LGBRegressorObjective, - MLModelType.XGB_QUANTILE: XGBQuantileRegressorObjective, - MLModelType.XGB_MULTIOUTPUT_QUANTILE: XGBMultioutputQuantileRegressorObjective, - MLModelType.LINEAR: LinearRegressorObjective, - MLModelType.LINEAR_QUANTILE: LinearRegressorObjective, - MLModelType.ARIMA: ARIMARegressorObjective, + ModelType.XGB: XGBRegressorObjective, + ModelType.LGB: LGBRegressorObjective, + ModelType.XGB_QUANTILE: XGBQuantileRegressorObjective, + ModelType.XGB_MULTIOUTPUT_QUANTILE: XGBMultioutputQuantileRegressorObjective, + ModelType.LINEAR: LinearRegressorObjective, + ModelType.LINEAR_QUANTILE: LinearRegressorObjective, + ModelType.ARIMA: ARIMARegressorObjective, } @staticmethod - def create_objective(model_type: Union[MLModelType, str]) -> RegressorObjective: + def create_objective(model_type: Union[ModelType, str]) -> RegressorObjective: """Create an objective function based on model type. Args: @@ -51,10 +51,10 @@ def create_objective(model_type: Union[MLModelType, str]) -> RegressorObjective: if is_custom_type(model_type): objective = create_custom_objective(model_type) else: - model_type = MLModelType(model_type) + model_type = ModelType(model_type) objective = ObjectiveCreator.OBJECTIVES[model_type] except ValueError as e: - valid_types = [t.value for t in MLModelType] + valid_types = [t.value for t in ModelType] raise NotImplementedError( f"No objective for '{model_type}', " f"valid model_types are: {valid_types}" diff --git a/openstef/tasks/calculate_kpi.py b/openstef/tasks/calculate_kpi.py index 6d38210cb..ea2c10255 100644 --- a/openstef/tasks/calculate_kpi.py +++ b/openstef/tasks/calculate_kpi.py @@ -29,7 +29,7 @@ import structlog from openstef.data_classes.prediction_job import PredictionJobDataClass -from openstef.enums import MLModelType +from openstef.enums import ModelType from openstef.exceptions import NoPredictedLoadError, NoRealisedLoadError from openstef.metrics import metrics from openstef.settings import Settings @@ -42,7 +42,7 @@ THRESHOLD_OPTIMIZING = 0.50 -def main(model_type: MLModelType = None, config=None, database=None) -> None: +def main(model_type: ModelType = None, config=None, database=None) -> None: taskname = Path(__file__).name.replace(".py", "") if database is None or config is None: @@ -52,7 +52,7 @@ def main(model_type: MLModelType = None, config=None, database=None) -> None: ) if model_type is None: - model_type = [ml.value for ml in MLModelType] + model_type = [ml.value for ml in ModelType] with TaskContext(taskname, config, database) as context: # Set start and end time diff --git a/openstef/tasks/create_components_forecast.py b/openstef/tasks/create_components_forecast.py index 3e6e35cc2..ee2b43dc4 100644 --- a/openstef/tasks/create_components_forecast.py +++ b/openstef/tasks/create_components_forecast.py @@ -29,7 +29,7 @@ import structlog from openstef.data_classes.prediction_job import PredictionJobDataClass -from openstef.enums import MLModelType +from openstef.enums import ModelType from openstef.exceptions import ComponentForecastTooShortHorizonError from openstef.pipeline.create_component_forecast import ( create_components_forecast_pipeline, @@ -150,7 +150,7 @@ def main(config: object = None, database: object = None, **kwargs): ) with TaskContext(taskname, config, database) as context: - model_type = [ml.value for ml in MLModelType] + model_type = [ml.value for ml in ModelType] PredictionJobLoop( context, diff --git a/openstef/tasks/create_forecast.py b/openstef/tasks/create_forecast.py index fff469346..da2f72623 100644 --- a/openstef/tasks/create_forecast.py +++ b/openstef/tasks/create_forecast.py @@ -24,7 +24,7 @@ from pathlib import Path from openstef.data_classes.prediction_job import PredictionJobDataClass -from openstef.enums import MLModelType, PipelineType +from openstef.enums import ModelType, PipelineType from openstef.exceptions import InputDataOngoingZeroFlatlinerError from openstef.pipeline.create_forecast import create_forecast_pipeline from openstef.tasks.utils.predictionjobloop import PredictionJobLoop @@ -129,7 +129,7 @@ def main(model_type=None, config=None, database=None, **kwargs): with TaskContext(taskname, config, database) as context: if model_type is None: - model_type = [ml.value for ml in MLModelType] + model_type = [ml.value for ml in ModelType] PredictionJobLoop(context, model_type=model_type).map( create_forecast_task, context, **kwargs diff --git a/openstef/tasks/optimize_hyperparameters.py b/openstef/tasks/optimize_hyperparameters.py index 6d3cfa6fb..304547204 100644 --- a/openstef/tasks/optimize_hyperparameters.py +++ b/openstef/tasks/optimize_hyperparameters.py @@ -20,7 +20,7 @@ from pathlib import Path from openstef.data_classes.prediction_job import PredictionJobDataClass -from openstef.enums import MLModelType, PipelineType +from openstef.enums import ModelType, PipelineType from openstef.model.serializer import MLflowSerializer from openstef.monitoring import teams from openstef.pipeline.optimize_hyperparameters import optimize_hyperparameters_pipeline @@ -124,7 +124,7 @@ def main(config=None, database=None): ) with TaskContext(taskname, config, database) as context: - model_type = [ml.value for ml in MLModelType] + model_type = [ml.value for ml in ModelType] PredictionJobLoop(context, model_type=model_type).map( optimize_hyperparameters_task, context diff --git a/openstef/tasks/split_forecast.py b/openstef/tasks/split_forecast.py index 033313add..be1f83d0d 100644 --- a/openstef/tasks/split_forecast.py +++ b/openstef/tasks/split_forecast.py @@ -33,7 +33,7 @@ import openstef.monitoring.teams as monitoring from openstef.data_classes.prediction_job import PredictionJobDataClass -from openstef.enums import MLModelType +from openstef.enums import ModelType from openstef.settings import Settings from openstef.tasks.utils.predictionjobloop import PredictionJobLoop from openstef.tasks.utils.taskcontext import TaskContext @@ -51,7 +51,7 @@ def main(config=None, database=None): ) with TaskContext(taskname, config, database) as context: - model_type = [ml.value for ml in MLModelType] + model_type = [ml.value for ml in ModelType] PredictionJobLoop( context, diff --git a/openstef/tasks/train_model.py b/openstef/tasks/train_model.py index a88d87cb3..5c6e21625 100644 --- a/openstef/tasks/train_model.py +++ b/openstef/tasks/train_model.py @@ -23,7 +23,7 @@ from pathlib import Path from openstef.data_classes.prediction_job import PredictionJobDataClass -from openstef.enums import MLModelType, PipelineType +from openstef.enums import ModelType, PipelineType from openstef.exceptions import ( InputDataOngoingZeroFlatlinerError, SkipSaveTrainingForecasts, @@ -179,7 +179,7 @@ def main(model_type=None, config=None, database=None): ) if model_type is None: - model_type = [ml.value for ml in MLModelType] + model_type = [ml.value for ml in ModelType] taskname = Path(__file__).name.replace(".py", "") datetime_now = datetime.utcnow() diff --git a/test/unit/model/test_model_creator.py b/test/unit/model/test_model_creator.py index 5465325a3..9ff180aac 100644 --- a/test/unit/model/test_model_creator.py +++ b/test/unit/model/test_model_creator.py @@ -5,7 +5,7 @@ import sys from unittest import TestCase -from openstef.enums import MLModelType +from openstef.enums import ModelType from openstef.model.model_creator import ModelCreator from openstef.model.regressors.regressor import OpenstfRegressor from openstef.model.regressors.xgb_quantile import XGBQuantileOpenstfRegressor @@ -14,22 +14,22 @@ class TestModelCreator(TestCase): def test_create_model_happy_flow(self): # Test happy flow (both str and enum model_type arguments) - valid_types = [t.value for t in MLModelType] + [t for t in MLModelType] + valid_types = [t.value for t in ModelType] + [t for t in ModelType] for model_type in valid_types: model = ModelCreator.create_model(model_type) self.assertIsInstance(model, OpenstfRegressor) self.assertTrue(hasattr(model, "can_predict_quantiles")) if model_type in [ "xgb_quantile", - MLModelType("xgb_quantile"), + ModelType("xgb_quantile"), "arima", - MLModelType("arima"), + ModelType("arima"), "linear_quantile", - MLModelType("linear_quantile"), + ModelType("linear_quantile"), "xgb_multioutput_quantile", - MLModelType("xgb_multioutput_quantile"), + ModelType("xgb_multioutput_quantile"), "flatliner", - MLModelType("flatliner"), + ModelType("flatliner"), ]: self.assertTrue(model.can_predict_quantiles) else: @@ -39,7 +39,7 @@ def test_create_model_happy_flow(self): def test_create_model_quantile_model(self): # Test if quantile model is properly returned - model_type = MLModelType.XGB_QUANTILE + model_type = ModelType.XGB_QUANTILE quantiles = tuple([0.5, 0.2, 0.5]) # Create relevant model model = ModelCreator.create_model(model_type, quantiles=quantiles) diff --git a/test/unit/pipeline/test_pipeline_train_model.py b/test/unit/pipeline/test_pipeline_train_model.py index f4b5b7491..2809816ef 100644 --- a/test/unit/pipeline/test_pipeline_train_model.py +++ b/test/unit/pipeline/test_pipeline_train_model.py @@ -15,7 +15,7 @@ from openstef.data_classes.data_prep import DataPrepDataClass from openstef.data_classes.split_function import SplitFuncDataClass -from openstef.enums import MLModelType +from openstef.enums import ModelType from openstef.exceptions import ( InputDataInsufficientError, InputDataWrongColumnOrderError, @@ -128,7 +128,7 @@ def test_train_model_pipeline_core_happy_flow(self): # Select 50 data points to speedup test train_input = self.train_input.iloc[:50, :] # Remove modeltypes which are optional, and add a dummy regressor - for model_type in list(MLModelType) + [__name__ + ".DummyRegressor"]: + for model_type in list(ModelType) + [__name__ + ".DummyRegressor"]: with self.subTest(model_type=model_type): pj = self.pj @@ -143,10 +143,10 @@ def test_train_model_pipeline_core_happy_flow(self): model_specs.hyper_params["max_epochs"] = 1 # For Linear model we need to choose an imputation strategy to handle missing value - if model_type == MLModelType.LINEAR: + if model_type == ModelType.LINEAR: model_specs.hyper_params["imputation_strategy"] = "mean" - if model_type == MLModelType.ARIMA: + if model_type == ModelType.ARIMA: pj.data_prep_class = DataPrepDataClass( klass=ARDataPreparation, arguments={}, @@ -203,10 +203,10 @@ def test_train_model_pipeline_core_happy_flow_with_legacy_data_prep(self): """Test happy flow of the train model pipeline with the legacy data prep class.""" # Select 50 data points to speedup test train_input = self.train_input.iloc[::50, :] - for model_type in list(MLModelType) + [__name__ + ".DummyRegressor"]: + for model_type in list(ModelType) + [__name__ + ".DummyRegressor"]: with self.subTest(model_type=model_type): # Skip the arima model because it does not use legacy data prep - if model_type == MLModelType.ARIMA: + if model_type == ModelType.ARIMA: continue pj = self.pj pj.data_prep_class = DataPrepDataClass( @@ -224,7 +224,7 @@ def test_train_model_pipeline_core_happy_flow_with_legacy_data_prep(self): model_specs.hyper_params["max_epochs"] = 1 # For Linear model we need to choose an imputation strategy to handle missing value - if model_type == MLModelType.LINEAR: + if model_type == ModelType.LINEAR: model_specs.hyper_params["imputation_strategy"] = "mean" model, report, modelspecs, _ = train_model_pipeline_core(