From b72335cd52bc21329baba20ad393ae4c219ac899 Mon Sep 17 00:00:00 2001 From: Fede Date: Fri, 3 May 2024 12:38:30 +0300 Subject: [PATCH] Add optuna searcher --- julearn/model_selection/_optuna_searcher.py | 38 +++- .../tests/test_optuna_searcher.py | 163 ++++++++++++++++++ julearn/pipeline/pipeline_creator.py | 16 +- .../pipeline/tests/test_pipeline_creator.py | 2 +- 4 files changed, 212 insertions(+), 7 deletions(-) create mode 100644 julearn/model_selection/tests/test_optuna_searcher.py diff --git a/julearn/model_selection/_optuna_searcher.py b/julearn/model_selection/_optuna_searcher.py index 2a1619871..04ba9bac4 100644 --- a/julearn/model_selection/_optuna_searcher.py +++ b/julearn/model_selection/_optuna_searcher.py @@ -4,6 +4,7 @@ # License: AGPL from typing import Any, Dict +from ..utils import logger from .available_searchers import _recreate_reset_copy, register_searcher @@ -38,12 +39,14 @@ def register_optuna_searcher(): def _prepare_optuna_hyperparameters_distributions( params_to_tune: Dict[str, Any], ) -> Dict[str, Any]: - """Prepare hyperparameters distributions for RandomizedSearchCV. + """Prepare hyperparameters distributions for OptunaSearchCV. - This method replaces tuples with distributions for RandomizedSearchCV + This method replaces tuples with distributions for OptunaSearchCV following the skopt convention. That is, if a parameter is a tuple with 3 elements, the first two elements are the bounds of the - distribution and the third element is the type of distribution. + distribution and the third element is the type of distribution. In case + the last element is "categorical", the parameter is considered + categorical and all the previous elements are the choices. Parameters ---------- @@ -61,16 +64,43 @@ def _prepare_optuna_hyperparameters_distributions( if isinstance(v, tuple) and len(v) == 3: if v[2] == "uniform": if isinstance(v[0], int) and isinstance(v[1], int): + logger.info( + f"Hyperparameter {k} is uniform integer " + f"[{v[0]}, {v[1]}]" + ) out[k] = optd.IntDistribution(v[0], v[1], log=False) else: + logger.info( + f"Hyperparameter {k} is uniform float [{v[0]}, {v[1]}]" + ) out[k] = optd.FloatDistribution(v[0], v[1], log=False) - elif v[2] in ("loguniform", "log-uniform"): + elif v[2] == "log-uniform": if isinstance(v[0], int) and isinstance(v[1], int): + logger.info( + f"Hyperparameter {k} is log-uniform int " + f"[{v[0]}, {v[1]}]" + ) out[k] = optd.IntDistribution(v[0], v[1], log=True) else: + logger.info( + f"Hyperparameter {k} is log-uniform float " + f"[{v[0]}, {v[1]}]" + ) out[k] = optd.FloatDistribution(v[0], v[1], log=True) + elif v[2] == "categorical": + logger.info(f"Hyperparameter {k} is categorical with 2 " + f"options: [{v[0]} and {v[1]}]") + out[k] = optd.CategoricalDistribution((v[0], v[1])) else: out[k] = v + elif ( + isinstance(v, tuple) + and isinstance(v[-1], str) + and v[-1] == "categorical" + ): + logger.info(f"Hyperparameter {k} is categorical [{v[:-1]}]") + out[k] = optd.CategoricalDistribution(v[:-1]) else: + logger.info(f"Hyperparameter {k} as is {v}") out[k] = v return out diff --git a/julearn/model_selection/tests/test_optuna_searcher.py b/julearn/model_selection/tests/test_optuna_searcher.py new file mode 100644 index 000000000..10f52cdee --- /dev/null +++ b/julearn/model_selection/tests/test_optuna_searcher.py @@ -0,0 +1,163 @@ +"""Provides tests for the optuna searcher.""" + +# Authors: Federico Raimondo +# License: AGPL +from typing import Dict + +import optuna.distributions as optd +import pytest + +from julearn.model_selection._optuna_searcher import ( + _prepare_optuna_hyperparameters_distributions, +) + + +@pytest.mark.parametrize( + "params_to_tune,expected_types, expected_dist", + [ + ( + { + "n_components": (0.2, 0.7, "uniform"), + "n_neighbors": (1.0, 10.0, "log-uniform"), + }, + ("float", "float"), + ("uniform", "log-uniform"), + ), + ( + { + "n_components": (1, 20, "uniform"), + "n_neighbors": (1, 10, "log-uniform"), + }, + ("int", "int"), + ("uniform", "log-uniform"), + ), + ( + { + "options": (True, False, "categorical"), + "more_options": ("a", "b", "c", "d", "categorical"), + }, + (None, None), + ("categorical", "categorical"), + ), + ( + { + "n_components": optd.FloatDistribution(0.2, 0.7, log=False), + "n_neighbors": optd.FloatDistribution(1.0, 10.0, log=True), + }, + ("float", "float"), + ("uniform", "log-uniform"), + ), + ( + { + "n_components": optd.IntDistribution(1, 20, log=False), + "n_neighbors": optd.IntDistribution(1, 10, log=True), + }, + ("int", "int"), + ("uniform", "log-uniform"), + ), + ( + { + "options": optd.CategoricalDistribution([True, False]), + "more_options": optd.CategoricalDistribution( + ("a", "b", "c", "d"), + ), + }, + (None, None), + ("categorical", "categorical"), + ), + ], +) +def test__prepare_optuna_hyperparameters_distributions( + params_to_tune: Dict[str, Dict[str, tuple]], + expected_types: tuple, + expected_dist: tuple, +) -> None: + """Test the _prepare_optuna_hyperparameters_distributions function. + + Parameters + ---------- + params_to_tune : dict + The parameters to tune. + expected_types : tuple + The expected types of each parameter. + expected_dist : tuple + The expected distributions of each parameter. + + """ + new_params = _prepare_optuna_hyperparameters_distributions(params_to_tune) + for i, (k, v) in enumerate(new_params.items()): + if expected_dist[i] == "uniform": + if expected_types[i] == "int": + assert isinstance(v, optd.IntDistribution) + assert not v.log + if isinstance(params_to_tune[k], tuple): + assert v.low == params_to_tune[k][0] # type: ignore + assert v.high == params_to_tune[k][1] # type: ignore + else: + assert isinstance(params_to_tune[k], optd.IntDistribution) + assert v.low == params_to_tune[k].low # type: ignore + assert v.high == params_to_tune[k].high # type: ignore + assert not params_to_tune[k].log # type: ignore + else: + assert isinstance(v, optd.FloatDistribution) + assert not v.log + if isinstance(params_to_tune[k], tuple): + assert v.low == params_to_tune[k][0] # type: ignore + assert v.high == params_to_tune[k][1] # type: ignore + else: + assert isinstance( + params_to_tune[k], optd.FloatDistribution + ) + assert v.low == params_to_tune[k].low # type: ignore + assert v.high == params_to_tune[k].high # type: ignore + assert not params_to_tune[k].log # type: ignore + elif expected_dist[i] == "log-uniform": + if expected_types[i] == "int": + assert isinstance(v, optd.IntDistribution) + assert v.log + if isinstance(params_to_tune[k], tuple): + assert v.low == params_to_tune[k][0] # type: ignore + assert v.high == params_to_tune[k][1] # type: ignore + else: + assert isinstance(params_to_tune[k], optd.IntDistribution) + assert v.low == params_to_tune[k].low # type: ignore + assert v.high == params_to_tune[k].high # type: ignore + assert params_to_tune[k].log # type: ignore + else: + assert isinstance(v, optd.FloatDistribution) + assert v.log + if isinstance(params_to_tune[k], tuple): + assert v.low == params_to_tune[k][0] # type: ignore + assert v.high == params_to_tune[k][1] # type: ignore + else: + assert isinstance( + params_to_tune[k], optd.FloatDistribution + ) + assert v.low == params_to_tune[k].low # type: ignore + assert v.high == params_to_tune[k].high # type: ignore + assert params_to_tune[k].log # type: ignore + elif expected_dist[i] == "categorical": + assert isinstance(v, optd.CategoricalDistribution) + if isinstance(params_to_tune[k], tuple): + assert all( + x in v.choices + for x in params_to_tune[k][:-1] # type: ignore + ) + assert all( + x in params_to_tune[k][:-1] # type: ignore + for x in v.choices + ) + else: + assert isinstance( + params_to_tune[k], optd.CategoricalDistribution + ) + assert all( + x in v.choices + for x in params_to_tune[k].choices # type: ignore + ) + assert all( + x in params_to_tune[k].choices # type: ignore + for x in v.choices + ) + else: + pytest.fail("Invalid distribution type") diff --git a/julearn/pipeline/pipeline_creator.py b/julearn/pipeline/pipeline_creator.py index d98c5b47f..d5616e255 100644 --- a/julearn/pipeline/pipeline_creator.py +++ b/julearn/pipeline/pipeline_creator.py @@ -17,6 +17,9 @@ from ..model_selection._optuna_searcher import ( _prepare_optuna_hyperparameters_distributions, ) +from ..model_selection._skopt_searcher import ( + _prepare_skopt_hyperparameters_distributions, +) from ..model_selection.available_searchers import get_searcher, list_searchers from ..models import get_model, list_models from ..prepare import prepare_search_params @@ -901,7 +904,7 @@ def _prepare_hyperparameters_distributions( if isinstance(v, tuple) and len(v) == 3: if v[2] == "uniform": mod_params_to_tune[k] = stats.uniform(v[0], v[1]) - elif v[2] in ("loguniform", "log-uniform"): + elif v[2] == "log-uniform": mod_params_to_tune[k] = stats.loguniform(v[0], v[1]) else: mod_params_to_tune[k] = v @@ -998,8 +1001,17 @@ def _prepare_hyperparameter_tuning( _prepare_hyperparameters_distributions(p) for p in params_to_tune ] + elif search.__name__ == "BayesSearchCV": + if isinstance(params_to_tune, dict): + params_to_tune = _prepare_skopt_hyperparameters_distributions( + params_to_tune + ) + else: + params_to_tune = [ + _prepare_skopt_hyperparameters_distributions(p) + for p in params_to_tune + ] elif search.__name__ == "OptunaSearchCV": - if isinstance(params_to_tune, dict): params_to_tune = _prepare_optuna_hyperparameters_distributions( params_to_tune diff --git a/julearn/pipeline/tests/test_pipeline_creator.py b/julearn/pipeline/tests/test_pipeline_creator.py index 9d049d3ed..746646f14 100644 --- a/julearn/pipeline/tests/test_pipeline_creator.py +++ b/julearn/pipeline/tests/test_pipeline_creator.py @@ -321,7 +321,7 @@ def _compare_param_grids(a: Dict, b: Dict) -> None: if hasattr(val, "rvs"): assert val.args[0] == b[key][0] assert val.args[1] == b[key][1] - if b[key][2] in ["log-uniform", "loguniform"]: + if b[key][2] == "log-uniform": assert val.dist.name == "loguniform" elif b[key][2] == "uniform": assert val.dist.name == "uniform"