From 83ce5bd5e39a38a22f329c87ecb050696879152b Mon Sep 17 00:00:00 2001 From: Fede Date: Thu, 2 May 2024 19:24:42 +0300 Subject: [PATCH 01/16] Add optuna searcher --- julearn/api.py | 4 +- julearn/conftest.py | 25 +++++- julearn/model_selection/__init__.py | 3 + julearn/model_selection/_optuna_searcher.py | 76 +++++++++++++++++++ .../tests/test_available_searchers.py | 13 ++++ julearn/pipeline/pipeline_creator.py | 17 ++++- .../pipeline/tests/test_pipeline_creator.py | 43 ++++++++++- 7 files changed, 175 insertions(+), 6 deletions(-) create mode 100644 julearn/model_selection/_optuna_searcher.py diff --git a/julearn/api.py b/julearn/api.py index 039254dfc..7602216f1 100644 --- a/julearn/api.py +++ b/julearn/api.py @@ -136,8 +136,8 @@ def run_cross_validation( # noqa: C901 the following keys: * 'kind': The kind of search algorithm to use, e.g.: - 'grid', 'random' or 'bayes'. Can be any valid julearn searcher name - or scikit-learn compatible searcher. + 'grid', 'random', 'bayes' or 'optuna'. Can be any valid julearn + searcher name or scikit-learn compatible searcher. * 'cv': If a searcher is going to be used, the cross-validation splitting strategy to use. Defaults to same CV as for the model evaluation. diff --git a/julearn/conftest.py b/julearn/conftest.py index 9da707fc4..cb4c2868d 100644 --- a/julearn/conftest.py +++ b/julearn/conftest.py @@ -270,7 +270,7 @@ def search_params(request: FixtureRequest) -> Optional[Dict]: scope="function", ) def bayes_search_params(request: FixtureRequest) -> Optional[Dict]: - """Return different search_params argument for BayesSearchCV. + """Return different search_params argument for BayesSearchCV. Parameters ---------- @@ -286,6 +286,29 @@ def bayes_search_params(request: FixtureRequest) -> Optional[Dict]: return request.param +@fixture( + params=[ + {"kind": "optuna", "n_trials": 10, "cv": 3}, + {"kind": "optuna", "timeout": 20}, + ], + scope="function", +) +def optuna_search_params(request: FixtureRequest) -> Optional[Dict]: + """Return different search_params argument for OptunaSearchCV. + + Parameters + ---------- + request : pytest.FixtureRequest + The request object. + + Returns + ------- + dict or None + A dictionary with the search_params argument. + + """ + + return request.param _tuning_params = { "zscore": {"with_mean": [True, False]}, diff --git a/julearn/model_selection/__init__.py b/julearn/model_selection/__init__.py index 2e1eab839..01356d0e6 100644 --- a/julearn/model_selection/__init__.py +++ b/julearn/model_selection/__init__.py @@ -17,5 +17,8 @@ ) from ._skopt_searcher import register_bayes_searcher +from ._optuna_searcher import register_optuna_searcher + register_bayes_searcher() +register_optuna_searcher() diff --git a/julearn/model_selection/_optuna_searcher.py b/julearn/model_selection/_optuna_searcher.py new file mode 100644 index 000000000..a35167480 --- /dev/null +++ b/julearn/model_selection/_optuna_searcher.py @@ -0,0 +1,76 @@ +"""Module for registering the BayesSearchCV class from scikit-optimize.""" + +# Authors: Federico Raimondo +# License: AGPL +from typing import Any, Dict + +from .available_searchers import _recreate_reset_copy, register_searcher + + +try: + from optuna_integration.sklearn import OptunaSearchCV + import optuna.distributions as od +except ImportError: + from sklearn.model_selection._search import BaseSearchCV + + class OptunaSearchCV(BaseSearchCV): + """Dummy class for OptunaSearchCV that raises ImportError. + + This class is used to raise an ImportError when OptunaSearchCV is + requested but optuna and optuna-integration ar not installed. + + """ + + def __init__(*args, **kwargs): + raise ImportError( + "OptunaSearchCV requires optuna and optuna-integration to be " + "installed." + ) + + +def register_optuna_searcher(): + register_searcher("optuna", OptunaSearchCV, "param_distributions") + + # Update the "reset copy" of available searchers + _recreate_reset_copy() + + +def _prepare_optuna_hyperparameters_distributions( + params_to_tune: Dict[str, Any], +) -> Dict[str, Any]: + """Prepare hyperparameters distributions for RandomizedSearchCV. + + This method replaces tuples with distributions for RandomizedSearchCV + following the skopt convention. That is, if a parameter is a tuple + with 3 elements, the first two elements are the bounds of the + distribution and the third element is the type of distribution. + + Parameters + ---------- + params_to_tune : dict + The parameters to tune. + + Returns + ------- + dict + The modified parameters to tune. + + """ + out = {} + for k, v in params_to_tune.items(): + if isinstance(v, tuple) and len(v) == 3: + if v[2] == "uniform": + if isinstance(v[0], int) and isinstance(v[1], int): + out[k] = od.IntDistribution(v[0], v[1], log=False) + else: + out[k] = od.FloatDistribution(v[0], v[1], log=False) + elif v[2] in ("loguniform", "log-uniform"): + if isinstance(v[0], int) and isinstance(v[1], int): + out[k] = od.IntDistribution(v[0], v[1], log=True) + else: + out[k] = od.FloatDistribution(v[0], v[1], log=True) + else: + out[k] = v + else: + out[k] = v + return out diff --git a/julearn/model_selection/tests/test_available_searchers.py b/julearn/model_selection/tests/test_available_searchers.py index f88348867..7a7d2b913 100644 --- a/julearn/model_selection/tests/test_available_searchers.py +++ b/julearn/model_selection/tests/test_available_searchers.py @@ -62,6 +62,9 @@ def test_get_searcher() -> None: out = get_searcher("bayes") assert out.__name__ == "BayesSearchCV" + out = get_searcher("optuna") + assert out.__name__ == "OptunaSearchCV" + def test_get_searcher_params_attr() -> None: """Test getting the params attribute of a searcher.""" @@ -74,6 +77,9 @@ def test_get_searcher_params_attr() -> None: out = get_searcher_params_attr("bayes") assert out == "search_spaces" + out = get_searcher_params_attr("optuna") + assert out == "param_distributions" + @pytest.mark.nodeps def test_get_searchers_noskopt() -> None: @@ -81,3 +87,10 @@ def test_get_searchers_noskopt() -> None: out = get_searcher("bayes") with pytest.raises(ImportError, match="BayesSearchCV requires"): out() # type: ignore + +@pytest.mark.nodeps +def test_get_searchers_nooptuna() -> None: + """Test getting a searcher without optuna.""" + out = get_searcher("optuna") + with pytest.raises(ImportError, match="OptunaSearchCV requires"): + out() # type: ignore diff --git a/julearn/pipeline/pipeline_creator.py b/julearn/pipeline/pipeline_creator.py index 652e060d2..0ecb639ec 100644 --- a/julearn/pipeline/pipeline_creator.py +++ b/julearn/pipeline/pipeline_creator.py @@ -15,6 +15,9 @@ from ..base import ColumnTypes, ColumnTypesLike, JuTransformer, WrapModel from ..model_selection.available_searchers import get_searcher, list_searchers +from ..model_selection._optuna_searcher import ( + _prepare_optuna_hyperparameters_distributions +) from ..models import get_model, list_models from ..prepare import prepare_search_params from ..transformers import ( @@ -928,8 +931,8 @@ def _prepare_hyperparameter_tuning( The parameters for the search. The following keys are accepted: * 'kind': The kind of search algorithm to use e.g.: - 'grid', 'random' or 'bayes'. All valid julearn searchers can be - entered. + 'grid', 'random', 'bayes' or 'optuna'. All valid julearn searchers + can be entered. * 'cv': If search is going to be used, the cross-validation splitting strategy to use. Defaults to same CV as for the model evaluation. @@ -995,7 +998,17 @@ def _prepare_hyperparameter_tuning( _prepare_hyperparameters_distributions(p) for p in params_to_tune ] + elif search.__name__ == "OptunaSearchCV": + if isinstance(params_to_tune, dict): + params_to_tune = _prepare_optuna_hyperparameters_distributions( + params_to_tune + ) + else: + params_to_tune = [ + _prepare_optuna_hyperparameters_distributions(p) + for p in params_to_tune + ] cv_inner = check_cv(cv_inner) # type: ignore logger.info(f"Using inner CV scheme {cv_inner}") search_params["cv"] = cv_inner diff --git a/julearn/pipeline/tests/test_pipeline_creator.py b/julearn/pipeline/tests/test_pipeline_creator.py index e299b1008..9d049d3ed 100644 --- a/julearn/pipeline/tests/test_pipeline_creator.py +++ b/julearn/pipeline/tests/test_pipeline_creator.py @@ -227,7 +227,7 @@ def test_hyperparameter_tuning_bayes( get_tuning_params: Callable, bayes_search_params: Dict[str, List], ) -> None: - """Test that the pipeline hyperparameter tuning works as expected. + """Test that the pipeline hyperparameter tuning (bayes) works as expected. Parameters ---------- @@ -259,6 +259,47 @@ def test_hyperparameter_tuning_bayes( assert pipeline.search_spaces == param_grid # type: ignore +def test_hyperparameter_tuning_optuna( + X_types_iris: Dict[str, List[str]], # noqa: N803 + model: str, + preprocess: Union[str, List[str]], + problem_type: str, + get_tuning_params: Callable, + optuna_search_params: Dict[str, List], +) -> None: + """Test that the pipeline hyperparameter tuning (optuna) works as expected. + + Parameters + ---------- + X_types_iris : dict + The iris dataset features types. + model : str + The model to test. + preprocess : str or list of str + The preprocessing steps to test. + problem_type : str + The problem type to test. + get_tuning_params : Callable + A function that returns the tuning hyperparameters for a given step. + optuna_search_params : dict of str and list + The parameters for the search. + + """ + optuna_integration = pytest.importorskip("optuna_integration") + OptunaSearchCV = optuna_integration.OptunaSearchCV + + pipeline, param_grid = _hyperparam_tuning_base_test( + X_types_iris, + model, + preprocess, + problem_type, + get_tuning_params, + optuna_search_params, + ) + assert isinstance(pipeline, OptunaSearchCV) + assert pipeline.param_distributions == param_grid # type: ignore + + def _compare_param_grids(a: Dict, b: Dict) -> None: """Compare two param grids. From 030a439e40fdc14c2555ab136fd25e9f925c6532 Mon Sep 17 00:00:00 2001 From: Fede Date: Thu, 2 May 2024 19:26:59 +0300 Subject: [PATCH 02/16] Fix linters --- julearn/model_selection/_optuna_searcher.py | 10 +++++----- julearn/pipeline/pipeline_creator.py | 4 ++-- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/julearn/model_selection/_optuna_searcher.py b/julearn/model_selection/_optuna_searcher.py index a35167480..2a1619871 100644 --- a/julearn/model_selection/_optuna_searcher.py +++ b/julearn/model_selection/_optuna_searcher.py @@ -8,8 +8,8 @@ try: + import optuna.distributions as optd from optuna_integration.sklearn import OptunaSearchCV - import optuna.distributions as od except ImportError: from sklearn.model_selection._search import BaseSearchCV @@ -61,14 +61,14 @@ def _prepare_optuna_hyperparameters_distributions( if isinstance(v, tuple) and len(v) == 3: if v[2] == "uniform": if isinstance(v[0], int) and isinstance(v[1], int): - out[k] = od.IntDistribution(v[0], v[1], log=False) + out[k] = optd.IntDistribution(v[0], v[1], log=False) else: - out[k] = od.FloatDistribution(v[0], v[1], log=False) + out[k] = optd.FloatDistribution(v[0], v[1], log=False) elif v[2] in ("loguniform", "log-uniform"): if isinstance(v[0], int) and isinstance(v[1], int): - out[k] = od.IntDistribution(v[0], v[1], log=True) + out[k] = optd.IntDistribution(v[0], v[1], log=True) else: - out[k] = od.FloatDistribution(v[0], v[1], log=True) + out[k] = optd.FloatDistribution(v[0], v[1], log=True) else: out[k] = v else: diff --git a/julearn/pipeline/pipeline_creator.py b/julearn/pipeline/pipeline_creator.py index 0ecb639ec..d98c5b47f 100644 --- a/julearn/pipeline/pipeline_creator.py +++ b/julearn/pipeline/pipeline_creator.py @@ -14,10 +14,10 @@ from sklearn.pipeline import Pipeline from ..base import ColumnTypes, ColumnTypesLike, JuTransformer, WrapModel -from ..model_selection.available_searchers import get_searcher, list_searchers from ..model_selection._optuna_searcher import ( - _prepare_optuna_hyperparameters_distributions + _prepare_optuna_hyperparameters_distributions, ) +from ..model_selection.available_searchers import get_searcher, list_searchers from ..models import get_model, list_models from ..prepare import prepare_search_params from ..transformers import ( From ac0471a8f94f6122cb152c4b77e56cb4ec128797 Mon Sep 17 00:00:00 2001 From: Fede Date: Fri, 3 May 2024 12:37:48 +0300 Subject: [PATCH 03/16] Add Optuna + distributions doc --- examples/99_docs/run_hyperparameters_docs.py | 142 ++++++++++++++++++- 1 file changed, 140 insertions(+), 2 deletions(-) diff --git a/examples/99_docs/run_hyperparameters_docs.py b/examples/99_docs/run_hyperparameters_docs.py index 4b7372abb..2213bc5a2 100644 --- a/examples/99_docs/run_hyperparameters_docs.py +++ b/examples/99_docs/run_hyperparameters_docs.py @@ -253,8 +253,9 @@ # hyperparameters values. # # Other searchers that ``julearn`` provides are the -# :class:`~sklearn.model_selection.RandomizedSearchCV` and -# :class:`~skopt.BayesSearchCV`. +# :class:`~sklearn.model_selection.RandomizedSearchCV`, +# :class:`~skopt.BayesSearchCV` and +# :class:`~optuna.integration.sklearn.OptunaSearchCV`. # # The randomized searcher # (:class:`~sklearn.model_selection.RandomizedSearchCV`) is similar to the @@ -274,6 +275,12 @@ # :class:`~skopt.BayesSearchCV` documentation, including how to specify # the prior distributions of the hyperparameters. # +# The Optuna searcher (:class:`~optuna.integration.sklearn.OptunaSearchCV`)\ +# uses the Optuna library to find the best hyperparameter set. Optuna is a +# hyperparameter optimization framework that has several algorithms to find +# the best hyperparameter set. For more information, see the +# `Optuna`_ documentation. +# # We can specify the kind of searcher and its parametrization, by setting the # ``search_params`` parameter in the :func:`.run_cross_validation` function. # For example, we can use the @@ -369,6 +376,137 @@ ) pprint(model_tuned.best_params_) +############################################################################### +# An example using optuna searcher is shown below. The searcher is specified +# as ``"optuna"`` and the hyperparameters are specified as a dictionary with +# the hyperparameters to tune and their distributions as for the bayesian +# searcher. However, the optuna searcher behaviour is controlled by a +# :class:`~optuna.study.Study` object. This object can be passed to the +# searcher using the ``study`` parameter in the ``search_params`` dictionary. +# +# .. important:: +# The optuna searcher requires that all the hyperparameters are specified +# as distributions, even the categorical ones. +# +# We first modify the pipeline creator so the ``select_k`` parameter is +# specified as a distribution. We exemplarily use a categorical distribution +# for the ``class_weight`` hyperparameter, trying the ``"balanced"`` and +# ``None`` values. + +creator = PipelineCreator(problem_type="classification") +creator.add("zscore") +creator.add("select_k", k=(2, 4, "uniform")) +creator.add( + "svm", + C=(0.01, 10, "log-uniform"), + gamma=(1e-3, 1e-1, "log-uniform"), + class_weight=("balanced", None, "categorical") +) + +############################################################################### +# We can now use the optuna searcher with 10 trials and 3-fold cross-validation. + +import optuna + +study = optuna.create_study( + direction="maximize", + study_name="optuna-concept", + load_if_exists=True, +) + +search_params = { + "kind": "optuna", + "study": study, + "cv": 3, +} +scores_tuned, model_tuned = run_cross_validation( + X=X, + y=y, + data=df, + X_types=X_types, + model=creator, + return_estimator="all", + search_params=search_params, +) + +print( + "Scores with best hyperparameter using 10 iterations of " + f"optuna and 3-fold CV: {scores_tuned['test_score'].mean()}" +) +pprint(model_tuned.best_params_) + +############################################################################### +# +# Specifying distributions +# ~~~~~~~~~~~~~~~~~~~~~~~~ +# +# The hyperparameters can be specified as distributions for the randomized +# searcher, bayesian searcher and optuna searcher. The distributions are +# either specified toolbox-specific method or a tuple convention with the +# following format: ``(low, high, distribution)`` where the distribution can +# be either ``"log-uniform"`` or ``"uniform"`` or +# ``(a, b, c, d, ..., "categorical")`` where ``a``, ``b``, ``c``, ``d``, etc. +# are the possible categorical values for the hyperparameter. +# +# For example, we can specify the ``C`` and ``gamma`` hyperparameters of the +# :class:`~sklearn.svm.SVC` as log-uniform distributions, while keeping +# the ``with_mean`` parameter of the +# :class:`~sklearn.preprocessing.StandardScaler` as a categorical parameter +# with two options. + + +creator = PipelineCreator(problem_type="classification") +creator.add("zscore", with_mean=(True, False, "categorical")) +creator.add( + "svm", + C=(0.01, 10, "log-uniform"), + gamma=(1e-3, 1e-1, "log-uniform"), +) + +############################################################################### +# While this will work for any of the ``random``, ``bayes`` or ``optuna`` +# searcher options, it is important to note that both ``bayes`` and ``optuna`` +# searchers accept further parameters to specify distributions. For example, +# the ``bayes`` searcher distributions are defined using the +# :class:`~skopt.space.space.Categorical`, :class:`~skopt.space.space.Integer` +# and :class:`~skopt.space.space.Real`. +# +# For example, we can define a log-uniform distribution with base 2 for the +# ``C`` hyperparameter of the :class:`~sklearn.svm.SVC` model: +from skopt.space import Real +creator = PipelineCreator(problem_type="classification") +creator.add("zscore", with_mean=(True, False, "categorical")) +creator.add( + "svm", + C=Real(0.01, 10, prior="log-uniform", base=2), + gamma=(1e-3, 1e-1, "log-uniform"), +) + +############################################################################### +# For the optuna searcher, the distributions are defined using the +# :class:`~optuna.distributions.CategoricalDistribution`, +# :class:`~optuna.distributions.FloatDistribution` and +# :class:`~optuna.distributions.IntDistribution`. +# +# +# For example, we can define a uniform distribution from 0.5 to 0.9 with a 0.05 +# step for the ``n_components`` of a :class:`~sklearn.decomposition.PCA` +# transformer, while keeping a log-uniform distribution for the ``C`` and +# ``gamma`` hyperparameters of the :class:`~sklearn.svm.SVC` model. +from optuna.distributions import FloatDistribution +creator = PipelineCreator(problem_type="classification") +creator.add("zscore") +creator.add( + "pca", + n_components=FloatDistribution(0.5, 0.9, step=0.05), +) +creator.add( + "svm", + C=FloatDistribution(0.01, 10, log=True), + gamma=(1e-3, 1e-1, "log-uniform"), +) + + ############################################################################### # From afa1d27013883cde98e82f5715dd9dd2baadf138 Mon Sep 17 00:00:00 2001 From: Fede Date: Fri, 3 May 2024 12:38:04 +0300 Subject: [PATCH 04/16] Configure tests + deps --- pyproject.toml | 4 ++++ tox.ini | 6 ++++++ 2 files changed, 10 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index 320d256d0..1ab96f1ed 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -69,6 +69,10 @@ viz = [ ] skopt = ["scikit-optimize>=0.10.0,<0.11"] +optuna = [ + "optuna>=3.6.0,<3.7", + "optuna_integration>=3.6.0,<3.7", +] # Add all optional functional dependencies (skip deslib until its fixed) # This does not include dev/docs building dependencies diff --git a/tox.ini b/tox.ini index 017e63fcf..1ea44e360 100644 --- a/tox.ini +++ b/tox.ini @@ -15,6 +15,8 @@ deps = pytest seaborn scikit-optimize>=0.10.0,<0.11 + optuna>=3.6.0,<3.7 + optuna_integration>=3.6.0,<3.7 commands = pytest {toxinidir}/julearn @@ -42,6 +44,8 @@ deps = bokeh>=3.0.0 param scikit-optimize>=0.10.0,<0.11 + optuna>=3.6.0,<3.7 + optuna_integration>=3.6.0,<3.7 commands = pytest -vv {toxinidir}/julearn @@ -64,6 +68,8 @@ deps = bokeh>=3.0.0 param scikit-optimize>=0.10.0,<0.11 + optuna>=3.6.0,<3.7 + optuna_integration>=3.6.0,<3.7 commands = pytest --cov={envsitepackagesdir}/julearn --cov=./julearn --cov-report=xml --cov-report=term -vv From f7fc6fe8475659530475539d0e4c08470d982a36 Mon Sep 17 00:00:00 2001 From: Fede Date: Fri, 3 May 2024 12:38:15 +0300 Subject: [PATCH 05/16] Add optuna links in docs --- docs/conf.py | 1 + docs/links.inc | 1 + 2 files changed, 2 insertions(+) diff --git a/docs/conf.py b/docs/conf.py index da1a0edbb..095c89843 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -161,6 +161,7 @@ "joblib": ("https://joblib.readthedocs.io/en/latest/", None), "scipy": ("https://docs.scipy.org/doc/scipy/", None), "skopt": ("https://scikit-optimize.readthedocs.io/en/latest", None), + "optuna": ("https://optuna.readthedocs.io/en/stable", None), } diff --git a/docs/links.inc b/docs/links.inc index c4be5e9c0..654f50fb2 100644 --- a/docs/links.inc +++ b/docs/links.inc @@ -41,3 +41,4 @@ .. _`DESlib`: https://github.com/scikit-learn-contrib/DESlib .. _`scikit-optimize`: https://scikit-optimize.readthedocs.io/en/stable/ +.. _`Optuna`: https://optuna.org \ No newline at end of file From b72335cd52bc21329baba20ad393ae4c219ac899 Mon Sep 17 00:00:00 2001 From: Fede Date: Fri, 3 May 2024 12:38:30 +0300 Subject: [PATCH 06/16] Add optuna searcher --- julearn/model_selection/_optuna_searcher.py | 38 +++- .../tests/test_optuna_searcher.py | 163 ++++++++++++++++++ julearn/pipeline/pipeline_creator.py | 16 +- .../pipeline/tests/test_pipeline_creator.py | 2 +- 4 files changed, 212 insertions(+), 7 deletions(-) create mode 100644 julearn/model_selection/tests/test_optuna_searcher.py diff --git a/julearn/model_selection/_optuna_searcher.py b/julearn/model_selection/_optuna_searcher.py index 2a1619871..04ba9bac4 100644 --- a/julearn/model_selection/_optuna_searcher.py +++ b/julearn/model_selection/_optuna_searcher.py @@ -4,6 +4,7 @@ # License: AGPL from typing import Any, Dict +from ..utils import logger from .available_searchers import _recreate_reset_copy, register_searcher @@ -38,12 +39,14 @@ def register_optuna_searcher(): def _prepare_optuna_hyperparameters_distributions( params_to_tune: Dict[str, Any], ) -> Dict[str, Any]: - """Prepare hyperparameters distributions for RandomizedSearchCV. + """Prepare hyperparameters distributions for OptunaSearchCV. - This method replaces tuples with distributions for RandomizedSearchCV + This method replaces tuples with distributions for OptunaSearchCV following the skopt convention. That is, if a parameter is a tuple with 3 elements, the first two elements are the bounds of the - distribution and the third element is the type of distribution. + distribution and the third element is the type of distribution. In case + the last element is "categorical", the parameter is considered + categorical and all the previous elements are the choices. Parameters ---------- @@ -61,16 +64,43 @@ def _prepare_optuna_hyperparameters_distributions( if isinstance(v, tuple) and len(v) == 3: if v[2] == "uniform": if isinstance(v[0], int) and isinstance(v[1], int): + logger.info( + f"Hyperparameter {k} is uniform integer " + f"[{v[0]}, {v[1]}]" + ) out[k] = optd.IntDistribution(v[0], v[1], log=False) else: + logger.info( + f"Hyperparameter {k} is uniform float [{v[0]}, {v[1]}]" + ) out[k] = optd.FloatDistribution(v[0], v[1], log=False) - elif v[2] in ("loguniform", "log-uniform"): + elif v[2] == "log-uniform": if isinstance(v[0], int) and isinstance(v[1], int): + logger.info( + f"Hyperparameter {k} is log-uniform int " + f"[{v[0]}, {v[1]}]" + ) out[k] = optd.IntDistribution(v[0], v[1], log=True) else: + logger.info( + f"Hyperparameter {k} is log-uniform float " + f"[{v[0]}, {v[1]}]" + ) out[k] = optd.FloatDistribution(v[0], v[1], log=True) + elif v[2] == "categorical": + logger.info(f"Hyperparameter {k} is categorical with 2 " + f"options: [{v[0]} and {v[1]}]") + out[k] = optd.CategoricalDistribution((v[0], v[1])) else: out[k] = v + elif ( + isinstance(v, tuple) + and isinstance(v[-1], str) + and v[-1] == "categorical" + ): + logger.info(f"Hyperparameter {k} is categorical [{v[:-1]}]") + out[k] = optd.CategoricalDistribution(v[:-1]) else: + logger.info(f"Hyperparameter {k} as is {v}") out[k] = v return out diff --git a/julearn/model_selection/tests/test_optuna_searcher.py b/julearn/model_selection/tests/test_optuna_searcher.py new file mode 100644 index 000000000..10f52cdee --- /dev/null +++ b/julearn/model_selection/tests/test_optuna_searcher.py @@ -0,0 +1,163 @@ +"""Provides tests for the optuna searcher.""" + +# Authors: Federico Raimondo +# License: AGPL +from typing import Dict + +import optuna.distributions as optd +import pytest + +from julearn.model_selection._optuna_searcher import ( + _prepare_optuna_hyperparameters_distributions, +) + + +@pytest.mark.parametrize( + "params_to_tune,expected_types, expected_dist", + [ + ( + { + "n_components": (0.2, 0.7, "uniform"), + "n_neighbors": (1.0, 10.0, "log-uniform"), + }, + ("float", "float"), + ("uniform", "log-uniform"), + ), + ( + { + "n_components": (1, 20, "uniform"), + "n_neighbors": (1, 10, "log-uniform"), + }, + ("int", "int"), + ("uniform", "log-uniform"), + ), + ( + { + "options": (True, False, "categorical"), + "more_options": ("a", "b", "c", "d", "categorical"), + }, + (None, None), + ("categorical", "categorical"), + ), + ( + { + "n_components": optd.FloatDistribution(0.2, 0.7, log=False), + "n_neighbors": optd.FloatDistribution(1.0, 10.0, log=True), + }, + ("float", "float"), + ("uniform", "log-uniform"), + ), + ( + { + "n_components": optd.IntDistribution(1, 20, log=False), + "n_neighbors": optd.IntDistribution(1, 10, log=True), + }, + ("int", "int"), + ("uniform", "log-uniform"), + ), + ( + { + "options": optd.CategoricalDistribution([True, False]), + "more_options": optd.CategoricalDistribution( + ("a", "b", "c", "d"), + ), + }, + (None, None), + ("categorical", "categorical"), + ), + ], +) +def test__prepare_optuna_hyperparameters_distributions( + params_to_tune: Dict[str, Dict[str, tuple]], + expected_types: tuple, + expected_dist: tuple, +) -> None: + """Test the _prepare_optuna_hyperparameters_distributions function. + + Parameters + ---------- + params_to_tune : dict + The parameters to tune. + expected_types : tuple + The expected types of each parameter. + expected_dist : tuple + The expected distributions of each parameter. + + """ + new_params = _prepare_optuna_hyperparameters_distributions(params_to_tune) + for i, (k, v) in enumerate(new_params.items()): + if expected_dist[i] == "uniform": + if expected_types[i] == "int": + assert isinstance(v, optd.IntDistribution) + assert not v.log + if isinstance(params_to_tune[k], tuple): + assert v.low == params_to_tune[k][0] # type: ignore + assert v.high == params_to_tune[k][1] # type: ignore + else: + assert isinstance(params_to_tune[k], optd.IntDistribution) + assert v.low == params_to_tune[k].low # type: ignore + assert v.high == params_to_tune[k].high # type: ignore + assert not params_to_tune[k].log # type: ignore + else: + assert isinstance(v, optd.FloatDistribution) + assert not v.log + if isinstance(params_to_tune[k], tuple): + assert v.low == params_to_tune[k][0] # type: ignore + assert v.high == params_to_tune[k][1] # type: ignore + else: + assert isinstance( + params_to_tune[k], optd.FloatDistribution + ) + assert v.low == params_to_tune[k].low # type: ignore + assert v.high == params_to_tune[k].high # type: ignore + assert not params_to_tune[k].log # type: ignore + elif expected_dist[i] == "log-uniform": + if expected_types[i] == "int": + assert isinstance(v, optd.IntDistribution) + assert v.log + if isinstance(params_to_tune[k], tuple): + assert v.low == params_to_tune[k][0] # type: ignore + assert v.high == params_to_tune[k][1] # type: ignore + else: + assert isinstance(params_to_tune[k], optd.IntDistribution) + assert v.low == params_to_tune[k].low # type: ignore + assert v.high == params_to_tune[k].high # type: ignore + assert params_to_tune[k].log # type: ignore + else: + assert isinstance(v, optd.FloatDistribution) + assert v.log + if isinstance(params_to_tune[k], tuple): + assert v.low == params_to_tune[k][0] # type: ignore + assert v.high == params_to_tune[k][1] # type: ignore + else: + assert isinstance( + params_to_tune[k], optd.FloatDistribution + ) + assert v.low == params_to_tune[k].low # type: ignore + assert v.high == params_to_tune[k].high # type: ignore + assert params_to_tune[k].log # type: ignore + elif expected_dist[i] == "categorical": + assert isinstance(v, optd.CategoricalDistribution) + if isinstance(params_to_tune[k], tuple): + assert all( + x in v.choices + for x in params_to_tune[k][:-1] # type: ignore + ) + assert all( + x in params_to_tune[k][:-1] # type: ignore + for x in v.choices + ) + else: + assert isinstance( + params_to_tune[k], optd.CategoricalDistribution + ) + assert all( + x in v.choices + for x in params_to_tune[k].choices # type: ignore + ) + assert all( + x in params_to_tune[k].choices # type: ignore + for x in v.choices + ) + else: + pytest.fail("Invalid distribution type") diff --git a/julearn/pipeline/pipeline_creator.py b/julearn/pipeline/pipeline_creator.py index d98c5b47f..d5616e255 100644 --- a/julearn/pipeline/pipeline_creator.py +++ b/julearn/pipeline/pipeline_creator.py @@ -17,6 +17,9 @@ from ..model_selection._optuna_searcher import ( _prepare_optuna_hyperparameters_distributions, ) +from ..model_selection._skopt_searcher import ( + _prepare_skopt_hyperparameters_distributions, +) from ..model_selection.available_searchers import get_searcher, list_searchers from ..models import get_model, list_models from ..prepare import prepare_search_params @@ -901,7 +904,7 @@ def _prepare_hyperparameters_distributions( if isinstance(v, tuple) and len(v) == 3: if v[2] == "uniform": mod_params_to_tune[k] = stats.uniform(v[0], v[1]) - elif v[2] in ("loguniform", "log-uniform"): + elif v[2] == "log-uniform": mod_params_to_tune[k] = stats.loguniform(v[0], v[1]) else: mod_params_to_tune[k] = v @@ -998,8 +1001,17 @@ def _prepare_hyperparameter_tuning( _prepare_hyperparameters_distributions(p) for p in params_to_tune ] + elif search.__name__ == "BayesSearchCV": + if isinstance(params_to_tune, dict): + params_to_tune = _prepare_skopt_hyperparameters_distributions( + params_to_tune + ) + else: + params_to_tune = [ + _prepare_skopt_hyperparameters_distributions(p) + for p in params_to_tune + ] elif search.__name__ == "OptunaSearchCV": - if isinstance(params_to_tune, dict): params_to_tune = _prepare_optuna_hyperparameters_distributions( params_to_tune diff --git a/julearn/pipeline/tests/test_pipeline_creator.py b/julearn/pipeline/tests/test_pipeline_creator.py index 9d049d3ed..746646f14 100644 --- a/julearn/pipeline/tests/test_pipeline_creator.py +++ b/julearn/pipeline/tests/test_pipeline_creator.py @@ -321,7 +321,7 @@ def _compare_param_grids(a: Dict, b: Dict) -> None: if hasattr(val, "rvs"): assert val.args[0] == b[key][0] assert val.args[1] == b[key][1] - if b[key][2] in ["log-uniform", "loguniform"]: + if b[key][2] == "log-uniform": assert val.dist.name == "loguniform" elif b[key][2] == "uniform": assert val.dist.name == "uniform" From a8c58615a654be66018075bcc2c6de21815e6dd9 Mon Sep 17 00:00:00 2001 From: Fede Date: Fri, 3 May 2024 12:38:43 +0300 Subject: [PATCH 07/16] Modify skopt searcher distributions --- julearn/model_selection/_skopt_searcher.py | 62 ++++++++ .../tests/test_skopt_searcher.py | 133 ++++++++++++++++++ 2 files changed, 195 insertions(+) create mode 100644 julearn/model_selection/tests/test_skopt_searcher.py diff --git a/julearn/model_selection/_skopt_searcher.py b/julearn/model_selection/_skopt_searcher.py index e0904b10c..3bd246a3b 100644 --- a/julearn/model_selection/_skopt_searcher.py +++ b/julearn/model_selection/_skopt_searcher.py @@ -2,11 +2,14 @@ # Authors: Federico Raimondo # License: AGPL +from typing import Any, Dict +from ..utils import logger from .available_searchers import _recreate_reset_copy, register_searcher try: + import skopt.space as sksp from skopt import BayesSearchCV except ImportError: from sklearn.model_selection._search import BaseSearchCV @@ -30,3 +33,62 @@ def register_bayes_searcher(): # Update the "reset copy" of available searchers _recreate_reset_copy() + + + +def _prepare_skopt_hyperparameters_distributions( + params_to_tune: Dict[str, Any], +) -> Dict[str, Any]: + """Prepare hyperparameters distributions for RandomizedSearchCV. + + This method replaces tuples with distributions for RandomizedSearchCV + following the skopt convention. That is, if a parameter is a tuple + with 3 elements, the first two elements are the bounds of the + distribution and the third element is the type of distribution. In case + the last element is "categorical", the parameter is considered + categorical and all the previous elements are the choices. + + Parameters + ---------- + params_to_tune : dict + The parameters to tune. + + Returns + ------- + dict + The modified parameters to tune. + + """ + out = {} + for k, v in params_to_tune.items(): + if isinstance(v, tuple) and len(v) == 3: + prior = v[2] + if prior == "categorical": + logger.info(f"Hyperparameter {k} is categorical with 2 " + f"options: [{v[0]} and {v[1]}]") + out[k] = sksp.Categorical(v[:-1]) + elif isinstance(v[0], int) and isinstance(v[1], int): + logger.info( + f"Hyperparameter {k} is {prior} integer " + f"[{v[0]}, {v[1]}]" + ) + out[k] = sksp.Integer(v[0], v[1], prior=prior) + elif isinstance(v[0], float) and isinstance(v[1], float): + logger.info( + f"Hyperparameter {k} is {prior} float " + f"[{v[0]}, {v[1]}]" + ) + out[k] = sksp.Real(v[0], v[1], prior=prior) + else: + logger.info(f"Hyperparameter {k} as is {v}") + out[k] = v + elif ( + isinstance(v, tuple) + and isinstance(v[-1], str) + and v[-1] == "categorical" + ): + out[k] = sksp.Categorical(v[:-1]) + else: + logger.info(f"Hyperparameter {k} as is {v}") + out[k] = v + return out diff --git a/julearn/model_selection/tests/test_skopt_searcher.py b/julearn/model_selection/tests/test_skopt_searcher.py new file mode 100644 index 000000000..9e8c67fb0 --- /dev/null +++ b/julearn/model_selection/tests/test_skopt_searcher.py @@ -0,0 +1,133 @@ +"""Provides tests for the bayes searcher.""" + +# Authors: Federico Raimondo +# License: AGPL +from typing import Dict + +import pytest +import skopt.space as sksp + +from julearn.model_selection._skopt_searcher import ( + _prepare_skopt_hyperparameters_distributions, +) + + +@pytest.mark.parametrize( + "params_to_tune,expected_types, expected_dist", + [ + ( + { + "n_components": (0.2, 0.7, "uniform"), + "n_neighbors": (1.0, 10.0, "log-uniform"), + }, + ("float", "float"), + ("uniform", "log-uniform"), + ), + ( + { + "n_components": (1, 20, "uniform"), + "n_neighbors": (1, 10, "log-uniform"), + }, + ("int", "int", "int"), + ("uniform", "log-uniform"), + ), + ( + { + "options": (True, False, "categorical"), + "more_options": ("a", "b", "c", "d", "categorical"), + }, + (None, None), + ("categorical", "categorical"), + ), + ( + { + "n_components": sksp.Real(0.2, 0.7, prior="uniform"), + "n_neighbors": sksp.Real(1.0, 10.0, prior="log-uniform"), + }, + ("float", "float"), + ("uniform", "log-uniform"), + ), + ( + { + "n_components": sksp.Integer(1, 20, prior="uniform"), + "n_neighbors": sksp.Integer(1, 10, prior="log-uniform"), + }, + ("int", "int"), + ("uniform", "log-uniform"), + ), + ( + { + "options": sksp.Categorical([True, False]), + "more_options": sksp.Categorical( + ("a", "b", "c", "d"), + ), + }, + (None, None), + ("categorical", "categorical"), + ), + ], +) +def test__prepare_skopt_hyperparameters_distributions( + params_to_tune: Dict[str, Dict[str, tuple]], + expected_types: tuple, + expected_dist: tuple, +) -> None: + """Test the _prepare_skopt_hyperparameters_distributions function. + + Parameters + ---------- + params_to_tune : dict + The parameters to tune. + expected_types : tuple + The expected types of each parameter. + expected_dist : tuple + The expected distributions of each parameter. + + """ + new_params = _prepare_skopt_hyperparameters_distributions(params_to_tune) + for i, (k, v) in enumerate(new_params.items()): + if expected_types[i] == "int": + assert isinstance(v, sksp.Integer) + assert v.prior == expected_dist[i] + if isinstance(params_to_tune[k], tuple): + assert v.bounds[0] == params_to_tune[k][0] # type: ignore + assert v.bounds[1] == params_to_tune[k][1] # type: ignore + else: + assert isinstance(params_to_tune[k], sksp.Integer) + assert v.bounds[0] == params_to_tune[k].bounds[0] # type: ignore + assert v.bounds[1] == params_to_tune[k].bounds[1] # type: ignore + assert params_to_tune[k].prior == v.prior # type: ignore + elif expected_types[i] == "float": + assert isinstance(v, sksp.Real) + assert v.prior == expected_dist[i] + if isinstance(params_to_tune[k], tuple): + assert v.bounds[0] == params_to_tune[k][0] # type: ignore + assert v.bounds[1] == params_to_tune[k][1] # type: ignore + else: + assert isinstance(params_to_tune[k], sksp.Real) + assert v.bounds[0] == params_to_tune[k].bounds[0] # type: ignore + assert v.bounds[1] == params_to_tune[k].bounds[1] # type: ignore + assert params_to_tune[k].prior == v.prior # type: ignore + elif expected_dist[i] == "categorical": + assert isinstance(v, sksp.Categorical) + if isinstance(params_to_tune[k], tuple): + assert all( + x in v.categories + for x in params_to_tune[k][:-1] # type: ignore + ) + assert all( + x in params_to_tune[k][:-1] # type: ignore + for x in v.categories + ) + else: + assert isinstance(params_to_tune[k], sksp.Categorical) + assert all( + x in v.categories + for x in params_to_tune[k].categories # type: ignore + ) + assert all( + x in params_to_tune[k].categories # type: ignore + for x in v.categories + ) + else: + pytest.fail("Invalid distribution type") From 877b0ad818a0082d851a4fb44cb2a4fa8425c5b1 Mon Sep 17 00:00:00 2001 From: Fede Date: Fri, 3 May 2024 12:41:35 +0300 Subject: [PATCH 08/16] Add changes for #262 --- docs/changes/newsfragments/262.enh | 1 + 1 file changed, 1 insertion(+) create mode 100644 docs/changes/newsfragments/262.enh diff --git a/docs/changes/newsfragments/262.enh b/docs/changes/newsfragments/262.enh new file mode 100644 index 000000000..a42d5bc5c --- /dev/null +++ b/docs/changes/newsfragments/262.enh @@ -0,0 +1 @@ +Add :class:`~optuna.integration.sklearn.OptunaSearchCV` to the list of available searchers as 'optuna', update documentation on Hyperparameter tuning and refactor how hyperparmeters' distributions are specified by `Fede Raimondo`_ From 195ac71209cd385177ae8fdb1ca98fa551ac7b2e Mon Sep 17 00:00:00 2001 From: Fede Date: Fri, 3 May 2024 12:57:16 +0300 Subject: [PATCH 09/16] Update dependencies for optuna --- pyproject.toml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 1ab96f1ed..56e1f455e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -60,6 +60,8 @@ docs = [ "numpydoc>=1.5.0,<1.6", "towncrier<24", "scikit-optimize>=0.10.0,<0.11", + "optuna>=3.6.0,<3.7", + "optuna_integration>=3.6.0,<3.7", ] deslib = ["deslib>=0.3.5,<0.4"] viz = [ @@ -76,7 +78,7 @@ optuna = [ # Add all optional functional dependencies (skip deslib until its fixed) # This does not include dev/docs building dependencies -all = ["julearn[viz,skopt]"] +all = ["julearn[viz,skopt,optuna]"] ################ # Tool configs # From f0c9ade7a62a4161f03da0c6ec6fdaed02ad2108 Mon Sep 17 00:00:00 2001 From: Fede Date: Fri, 3 May 2024 13:29:01 +0300 Subject: [PATCH 10/16] Fix intersphinx for optuna_integration --- docs/changes/newsfragments/262.enh | 2 +- docs/conf.py | 4 ++++ examples/99_docs/run_hyperparameters_docs.py | 4 ++-- 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/docs/changes/newsfragments/262.enh b/docs/changes/newsfragments/262.enh index a42d5bc5c..d512a7673 100644 --- a/docs/changes/newsfragments/262.enh +++ b/docs/changes/newsfragments/262.enh @@ -1 +1 @@ -Add :class:`~optuna.integration.sklearn.OptunaSearchCV` to the list of available searchers as 'optuna', update documentation on Hyperparameter tuning and refactor how hyperparmeters' distributions are specified by `Fede Raimondo`_ +Add :class:`~optuna_integration.sklearn.OptunaSearchCV` to the list of available searchers as 'optuna', update documentation on Hyperparameter tuning and refactor how hyperparmeters' distributions are specified by `Fede Raimondo`_ diff --git a/docs/conf.py b/docs/conf.py index 095c89843..28d256772 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -162,6 +162,10 @@ "scipy": ("https://docs.scipy.org/doc/scipy/", None), "skopt": ("https://scikit-optimize.readthedocs.io/en/latest", None), "optuna": ("https://optuna.readthedocs.io/en/stable", None), + "optuna_integration": ( + "https://optuna-integration.readthedocs.io/en/stable", + None, + ), } diff --git a/examples/99_docs/run_hyperparameters_docs.py b/examples/99_docs/run_hyperparameters_docs.py index 2213bc5a2..b7dfff7ac 100644 --- a/examples/99_docs/run_hyperparameters_docs.py +++ b/examples/99_docs/run_hyperparameters_docs.py @@ -255,7 +255,7 @@ # Other searchers that ``julearn`` provides are the # :class:`~sklearn.model_selection.RandomizedSearchCV`, # :class:`~skopt.BayesSearchCV` and -# :class:`~optuna.integration.sklearn.OptunaSearchCV`. +# :class:`~optuna_integration.sklearn.OptunaSearchCV`. # # The randomized searcher # (:class:`~sklearn.model_selection.RandomizedSearchCV`) is similar to the @@ -275,7 +275,7 @@ # :class:`~skopt.BayesSearchCV` documentation, including how to specify # the prior distributions of the hyperparameters. # -# The Optuna searcher (:class:`~optuna.integration.sklearn.OptunaSearchCV`)\ +# The Optuna searcher (:class:`~optuna_integration.sklearn.OptunaSearchCV`)\ # uses the Optuna library to find the best hyperparameter set. Optuna is a # hyperparameter optimization framework that has several algorithms to find # the best hyperparameter set. For more information, see the From 67eadb7ad5ceffbd6c39339492488435f67aea7c Mon Sep 17 00:00:00 2001 From: Fede Date: Fri, 3 May 2024 13:45:02 +0300 Subject: [PATCH 11/16] Skip test when deps are not met --- julearn/model_selection/tests/test_optuna_searcher.py | 3 ++- julearn/model_selection/tests/test_skopt_searcher.py | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/julearn/model_selection/tests/test_optuna_searcher.py b/julearn/model_selection/tests/test_optuna_searcher.py index 10f52cdee..0e0014a83 100644 --- a/julearn/model_selection/tests/test_optuna_searcher.py +++ b/julearn/model_selection/tests/test_optuna_searcher.py @@ -4,7 +4,6 @@ # License: AGPL from typing import Dict -import optuna.distributions as optd import pytest from julearn.model_selection._optuna_searcher import ( @@ -12,6 +11,8 @@ ) +optd = pytest.importorskip("optuna.distributions") + @pytest.mark.parametrize( "params_to_tune,expected_types, expected_dist", [ diff --git a/julearn/model_selection/tests/test_skopt_searcher.py b/julearn/model_selection/tests/test_skopt_searcher.py index 9e8c67fb0..be9a9321b 100644 --- a/julearn/model_selection/tests/test_skopt_searcher.py +++ b/julearn/model_selection/tests/test_skopt_searcher.py @@ -5,13 +5,14 @@ from typing import Dict import pytest -import skopt.space as sksp from julearn.model_selection._skopt_searcher import ( _prepare_skopt_hyperparameters_distributions, ) +sksp = pytest.importorskip("skopt.space") + @pytest.mark.parametrize( "params_to_tune,expected_types, expected_dist", [ From 365b615fbfa8bd2806eddd24916e655569a21668 Mon Sep 17 00:00:00 2001 From: Fede Date: Fri, 3 May 2024 16:18:11 +0300 Subject: [PATCH 12/16] Update changelog for #262 --- docs/changes/newsfragments/262.doc | 1 + docs/changes/newsfragments/262.enh | 2 +- docs/changes/newsfragments/262.feature | 1 + 3 files changed, 3 insertions(+), 1 deletion(-) create mode 100644 docs/changes/newsfragments/262.doc create mode 100644 docs/changes/newsfragments/262.feature diff --git a/docs/changes/newsfragments/262.doc b/docs/changes/newsfragments/262.doc new file mode 100644 index 000000000..ce5b75c38 --- /dev/null +++ b/docs/changes/newsfragments/262.doc @@ -0,0 +1 @@ +Update documentation on Hyperparameter Tuning by `Fede Raimondo_` \ No newline at end of file diff --git a/docs/changes/newsfragments/262.enh b/docs/changes/newsfragments/262.enh index d512a7673..dc5ac2746 100644 --- a/docs/changes/newsfragments/262.enh +++ b/docs/changes/newsfragments/262.enh @@ -1 +1 @@ -Add :class:`~optuna_integration.sklearn.OptunaSearchCV` to the list of available searchers as 'optuna', update documentation on Hyperparameter tuning and refactor how hyperparmeters' distributions are specified by `Fede Raimondo`_ +Refactor how hyperparmeters' distributions are specified by `Fede Raimondo`_ diff --git a/docs/changes/newsfragments/262.feature b/docs/changes/newsfragments/262.feature new file mode 100644 index 000000000..16be6fbf6 --- /dev/null +++ b/docs/changes/newsfragments/262.feature @@ -0,0 +1 @@ +Add :class:`~optuna_integration.sklearn.OptunaSearchCV` to the list of available searchers as ``optuna`` by `Fede Raimondo`_ From 2caf4c3281efe2ca76033d3d0478cd0147555a44 Mon Sep 17 00:00:00 2001 From: Fede Date: Fri, 3 May 2024 16:18:26 +0300 Subject: [PATCH 13/16] Update docs on hyperparameter tuning --- examples/99_docs/run_hyperparameters_docs.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/examples/99_docs/run_hyperparameters_docs.py b/examples/99_docs/run_hyperparameters_docs.py index b7dfff7ac..fec76a8d7 100644 --- a/examples/99_docs/run_hyperparameters_docs.py +++ b/examples/99_docs/run_hyperparameters_docs.py @@ -275,7 +275,7 @@ # :class:`~skopt.BayesSearchCV` documentation, including how to specify # the prior distributions of the hyperparameters. # -# The Optuna searcher (:class:`~optuna_integration.sklearn.OptunaSearchCV`)\ +# The Optuna searcher (:class:`~optuna_integration.sklearn.OptunaSearchCV`) # uses the Optuna library to find the best hyperparameter set. Optuna is a # hyperparameter optimization framework that has several algorithms to find # the best hyperparameter set. For more information, see the @@ -402,6 +402,7 @@ gamma=(1e-3, 1e-1, "log-uniform"), class_weight=("balanced", None, "categorical") ) +print(creator) ############################################################################### # We can now use the optuna searcher with 10 trials and 3-fold cross-validation. @@ -462,6 +463,7 @@ C=(0.01, 10, "log-uniform"), gamma=(1e-3, 1e-1, "log-uniform"), ) +print(creator) ############################################################################### # While this will work for any of the ``random``, ``bayes`` or ``optuna`` @@ -481,6 +483,7 @@ C=Real(0.01, 10, prior="log-uniform", base=2), gamma=(1e-3, 1e-1, "log-uniform"), ) +print(creator) ############################################################################### # For the optuna searcher, the distributions are defined using the @@ -505,7 +508,7 @@ C=FloatDistribution(0.01, 10, log=True), gamma=(1e-3, 1e-1, "log-uniform"), ) - +print(creator) ############################################################################### From ed2e508c910b6ab4c9ae084c3f1004cbe79ce5e4 Mon Sep 17 00:00:00 2001 From: Fede Date: Fri, 3 May 2024 16:19:12 +0300 Subject: [PATCH 14/16] Adress comments in #262 --- julearn/api.py | 15 +++++++-- julearn/model_selection/_optuna_searcher.py | 1 + julearn/model_selection/_skopt_searcher.py | 11 ++++--- .../tests/test_available_searchers.py | 32 ++++++++++++------- .../tests/test_optuna_searcher.py | 7 ++-- .../tests/test_skopt_searcher.py | 9 +++--- julearn/pipeline/pipeline_creator.py | 15 +++++++-- 7 files changed, 61 insertions(+), 29 deletions(-) diff --git a/julearn/api.py b/julearn/api.py index 7602216f1..130b57e0a 100644 --- a/julearn/api.py +++ b/julearn/api.py @@ -135,9 +135,18 @@ def run_cross_validation( # noqa: C901 Additional parameters in case Hyperparameter Tuning is performed, with the following keys: - * 'kind': The kind of search algorithm to use, e.g.: - 'grid', 'random', 'bayes' or 'optuna'. Can be any valid julearn - searcher name or scikit-learn compatible searcher. + * 'kind': The kind of search algorithm to use, Valid options are: + + * ``"grid"`` : :class:`~sklearn.model_selection.GridSearchCV` + * ``"random"`` : + :class:`~sklearn.model_selection.RandomizedSearchCV` + * ``"bayes"`` : :class:`~skopt.BayesSearchCV` + * ``"optuna"`` : + :class:`~optuna_integration.sklearn.OptunaSearchCV` + * user-registered searcher name : see + :func:`~julearn.model_selection.register_searcher` + * ``scikit-learn``-compatible searcher + * 'cv': If a searcher is going to be used, the cross-validation splitting strategy to use. Defaults to same CV as for the model evaluation. diff --git a/julearn/model_selection/_optuna_searcher.py b/julearn/model_selection/_optuna_searcher.py index 04ba9bac4..a05801fc2 100644 --- a/julearn/model_selection/_optuna_searcher.py +++ b/julearn/model_selection/_optuna_searcher.py @@ -2,6 +2,7 @@ # Authors: Federico Raimondo # License: AGPL + from typing import Any, Dict from ..utils import logger diff --git a/julearn/model_selection/_skopt_searcher.py b/julearn/model_selection/_skopt_searcher.py index 3bd246a3b..8fbf53431 100644 --- a/julearn/model_selection/_skopt_searcher.py +++ b/julearn/model_selection/_skopt_searcher.py @@ -2,6 +2,7 @@ # Authors: Federico Raimondo # License: AGPL + from typing import Any, Dict from ..utils import logger @@ -35,7 +36,6 @@ def register_bayes_searcher(): _recreate_reset_copy() - def _prepare_skopt_hyperparameters_distributions( params_to_tune: Dict[str, Any], ) -> Dict[str, Any]: @@ -64,8 +64,10 @@ def _prepare_skopt_hyperparameters_distributions( if isinstance(v, tuple) and len(v) == 3: prior = v[2] if prior == "categorical": - logger.info(f"Hyperparameter {k} is categorical with 2 " - f"options: [{v[0]} and {v[1]}]") + logger.info( + f"Hyperparameter {k} is categorical with 2 " + f"options: [{v[0]} and {v[1]}]" + ) out[k] = sksp.Categorical(v[:-1]) elif isinstance(v[0], int) and isinstance(v[1], int): logger.info( @@ -75,8 +77,7 @@ def _prepare_skopt_hyperparameters_distributions( out[k] = sksp.Integer(v[0], v[1], prior=prior) elif isinstance(v[0], float) and isinstance(v[1], float): logger.info( - f"Hyperparameter {k} is {prior} float " - f"[{v[0]}, {v[1]}]" + f"Hyperparameter {k} is {prior} float " f"[{v[0]}, {v[1]}]" ) out[k] = sksp.Real(v[0], v[1], prior=prior) else: diff --git a/julearn/model_selection/tests/test_available_searchers.py b/julearn/model_selection/tests/test_available_searchers.py index 7a7d2b913..97764c94f 100644 --- a/julearn/model_selection/tests/test_available_searchers.py +++ b/julearn/model_selection/tests/test_available_searchers.py @@ -66,19 +66,28 @@ def test_get_searcher() -> None: assert out.__name__ == "OptunaSearchCV" -def test_get_searcher_params_attr() -> None: - """Test getting the params attribute of a searcher.""" - out = get_searcher_params_attr("grid") - assert out == "param_grid" - - out = get_searcher_params_attr("random") - assert out == "param_distributions" +@pytest.mark.parametrize( + "searcher,expected", + [ + ("grid", "param_grid"), + ("random", "param_distributions"), + ("bayes", "search_spaces"), + ("optuna", "param_distributions"), + ], +) +def test_get_searcher_params_attr(searcher: str, expected: str) -> None: + """Test getting the params attribute of a searcher. - out = get_searcher_params_attr("bayes") - assert out == "search_spaces" + Parameters + ---------- + searcher : str + The searcher name. + expected : str + The expected attribute name. - out = get_searcher_params_attr("optuna") - assert out == "param_distributions" + """ + out = get_searcher_params_attr(searcher) + assert out == expected @pytest.mark.nodeps @@ -88,6 +97,7 @@ def test_get_searchers_noskopt() -> None: with pytest.raises(ImportError, match="BayesSearchCV requires"): out() # type: ignore + @pytest.mark.nodeps def test_get_searchers_nooptuna() -> None: """Test getting a searcher without optuna.""" diff --git a/julearn/model_selection/tests/test_optuna_searcher.py b/julearn/model_selection/tests/test_optuna_searcher.py index 0e0014a83..ab9b7fa11 100644 --- a/julearn/model_selection/tests/test_optuna_searcher.py +++ b/julearn/model_selection/tests/test_optuna_searcher.py @@ -2,7 +2,8 @@ # Authors: Federico Raimondo # License: AGPL -from typing import Dict + +from typing import Any, Dict, Tuple import pytest @@ -69,8 +70,8 @@ ], ) def test__prepare_optuna_hyperparameters_distributions( - params_to_tune: Dict[str, Dict[str, tuple]], - expected_types: tuple, + params_to_tune: Dict[str, Dict[str, Any]], + expected_types: Tuple, expected_dist: tuple, ) -> None: """Test the _prepare_optuna_hyperparameters_distributions function. diff --git a/julearn/model_selection/tests/test_skopt_searcher.py b/julearn/model_selection/tests/test_skopt_searcher.py index be9a9321b..ea3598a87 100644 --- a/julearn/model_selection/tests/test_skopt_searcher.py +++ b/julearn/model_selection/tests/test_skopt_searcher.py @@ -2,7 +2,8 @@ # Authors: Federico Raimondo # License: AGPL -from typing import Dict + +from typing import Any, Dict, Tuple import pytest @@ -69,9 +70,9 @@ ], ) def test__prepare_skopt_hyperparameters_distributions( - params_to_tune: Dict[str, Dict[str, tuple]], - expected_types: tuple, - expected_dist: tuple, + params_to_tune: Dict[str, Dict[str, Any]], + expected_types: Tuple, + expected_dist: Tuple, ) -> None: """Test the _prepare_skopt_hyperparameters_distributions function. diff --git a/julearn/pipeline/pipeline_creator.py b/julearn/pipeline/pipeline_creator.py index d5616e255..979bf828a 100644 --- a/julearn/pipeline/pipeline_creator.py +++ b/julearn/pipeline/pipeline_creator.py @@ -933,9 +933,18 @@ def _prepare_hyperparameter_tuning( search_params : dict The parameters for the search. The following keys are accepted: - * 'kind': The kind of search algorithm to use e.g.: - 'grid', 'random', 'bayes' or 'optuna'. All valid julearn searchers - can be entered. + * 'kind': The kind of search algorithm to use. Valid options are: + + * ``"grid"`` : :class:`~sklearn.model_selection.GridSearchCV` + * ``"random"`` : + :class:`~sklearn.model_selection.RandomizedSearchCV` + * ``"bayes"`` : :class:`~skopt.BayesSearchCV` + * ``"optuna"`` : + :class:`~optuna_integration.sklearn.OptunaSearchCV` + * user-registered searcher name : see + :func:`~julearn.model_selection.register_searcher` + * ``scikit-learn``-compatible searcher + * 'cv': If search is going to be used, the cross-validation splitting strategy to use. Defaults to same CV as for the model evaluation. From 2b469cff31c317ee534e18a6b283c75803fbf5f2 Mon Sep 17 00:00:00 2001 From: Fede Date: Fri, 3 May 2024 17:04:06 +0300 Subject: [PATCH 15/16] Address comments in #262 --- docs/changes/newsfragments/262.doc | 2 +- .../tests/test_available_searchers.py | 31 ++++++++++++------- .../tests/test_optuna_searcher.py | 2 +- 3 files changed, 22 insertions(+), 13 deletions(-) diff --git a/docs/changes/newsfragments/262.doc b/docs/changes/newsfragments/262.doc index ce5b75c38..067b3bf0d 100644 --- a/docs/changes/newsfragments/262.doc +++ b/docs/changes/newsfragments/262.doc @@ -1 +1 @@ -Update documentation on Hyperparameter Tuning by `Fede Raimondo_` \ No newline at end of file +Update documentation on Hyperparameter Tuning by `Fede Raimondo`_ \ No newline at end of file diff --git a/julearn/model_selection/tests/test_available_searchers.py b/julearn/model_selection/tests/test_available_searchers.py index 97764c94f..17386fb91 100644 --- a/julearn/model_selection/tests/test_available_searchers.py +++ b/julearn/model_selection/tests/test_available_searchers.py @@ -51,19 +51,28 @@ def test_reset_searcher() -> None: get_searcher("custom_grid") -def test_get_searcher() -> None: - """Test getting a searcher.""" - out = get_searcher("grid") - assert out == GridSearchCV - - out = get_searcher("random") - assert out == RandomizedSearchCV +@pytest.mark.parametrize( + "searcher,expected", + [ + ("grid", "GridSearchCV"), + ("random", "RandomizedSearchCV"), + ("bayes", "BayesSearchCV"), + ("optuna", "OptunaSearchCV"), + ], +) +def test_get_searcher(searcher: str, expected: str) -> None: + """Test getting a searcher. - out = get_searcher("bayes") - assert out.__name__ == "BayesSearchCV" + Parameters + ---------- + searcher : str + The searcher name. + expected : str + The expected searcher class name. - out = get_searcher("optuna") - assert out.__name__ == "OptunaSearchCV" + """ + out = get_searcher(searcher) + assert out.__name__ == expected @pytest.mark.parametrize( diff --git a/julearn/model_selection/tests/test_optuna_searcher.py b/julearn/model_selection/tests/test_optuna_searcher.py index ab9b7fa11..3d0c30db2 100644 --- a/julearn/model_selection/tests/test_optuna_searcher.py +++ b/julearn/model_selection/tests/test_optuna_searcher.py @@ -72,7 +72,7 @@ def test__prepare_optuna_hyperparameters_distributions( params_to_tune: Dict[str, Dict[str, Any]], expected_types: Tuple, - expected_dist: tuple, + expected_dist: Tuple, ) -> None: """Test the _prepare_optuna_hyperparameters_distributions function. From 721d8e6a3cebd9a34f4396a74017be8eff752597 Mon Sep 17 00:00:00 2001 From: Fede Date: Fri, 3 May 2024 17:04:25 +0300 Subject: [PATCH 16/16] Fix linter --- julearn/model_selection/tests/test_available_searchers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/julearn/model_selection/tests/test_available_searchers.py b/julearn/model_selection/tests/test_available_searchers.py index 17386fb91..5b3fae4f4 100644 --- a/julearn/model_selection/tests/test_available_searchers.py +++ b/julearn/model_selection/tests/test_available_searchers.py @@ -5,7 +5,7 @@ # License: AGPL import pytest -from sklearn.model_selection import GridSearchCV, RandomizedSearchCV +from sklearn.model_selection import GridSearchCV from julearn.model_selection import ( get_searcher,