From 83ce5bd5e39a38a22f329c87ecb050696879152b Mon Sep 17 00:00:00 2001
From: Fede <fraimondo@proton.me>
Date: Thu, 2 May 2024 19:24:42 +0300
Subject: [PATCH 01/16] Add optuna searcher

---
 julearn/api.py                                |  4 +-
 julearn/conftest.py                           | 25 +++++-
 julearn/model_selection/__init__.py           |  3 +
 julearn/model_selection/_optuna_searcher.py   | 76 +++++++++++++++++++
 .../tests/test_available_searchers.py         | 13 ++++
 julearn/pipeline/pipeline_creator.py          | 17 ++++-
 .../pipeline/tests/test_pipeline_creator.py   | 43 ++++++++++-
 7 files changed, 175 insertions(+), 6 deletions(-)
 create mode 100644 julearn/model_selection/_optuna_searcher.py

diff --git a/julearn/api.py b/julearn/api.py
index 039254dfc..7602216f1 100644
--- a/julearn/api.py
+++ b/julearn/api.py
@@ -136,8 +136,8 @@ def run_cross_validation(  # noqa: C901
         the following keys:
 
         * 'kind': The kind of search algorithm to use, e.g.:
-            'grid', 'random' or 'bayes'. Can be any valid julearn searcher name
-            or scikit-learn compatible searcher.
+            'grid', 'random', 'bayes' or 'optuna'. Can be any valid julearn
+            searcher name or scikit-learn compatible searcher.
         * 'cv': If a searcher is going to be used, the cross-validation
             splitting strategy to use. Defaults to same CV as for the model
             evaluation.
diff --git a/julearn/conftest.py b/julearn/conftest.py
index 9da707fc4..cb4c2868d 100644
--- a/julearn/conftest.py
+++ b/julearn/conftest.py
@@ -270,7 +270,7 @@ def search_params(request: FixtureRequest) -> Optional[Dict]:
     scope="function",
 )
 def bayes_search_params(request: FixtureRequest) -> Optional[Dict]:
-    """Return different  search_params argument for BayesSearchCV.
+    """Return different search_params argument for BayesSearchCV.
 
     Parameters
     ----------
@@ -286,6 +286,29 @@ def bayes_search_params(request: FixtureRequest) -> Optional[Dict]:
 
     return request.param
 
+@fixture(
+    params=[
+        {"kind": "optuna", "n_trials": 10, "cv": 3},
+        {"kind": "optuna", "timeout": 20},
+    ],
+    scope="function",
+)
+def optuna_search_params(request: FixtureRequest) -> Optional[Dict]:
+    """Return different search_params argument for OptunaSearchCV.
+
+    Parameters
+    ----------
+    request : pytest.FixtureRequest
+        The request object.
+
+    Returns
+    -------
+    dict or None
+        A dictionary with the search_params argument.
+
+    """
+
+    return request.param
 
 _tuning_params = {
     "zscore": {"with_mean": [True, False]},
diff --git a/julearn/model_selection/__init__.py b/julearn/model_selection/__init__.py
index 2e1eab839..01356d0e6 100644
--- a/julearn/model_selection/__init__.py
+++ b/julearn/model_selection/__init__.py
@@ -17,5 +17,8 @@
 )
 
 from ._skopt_searcher import register_bayes_searcher
+from ._optuna_searcher import register_optuna_searcher
+
 register_bayes_searcher()
+register_optuna_searcher()
 
diff --git a/julearn/model_selection/_optuna_searcher.py b/julearn/model_selection/_optuna_searcher.py
new file mode 100644
index 000000000..a35167480
--- /dev/null
+++ b/julearn/model_selection/_optuna_searcher.py
@@ -0,0 +1,76 @@
+"""Module for registering the BayesSearchCV class from scikit-optimize."""
+
+# Authors: Federico Raimondo <f.raimondo@fz-juelich.de>
+# License: AGPL
+from typing import Any, Dict
+
+from .available_searchers import _recreate_reset_copy, register_searcher
+
+
+try:
+    from optuna_integration.sklearn import OptunaSearchCV
+    import optuna.distributions as od
+except ImportError:
+    from sklearn.model_selection._search import BaseSearchCV
+
+    class OptunaSearchCV(BaseSearchCV):
+        """Dummy class for OptunaSearchCV that raises ImportError.
+
+        This class is used to raise an ImportError when OptunaSearchCV is
+        requested but optuna and optuna-integration ar not installed.
+
+        """
+
+        def __init__(*args, **kwargs):
+            raise ImportError(
+                "OptunaSearchCV requires optuna and optuna-integration to be "
+                "installed."
+            )
+
+
+def register_optuna_searcher():
+    register_searcher("optuna", OptunaSearchCV, "param_distributions")
+
+    # Update the "reset copy" of available searchers
+    _recreate_reset_copy()
+
+
+def _prepare_optuna_hyperparameters_distributions(
+    params_to_tune: Dict[str, Any],
+) -> Dict[str, Any]:
+    """Prepare hyperparameters distributions for RandomizedSearchCV.
+
+    This method replaces tuples with distributions for RandomizedSearchCV
+    following the skopt convention. That is, if a parameter is a tuple
+    with 3 elements, the first two elements are the bounds of the
+    distribution and the third element is the type of distribution.
+
+    Parameters
+    ----------
+    params_to_tune : dict
+        The parameters to tune.
+
+    Returns
+    -------
+    dict
+        The modified parameters to tune.
+
+    """
+    out = {}
+    for k, v in params_to_tune.items():
+        if isinstance(v, tuple) and len(v) == 3:
+            if v[2] == "uniform":
+                if isinstance(v[0], int) and isinstance(v[1], int):
+                    out[k] = od.IntDistribution(v[0], v[1], log=False)
+                else:
+                    out[k] = od.FloatDistribution(v[0], v[1], log=False)
+            elif v[2] in ("loguniform", "log-uniform"):
+                if isinstance(v[0], int) and isinstance(v[1], int):
+                    out[k] = od.IntDistribution(v[0], v[1], log=True)
+                else:
+                    out[k] = od.FloatDistribution(v[0], v[1], log=True)
+            else:
+                out[k] = v
+        else:
+            out[k] = v
+    return out
diff --git a/julearn/model_selection/tests/test_available_searchers.py b/julearn/model_selection/tests/test_available_searchers.py
index f88348867..7a7d2b913 100644
--- a/julearn/model_selection/tests/test_available_searchers.py
+++ b/julearn/model_selection/tests/test_available_searchers.py
@@ -62,6 +62,9 @@ def test_get_searcher() -> None:
     out = get_searcher("bayes")
     assert out.__name__ == "BayesSearchCV"
 
+    out = get_searcher("optuna")
+    assert out.__name__ == "OptunaSearchCV"
+
 
 def test_get_searcher_params_attr() -> None:
     """Test getting the params attribute of a searcher."""
@@ -74,6 +77,9 @@ def test_get_searcher_params_attr() -> None:
     out = get_searcher_params_attr("bayes")
     assert out == "search_spaces"
 
+    out = get_searcher_params_attr("optuna")
+    assert out == "param_distributions"
+
 
 @pytest.mark.nodeps
 def test_get_searchers_noskopt() -> None:
@@ -81,3 +87,10 @@ def test_get_searchers_noskopt() -> None:
     out = get_searcher("bayes")
     with pytest.raises(ImportError, match="BayesSearchCV requires"):
         out()  # type: ignore
+
+@pytest.mark.nodeps
+def test_get_searchers_nooptuna() -> None:
+    """Test getting a searcher without optuna."""
+    out = get_searcher("optuna")
+    with pytest.raises(ImportError, match="OptunaSearchCV requires"):
+        out()  # type: ignore
diff --git a/julearn/pipeline/pipeline_creator.py b/julearn/pipeline/pipeline_creator.py
index 652e060d2..0ecb639ec 100644
--- a/julearn/pipeline/pipeline_creator.py
+++ b/julearn/pipeline/pipeline_creator.py
@@ -15,6 +15,9 @@
 
 from ..base import ColumnTypes, ColumnTypesLike, JuTransformer, WrapModel
 from ..model_selection.available_searchers import get_searcher, list_searchers
+from ..model_selection._optuna_searcher import (
+    _prepare_optuna_hyperparameters_distributions
+)
 from ..models import get_model, list_models
 from ..prepare import prepare_search_params
 from ..transformers import (
@@ -928,8 +931,8 @@ def _prepare_hyperparameter_tuning(
         The parameters for the search. The following keys are accepted:
 
         * 'kind': The kind of search algorithm to use e.g.:
-            'grid', 'random' or 'bayes'. All valid julearn searchers can be
-            entered.
+            'grid', 'random', 'bayes' or 'optuna'. All valid julearn searchers
+            can be entered.
         * 'cv': If search is going to be used, the cross-validation
             splitting strategy to use. Defaults to same CV as for the model
             evaluation.
@@ -995,7 +998,17 @@ def _prepare_hyperparameter_tuning(
                     _prepare_hyperparameters_distributions(p)
                     for p in params_to_tune
                 ]
+        elif search.__name__ == "OptunaSearchCV":
 
+            if isinstance(params_to_tune, dict):
+                params_to_tune = _prepare_optuna_hyperparameters_distributions(
+                    params_to_tune
+                )
+            else:
+                params_to_tune = [
+                    _prepare_optuna_hyperparameters_distributions(p)
+                    for p in params_to_tune
+                ]
         cv_inner = check_cv(cv_inner)  # type: ignore
         logger.info(f"Using inner CV scheme {cv_inner}")
         search_params["cv"] = cv_inner
diff --git a/julearn/pipeline/tests/test_pipeline_creator.py b/julearn/pipeline/tests/test_pipeline_creator.py
index e299b1008..9d049d3ed 100644
--- a/julearn/pipeline/tests/test_pipeline_creator.py
+++ b/julearn/pipeline/tests/test_pipeline_creator.py
@@ -227,7 +227,7 @@ def test_hyperparameter_tuning_bayes(
     get_tuning_params: Callable,
     bayes_search_params: Dict[str, List],
 ) -> None:
-    """Test that the pipeline hyperparameter tuning works as expected.
+    """Test that the pipeline hyperparameter tuning (bayes) works as expected.
 
     Parameters
     ----------
@@ -259,6 +259,47 @@ def test_hyperparameter_tuning_bayes(
     assert pipeline.search_spaces == param_grid  # type: ignore
 
 
+def test_hyperparameter_tuning_optuna(
+    X_types_iris: Dict[str, List[str]],  # noqa: N803
+    model: str,
+    preprocess: Union[str, List[str]],
+    problem_type: str,
+    get_tuning_params: Callable,
+    optuna_search_params: Dict[str, List],
+) -> None:
+    """Test that the pipeline hyperparameter tuning (optuna) works as expected.
+
+    Parameters
+    ----------
+    X_types_iris : dict
+        The iris dataset features types.
+    model : str
+        The model to test.
+    preprocess : str or list of str
+        The preprocessing steps to test.
+    problem_type : str
+        The problem type to test.
+    get_tuning_params : Callable
+        A function that returns the tuning hyperparameters for a given step.
+    optuna_search_params : dict of str and list
+        The parameters for the search.
+
+    """
+    optuna_integration = pytest.importorskip("optuna_integration")
+    OptunaSearchCV = optuna_integration.OptunaSearchCV
+
+    pipeline, param_grid = _hyperparam_tuning_base_test(
+        X_types_iris,
+        model,
+        preprocess,
+        problem_type,
+        get_tuning_params,
+        optuna_search_params,
+    )
+    assert isinstance(pipeline, OptunaSearchCV)
+    assert pipeline.param_distributions == param_grid  # type: ignore
+
+
 def _compare_param_grids(a: Dict, b: Dict) -> None:
     """Compare two param grids.
 

From 030a439e40fdc14c2555ab136fd25e9f925c6532 Mon Sep 17 00:00:00 2001
From: Fede <fraimondo@proton.me>
Date: Thu, 2 May 2024 19:26:59 +0300
Subject: [PATCH 02/16] Fix linters

---
 julearn/model_selection/_optuna_searcher.py | 10 +++++-----
 julearn/pipeline/pipeline_creator.py        |  4 ++--
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/julearn/model_selection/_optuna_searcher.py b/julearn/model_selection/_optuna_searcher.py
index a35167480..2a1619871 100644
--- a/julearn/model_selection/_optuna_searcher.py
+++ b/julearn/model_selection/_optuna_searcher.py
@@ -8,8 +8,8 @@
 
 
 try:
+    import optuna.distributions as optd
     from optuna_integration.sklearn import OptunaSearchCV
-    import optuna.distributions as od
 except ImportError:
     from sklearn.model_selection._search import BaseSearchCV
 
@@ -61,14 +61,14 @@ def _prepare_optuna_hyperparameters_distributions(
         if isinstance(v, tuple) and len(v) == 3:
             if v[2] == "uniform":
                 if isinstance(v[0], int) and isinstance(v[1], int):
-                    out[k] = od.IntDistribution(v[0], v[1], log=False)
+                    out[k] = optd.IntDistribution(v[0], v[1], log=False)
                 else:
-                    out[k] = od.FloatDistribution(v[0], v[1], log=False)
+                    out[k] = optd.FloatDistribution(v[0], v[1], log=False)
             elif v[2] in ("loguniform", "log-uniform"):
                 if isinstance(v[0], int) and isinstance(v[1], int):
-                    out[k] = od.IntDistribution(v[0], v[1], log=True)
+                    out[k] = optd.IntDistribution(v[0], v[1], log=True)
                 else:
-                    out[k] = od.FloatDistribution(v[0], v[1], log=True)
+                    out[k] = optd.FloatDistribution(v[0], v[1], log=True)
             else:
                 out[k] = v
         else:
diff --git a/julearn/pipeline/pipeline_creator.py b/julearn/pipeline/pipeline_creator.py
index 0ecb639ec..d98c5b47f 100644
--- a/julearn/pipeline/pipeline_creator.py
+++ b/julearn/pipeline/pipeline_creator.py
@@ -14,10 +14,10 @@
 from sklearn.pipeline import Pipeline
 
 from ..base import ColumnTypes, ColumnTypesLike, JuTransformer, WrapModel
-from ..model_selection.available_searchers import get_searcher, list_searchers
 from ..model_selection._optuna_searcher import (
-    _prepare_optuna_hyperparameters_distributions
+    _prepare_optuna_hyperparameters_distributions,
 )
+from ..model_selection.available_searchers import get_searcher, list_searchers
 from ..models import get_model, list_models
 from ..prepare import prepare_search_params
 from ..transformers import (

From ac0471a8f94f6122cb152c4b77e56cb4ec128797 Mon Sep 17 00:00:00 2001
From: Fede <fraimondo@proton.me>
Date: Fri, 3 May 2024 12:37:48 +0300
Subject: [PATCH 03/16] Add Optuna + distributions doc

---
 examples/99_docs/run_hyperparameters_docs.py | 142 ++++++++++++++++++-
 1 file changed, 140 insertions(+), 2 deletions(-)

diff --git a/examples/99_docs/run_hyperparameters_docs.py b/examples/99_docs/run_hyperparameters_docs.py
index 4b7372abb..2213bc5a2 100644
--- a/examples/99_docs/run_hyperparameters_docs.py
+++ b/examples/99_docs/run_hyperparameters_docs.py
@@ -253,8 +253,9 @@
 # hyperparameters values.
 #
 # Other searchers that ``julearn`` provides are the
-# :class:`~sklearn.model_selection.RandomizedSearchCV` and
-# :class:`~skopt.BayesSearchCV`.
+# :class:`~sklearn.model_selection.RandomizedSearchCV`, 
+# :class:`~skopt.BayesSearchCV` and
+# :class:`~optuna.integration.sklearn.OptunaSearchCV`.
 #
 # The randomized searcher
 # (:class:`~sklearn.model_selection.RandomizedSearchCV`) is similar to the
@@ -274,6 +275,12 @@
 # :class:`~skopt.BayesSearchCV` documentation, including how to specify
 # the prior distributions of the hyperparameters.
 #
+# The Optuna searcher (:class:`~optuna.integration.sklearn.OptunaSearchCV`)\
+# uses the Optuna library to find the best hyperparameter set. Optuna is a
+# hyperparameter optimization framework that has several algorithms to find
+# the best hyperparameter set. For more information, see the
+# `Optuna`_ documentation.
+#
 # We can specify the kind of searcher and its parametrization, by setting the
 # ``search_params`` parameter in the :func:`.run_cross_validation` function.
 # For example, we can use the
@@ -369,6 +376,137 @@
 )
 pprint(model_tuned.best_params_)
 
+###############################################################################
+# An example using optuna searcher is shown below. The searcher is specified
+# as ``"optuna"`` and the hyperparameters are specified as a dictionary with
+# the hyperparameters to tune and their distributions as for the bayesian
+# searcher. However, the optuna searcher behaviour is controlled by a
+# :class:`~optuna.study.Study` object. This object can be passed to the
+# searcher using the ``study`` parameter in the ``search_params`` dictionary.
+# 
+# .. important::
+#    The optuna searcher requires that all the hyperparameters are specified
+#    as distributions, even the categorical ones. 
+#
+# We first modify the pipeline creator so the ``select_k`` parameter is
+# specified as a distribution. We exemplarily use a categorical distribution
+# for the ``class_weight`` hyperparameter, trying the ``"balanced"`` and 
+# ``None`` values.
+
+creator = PipelineCreator(problem_type="classification")
+creator.add("zscore")
+creator.add("select_k", k=(2, 4, "uniform"))
+creator.add(
+    "svm",
+    C=(0.01, 10, "log-uniform"),
+    gamma=(1e-3, 1e-1, "log-uniform"),
+    class_weight=("balanced", None, "categorical")
+)
+
+###############################################################################
+# We can now use the optuna searcher with 10 trials and 3-fold cross-validation.
+
+import optuna
+
+study = optuna.create_study(
+    direction="maximize",
+    study_name="optuna-concept",
+    load_if_exists=True,
+)
+
+search_params = {
+    "kind": "optuna",
+    "study": study,
+    "cv": 3,
+}
+scores_tuned, model_tuned = run_cross_validation(
+    X=X,
+    y=y,
+    data=df,
+    X_types=X_types,
+    model=creator,
+    return_estimator="all",
+    search_params=search_params,
+)
+
+print(
+    "Scores with best hyperparameter using 10 iterations of "
+    f"optuna and 3-fold CV: {scores_tuned['test_score'].mean()}"
+)
+pprint(model_tuned.best_params_)
+
+###############################################################################
+#
+# Specifying distributions
+# ~~~~~~~~~~~~~~~~~~~~~~~~
+#
+# The hyperparameters can be specified as distributions for the randomized
+# searcher, bayesian searcher and optuna searcher. The distributions are
+# either specified toolbox-specific method or  a tuple convention with the
+# following format: ``(low, high, distribution)`` where the distribution can
+# be either ``"log-uniform"`` or ``"uniform"`` or 
+# ``(a, b, c, d, ..., "categorical")`` where ``a``, ``b``, ``c``, ``d``, etc.
+# are the possible categorical values for the hyperparameter.
+#
+# For example, we can specify the ``C`` and ``gamma`` hyperparameters of the 
+# :class:`~sklearn.svm.SVC` as  log-uniform distributions, while keeping 
+# the ``with_mean`` parameter of the
+# :class:`~sklearn.preprocessing.StandardScaler` as a categorical parameter
+# with two options.
+
+
+creator = PipelineCreator(problem_type="classification")
+creator.add("zscore", with_mean=(True, False, "categorical"))
+creator.add(
+    "svm",
+    C=(0.01, 10, "log-uniform"),
+    gamma=(1e-3, 1e-1, "log-uniform"),
+)
+
+###############################################################################
+# While this will work for any of the ``random``, ``bayes`` or ``optuna``
+# searcher options, it is important to note that both ``bayes`` and ``optuna``
+# searchers accept further parameters to specify distributions. For example,
+# the ``bayes`` searcher distributions are defined using the 
+# :class:`~skopt.space.space.Categorical`, :class:`~skopt.space.space.Integer`
+# and :class:`~skopt.space.space.Real`.
+#
+# For example, we can define a log-uniform distribution with base 2 for the
+# ``C`` hyperparameter of the :class:`~sklearn.svm.SVC` model:
+from skopt.space import Real
+creator = PipelineCreator(problem_type="classification")
+creator.add("zscore", with_mean=(True, False, "categorical"))
+creator.add(
+    "svm",
+    C=Real(0.01, 10, prior="log-uniform", base=2),
+    gamma=(1e-3, 1e-1, "log-uniform"),
+)
+
+###############################################################################
+# For the optuna searcher, the distributions are defined using the
+# :class:`~optuna.distributions.CategoricalDistribution`,
+# :class:`~optuna.distributions.FloatDistribution` and
+# :class:`~optuna.distributions.IntDistribution`.
+#
+#
+# For example, we can define a uniform distribution from 0.5 to 0.9 with a 0.05
+# step for the ``n_components`` of a :class:`~sklearn.decomposition.PCA` 
+# transformer, while keeping a log-uniform distribution for the ``C`` and
+# ``gamma`` hyperparameters of the :class:`~sklearn.svm.SVC` model.
+from optuna.distributions import FloatDistribution
+creator = PipelineCreator(problem_type="classification")
+creator.add("zscore")
+creator.add(
+    "pca",
+    n_components=FloatDistribution(0.5, 0.9, step=0.05),
+)
+creator.add(
+    "svm",
+    C=FloatDistribution(0.01, 10, log=True),
+    gamma=(1e-3, 1e-1, "log-uniform"),
+)
+
+
 
 ###############################################################################
 #

From afa1d27013883cde98e82f5715dd9dd2baadf138 Mon Sep 17 00:00:00 2001
From: Fede <fraimondo@proton.me>
Date: Fri, 3 May 2024 12:38:04 +0300
Subject: [PATCH 04/16] Configure tests + deps

---
 pyproject.toml | 4 ++++
 tox.ini        | 6 ++++++
 2 files changed, 10 insertions(+)

diff --git a/pyproject.toml b/pyproject.toml
index 320d256d0..1ab96f1ed 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -69,6 +69,10 @@ viz = [
 ]
 
 skopt = ["scikit-optimize>=0.10.0,<0.11"]
+optuna = [
+    "optuna>=3.6.0,<3.7",
+    "optuna_integration>=3.6.0,<3.7",
+]
 
 # Add all optional functional dependencies (skip deslib until its fixed)
 # This does not include dev/docs building dependencies
diff --git a/tox.ini b/tox.ini
index 017e63fcf..1ea44e360 100644
--- a/tox.ini
+++ b/tox.ini
@@ -15,6 +15,8 @@ deps =
     pytest
     seaborn
     scikit-optimize>=0.10.0,<0.11
+    optuna>=3.6.0,<3.7
+    optuna_integration>=3.6.0,<3.7
 commands =
     pytest {toxinidir}/julearn
 
@@ -42,6 +44,8 @@ deps =
     bokeh>=3.0.0
     param
     scikit-optimize>=0.10.0,<0.11
+    optuna>=3.6.0,<3.7
+    optuna_integration>=3.6.0,<3.7
 commands =
     pytest -vv {toxinidir}/julearn
 
@@ -64,6 +68,8 @@ deps =
     bokeh>=3.0.0
     param
     scikit-optimize>=0.10.0,<0.11
+    optuna>=3.6.0,<3.7
+    optuna_integration>=3.6.0,<3.7
 commands =
     pytest --cov={envsitepackagesdir}/julearn --cov=./julearn --cov-report=xml --cov-report=term -vv
 

From f7fc6fe8475659530475539d0e4c08470d982a36 Mon Sep 17 00:00:00 2001
From: Fede <fraimondo@proton.me>
Date: Fri, 3 May 2024 12:38:15 +0300
Subject: [PATCH 05/16] Add optuna links in docs

---
 docs/conf.py   | 1 +
 docs/links.inc | 1 +
 2 files changed, 2 insertions(+)

diff --git a/docs/conf.py b/docs/conf.py
index da1a0edbb..095c89843 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -161,6 +161,7 @@
     "joblib": ("https://joblib.readthedocs.io/en/latest/", None),
     "scipy": ("https://docs.scipy.org/doc/scipy/", None),
     "skopt": ("https://scikit-optimize.readthedocs.io/en/latest", None),
+    "optuna": ("https://optuna.readthedocs.io/en/stable", None),
 }
 
 
diff --git a/docs/links.inc b/docs/links.inc
index c4be5e9c0..654f50fb2 100644
--- a/docs/links.inc
+++ b/docs/links.inc
@@ -41,3 +41,4 @@
 
 .. _`DESlib`: https://github.com/scikit-learn-contrib/DESlib
 .. _`scikit-optimize`: https://scikit-optimize.readthedocs.io/en/stable/
+.. _`Optuna`: https://optuna.org
\ No newline at end of file

From b72335cd52bc21329baba20ad393ae4c219ac899 Mon Sep 17 00:00:00 2001
From: Fede <fraimondo@proton.me>
Date: Fri, 3 May 2024 12:38:30 +0300
Subject: [PATCH 06/16] Add optuna searcher

---
 julearn/model_selection/_optuna_searcher.py   |  38 +++-
 .../tests/test_optuna_searcher.py             | 163 ++++++++++++++++++
 julearn/pipeline/pipeline_creator.py          |  16 +-
 .../pipeline/tests/test_pipeline_creator.py   |   2 +-
 4 files changed, 212 insertions(+), 7 deletions(-)
 create mode 100644 julearn/model_selection/tests/test_optuna_searcher.py

diff --git a/julearn/model_selection/_optuna_searcher.py b/julearn/model_selection/_optuna_searcher.py
index 2a1619871..04ba9bac4 100644
--- a/julearn/model_selection/_optuna_searcher.py
+++ b/julearn/model_selection/_optuna_searcher.py
@@ -4,6 +4,7 @@
 # License: AGPL
 from typing import Any, Dict
 
+from ..utils import logger
 from .available_searchers import _recreate_reset_copy, register_searcher
 
 
@@ -38,12 +39,14 @@ def register_optuna_searcher():
 def _prepare_optuna_hyperparameters_distributions(
     params_to_tune: Dict[str, Any],
 ) -> Dict[str, Any]:
-    """Prepare hyperparameters distributions for RandomizedSearchCV.
+    """Prepare hyperparameters distributions for OptunaSearchCV.
 
-    This method replaces tuples with distributions for RandomizedSearchCV
+    This method replaces tuples with distributions for OptunaSearchCV
     following the skopt convention. That is, if a parameter is a tuple
     with 3 elements, the first two elements are the bounds of the
-    distribution and the third element is the type of distribution.
+    distribution and the third element is the type of distribution. In case
+    the last element is "categorical", the parameter is considered
+    categorical and all the previous elements are the choices.
 
     Parameters
     ----------
@@ -61,16 +64,43 @@ def _prepare_optuna_hyperparameters_distributions(
         if isinstance(v, tuple) and len(v) == 3:
             if v[2] == "uniform":
                 if isinstance(v[0], int) and isinstance(v[1], int):
+                    logger.info(
+                        f"Hyperparameter {k} is uniform integer "
+                        f"[{v[0]}, {v[1]}]"
+                    )
                     out[k] = optd.IntDistribution(v[0], v[1], log=False)
                 else:
+                    logger.info(
+                        f"Hyperparameter {k} is uniform float [{v[0]}, {v[1]}]"
+                    )
                     out[k] = optd.FloatDistribution(v[0], v[1], log=False)
-            elif v[2] in ("loguniform", "log-uniform"):
+            elif v[2] == "log-uniform":
                 if isinstance(v[0], int) and isinstance(v[1], int):
+                    logger.info(
+                        f"Hyperparameter {k} is log-uniform int "
+                        f"[{v[0]}, {v[1]}]"
+                    )
                     out[k] = optd.IntDistribution(v[0], v[1], log=True)
                 else:
+                    logger.info(
+                        f"Hyperparameter {k} is log-uniform float "
+                        f"[{v[0]}, {v[1]}]"
+                    )
                     out[k] = optd.FloatDistribution(v[0], v[1], log=True)
+            elif v[2] == "categorical":
+                logger.info(f"Hyperparameter {k} is categorical with 2 "
+                            f"options: [{v[0]} and {v[1]}]")
+                out[k] = optd.CategoricalDistribution((v[0], v[1]))
             else:
                 out[k] = v
+        elif (
+            isinstance(v, tuple)
+            and isinstance(v[-1], str)
+            and v[-1] == "categorical"
+        ):
+            logger.info(f"Hyperparameter {k} is categorical [{v[:-1]}]")
+            out[k] = optd.CategoricalDistribution(v[:-1])
         else:
+            logger.info(f"Hyperparameter {k} as is {v}")
             out[k] = v
     return out
diff --git a/julearn/model_selection/tests/test_optuna_searcher.py b/julearn/model_selection/tests/test_optuna_searcher.py
new file mode 100644
index 000000000..10f52cdee
--- /dev/null
+++ b/julearn/model_selection/tests/test_optuna_searcher.py
@@ -0,0 +1,163 @@
+"""Provides tests for the optuna searcher."""
+
+# Authors: Federico Raimondo <f.raimondo@fz-juelich.de>
+# License: AGPL
+from typing import Dict
+
+import optuna.distributions as optd
+import pytest
+
+from julearn.model_selection._optuna_searcher import (
+    _prepare_optuna_hyperparameters_distributions,
+)
+
+
+@pytest.mark.parametrize(
+    "params_to_tune,expected_types, expected_dist",
+    [
+        (
+            {
+                "n_components": (0.2, 0.7, "uniform"),
+                "n_neighbors": (1.0, 10.0, "log-uniform"),
+            },
+            ("float", "float"),
+            ("uniform", "log-uniform"),
+        ),
+        (
+            {
+                "n_components": (1, 20, "uniform"),
+                "n_neighbors": (1, 10, "log-uniform"),
+            },
+            ("int", "int"),
+            ("uniform", "log-uniform"),
+        ),
+        (
+            {
+                "options": (True, False, "categorical"),
+                "more_options": ("a", "b", "c", "d", "categorical"),
+            },
+            (None, None),
+            ("categorical", "categorical"),
+        ),
+        (
+            {
+                "n_components": optd.FloatDistribution(0.2, 0.7, log=False),
+                "n_neighbors": optd.FloatDistribution(1.0, 10.0, log=True),
+            },
+            ("float", "float"),
+            ("uniform", "log-uniform"),
+        ),
+        (
+            {
+                "n_components": optd.IntDistribution(1, 20, log=False),
+                "n_neighbors": optd.IntDistribution(1, 10, log=True),
+            },
+            ("int", "int"),
+            ("uniform", "log-uniform"),
+        ),
+        (
+            {
+                "options": optd.CategoricalDistribution([True, False]),
+                "more_options": optd.CategoricalDistribution(
+                    ("a", "b", "c", "d"),
+                ),
+            },
+            (None, None),
+            ("categorical", "categorical"),
+        ),
+    ],
+)
+def test__prepare_optuna_hyperparameters_distributions(
+    params_to_tune: Dict[str, Dict[str, tuple]],
+    expected_types: tuple,
+    expected_dist: tuple,
+) -> None:
+    """Test the _prepare_optuna_hyperparameters_distributions function.
+
+    Parameters
+    ----------
+    params_to_tune : dict
+        The parameters to tune.
+    expected_types : tuple
+        The expected types of each parameter.
+    expected_dist : tuple
+        The expected distributions of each parameter.
+
+    """
+    new_params = _prepare_optuna_hyperparameters_distributions(params_to_tune)
+    for i, (k, v) in enumerate(new_params.items()):
+        if expected_dist[i] == "uniform":
+            if expected_types[i] == "int":
+                assert isinstance(v, optd.IntDistribution)
+                assert not v.log
+                if isinstance(params_to_tune[k], tuple):
+                    assert v.low == params_to_tune[k][0]  # type: ignore
+                    assert v.high == params_to_tune[k][1]  # type: ignore
+                else:
+                    assert isinstance(params_to_tune[k], optd.IntDistribution)
+                    assert v.low == params_to_tune[k].low  # type: ignore
+                    assert v.high == params_to_tune[k].high  # type: ignore
+                    assert not params_to_tune[k].log  # type: ignore
+            else:
+                assert isinstance(v, optd.FloatDistribution)
+                assert not v.log
+                if isinstance(params_to_tune[k], tuple):
+                    assert v.low == params_to_tune[k][0]  # type: ignore
+                    assert v.high == params_to_tune[k][1]  # type: ignore
+                else:
+                    assert isinstance(
+                        params_to_tune[k], optd.FloatDistribution
+                    )
+                    assert v.low == params_to_tune[k].low  # type: ignore
+                    assert v.high == params_to_tune[k].high  # type: ignore
+                    assert not params_to_tune[k].log  # type: ignore
+        elif expected_dist[i] == "log-uniform":
+            if expected_types[i] == "int":
+                assert isinstance(v, optd.IntDistribution)
+                assert v.log
+                if isinstance(params_to_tune[k], tuple):
+                    assert v.low == params_to_tune[k][0]  # type: ignore
+                    assert v.high == params_to_tune[k][1]  # type: ignore
+                else:
+                    assert isinstance(params_to_tune[k], optd.IntDistribution)
+                    assert v.low == params_to_tune[k].low  # type: ignore
+                    assert v.high == params_to_tune[k].high  # type: ignore
+                    assert params_to_tune[k].log  # type: ignore
+            else:
+                assert isinstance(v, optd.FloatDistribution)
+                assert v.log
+                if isinstance(params_to_tune[k], tuple):
+                    assert v.low == params_to_tune[k][0]  # type: ignore
+                    assert v.high == params_to_tune[k][1]  # type: ignore
+                else:
+                    assert isinstance(
+                        params_to_tune[k], optd.FloatDistribution
+                    )
+                    assert v.low == params_to_tune[k].low  # type: ignore
+                    assert v.high == params_to_tune[k].high  # type: ignore
+                    assert params_to_tune[k].log  # type: ignore
+        elif expected_dist[i] == "categorical":
+            assert isinstance(v, optd.CategoricalDistribution)
+            if isinstance(params_to_tune[k], tuple):
+                assert all(
+                    x in v.choices
+                    for x in params_to_tune[k][:-1]  # type: ignore
+                )
+                assert all(
+                    x in params_to_tune[k][:-1]  # type: ignore
+                    for x in v.choices
+                )
+            else:
+                assert isinstance(
+                    params_to_tune[k], optd.CategoricalDistribution
+                )
+                assert all(
+                    x in v.choices
+                    for x in params_to_tune[k].choices  # type: ignore
+                )
+                assert all(
+                    x in params_to_tune[k].choices  # type: ignore
+                    for x in v.choices
+                )
+        else:
+            pytest.fail("Invalid distribution type")
diff --git a/julearn/pipeline/pipeline_creator.py b/julearn/pipeline/pipeline_creator.py
index d98c5b47f..d5616e255 100644
--- a/julearn/pipeline/pipeline_creator.py
+++ b/julearn/pipeline/pipeline_creator.py
@@ -17,6 +17,9 @@
 from ..model_selection._optuna_searcher import (
     _prepare_optuna_hyperparameters_distributions,
 )
+from ..model_selection._skopt_searcher import (
+    _prepare_skopt_hyperparameters_distributions,
+)
 from ..model_selection.available_searchers import get_searcher, list_searchers
 from ..models import get_model, list_models
 from ..prepare import prepare_search_params
@@ -901,7 +904,7 @@ def _prepare_hyperparameters_distributions(
         if isinstance(v, tuple) and len(v) == 3:
             if v[2] == "uniform":
                 mod_params_to_tune[k] = stats.uniform(v[0], v[1])
-            elif v[2] in ("loguniform", "log-uniform"):
+            elif v[2] == "log-uniform":
                 mod_params_to_tune[k] = stats.loguniform(v[0], v[1])
             else:
                 mod_params_to_tune[k] = v
@@ -998,8 +1001,17 @@ def _prepare_hyperparameter_tuning(
                     _prepare_hyperparameters_distributions(p)
                     for p in params_to_tune
                 ]
+        elif search.__name__ == "BayesSearchCV":
+            if isinstance(params_to_tune, dict):
+                params_to_tune = _prepare_skopt_hyperparameters_distributions(
+                    params_to_tune
+                )
+            else:
+                params_to_tune = [
+                    _prepare_skopt_hyperparameters_distributions(p)
+                    for p in params_to_tune
+                ]
         elif search.__name__ == "OptunaSearchCV":
-
             if isinstance(params_to_tune, dict):
                 params_to_tune = _prepare_optuna_hyperparameters_distributions(
                     params_to_tune
diff --git a/julearn/pipeline/tests/test_pipeline_creator.py b/julearn/pipeline/tests/test_pipeline_creator.py
index 9d049d3ed..746646f14 100644
--- a/julearn/pipeline/tests/test_pipeline_creator.py
+++ b/julearn/pipeline/tests/test_pipeline_creator.py
@@ -321,7 +321,7 @@ def _compare_param_grids(a: Dict, b: Dict) -> None:
         if hasattr(val, "rvs"):
             assert val.args[0] == b[key][0]
             assert val.args[1] == b[key][1]
-            if b[key][2] in ["log-uniform", "loguniform"]:
+            if b[key][2] == "log-uniform":
                 assert val.dist.name == "loguniform"
             elif b[key][2] == "uniform":
                 assert val.dist.name == "uniform"

From a8c58615a654be66018075bcc2c6de21815e6dd9 Mon Sep 17 00:00:00 2001
From: Fede <fraimondo@proton.me>
Date: Fri, 3 May 2024 12:38:43 +0300
Subject: [PATCH 07/16] Modify skopt searcher distributions

---
 julearn/model_selection/_skopt_searcher.py    |  62 ++++++++
 .../tests/test_skopt_searcher.py              | 133 ++++++++++++++++++
 2 files changed, 195 insertions(+)
 create mode 100644 julearn/model_selection/tests/test_skopt_searcher.py

diff --git a/julearn/model_selection/_skopt_searcher.py b/julearn/model_selection/_skopt_searcher.py
index e0904b10c..3bd246a3b 100644
--- a/julearn/model_selection/_skopt_searcher.py
+++ b/julearn/model_selection/_skopt_searcher.py
@@ -2,11 +2,14 @@
 
 # Authors: Federico Raimondo <f.raimondo@fz-juelich.de>
 # License: AGPL
+from typing import Any, Dict
 
+from ..utils import logger
 from .available_searchers import _recreate_reset_copy, register_searcher
 
 
 try:
+    import skopt.space as sksp
     from skopt import BayesSearchCV
 except ImportError:
     from sklearn.model_selection._search import BaseSearchCV
@@ -30,3 +33,62 @@ def register_bayes_searcher():
 
     # Update the "reset copy" of available searchers
     _recreate_reset_copy()
+
+
+
+def _prepare_skopt_hyperparameters_distributions(
+    params_to_tune: Dict[str, Any],
+) -> Dict[str, Any]:
+    """Prepare hyperparameters distributions for RandomizedSearchCV.
+
+    This method replaces tuples with distributions for RandomizedSearchCV
+    following the skopt convention. That is, if a parameter is a tuple
+    with 3 elements, the first two elements are the bounds of the
+    distribution and the third element is the type of distribution. In case
+    the last element is "categorical", the parameter is considered
+    categorical and all the previous elements are the choices.
+
+    Parameters
+    ----------
+    params_to_tune : dict
+        The parameters to tune.
+
+    Returns
+    -------
+    dict
+        The modified parameters to tune.
+
+    """
+    out = {}
+    for k, v in params_to_tune.items():
+        if isinstance(v, tuple) and len(v) == 3:
+            prior = v[2]
+            if prior == "categorical":
+                logger.info(f"Hyperparameter {k} is categorical with 2 "
+                            f"options: [{v[0]} and {v[1]}]")
+                out[k] = sksp.Categorical(v[:-1])
+            elif isinstance(v[0], int) and isinstance(v[1], int):
+                logger.info(
+                    f"Hyperparameter {k} is {prior} integer "
+                    f"[{v[0]}, {v[1]}]"
+                )
+                out[k] = sksp.Integer(v[0], v[1], prior=prior)
+            elif isinstance(v[0], float) and isinstance(v[1], float):
+                logger.info(
+                    f"Hyperparameter {k} is {prior} float "
+                    f"[{v[0]}, {v[1]}]"
+                )
+                out[k] = sksp.Real(v[0], v[1], prior=prior)
+            else:
+                logger.info(f"Hyperparameter {k} as is {v}")
+                out[k] = v
+        elif (
+            isinstance(v, tuple)
+            and isinstance(v[-1], str)
+            and v[-1] == "categorical"
+        ):
+            out[k] = sksp.Categorical(v[:-1])
+        else:
+            logger.info(f"Hyperparameter {k} as is {v}")
+            out[k] = v
+    return out
diff --git a/julearn/model_selection/tests/test_skopt_searcher.py b/julearn/model_selection/tests/test_skopt_searcher.py
new file mode 100644
index 000000000..9e8c67fb0
--- /dev/null
+++ b/julearn/model_selection/tests/test_skopt_searcher.py
@@ -0,0 +1,133 @@
+"""Provides tests for the bayes searcher."""
+
+# Authors: Federico Raimondo <f.raimondo@fz-juelich.de>
+# License: AGPL
+from typing import Dict
+
+import pytest
+import skopt.space as sksp
+
+from julearn.model_selection._skopt_searcher import (
+    _prepare_skopt_hyperparameters_distributions,
+)
+
+
+@pytest.mark.parametrize(
+    "params_to_tune,expected_types, expected_dist",
+    [
+        (
+            {
+                "n_components": (0.2, 0.7, "uniform"),
+                "n_neighbors": (1.0, 10.0, "log-uniform"),
+            },
+            ("float", "float"),
+            ("uniform", "log-uniform"),
+        ),
+        (
+            {
+                "n_components": (1, 20, "uniform"),
+                "n_neighbors": (1, 10, "log-uniform"),
+            },
+            ("int", "int", "int"),
+            ("uniform", "log-uniform"),
+        ),
+        (
+            {
+                "options": (True, False, "categorical"),
+                "more_options": ("a", "b", "c", "d", "categorical"),
+            },
+            (None, None),
+            ("categorical", "categorical"),
+        ),
+        (
+            {
+                "n_components": sksp.Real(0.2, 0.7, prior="uniform"),
+                "n_neighbors": sksp.Real(1.0, 10.0, prior="log-uniform"),
+            },
+            ("float", "float"),
+            ("uniform", "log-uniform"),
+        ),
+        (
+            {
+                "n_components": sksp.Integer(1, 20, prior="uniform"),
+                "n_neighbors": sksp.Integer(1, 10, prior="log-uniform"),
+            },
+            ("int", "int"),
+            ("uniform", "log-uniform"),
+        ),
+        (
+            {
+                "options": sksp.Categorical([True, False]),
+                "more_options": sksp.Categorical(
+                    ("a", "b", "c", "d"),
+                ),
+            },
+            (None, None),
+            ("categorical", "categorical"),
+        ),
+    ],
+)
+def test__prepare_skopt_hyperparameters_distributions(
+    params_to_tune: Dict[str, Dict[str, tuple]],
+    expected_types: tuple,
+    expected_dist: tuple,
+) -> None:
+    """Test the _prepare_skopt_hyperparameters_distributions function.
+
+    Parameters
+    ----------
+    params_to_tune : dict
+        The parameters to tune.
+    expected_types : tuple
+        The expected types of each parameter.
+    expected_dist : tuple
+        The expected distributions of each parameter.
+
+    """
+    new_params = _prepare_skopt_hyperparameters_distributions(params_to_tune)
+    for i, (k, v) in enumerate(new_params.items()):
+        if expected_types[i] == "int":
+            assert isinstance(v, sksp.Integer)
+            assert v.prior == expected_dist[i]
+            if isinstance(params_to_tune[k], tuple):
+                assert v.bounds[0] == params_to_tune[k][0]  # type: ignore
+                assert v.bounds[1] == params_to_tune[k][1]  # type: ignore
+            else:
+                assert isinstance(params_to_tune[k], sksp.Integer)
+                assert v.bounds[0] == params_to_tune[k].bounds[0]  # type: ignore
+                assert v.bounds[1] == params_to_tune[k].bounds[1]  # type: ignore
+                assert params_to_tune[k].prior == v.prior  # type: ignore
+        elif expected_types[i] == "float":
+            assert isinstance(v, sksp.Real)
+            assert v.prior == expected_dist[i]
+            if isinstance(params_to_tune[k], tuple):
+                assert v.bounds[0] == params_to_tune[k][0]  # type: ignore
+                assert v.bounds[1] == params_to_tune[k][1]  # type: ignore
+            else:
+                assert isinstance(params_to_tune[k], sksp.Real)
+                assert v.bounds[0] == params_to_tune[k].bounds[0]  # type: ignore
+                assert v.bounds[1] == params_to_tune[k].bounds[1]  # type: ignore
+                assert params_to_tune[k].prior == v.prior  # type: ignore
+        elif expected_dist[i] == "categorical":
+            assert isinstance(v, sksp.Categorical)
+            if isinstance(params_to_tune[k], tuple):
+                assert all(
+                    x in v.categories
+                    for x in params_to_tune[k][:-1]  # type: ignore
+                )
+                assert all(
+                    x in params_to_tune[k][:-1]  # type: ignore
+                    for x in v.categories
+                )
+            else:
+                assert isinstance(params_to_tune[k], sksp.Categorical)
+                assert all(
+                    x in v.categories
+                    for x in params_to_tune[k].categories  # type: ignore
+                )
+                assert all(
+                    x in params_to_tune[k].categories  # type: ignore
+                    for x in v.categories
+                )
+        else:
+            pytest.fail("Invalid distribution type")

From 877b0ad818a0082d851a4fb44cb2a4fa8425c5b1 Mon Sep 17 00:00:00 2001
From: Fede <fraimondo@proton.me>
Date: Fri, 3 May 2024 12:41:35 +0300
Subject: [PATCH 08/16] Add changes for #262

---
 docs/changes/newsfragments/262.enh | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 docs/changes/newsfragments/262.enh

diff --git a/docs/changes/newsfragments/262.enh b/docs/changes/newsfragments/262.enh
new file mode 100644
index 000000000..a42d5bc5c
--- /dev/null
+++ b/docs/changes/newsfragments/262.enh
@@ -0,0 +1 @@
+Add :class:`~optuna.integration.sklearn.OptunaSearchCV` to the list of available searchers as 'optuna', update documentation on Hyperparameter tuning and refactor how hyperparmeters' distributions are specified by `Fede Raimondo`_

From 195ac71209cd385177ae8fdb1ca98fa551ac7b2e Mon Sep 17 00:00:00 2001
From: Fede <fraimondo@proton.me>
Date: Fri, 3 May 2024 12:57:16 +0300
Subject: [PATCH 09/16] Update dependencies for optuna

---
 pyproject.toml | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 1ab96f1ed..56e1f455e 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -60,6 +60,8 @@ docs = [
     "numpydoc>=1.5.0,<1.6",
     "towncrier<24",
     "scikit-optimize>=0.10.0,<0.11",
+    "optuna>=3.6.0,<3.7",
+    "optuna_integration>=3.6.0,<3.7",
 ]
 deslib = ["deslib>=0.3.5,<0.4"]
 viz = [
@@ -76,7 +78,7 @@ optuna = [
 
 # Add all optional functional dependencies (skip deslib until its fixed)
 # This does not include dev/docs building dependencies
-all = ["julearn[viz,skopt]"]
+all = ["julearn[viz,skopt,optuna]"]
 
 ################
 # Tool configs #

From f0c9ade7a62a4161f03da0c6ec6fdaed02ad2108 Mon Sep 17 00:00:00 2001
From: Fede <fraimondo@proton.me>
Date: Fri, 3 May 2024 13:29:01 +0300
Subject: [PATCH 10/16] Fix intersphinx for optuna_integration

---
 docs/changes/newsfragments/262.enh           | 2 +-
 docs/conf.py                                 | 4 ++++
 examples/99_docs/run_hyperparameters_docs.py | 4 ++--
 3 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/docs/changes/newsfragments/262.enh b/docs/changes/newsfragments/262.enh
index a42d5bc5c..d512a7673 100644
--- a/docs/changes/newsfragments/262.enh
+++ b/docs/changes/newsfragments/262.enh
@@ -1 +1 @@
-Add :class:`~optuna.integration.sklearn.OptunaSearchCV` to the list of available searchers as 'optuna', update documentation on Hyperparameter tuning and refactor how hyperparmeters' distributions are specified by `Fede Raimondo`_
+Add :class:`~optuna_integration.sklearn.OptunaSearchCV` to the list of available searchers as 'optuna', update documentation on Hyperparameter tuning and refactor how hyperparmeters' distributions are specified by `Fede Raimondo`_
diff --git a/docs/conf.py b/docs/conf.py
index 095c89843..28d256772 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -162,6 +162,10 @@
     "scipy": ("https://docs.scipy.org/doc/scipy/", None),
     "skopt": ("https://scikit-optimize.readthedocs.io/en/latest", None),
     "optuna": ("https://optuna.readthedocs.io/en/stable", None),
+    "optuna_integration": (
+        "https://optuna-integration.readthedocs.io/en/stable",
+        None,
+    ),
 }
 
 
diff --git a/examples/99_docs/run_hyperparameters_docs.py b/examples/99_docs/run_hyperparameters_docs.py
index 2213bc5a2..b7dfff7ac 100644
--- a/examples/99_docs/run_hyperparameters_docs.py
+++ b/examples/99_docs/run_hyperparameters_docs.py
@@ -255,7 +255,7 @@
 # Other searchers that ``julearn`` provides are the
 # :class:`~sklearn.model_selection.RandomizedSearchCV`, 
 # :class:`~skopt.BayesSearchCV` and
-# :class:`~optuna.integration.sklearn.OptunaSearchCV`.
+# :class:`~optuna_integration.sklearn.OptunaSearchCV`.
 #
 # The randomized searcher
 # (:class:`~sklearn.model_selection.RandomizedSearchCV`) is similar to the
@@ -275,7 +275,7 @@
 # :class:`~skopt.BayesSearchCV` documentation, including how to specify
 # the prior distributions of the hyperparameters.
 #
-# The Optuna searcher (:class:`~optuna.integration.sklearn.OptunaSearchCV`)\
+# The Optuna searcher (:class:`~optuna_integration.sklearn.OptunaSearchCV`)\
 # uses the Optuna library to find the best hyperparameter set. Optuna is a
 # hyperparameter optimization framework that has several algorithms to find
 # the best hyperparameter set. For more information, see the

From 67eadb7ad5ceffbd6c39339492488435f67aea7c Mon Sep 17 00:00:00 2001
From: Fede <fraimondo@proton.me>
Date: Fri, 3 May 2024 13:45:02 +0300
Subject: [PATCH 11/16] Skip test when deps are not met

---
 julearn/model_selection/tests/test_optuna_searcher.py | 3 ++-
 julearn/model_selection/tests/test_skopt_searcher.py  | 3 ++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/julearn/model_selection/tests/test_optuna_searcher.py b/julearn/model_selection/tests/test_optuna_searcher.py
index 10f52cdee..0e0014a83 100644
--- a/julearn/model_selection/tests/test_optuna_searcher.py
+++ b/julearn/model_selection/tests/test_optuna_searcher.py
@@ -4,7 +4,6 @@
 # License: AGPL
 from typing import Dict
 
-import optuna.distributions as optd
 import pytest
 
 from julearn.model_selection._optuna_searcher import (
@@ -12,6 +11,8 @@
 )
 
 
+optd = pytest.importorskip("optuna.distributions")
+
 @pytest.mark.parametrize(
     "params_to_tune,expected_types, expected_dist",
     [
diff --git a/julearn/model_selection/tests/test_skopt_searcher.py b/julearn/model_selection/tests/test_skopt_searcher.py
index 9e8c67fb0..be9a9321b 100644
--- a/julearn/model_selection/tests/test_skopt_searcher.py
+++ b/julearn/model_selection/tests/test_skopt_searcher.py
@@ -5,13 +5,14 @@
 from typing import Dict
 
 import pytest
-import skopt.space as sksp
 
 from julearn.model_selection._skopt_searcher import (
     _prepare_skopt_hyperparameters_distributions,
 )
 
 
+sksp = pytest.importorskip("skopt.space")
+
 @pytest.mark.parametrize(
     "params_to_tune,expected_types, expected_dist",
     [

From 365b615fbfa8bd2806eddd24916e655569a21668 Mon Sep 17 00:00:00 2001
From: Fede <fraimondo@proton.me>
Date: Fri, 3 May 2024 16:18:11 +0300
Subject: [PATCH 12/16] Update changelog for #262

---
 docs/changes/newsfragments/262.doc     | 1 +
 docs/changes/newsfragments/262.enh     | 2 +-
 docs/changes/newsfragments/262.feature | 1 +
 3 files changed, 3 insertions(+), 1 deletion(-)
 create mode 100644 docs/changes/newsfragments/262.doc
 create mode 100644 docs/changes/newsfragments/262.feature

diff --git a/docs/changes/newsfragments/262.doc b/docs/changes/newsfragments/262.doc
new file mode 100644
index 000000000..ce5b75c38
--- /dev/null
+++ b/docs/changes/newsfragments/262.doc
@@ -0,0 +1 @@
+Update documentation on Hyperparameter Tuning by `Fede Raimondo_`
\ No newline at end of file
diff --git a/docs/changes/newsfragments/262.enh b/docs/changes/newsfragments/262.enh
index d512a7673..dc5ac2746 100644
--- a/docs/changes/newsfragments/262.enh
+++ b/docs/changes/newsfragments/262.enh
@@ -1 +1 @@
-Add :class:`~optuna_integration.sklearn.OptunaSearchCV` to the list of available searchers as 'optuna', update documentation on Hyperparameter tuning and refactor how hyperparmeters' distributions are specified by `Fede Raimondo`_
+Refactor how hyperparmeters' distributions are specified by `Fede Raimondo`_
diff --git a/docs/changes/newsfragments/262.feature b/docs/changes/newsfragments/262.feature
new file mode 100644
index 000000000..16be6fbf6
--- /dev/null
+++ b/docs/changes/newsfragments/262.feature
@@ -0,0 +1 @@
+Add :class:`~optuna_integration.sklearn.OptunaSearchCV` to the list of available searchers as ``optuna`` by `Fede Raimondo`_

From 2caf4c3281efe2ca76033d3d0478cd0147555a44 Mon Sep 17 00:00:00 2001
From: Fede <fraimondo@proton.me>
Date: Fri, 3 May 2024 16:18:26 +0300
Subject: [PATCH 13/16] Update docs on hyperparameter tuning

---
 examples/99_docs/run_hyperparameters_docs.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/examples/99_docs/run_hyperparameters_docs.py b/examples/99_docs/run_hyperparameters_docs.py
index b7dfff7ac..fec76a8d7 100644
--- a/examples/99_docs/run_hyperparameters_docs.py
+++ b/examples/99_docs/run_hyperparameters_docs.py
@@ -275,7 +275,7 @@
 # :class:`~skopt.BayesSearchCV` documentation, including how to specify
 # the prior distributions of the hyperparameters.
 #
-# The Optuna searcher (:class:`~optuna_integration.sklearn.OptunaSearchCV`)\
+# The Optuna searcher (:class:`~optuna_integration.sklearn.OptunaSearchCV`)
 # uses the Optuna library to find the best hyperparameter set. Optuna is a
 # hyperparameter optimization framework that has several algorithms to find
 # the best hyperparameter set. For more information, see the
@@ -402,6 +402,7 @@
     gamma=(1e-3, 1e-1, "log-uniform"),
     class_weight=("balanced", None, "categorical")
 )
+print(creator)
 
 ###############################################################################
 # We can now use the optuna searcher with 10 trials and 3-fold cross-validation.
@@ -462,6 +463,7 @@
     C=(0.01, 10, "log-uniform"),
     gamma=(1e-3, 1e-1, "log-uniform"),
 )
+print(creator)
 
 ###############################################################################
 # While this will work for any of the ``random``, ``bayes`` or ``optuna``
@@ -481,6 +483,7 @@
     C=Real(0.01, 10, prior="log-uniform", base=2),
     gamma=(1e-3, 1e-1, "log-uniform"),
 )
+print(creator)
 
 ###############################################################################
 # For the optuna searcher, the distributions are defined using the
@@ -505,7 +508,7 @@
     C=FloatDistribution(0.01, 10, log=True),
     gamma=(1e-3, 1e-1, "log-uniform"),
 )
-
+print(creator)
 
 
 ###############################################################################

From ed2e508c910b6ab4c9ae084c3f1004cbe79ce5e4 Mon Sep 17 00:00:00 2001
From: Fede <fraimondo@proton.me>
Date: Fri, 3 May 2024 16:19:12 +0300
Subject: [PATCH 14/16] Adress comments in #262

---
 julearn/api.py                                | 15 +++++++--
 julearn/model_selection/_optuna_searcher.py   |  1 +
 julearn/model_selection/_skopt_searcher.py    | 11 ++++---
 .../tests/test_available_searchers.py         | 32 ++++++++++++-------
 .../tests/test_optuna_searcher.py             |  7 ++--
 .../tests/test_skopt_searcher.py              |  9 +++---
 julearn/pipeline/pipeline_creator.py          | 15 +++++++--
 7 files changed, 61 insertions(+), 29 deletions(-)

diff --git a/julearn/api.py b/julearn/api.py
index 7602216f1..130b57e0a 100644
--- a/julearn/api.py
+++ b/julearn/api.py
@@ -135,9 +135,18 @@ def run_cross_validation(  # noqa: C901
         Additional parameters in case Hyperparameter Tuning is performed, with
         the following keys:
 
-        * 'kind': The kind of search algorithm to use, e.g.:
-            'grid', 'random', 'bayes' or 'optuna'. Can be any valid julearn
-            searcher name or scikit-learn compatible searcher.
+        * 'kind': The kind of search algorithm to use, Valid options are:
+
+          * ``"grid"`` : :class:`~sklearn.model_selection.GridSearchCV`
+          * ``"random"`` :
+            :class:`~sklearn.model_selection.RandomizedSearchCV`
+          * ``"bayes"`` : :class:`~skopt.BayesSearchCV`
+          * ``"optuna"`` :
+            :class:`~optuna_integration.sklearn.OptunaSearchCV`
+          * user-registered searcher name : see
+            :func:`~julearn.model_selection.register_searcher`
+          * ``scikit-learn``-compatible searcher
+
         * 'cv': If a searcher is going to be used, the cross-validation
             splitting strategy to use. Defaults to same CV as for the model
             evaluation.
diff --git a/julearn/model_selection/_optuna_searcher.py b/julearn/model_selection/_optuna_searcher.py
index 04ba9bac4..a05801fc2 100644
--- a/julearn/model_selection/_optuna_searcher.py
+++ b/julearn/model_selection/_optuna_searcher.py
@@ -2,6 +2,7 @@
 
 # Authors: Federico Raimondo <f.raimondo@fz-juelich.de>
 # License: AGPL
+
 from typing import Any, Dict
 
 from ..utils import logger
diff --git a/julearn/model_selection/_skopt_searcher.py b/julearn/model_selection/_skopt_searcher.py
index 3bd246a3b..8fbf53431 100644
--- a/julearn/model_selection/_skopt_searcher.py
+++ b/julearn/model_selection/_skopt_searcher.py
@@ -2,6 +2,7 @@
 
 # Authors: Federico Raimondo <f.raimondo@fz-juelich.de>
 # License: AGPL
+
 from typing import Any, Dict
 
 from ..utils import logger
@@ -35,7 +36,6 @@ def register_bayes_searcher():
     _recreate_reset_copy()
 
 
-
 def _prepare_skopt_hyperparameters_distributions(
     params_to_tune: Dict[str, Any],
 ) -> Dict[str, Any]:
@@ -64,8 +64,10 @@ def _prepare_skopt_hyperparameters_distributions(
         if isinstance(v, tuple) and len(v) == 3:
             prior = v[2]
             if prior == "categorical":
-                logger.info(f"Hyperparameter {k} is categorical with 2 "
-                            f"options: [{v[0]} and {v[1]}]")
+                logger.info(
+                    f"Hyperparameter {k} is categorical with 2 "
+                    f"options: [{v[0]} and {v[1]}]"
+                )
                 out[k] = sksp.Categorical(v[:-1])
             elif isinstance(v[0], int) and isinstance(v[1], int):
                 logger.info(
@@ -75,8 +77,7 @@ def _prepare_skopt_hyperparameters_distributions(
                 out[k] = sksp.Integer(v[0], v[1], prior=prior)
             elif isinstance(v[0], float) and isinstance(v[1], float):
                 logger.info(
-                    f"Hyperparameter {k} is {prior} float "
-                    f"[{v[0]}, {v[1]}]"
+                    f"Hyperparameter {k} is {prior} float " f"[{v[0]}, {v[1]}]"
                 )
                 out[k] = sksp.Real(v[0], v[1], prior=prior)
             else:
diff --git a/julearn/model_selection/tests/test_available_searchers.py b/julearn/model_selection/tests/test_available_searchers.py
index 7a7d2b913..97764c94f 100644
--- a/julearn/model_selection/tests/test_available_searchers.py
+++ b/julearn/model_selection/tests/test_available_searchers.py
@@ -66,19 +66,28 @@ def test_get_searcher() -> None:
     assert out.__name__ == "OptunaSearchCV"
 
 
-def test_get_searcher_params_attr() -> None:
-    """Test getting the params attribute of a searcher."""
-    out = get_searcher_params_attr("grid")
-    assert out == "param_grid"
-
-    out = get_searcher_params_attr("random")
-    assert out == "param_distributions"
+@pytest.mark.parametrize(
+    "searcher,expected",
+    [
+        ("grid", "param_grid"),
+        ("random", "param_distributions"),
+        ("bayes", "search_spaces"),
+        ("optuna", "param_distributions"),
+    ],
+)
+def test_get_searcher_params_attr(searcher: str, expected: str) -> None:
+    """Test getting the params attribute of a searcher.
 
-    out = get_searcher_params_attr("bayes")
-    assert out == "search_spaces"
+    Parameters
+    ----------
+    searcher : str
+        The searcher name.
+    expected : str
+        The expected attribute name.
 
-    out = get_searcher_params_attr("optuna")
-    assert out == "param_distributions"
+    """
+    out = get_searcher_params_attr(searcher)
+    assert out == expected
 
 
 @pytest.mark.nodeps
@@ -88,6 +97,7 @@ def test_get_searchers_noskopt() -> None:
     with pytest.raises(ImportError, match="BayesSearchCV requires"):
         out()  # type: ignore
 
+
 @pytest.mark.nodeps
 def test_get_searchers_nooptuna() -> None:
     """Test getting a searcher without optuna."""
diff --git a/julearn/model_selection/tests/test_optuna_searcher.py b/julearn/model_selection/tests/test_optuna_searcher.py
index 0e0014a83..ab9b7fa11 100644
--- a/julearn/model_selection/tests/test_optuna_searcher.py
+++ b/julearn/model_selection/tests/test_optuna_searcher.py
@@ -2,7 +2,8 @@
 
 # Authors: Federico Raimondo <f.raimondo@fz-juelich.de>
 # License: AGPL
-from typing import Dict
+
+from typing import Any, Dict, Tuple
 
 import pytest
 
@@ -69,8 +70,8 @@
     ],
 )
 def test__prepare_optuna_hyperparameters_distributions(
-    params_to_tune: Dict[str, Dict[str, tuple]],
-    expected_types: tuple,
+    params_to_tune: Dict[str, Dict[str, Any]],
+    expected_types: Tuple,
     expected_dist: tuple,
 ) -> None:
     """Test the _prepare_optuna_hyperparameters_distributions function.
diff --git a/julearn/model_selection/tests/test_skopt_searcher.py b/julearn/model_selection/tests/test_skopt_searcher.py
index be9a9321b..ea3598a87 100644
--- a/julearn/model_selection/tests/test_skopt_searcher.py
+++ b/julearn/model_selection/tests/test_skopt_searcher.py
@@ -2,7 +2,8 @@
 
 # Authors: Federico Raimondo <f.raimondo@fz-juelich.de>
 # License: AGPL
-from typing import Dict
+
+from typing import Any, Dict, Tuple
 
 import pytest
 
@@ -69,9 +70,9 @@
     ],
 )
 def test__prepare_skopt_hyperparameters_distributions(
-    params_to_tune: Dict[str, Dict[str, tuple]],
-    expected_types: tuple,
-    expected_dist: tuple,
+    params_to_tune: Dict[str, Dict[str, Any]],
+    expected_types: Tuple,
+    expected_dist: Tuple,
 ) -> None:
     """Test the _prepare_skopt_hyperparameters_distributions function.
 
diff --git a/julearn/pipeline/pipeline_creator.py b/julearn/pipeline/pipeline_creator.py
index d5616e255..979bf828a 100644
--- a/julearn/pipeline/pipeline_creator.py
+++ b/julearn/pipeline/pipeline_creator.py
@@ -933,9 +933,18 @@ def _prepare_hyperparameter_tuning(
     search_params : dict
         The parameters for the search. The following keys are accepted:
 
-        * 'kind': The kind of search algorithm to use e.g.:
-            'grid', 'random', 'bayes' or 'optuna'. All valid julearn searchers
-            can be entered.
+        * 'kind': The kind of search algorithm to use. Valid options are:
+
+          * ``"grid"`` : :class:`~sklearn.model_selection.GridSearchCV`
+          * ``"random"`` :
+            :class:`~sklearn.model_selection.RandomizedSearchCV`
+          * ``"bayes"`` : :class:`~skopt.BayesSearchCV`
+          * ``"optuna"`` :
+            :class:`~optuna_integration.sklearn.OptunaSearchCV`
+          * user-registered searcher name : see
+            :func:`~julearn.model_selection.register_searcher`
+          * ``scikit-learn``-compatible searcher
+
         * 'cv': If search is going to be used, the cross-validation
             splitting strategy to use. Defaults to same CV as for the model
             evaluation.

From 2b469cff31c317ee534e18a6b283c75803fbf5f2 Mon Sep 17 00:00:00 2001
From: Fede <fraimondo@proton.me>
Date: Fri, 3 May 2024 17:04:06 +0300
Subject: [PATCH 15/16] Address comments in #262

---
 docs/changes/newsfragments/262.doc            |  2 +-
 .../tests/test_available_searchers.py         | 31 ++++++++++++-------
 .../tests/test_optuna_searcher.py             |  2 +-
 3 files changed, 22 insertions(+), 13 deletions(-)

diff --git a/docs/changes/newsfragments/262.doc b/docs/changes/newsfragments/262.doc
index ce5b75c38..067b3bf0d 100644
--- a/docs/changes/newsfragments/262.doc
+++ b/docs/changes/newsfragments/262.doc
@@ -1 +1 @@
-Update documentation on Hyperparameter Tuning by `Fede Raimondo_`
\ No newline at end of file
+Update documentation on Hyperparameter Tuning by `Fede Raimondo`_
\ No newline at end of file
diff --git a/julearn/model_selection/tests/test_available_searchers.py b/julearn/model_selection/tests/test_available_searchers.py
index 97764c94f..17386fb91 100644
--- a/julearn/model_selection/tests/test_available_searchers.py
+++ b/julearn/model_selection/tests/test_available_searchers.py
@@ -51,19 +51,28 @@ def test_reset_searcher() -> None:
         get_searcher("custom_grid")
 
 
-def test_get_searcher() -> None:
-    """Test getting a searcher."""
-    out = get_searcher("grid")
-    assert out == GridSearchCV
-
-    out = get_searcher("random")
-    assert out == RandomizedSearchCV
+@pytest.mark.parametrize(
+    "searcher,expected",
+    [
+        ("grid", "GridSearchCV"),
+        ("random", "RandomizedSearchCV"),
+        ("bayes", "BayesSearchCV"),
+        ("optuna", "OptunaSearchCV"),
+    ],
+)
+def test_get_searcher(searcher: str, expected: str) -> None:
+    """Test getting a searcher.
 
-    out = get_searcher("bayes")
-    assert out.__name__ == "BayesSearchCV"
+    Parameters
+    ----------
+    searcher : str
+        The searcher name.
+    expected : str
+        The expected searcher class name.
 
-    out = get_searcher("optuna")
-    assert out.__name__ == "OptunaSearchCV"
+    """
+    out = get_searcher(searcher)
+    assert out.__name__ == expected
 
 
 @pytest.mark.parametrize(
diff --git a/julearn/model_selection/tests/test_optuna_searcher.py b/julearn/model_selection/tests/test_optuna_searcher.py
index ab9b7fa11..3d0c30db2 100644
--- a/julearn/model_selection/tests/test_optuna_searcher.py
+++ b/julearn/model_selection/tests/test_optuna_searcher.py
@@ -72,7 +72,7 @@
 def test__prepare_optuna_hyperparameters_distributions(
     params_to_tune: Dict[str, Dict[str, Any]],
     expected_types: Tuple,
-    expected_dist: tuple,
+    expected_dist: Tuple,
 ) -> None:
     """Test the _prepare_optuna_hyperparameters_distributions function.
 

From 721d8e6a3cebd9a34f4396a74017be8eff752597 Mon Sep 17 00:00:00 2001
From: Fede <fraimondo@proton.me>
Date: Fri, 3 May 2024 17:04:25 +0300
Subject: [PATCH 16/16] Fix linter

---
 julearn/model_selection/tests/test_available_searchers.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/julearn/model_selection/tests/test_available_searchers.py b/julearn/model_selection/tests/test_available_searchers.py
index 17386fb91..5b3fae4f4 100644
--- a/julearn/model_selection/tests/test_available_searchers.py
+++ b/julearn/model_selection/tests/test_available_searchers.py
@@ -5,7 +5,7 @@
 # License: AGPL
 
 import pytest
-from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
+from sklearn.model_selection import GridSearchCV
 
 from julearn.model_selection import (
     get_searcher,