Skip to content

Commit

Permalink
Add optuna searcher
Browse files Browse the repository at this point in the history
  • Loading branch information
fraimondo committed May 3, 2024
1 parent f7fc6fe commit b72335c
Show file tree
Hide file tree
Showing 4 changed files with 212 additions and 7 deletions.
38 changes: 34 additions & 4 deletions julearn/model_selection/_optuna_searcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
# License: AGPL
from typing import Any, Dict

from ..utils import logger
from .available_searchers import _recreate_reset_copy, register_searcher


Expand Down Expand Up @@ -38,12 +39,14 @@ def register_optuna_searcher():
def _prepare_optuna_hyperparameters_distributions(
params_to_tune: Dict[str, Any],
) -> Dict[str, Any]:
"""Prepare hyperparameters distributions for RandomizedSearchCV.
"""Prepare hyperparameters distributions for OptunaSearchCV.
This method replaces tuples with distributions for RandomizedSearchCV
This method replaces tuples with distributions for OptunaSearchCV
following the skopt convention. That is, if a parameter is a tuple
with 3 elements, the first two elements are the bounds of the
distribution and the third element is the type of distribution.
distribution and the third element is the type of distribution. In case
the last element is "categorical", the parameter is considered
categorical and all the previous elements are the choices.
Parameters
----------
Expand All @@ -61,16 +64,43 @@ def _prepare_optuna_hyperparameters_distributions(
if isinstance(v, tuple) and len(v) == 3:
if v[2] == "uniform":
if isinstance(v[0], int) and isinstance(v[1], int):
logger.info(
f"Hyperparameter {k} is uniform integer "
f"[{v[0]}, {v[1]}]"
)
out[k] = optd.IntDistribution(v[0], v[1], log=False)
else:
logger.info(
f"Hyperparameter {k} is uniform float [{v[0]}, {v[1]}]"
)
out[k] = optd.FloatDistribution(v[0], v[1], log=False)
elif v[2] in ("loguniform", "log-uniform"):
elif v[2] == "log-uniform":
if isinstance(v[0], int) and isinstance(v[1], int):
logger.info(
f"Hyperparameter {k} is log-uniform int "
f"[{v[0]}, {v[1]}]"
)
out[k] = optd.IntDistribution(v[0], v[1], log=True)
else:
logger.info(
f"Hyperparameter {k} is log-uniform float "
f"[{v[0]}, {v[1]}]"
)
out[k] = optd.FloatDistribution(v[0], v[1], log=True)
elif v[2] == "categorical":
logger.info(f"Hyperparameter {k} is categorical with 2 "
f"options: [{v[0]} and {v[1]}]")
out[k] = optd.CategoricalDistribution((v[0], v[1]))
else:
out[k] = v
elif (
isinstance(v, tuple)
and isinstance(v[-1], str)
and v[-1] == "categorical"
):
logger.info(f"Hyperparameter {k} is categorical [{v[:-1]}]")
out[k] = optd.CategoricalDistribution(v[:-1])
else:
logger.info(f"Hyperparameter {k} as is {v}")
out[k] = v
return out
163 changes: 163 additions & 0 deletions julearn/model_selection/tests/test_optuna_searcher.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,163 @@
"""Provides tests for the optuna searcher."""

# Authors: Federico Raimondo <[email protected]>
# License: AGPL
from typing import Dict

import optuna.distributions as optd
import pytest

from julearn.model_selection._optuna_searcher import (
_prepare_optuna_hyperparameters_distributions,
)


@pytest.mark.parametrize(
"params_to_tune,expected_types, expected_dist",
[
(
{
"n_components": (0.2, 0.7, "uniform"),
"n_neighbors": (1.0, 10.0, "log-uniform"),
},
("float", "float"),
("uniform", "log-uniform"),
),
(
{
"n_components": (1, 20, "uniform"),
"n_neighbors": (1, 10, "log-uniform"),
},
("int", "int"),
("uniform", "log-uniform"),
),
(
{
"options": (True, False, "categorical"),
"more_options": ("a", "b", "c", "d", "categorical"),
},
(None, None),
("categorical", "categorical"),
),
(
{
"n_components": optd.FloatDistribution(0.2, 0.7, log=False),
"n_neighbors": optd.FloatDistribution(1.0, 10.0, log=True),
},
("float", "float"),
("uniform", "log-uniform"),
),
(
{
"n_components": optd.IntDistribution(1, 20, log=False),
"n_neighbors": optd.IntDistribution(1, 10, log=True),
},
("int", "int"),
("uniform", "log-uniform"),
),
(
{
"options": optd.CategoricalDistribution([True, False]),
"more_options": optd.CategoricalDistribution(
("a", "b", "c", "d"),
),
},
(None, None),
("categorical", "categorical"),
),
],
)
def test__prepare_optuna_hyperparameters_distributions(
params_to_tune: Dict[str, Dict[str, tuple]],
expected_types: tuple,
expected_dist: tuple,
) -> None:
"""Test the _prepare_optuna_hyperparameters_distributions function.
Parameters
----------
params_to_tune : dict
The parameters to tune.
expected_types : tuple
The expected types of each parameter.
expected_dist : tuple
The expected distributions of each parameter.
"""
new_params = _prepare_optuna_hyperparameters_distributions(params_to_tune)
for i, (k, v) in enumerate(new_params.items()):
if expected_dist[i] == "uniform":
if expected_types[i] == "int":
assert isinstance(v, optd.IntDistribution)
assert not v.log
if isinstance(params_to_tune[k], tuple):
assert v.low == params_to_tune[k][0] # type: ignore
assert v.high == params_to_tune[k][1] # type: ignore
else:
assert isinstance(params_to_tune[k], optd.IntDistribution)
assert v.low == params_to_tune[k].low # type: ignore
assert v.high == params_to_tune[k].high # type: ignore
assert not params_to_tune[k].log # type: ignore
else:
assert isinstance(v, optd.FloatDistribution)
assert not v.log
if isinstance(params_to_tune[k], tuple):
assert v.low == params_to_tune[k][0] # type: ignore
assert v.high == params_to_tune[k][1] # type: ignore
else:
assert isinstance(
params_to_tune[k], optd.FloatDistribution
)
assert v.low == params_to_tune[k].low # type: ignore
assert v.high == params_to_tune[k].high # type: ignore
assert not params_to_tune[k].log # type: ignore
elif expected_dist[i] == "log-uniform":
if expected_types[i] == "int":
assert isinstance(v, optd.IntDistribution)
assert v.log
if isinstance(params_to_tune[k], tuple):
assert v.low == params_to_tune[k][0] # type: ignore
assert v.high == params_to_tune[k][1] # type: ignore
else:
assert isinstance(params_to_tune[k], optd.IntDistribution)
assert v.low == params_to_tune[k].low # type: ignore
assert v.high == params_to_tune[k].high # type: ignore
assert params_to_tune[k].log # type: ignore
else:
assert isinstance(v, optd.FloatDistribution)
assert v.log
if isinstance(params_to_tune[k], tuple):
assert v.low == params_to_tune[k][0] # type: ignore
assert v.high == params_to_tune[k][1] # type: ignore
else:
assert isinstance(
params_to_tune[k], optd.FloatDistribution
)
assert v.low == params_to_tune[k].low # type: ignore
assert v.high == params_to_tune[k].high # type: ignore
assert params_to_tune[k].log # type: ignore
elif expected_dist[i] == "categorical":
assert isinstance(v, optd.CategoricalDistribution)
if isinstance(params_to_tune[k], tuple):
assert all(
x in v.choices
for x in params_to_tune[k][:-1] # type: ignore
)
assert all(
x in params_to_tune[k][:-1] # type: ignore
for x in v.choices
)
else:
assert isinstance(
params_to_tune[k], optd.CategoricalDistribution
)
assert all(
x in v.choices
for x in params_to_tune[k].choices # type: ignore
)
assert all(
x in params_to_tune[k].choices # type: ignore
for x in v.choices
)
else:
pytest.fail("Invalid distribution type")
16 changes: 14 additions & 2 deletions julearn/pipeline/pipeline_creator.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@
from ..model_selection._optuna_searcher import (
_prepare_optuna_hyperparameters_distributions,
)
from ..model_selection._skopt_searcher import (
_prepare_skopt_hyperparameters_distributions,
)
from ..model_selection.available_searchers import get_searcher, list_searchers
from ..models import get_model, list_models
from ..prepare import prepare_search_params
Expand Down Expand Up @@ -901,7 +904,7 @@ def _prepare_hyperparameters_distributions(
if isinstance(v, tuple) and len(v) == 3:
if v[2] == "uniform":
mod_params_to_tune[k] = stats.uniform(v[0], v[1])
elif v[2] in ("loguniform", "log-uniform"):
elif v[2] == "log-uniform":
mod_params_to_tune[k] = stats.loguniform(v[0], v[1])
else:
mod_params_to_tune[k] = v
Expand Down Expand Up @@ -998,8 +1001,17 @@ def _prepare_hyperparameter_tuning(
_prepare_hyperparameters_distributions(p)
for p in params_to_tune
]
elif search.__name__ == "BayesSearchCV":
if isinstance(params_to_tune, dict):
params_to_tune = _prepare_skopt_hyperparameters_distributions(
params_to_tune
)
else:
params_to_tune = [
_prepare_skopt_hyperparameters_distributions(p)
for p in params_to_tune
]
elif search.__name__ == "OptunaSearchCV":

if isinstance(params_to_tune, dict):
params_to_tune = _prepare_optuna_hyperparameters_distributions(
params_to_tune
Expand Down
2 changes: 1 addition & 1 deletion julearn/pipeline/tests/test_pipeline_creator.py
Original file line number Diff line number Diff line change
Expand Up @@ -321,7 +321,7 @@ def _compare_param_grids(a: Dict, b: Dict) -> None:
if hasattr(val, "rvs"):
assert val.args[0] == b[key][0]
assert val.args[1] == b[key][1]
if b[key][2] in ["log-uniform", "loguniform"]:
if b[key][2] == "log-uniform":
assert val.dist.name == "loguniform"
elif b[key][2] == "uniform":
assert val.dist.name == "uniform"
Expand Down

0 comments on commit b72335c

Please sign in to comment.