Skip to content

Commit

Permalink
Address comments
Browse files Browse the repository at this point in the history
  • Loading branch information
fraimondo committed Apr 29, 2024
1 parent 206134e commit e5171f8
Show file tree
Hide file tree
Showing 8 changed files with 64 additions and 58 deletions.
2 changes: 1 addition & 1 deletion docs/changes/newsfragments/260.misc
Original file line number Diff line number Diff line change
@@ -1 +1 @@
Add ``all`` as optional dependencies to install all functional dependencies by `Fede Raimondo`_.
Add ``all`` as optional dependencies to install all functional dependencies by `Fede Raimondo`_
2 changes: 1 addition & 1 deletion docs/getting_started.rst
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ The following optional dependencies are available:
* ``viz``: Visualization tools for ``julearn``. This includes the
:mod:`.viz` module.
* ``deslib``: The :mod:`.dynamic` module requires the `deslib`_ package. This
module is not compatible with newer python versions and its unmaintained.
module is not compatible with newer Python versions and it is unmaintained.
* ``skopt``: Using the ``"bayes"`` searcher (:class:`~skopt.BayesSearchCV`)
requires the `scikit-optimize`_ package.
* ``all``: Install all optional functional dependencies (except ``deslib``).
26 changes: 13 additions & 13 deletions examples/99_docs/run_hyperparameters_docs.py
Original file line number Diff line number Diff line change
Expand Up @@ -245,14 +245,14 @@
#
# By default, ``julearn`` uses a
# :class:`~sklearn.model_selection.GridSearchCV`.
# This searcher, specified as `"grid"` is very simple. First, it constructs the
# _grid_ of hyperparameters to try. As we see above, we have 3 hyperparameters
# to tune. So it constructs a 3-dimentional grid with all the possible
# combinations of the hyperparameters values. The second step is to perform
# cross-validation on each of the possible combinations of hyperparameters
# values.
# This searcher, specified as ``"grid"`` is very simple. First, it constructs
# the _grid_ of hyperparameters to try. As we see above, we have 3
# hyperparameters to tune. So it constructs a 3-dimentional grid with all the
# possible combinations of the hyperparameters values. The second step is to
# perform cross-validation on each of the possible combinations of
# hyperparameters values.
#
# Another searchers that ``julearn`` provides are the
# Other searchers that ``julearn`` provides are the
# :class:`~sklearn.model_selection.RandomizedSearchCV` and
# :class:`~skopt.BayesSearchCV`.
#
Expand All @@ -268,11 +268,11 @@
#
# The Bayesian searcher (:class:`~skopt.BayesSearchCV`) is a bit more
# complex. It uses Bayesian optimization to find the best hyperparameter set.
# As with the randomized search, it is useful when we have a many
# As with the randomized search, it is useful when we have many
# hyperparameters to tune, and we don't want to try all the possible
# combinations due to computational constraints. For more information, see the
# :class:`~skopt.BayesSearchCV` documentation, including how to specify
# the distributions of the hyperparameters.
# the prior distributions of the hyperparameters.
#
# We can specify the kind of searcher and its parametrization, by setting the
# ``search_params`` parameter in the :func:`.run_cross_validation` function.
Expand Down Expand Up @@ -307,10 +307,10 @@
# Furthermore, the :class:`~sklearn.model_selection.RandomizedSearchCV`
# searcher can sample hyperparameters from distributions, which can be useful
# when we have continuous hyperparameters.
# Let's set both C and gamma to be sampled from log-uniform distributions. We
# can do this by setting the hyperparameter values as a tuple with the
# following format: ``(low, high, distribution)``. The distribution can be
# either ``"log-uniform"`` or ``"uniform"``.
# Let's set both ``C`` and ``gamma`` to be sampled from log-uniform
# distributions. We can do this by setting the hyperparameter values as a
# tuple with the following format: ``(low, high, distribution)``. The
# distribution can be either ``"log-uniform"`` or ``"uniform"``.

creator = PipelineCreator(problem_type="classification")
creator.add("zscore")
Expand Down
8 changes: 4 additions & 4 deletions julearn/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -316,7 +316,7 @@ def run_cross_validation( # noqa: C901
"Cannot use model_params with a model object. Use either "
"a string or a PipelineCreator"
)
pipeline_creator.add(step=model, **t_params) # pyright: ignore
pipeline_creator.add(step=model, **t_params) # type: ignore

# Check for extra model_params that are not used
unused_params = []
Expand Down Expand Up @@ -357,11 +357,11 @@ def run_cross_validation( # noqa: C901

# Prepare cross validation
cv_outer = check_cv(
cv, classifier=problem_type == "classification" # pyright: ignore
cv, classifier=problem_type == "classification" # type: ignore
)
logger.info(f"Using outer CV scheme {cv_outer}")

check_consistency(df_y, cv, groups, problem_type) # pyright: ignore
check_consistency(df_y, cv, groups, problem_type) # type: ignore

cv_return_estimator = return_estimator in ["cv", "all"]
scoring = check_scoring(pipeline, scoring, wrap_score=wrap_score)
Expand All @@ -381,7 +381,7 @@ def run_cross_validation( # noqa: C901
return_estimator=cv_return_estimator,
n_jobs=n_jobs,
return_train_score=return_train_score,
verbose=verbose, # pyright: ignore
verbose=verbose, # type: ignore
fit_params=fit_params,
)

Expand Down
30 changes: 23 additions & 7 deletions julearn/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,22 +18,27 @@
}


def pytest_configure(config: pytest.Config):
def pytest_configure(config: pytest.Config) -> None:
"""Add a new marker to pytest.
Parameters

Check failure on line 24 in julearn/conftest.py

View workflow job for this annotation

GitHub Actions / lint

Ruff (D413)

julearn/conftest.py:24:5: D413 Missing blank line after last section ("Parameters")

Check failure on line 24 in julearn/conftest.py

View workflow job for this annotation

GitHub Actions / lint

Ruff (D413)

julearn/conftest.py:24:5: D413 Missing blank line after last section ("Parameters")
----------
config : pytest.Config
The pytest configuration object.
"""
# register your new marker to avoid warnings
for k, v in _filter_keys.items():
config.addinivalue_line("markers", f"{k}: {v}")


def pytest_addoption(parser):
"""Add a new filter option to pytest."""
def pytest_addoption(parser: pytest.Parser) -> None:
"""Add a new filter option to pytest.
Parameters

Check failure on line 37 in julearn/conftest.py

View workflow job for this annotation

GitHub Actions / lint

Ruff (D413)

julearn/conftest.py:37:5: D413 Missing blank line after last section ("Parameters")

Check failure on line 37 in julearn/conftest.py

View workflow job for this annotation

GitHub Actions / lint

Ruff (D413)

julearn/conftest.py:37:5: D413 Missing blank line after last section ("Parameters")
----------
parser : pytest.Parser
The pytest parser object.
"""
# add your new filter option (you can name it whatever you want)
parser.addoption(
"--filter",
Expand All @@ -42,9 +47,20 @@ def pytest_addoption(parser):
)


def pytest_collection_modifyitems(config, items):
"""Filter tests based on the key marker."""
filter = config.getoption("--filter", None)
def pytest_collection_modifyitems(
config: pytest.Config, items: List[pytest.Item]
) -> None:
"""Filter tests based on the key marker.
Parameters
----------
config : pytest.Config
The pytest configuration object.
items : list
The list of items.
"""
filter = config.getoption("--filter", None) # type: ignore
if filter is None:
for k in _filter_keys.keys():
skip_keys = mark.skip(
Expand Down
2 changes: 1 addition & 1 deletion julearn/model_selection/tests/test_available_searchers.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,4 +80,4 @@ def test_get_searchers_noskopt() -> None:
"""Test getting a searcher without skopt."""
out = get_searcher("bayes")
with pytest.raises(ImportError, match="BayesSearchCV requires"):
out() # pyright: ignore
out() # type: ignore
24 changes: 12 additions & 12 deletions julearn/pipeline/merger.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ def merge_pipelines(
)

if isinstance(p, BaseSearchCV):
if not isinstance(p, t_searcher): # pyright: ignore
if not isinstance(p, t_searcher): # type: ignore
raise_error(
"One of the pipelines to merge is a "
f"{p.__class__.__name__}, but the search params specify a "
Expand All @@ -60,16 +60,16 @@ def merge_pipelines(
# Check that all estimators have the same named steps in their pipelines.
reference_pipeline = pipelines[0]
if isinstance(reference_pipeline, BaseSearchCV):
reference_pipeline = reference_pipeline.estimator # pyright: ignore
reference_pipeline = reference_pipeline.estimator # type: ignore

step_names = reference_pipeline.named_steps.keys() # pyright: ignore
step_names = reference_pipeline.named_steps.keys() # type: ignore

for p in pipelines:
if isinstance(p, BaseSearchCV):
p = p.estimator # pyright: ignore
p = p.estimator # type: ignore
if not isinstance(p, Pipeline):
raise_error("All searchers must use a pipeline.")
if step_names != p.named_steps.keys(): # pyright: ignore
if step_names != p.named_steps.keys(): # type: ignore
raise_error("All pipelines must have the same named steps.")

# The idea behind the merge is to create a list of parameter
Expand All @@ -83,20 +83,20 @@ def merge_pipelines(
different_steps = []
for t_step_name in step_names:
# Get the transformer/model of the first element
t = reference_pipeline.named_steps[t_step_name] # pyright: ignore
t = reference_pipeline.named_steps[t_step_name] # type: ignore

# Check that all searchers have the same transformer/model.
# TODO: Fix this comparison, as it always returns False.
for s in pipelines[1:]:
if isinstance(s, BaseSearchCV):
if (
s.estimator.named_steps[t_step_name] # pyright: ignore
s.estimator.named_steps[t_step_name] # type: ignore
!= t
):
different_steps.append(t_step_name)
break
else:
if s.named_steps[t_step_name] != t: # pyright: ignore
if s.named_steps[t_step_name] != t: # type: ignore
different_steps.append(t_step_name)
break

Expand All @@ -109,7 +109,7 @@ def merge_pipelines(
if params_attr is None:
raise_error(
f"Searcher {s.__class__.__name__} is not registered "
"in the searcher registry. Merging of this kinds of "
"in the searcher registry. Merging of these kinds of "
"searchers is not supported. If you register the "
"searcher, you can merge it."
)
Expand All @@ -119,14 +119,14 @@ def merge_pipelines(
for t_name in different_steps:
if isinstance(s, BaseSearchCV):
t_grid[t_name] = [
s.estimator.named_steps[t_name] # pyright: ignore
s.estimator.named_steps[t_name] # type: ignore
]
else:
t_grid[t_name] = [s.named_steps[t_name]] # pyright: ignore
t_grid[t_name] = [s.named_steps[t_name]] # type: ignore
all_grids.append(t_grid)

# Finally, we will concatenate the grids and create a new searcher.
new_searcher = _prepare_hyperparameter_tuning(
all_grids, search_params, reference_pipeline # pyright: ignore
all_grids, search_params, reference_pipeline # type: ignore
)
return new_searcher
28 changes: 9 additions & 19 deletions julearn/pipeline/tests/test_pipeline_creator.py
Original file line number Diff line number Diff line change
Expand Up @@ -217,11 +217,10 @@ def test_hyperparameter_tuning(
if kind == "grid":
assert isinstance(pipeline, GridSearchCV)
assert pipeline.param_grid == param_grid # type: ignore
elif kind == "random":
else:
assert kind == "random"
assert isinstance(pipeline, RandomizedSearchCV)
assert pipeline.param_distributions == param_grid # type: ignore
else:
raise ValueError(f"Unknown kind {kind}")


def test_hyperparameter_tuning_bayes(
Expand Down Expand Up @@ -250,10 +249,7 @@ def test_hyperparameter_tuning_bayes(
The parameters for the search.
"""
try:
from skopt import BayesSearchCV
except ImportError:
pytest.skip("skopt not installed")
BayesSearchCV = pytest.importorskip('skopt.BayesSearchCV')

pipeline, param_grid = _hyperparam_tuning_base_test(
X_types_iris,
Expand Down Expand Up @@ -327,8 +323,8 @@ def test_hyperparameter_tuning_distributions(
kind = "grid"
if search_params is not None:
kind = search_params.get("kind", "grid")
if kind == "grid":
return # No sense to test distributions for grid search
if kind != "random":
return # No sense to test distributions for other than gridsearch

pipeline, param_grid = _hyperparam_tuning_base_test(
X_types_iris,
Expand All @@ -339,12 +335,9 @@ def test_hyperparameter_tuning_distributions(
search_params,
)

if kind == "random":
assert isinstance(pipeline, RandomizedSearchCV)
_compare_param_grids(pipeline.param_distributions, # type: ignore
param_grid)
else:
raise ValueError(f"Unknown kind {kind}")
assert isinstance(pipeline, RandomizedSearchCV)
_compare_param_grids(pipeline.param_distributions, # type: ignore
param_grid)


def test_hyperparameter_tuning_distributions_bayes(
Expand Down Expand Up @@ -373,10 +366,7 @@ def test_hyperparameter_tuning_distributions_bayes(
The parameters for the search.
"""
try:
from skopt import BayesSearchCV
except ImportError:
pytest.skip("skopt not installed")
BayesSearchCV = pytest.importorskip('skopt.BayesSearchCV')

pipeline, param_grid = _hyperparam_tuning_base_test(
X_types_iris,
Expand Down

0 comments on commit e5171f8

Please sign in to comment.