Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add scikit-learn-contrib/stability-selection and improve randomized_l1 transformers #308

Open
wants to merge 7 commits into
base: master
Choose a base branch
from
4 changes: 4 additions & 0 deletions docs/source/libraries/sklearn.rst
Original file line number Diff line number Diff line change
Expand Up @@ -261,6 +261,8 @@ Currently the following transformers are supported out of the box:
* SelectorMixin-based transformers: SelectPercentile_,
SelectKBest_, GenericUnivariateSelect_, VarianceThreshold_,
RFE_, RFECV_, SelectFromModel_, RandomizedLogisticRegression_;
* stability selection-based transformers: RandomizedLogisticRegression_,
RandomizedLasso_, StabilitySelection_;
* scalers from sklearn.preprocessing: MinMaxScaler_, StandardScaler_,
MaxAbsScaler_, RobustScaler_.

Expand All @@ -276,6 +278,8 @@ Currently the following transformers are supported out of the box:
.. _RFECV: http://scikit-learn.org/stable/modules/generated/sklearn.feature_selection.RFECV.html
.. _VarianceThreshold: http://scikit-learn.org/stable/modules/generated/sklearn.feature_selection.VarianceThreshold.html
.. _RandomizedLogisticRegression: http://scikit-learn.org/stable/modules/generated/sklearn.linear_model.RandomizedLogisticRegression.html
.. _RandomizedLasso: https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.RandomizedLasso.html
.. _StabilitySelection: https://github.com/scikit-learn-contrib/stability-selection
.. _Pipeline: http://scikit-learn.org/stable/modules/generated/sklearn.pipeline.Pipeline.html#sklearn.pipeline.Pipeline
.. _singledispatch: https://pypi.python.org/pypi/singledispatch

Expand Down
43 changes: 33 additions & 10 deletions eli5/sklearn/transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,21 @@
import numpy as np # type: ignore
from sklearn.pipeline import Pipeline, FeatureUnion # type: ignore
from sklearn.feature_selection.base import SelectorMixin # type: ignore
try:
from sklearn.linear_model import ( # type: ignore
RandomizedLogisticRegression,
RandomizedLasso,
)
except ImportError:
# randomized_l1 feature selectors are not available (removed in scikit-learn 0.21)
RandomizedLogisticRegression = None
RandomizedLasso = None
try:
from stability_selection import StabilitySelection # type: ignore
# TODO: add support for stability_selection.RandomizedLogisticRegression and stability_selection.RandomizedLasso ?
except ImportError:
# scikit-learn-contrib/stability-selection is not available
StabilitySelection = None

from sklearn.preprocessing import ( # type: ignore
MinMaxScaler,
Expand All @@ -16,25 +31,33 @@
from eli5.sklearn.utils import get_feature_names as _get_feature_names


def register_notnone(generic_func, cls):
"""
Register an implementation of a generic function
if the supplied type is not None.
"""
def inner_register(func):
if cls is None:
# do nothing
return func
else:
# register a new implementation
return generic_func.register(cls)(func)
return inner_register


# Feature selection:

@transform_feature_names.register(SelectorMixin)
@register_notnone(transform_feature_names, RandomizedLogisticRegression)
@register_notnone(transform_feature_names, RandomizedLasso)
@register_notnone(transform_feature_names, StabilitySelection)
def _select_names(est, in_names=None):
mask = est.get_support(indices=False)
in_names = _get_feature_names(est, feature_names=in_names,
num_features=len(mask))
return [in_names[i] for i in np.flatnonzero(mask)]

try:
from sklearn.linear_model import ( # type: ignore
RandomizedLogisticRegression,
RandomizedLasso,
)
_select_names = transform_feature_names.register(RandomizedLasso)(_select_names)
_select_names = transform_feature_names.register(RandomizedLogisticRegression)(_select_names)
except ImportError: # Removed in scikit-learn 0.21
pass


# Scaling

Expand Down
62 changes: 54 additions & 8 deletions tests/test_sklearn_transform.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
# -*- coding: utf-8 -*-
from __future__ import absolute_import, print_function
import re

import pytest
Expand All @@ -15,11 +17,21 @@
RFECV,
SelectFromModel,
)
from sklearn.linear_model import (
LogisticRegression,
RandomizedLogisticRegression,
RandomizedLasso, # TODO: add tests and document
)
from sklearn.linear_model import LogisticRegression
try:
from sklearn.linear_model import (
RandomizedLogisticRegression,
RandomizedLasso,
)
except ImportError:
# randomized_l1 feature selectors are not available (removed in scikit-learn 0.21)
RandomizedLogisticRegression = None
RandomizedLasso = None
try:
from stability_selection import StabilitySelection
except ImportError:
# scikit-learn-contrib/stability-selection is not available
StabilitySelection = None
from sklearn.preprocessing import (
MinMaxScaler,
StandardScaler,
Expand All @@ -28,6 +40,7 @@
)
from sklearn.pipeline import FeatureUnion, make_pipeline

from .utils import sklearn_version
from eli5 import transform_feature_names
from eli5.sklearn import PermutationImportance

Expand All @@ -47,6 +60,10 @@ def selection_score_func(X, y):
return np.array([1, 2, 3, 4])


def instantiate_notnone(cls, *args, **kwargs):
return cls(*args, **kwargs) if cls is not None else None


@pytest.mark.parametrize('transformer,expected', [
(MyFeatureExtractor(), ['f1', 'f2', 'f3']),

Expand Down Expand Up @@ -88,8 +105,37 @@ def selection_score_func(X, y):
['<NAME1>', '<NAME3>']),
(RFECV(LogisticRegression(random_state=42)),
['<NAME0>', '<NAME1>', '<NAME2>', '<NAME3>']),
(RandomizedLogisticRegression(random_state=42),
['<NAME1>', '<NAME2>', '<NAME3>']),

pytest.param(
instantiate_notnone(RandomizedLogisticRegression, random_state=42),
['<NAME1>', '<NAME2>', '<NAME3>'],
marks=pytest.mark.skipif(RandomizedLogisticRegression is None,
reason='scikit-learn RandomizedLogisticRegression is not available')
),
pytest.param(
instantiate_notnone(RandomizedLasso, random_state=42),
['<NAME1>', '<NAME2>', '<NAME3>'],
marks=[
pytest.mark.skipif(RandomizedLasso is None,
reason='RandomizedLasso is not available'),
pytest.mark.skipif(sklearn_version() < '0.19',
reason='scikit-learn < 0.19')]
),
pytest.param(
instantiate_notnone(RandomizedLasso, random_state=42),
['<NAME0>', '<NAME1>', '<NAME2>', '<NAME3>'],
marks=[
pytest.mark.skipif(RandomizedLasso is None,
reason='RandomizedLasso is not available'),
pytest.mark.skipif('0.19' <= sklearn_version(),
reason='scikit-learn >= 0.19')]
),
pytest.param(
instantiate_notnone(StabilitySelection, random_state=42),
['<NAME2>'],
marks=pytest.mark.skipif(StabilitySelection is None,
reason='scikit-learn-contrib/stability-selection is not available')
),
])
def test_transform_feature_names_iris(transformer, expected, iris_train):
X, y, _, _ = iris_train
Expand All @@ -102,4 +148,4 @@ def test_transform_feature_names_iris(transformer, expected, iris_train):
# Test in_names being None
expected_default_names = [re.sub('<NAME([0-9]+)>', r'x\1', name)
for name in expected]
assert transform_feature_names(transformer, None) == expected_default_names
assert transform_feature_names(transformer, None) == expected_default_names