Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Sklearn 1.6 support #2221

Merged
merged 18 commits into from
Dec 12, 2024
Merged
Show file tree
Hide file tree
Changes from 16 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 20 additions & 26 deletions .ci/pipeline/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -116,24 +116,21 @@ jobs:
timeoutInMinutes: 120
strategy:
matrix:
Python3.9_Sklearn1.0:
Alexsandruss marked this conversation as resolved.
Show resolved Hide resolved
Python3.9_Sklearn1.2:
PYTHON_VERSION: '3.9'
SKLEARN_VERSION: '1.0'
Python3.9_Sklearn1.1:
PYTHON_VERSION: '3.9'
SKLEARN_VERSION: '1.1'
Python3.10_Sklearn1.2:
PYTHON_VERSION: '3.10'
SKLEARN_VERSION: '1.2'
Python3.11_Sklearn1.3:
PYTHON_VERSION: '3.11'
Python3.10_Sklearn1.3:
PYTHON_VERSION: '3.10'
SKLEARN_VERSION: '1.3'
Python3.12_Sklearn1.4:
PYTHON_VERSION: '3.12'
Python3.11_Sklearn1.4:
PYTHON_VERSION: '3.11'
SKLEARN_VERSION: '1.4'
Python3.13_Sklearn1.5:
PYTHON_VERSION: '3.13'
Python3.12_Sklearn1.5:
PYTHON_VERSION: '3.12'
SKLEARN_VERSION: '1.5'
Python3.13_Sklearn1.6:
Alexsandruss marked this conversation as resolved.
Show resolved Hide resolved
PYTHON_VERSION: '3.13'
SKLEARN_VERSION: '1.6'
pool:
vmImage: 'ubuntu-22.04'
steps:
Expand All @@ -143,24 +140,21 @@ jobs:
timeoutInMinutes: 120
strategy:
matrix:
Python3.9_Sklearn1.0:
Python3.9_Sklearn1.2:
PYTHON_VERSION: '3.9'
SKLEARN_VERSION: '1.0'
Python3.9_Sklearn1.1:
PYTHON_VERSION: '3.9'
SKLEARN_VERSION: '1.1'
Python3.10_Sklearn1.2:
PYTHON_VERSION: '3.10'
SKLEARN_VERSION: '1.2'
Python3.11_Sklearn1.3:
PYTHON_VERSION: '3.11'
Python3.10_Sklearn1.3:
PYTHON_VERSION: '3.10'
SKLEARN_VERSION: '1.3'
Python3.12_Sklearn1.4:
PYTHON_VERSION: '3.12'
Python3.11_Sklearn1.4:
PYTHON_VERSION: '3.11'
SKLEARN_VERSION: '1.4'
Python3.13_Sklearn1.5:
PYTHON_VERSION: '3.13'
Python3.12_Sklearn1.5:
PYTHON_VERSION: '3.12'
SKLEARN_VERSION: '1.5'
Python3.13_Sklearn1.6:
PYTHON_VERSION: '3.13'
SKLEARN_VERSION: '1.6'
pool:
vmImage: 'windows-2022'
steps:
Expand Down
5 changes: 5 additions & 0 deletions .ci/scripts/run_sklearn_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@
import pytest
import sklearn

from daal4py.sklearn._utils import sklearn_check_version

if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument(
Expand All @@ -43,6 +45,9 @@
if os.environ["SELECTED_TESTS"] == "all":
os.environ["SELECTED_TESTS"] = ""

if sklearn_check_version("1.6"):
os.environ["SCIPY_ARRAY_API"] = "1"

pytest_args = (
"--verbose --durations=100 --durations-min=0.01 "
f"--rootdir={sklearn_file_dir} "
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
[![PyPI Version](https://img.shields.io/pypi/v/scikit-learn-intelex)](https://pypi.org/project/scikit-learn-intelex/)
[![Conda Version](https://img.shields.io/conda/vn/conda-forge/scikit-learn-intelex)](https://anaconda.org/conda-forge/scikit-learn-intelex)
[![python version](https://img.shields.io/badge/python-3.9%20%7C%203.10%20%7C%203.11%20%7C%203.12-blue)](https://img.shields.io/badge/python-3.9%20%7C%203.10%20%7C%203.11%20%7C%203.12-blue)
[![scikit-learn supported versions](https://img.shields.io/badge/sklearn-1.0%20%7C%201.2%20%7C%201.3%20%7C%201.4%20%7C%201.5-blue)](https://img.shields.io/badge/sklearn-1.0%20%7C%201.2%20%7C%201.3%20%7C%201.4%20%7C%201.5-blue)
[![scikit-learn supported versions](https://img.shields.io/badge/sklearn-1.0%20%7C%201.2%20%7C%201.3%20%7C%201.4%20%7C%201.5%20%7C%201.6-blue)](https://img.shields.io/badge/sklearn-1.0%20%7C%201.2%20%7C%201.3%20%7C%201.4%20%7C%201.5%20%7C%201.6-blue)

---
</h3>
Expand Down
14 changes: 9 additions & 5 deletions daal4py/sklearn/ensemble/AdaBoostClassifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,13 +25,19 @@
from sklearn.utils.validation import check_array, check_is_fitted, check_X_y

import daal4py as d4p
from daal4py.sklearn._utils import sklearn_check_version

from .._n_jobs_support import control_n_jobs
from .._utils import getFPType

if sklearn_check_version("1.6"):
from sklearn.utils.validation import validate_data
else:
validate_data = BaseEstimator._validate_data


@control_n_jobs(decorated_methods=["fit", "predict"])
class AdaBoostClassifier(BaseEstimator, ClassifierMixin):
class AdaBoostClassifier(ClassifierMixin, BaseEstimator):
def __init__(
self,
split_criterion="gini",
Expand Down Expand Up @@ -89,7 +95,7 @@ def fit(self, X, y):
)

# Check that X and y have correct shape
X, y = check_X_y(X, y, y_numeric=False, dtype=[np.single, np.double])
X, y = check_X_y(X, y, y_numeric=False, dtype=[np.float64, np.float32])

check_classification_targets(y)

Expand Down Expand Up @@ -151,9 +157,7 @@ def predict(self, X):
check_is_fitted(self)

# Input validation
X = check_array(X, dtype=[np.single, np.double])
if X.shape[1] != self.n_features_in_:
raise ValueError("Shape of input is different from what was seen in `fit`")
X = validate_data(self, X, dtype=[np.float64, np.float32], reset=False)

# Trivial case
if self.n_classes_ == 1:
Expand Down
51 changes: 35 additions & 16 deletions daal4py/sklearn/ensemble/GBTDAAL.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,16 @@
from sklearn.utils.validation import check_array, check_is_fitted, check_X_y

import daal4py as d4p
from daal4py.sklearn._utils import sklearn_check_version

from .._n_jobs_support import control_n_jobs
from .._utils import getFPType

if sklearn_check_version("1.6"):
from sklearn.utils.validation import validate_data
else:
validate_data = BaseEstimator._validate_data


class GBTDAALBase(BaseEstimator, d4p.mb.GBTDAALBaseModel):
def __init__(
Expand Down Expand Up @@ -128,15 +134,22 @@ def _check_params(self):
def _more_tags(self):
return {"allow_nan": self.allow_nan_}

if sklearn_check_version("1.6"):

def __sklearn_tags__(self):
tags = super().__sklearn_tags__()
tags.input_tags.allow_nan = self.allow_nan_
return tags


@control_n_jobs(decorated_methods=["fit", "predict"])
class GBTDAALClassifier(GBTDAALBase, ClassifierMixin):
class GBTDAALClassifier(ClassifierMixin, GBTDAALBase):
def fit(self, X, y):
# Check the algorithm parameters
self._check_params()

# Check that X and y have correct shape
X, y = check_X_y(X, y, y_numeric=False, dtype=[np.single, np.double])
X, y = check_X_y(X, y, y_numeric=False, dtype=[np.float64, np.float32])

check_classification_targets(y)

Expand Down Expand Up @@ -196,15 +209,18 @@ def fit(self, X, y):
def _predict(
self, X, resultsToEvaluate, pred_contribs=False, pred_interactions=False
):
# Input validation
if not self.allow_nan_:
X = check_array(X, dtype=[np.single, np.double])
else:
X = check_array(X, dtype=[np.single, np.double], force_all_finite="allow-nan")

# Check is fit had been called
check_is_fitted(self, ["n_features_in_", "n_classes_"])

# Input validation
X = validate_data(
self,
X,
dtype=[np.float64, np.float32],
force_all_finite="allow-nan" if self.allow_nan_ else True,
reset=False,
)

# Trivial case
if self.n_classes_ == 1:
return np.full(X.shape[0], self.classes_[0])
Expand Down Expand Up @@ -251,13 +267,13 @@ def convert_model(model):


@control_n_jobs(decorated_methods=["fit", "predict"])
class GBTDAALRegressor(GBTDAALBase, RegressorMixin):
class GBTDAALRegressor(RegressorMixin, GBTDAALBase):
def fit(self, X, y):
# Check the algorithm parameters
self._check_params()

# Check that X and y have correct shape
X, y = check_X_y(X, y, y_numeric=True, dtype=[np.single, np.double])
X, y = check_X_y(X, y, y_numeric=True, dtype=[np.float64, np.float32])

# Convert to 2d array
y_ = y.reshape((-1, 1))
Expand Down Expand Up @@ -297,15 +313,18 @@ def fit(self, X, y):
return self

def predict(self, X, pred_contribs=False, pred_interactions=False):
# Input validation
if not self.allow_nan_:
X = check_array(X, dtype=[np.single, np.double])
else:
X = check_array(X, dtype=[np.single, np.double], force_all_finite="allow-nan")

# Check is fit had been called
check_is_fitted(self, ["n_features_in_"])

# Input validation
X = validate_data(
self,
X,
dtype=[np.float64, np.float32],
force_all_finite="allow-nan" if self.allow_nan_ else True,
reset=False,
)

fptype = getFPType(X)
return self._predict_regression(X, fptype, pred_contribs, pred_interactions)

Expand Down
12 changes: 12 additions & 0 deletions daal4py/sklearn/linear_model/tests/test_linear.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,18 @@
# limitations under the License.
# ==============================================================================


from os import environ

from daal4py.sklearn._utils import sklearn_check_version

# sklearn requires manual enabling of Scipy array API support
# if `array-api-compat` package is present in environment
# TODO: create generic approach to handle this for all tests

Check notice on line 24 in daal4py/sklearn/linear_model/tests/test_linear.py

View check run for this annotation

codefactor.io / CodeFactor

daal4py/sklearn/linear_model/tests/test_linear.py#L24

Unresolved comment '# TODO: create generic approach to handle this for all tests'. (C100)
if sklearn_check_version("1.6"):
environ["SCIPY_ARRAY_API"] = "1"


import numpy as np
import pytest
from sklearn.datasets import make_regression
Expand Down
101 changes: 91 additions & 10 deletions daal4py/sklearn/metrics/_pairwise.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,12 @@
from .._utils import PatchingConditionsChain, getFPType, sklearn_check_version

if sklearn_check_version("1.3"):
from sklearn.utils._param_validation import Integral, StrOptions, validate_params
from sklearn.utils._param_validation import (
Hidden,
Integral,
StrOptions,
validate_params,
)


def _daal4py_cosine_distance_dense(X):
Expand All @@ -65,91 +70,167 @@
return res.correlationDistance


def pairwise_distances(
def _pairwise_distances(
X, Y=None, metric="euclidean", *, n_jobs=None, force_all_finite=True, **kwds
):
if metric not in _VALID_METRICS and not callable(metric) and metric != "precomputed":
raise ValueError(
"Unknown metric %s. Valid metrics are %s, or 'precomputed', "
"or a callable" % (metric, _VALID_METRICS)
)

X = _daal_check_array(
X, accept_sparse=["csr", "csc", "coo"], force_all_finite=force_all_finite
)

_patching_status = PatchingConditionsChain("sklearn.metrics.pairwise_distances")
_dal_ready = _patching_status.and_conditions(
[
(
metric == "cosine" or metric == "correlation",
f"'{metric}' metric is not supported. "
"Only 'cosine' and 'correlation' metrics are supported.",
),
(Y is None, "Second feature array is not supported."),
(not issparse(X), "X is sparse. Sparse input is not supported."),
(
X.dtype == np.float64,
f"{X.dtype} X data type is not supported. Only np.float64 is supported.",
),
]
)
_patching_status.write_log()
if _dal_ready:
if metric == "cosine":
return _daal4py_cosine_distance_dense(X)
if metric == "correlation":
return _daal4py_correlation_distance_dense(X)
raise ValueError(f"'{metric}' distance is wrong for daal4py.")
if metric == "precomputed":
X, _ = check_pairwise_arrays(
X, Y, precomputed=True, force_all_finite=force_all_finite
)
whom = (
"`pairwise_distances`. Precomputed distance "
" need to have non-negative values."
)
check_non_negative(X, whom=whom)
return X
if metric in PAIRWISE_DISTANCE_FUNCTIONS:
func = PAIRWISE_DISTANCE_FUNCTIONS[metric]
elif callable(metric):
func = partial(
_pairwise_callable, metric=metric, force_all_finite=force_all_finite, **kwds
)
else:
if issparse(X) or issparse(Y):
raise TypeError("scipy distance metrics do not" " support sparse matrices.")

dtype = bool if metric in PAIRWISE_BOOLEAN_FUNCTIONS else None

if dtype == bool and (X.dtype != bool or (Y is not None and Y.dtype != bool)):
msg = "Data was converted to boolean for metric %s" % metric
warnings.warn(msg, DataConversionWarning)

X, Y = check_pairwise_arrays(X, Y, dtype=dtype, force_all_finite=force_all_finite)

# precompute data-derived metric params
params = _precompute_metric_params(X, Y, metric=metric, **kwds)
kwds.update(**params)

if effective_n_jobs(n_jobs) == 1 and X is Y:
return distance.squareform(distance.pdist(X, metric=metric, **kwds))
func = partial(distance.cdist, metric=metric, **kwds)

return _parallel_pairwise(X, Y, func, n_jobs, **kwds)

Check notice on line 145 in daal4py/sklearn/metrics/_pairwise.py

View check run for this annotation

codefactor.io / CodeFactor

daal4py/sklearn/metrics/_pairwise.py#L73-L145

Complex Method


# logic to deprecate `force_all_finite` from sklearn:
# it was renamed to `ensure_all_finite` since 1.6 and will be removed in 1.8
if sklearn_check_version("1.3"):
pairwise_distances_parameters = {
"X": ["array-like", "sparse matrix"],
"Y": ["array-like", "sparse matrix", None],
"metric": [StrOptions(set(_VALID_METRICS) | {"precomputed"}), callable],
"n_jobs": [Integral, None],
"force_all_finite": [
"boolean",
StrOptions({"allow-nan"}),
Hidden(StrOptions({"deprecated"})),
],
"ensure_all_finite": [
"boolean",
StrOptions({"allow-nan"}),
Hidden(None),
],
}
if sklearn_check_version("1.6"):
if sklearn_check_version("1.8"):
del pairwise_distances_parameters["force_all_finite"]

def pairwise_distances(
X,
Y=None,
metric="euclidean",
*,
n_jobs=None,
ensure_all_finite=None,
**kwds,
):
return _pairwise_distances(
X,
Y,
metric,
n_jobs=n_jobs,
force_all_finite=ensure_all_finite,
**kwds,
)

else:
from sklearn.utils.deprecation import _deprecate_force_all_finite

def pairwise_distances(
X,
Y=None,
metric="euclidean",
*,
n_jobs=None,
force_all_finite="deprecated",
ensure_all_finite=None,
**kwds,
):
force_all_finite = _deprecate_force_all_finite(
force_all_finite, ensure_all_finite
)
return _pairwise_distances(
X, Y, metric, n_jobs=n_jobs, force_all_finite=force_all_finite, **kwds
)

else:
del pairwise_distances_parameters["ensure_all_finite"]

def pairwise_distances(
X,
Y=None,
metric="euclidean",
*,
n_jobs=None,
force_all_finite=True,
**kwds,
):
return _pairwise_distances(
X,
Y,
metric,
n_jobs=n_jobs,
force_all_finite=force_all_finite,
**kwds,
)

pairwise_distances = validate_params(
{
"X": ["array-like", "sparse matrix"],
"Y": ["array-like", "sparse matrix", None],
"metric": [StrOptions(set(_VALID_METRICS) | {"precomputed"}), callable],
"n_jobs": [Integral, None],
"force_all_finite": ["boolean", StrOptions({"allow-nan"})],
},
pairwise_distances_parameters,
prefer_skip_nested_validation=True,
)(pairwise_distances)

else:
pairwise_distances = _pairwise_distances
pairwise_distances.__doc__ = pairwise_distances_original.__doc__
Loading
Loading