From 95bd1eabcca2e4d0565423be1dba3b58deaf8901 Mon Sep 17 00:00:00 2001 From: Ian Faust Date: Tue, 10 Dec 2024 08:45:19 +0100 Subject: [PATCH 1/7] [enhancement] add sklearnex version of ```validate_data```, ```_check_sample_weight``` (#2177) * add finiteness_checker pybind11 bindings * added finiteness checker * Update finiteness_checker.cpp * Update finiteness_checker.cpp * Update finiteness_checker.cpp * Update finiteness_checker.cpp * Update finiteness_checker.cpp * Update finiteness_checker.cpp * Rename finiteness_checker.cpp to finiteness_checker.cpp * Update finiteness_checker.cpp * add next step * follow conventions * make xtable explicit * remove comment * Update validation.py * Update __init__.py * Update validation.py * Update __init__.py * Update __init__.py * Update validation.py * Update _data_conversion.py * Update _data_conversion.py * Update policy_common.cpp * Update policy_common.cpp * Update _policy.py * Update policy_common.cpp * Rename finiteness_checker.cpp to finiteness_checker.cpp * Create finiteness_checker.py * Update validation.py * Update __init__.py * attempt at fixing circular imports again * fix isort * remove __init__ changes * last move * Update policy_common.cpp * Update policy_common.cpp * Update policy_common.cpp * Update policy_common.cpp * Update validation.py * add testing * isort * attempt to fix module error * add fptype * fix typo * Update validation.py * remove sua_ifcae from to_table * isort and black * Update test_memory_usage.py * format * Update _data_conversion.py * Update _data_conversion.py * Update test_validation.py * remove unnecessary code * make reviewer changes * make dtype check change * add sparse testing * try again * try again * try again * temporary commit * first attempt * missing change? * modify DummyEstimator for testing * generalize DummyEstimator * switch test * further testing changes * add initial validate_data test, will be refactored * fixes for CI * Update validation.py * Update validation.py * Update test_memory_usage.py * Update base.py * Update base.py * improve tests * fix logic * fix logic * fix logic again * rename file * Revert "rename file" This reverts commit 8d47744f25c0b32e9b0ad639e772107710c56e98. * remove duplication * fix imports * Rename test_finite.py to test_validation.py * Revert "Rename test_finite.py to test_validation.py" This reverts commit ee799f60c000651eb828bd7586a91825706b644b. * updates * Update validation.py * fixes for some test failures * fix text * fixes for some failures * make consistent * fix bad logic * fix in string * attempt tp see if dataframe conversion is causing the issue * fix iter problem * fix testing issues * formatting * revert change * fixes for pandas * there is a slowdown with pandas that needs to be solved * swap to transpose for speed * more clarity * add _check_sample_weight * add more testing' * rename * remove unnecessary imports * fix test slowness * focus get_dataframes_and_queues * put config_context around * Update test_validation.py * Update base.py * Update test_validation.py * generalize regex * add fixes for sklearn 1.0 and input_name * fixes for test failures * Update validation.py * Update test_validation.py * Update validation.py * formattintg * make suggested changes * follow changes made in #2126 * fix future device problem * Update validation.py * minor changes based on #2206, suggestions * remove xp as keyword * only_non_negative -> ensure_non_negative * add commentary * formatting * address changes * Update test_validation.py * Update base.py * Update test_validation.py * Update sklearnex/utils/validation.py Co-authored-by: ethanglaser <42726565+ethanglaser@users.noreply.github.com> --------- Co-authored-by: ethanglaser <42726565+ethanglaser@users.noreply.github.com> --- sklearnex/tests/test_memory_usage.py | 45 +---- sklearnex/tests/utils/__init__.py | 2 + sklearnex/tests/utils/base.py | 44 +++++ sklearnex/utils/__init__.py | 4 +- sklearnex/utils/tests/test_finite.py | 89 --------- sklearnex/utils/tests/test_validation.py | 240 +++++++++++++++++++++++ sklearnex/utils/validation.py | 193 +++++++++++++++++- 7 files changed, 487 insertions(+), 130 deletions(-) delete mode 100644 sklearnex/utils/tests/test_finite.py create mode 100644 sklearnex/utils/tests/test_validation.py diff --git a/sklearnex/tests/test_memory_usage.py b/sklearnex/tests/test_memory_usage.py index aa92df1d6a..2d52a545cf 100644 --- a/sklearnex/tests/test_memory_usage.py +++ b/sklearnex/tests/test_memory_usage.py @@ -35,10 +35,14 @@ get_dataframes_and_queues, ) from onedal.tests.utils._device_selection import get_queues, is_dpctl_device_available -from onedal.utils._array_api import _get_sycl_namespace from onedal.utils._dpep_helpers import dpctl_available, dpnp_available from sklearnex import config_context -from sklearnex.tests.utils import PATCHED_FUNCTIONS, PATCHED_MODELS, SPECIAL_INSTANCES +from sklearnex.tests.utils import ( + PATCHED_FUNCTIONS, + PATCHED_MODELS, + SPECIAL_INSTANCES, + DummyEstimator, +) from sklearnex.utils._array_api import get_namespace if dpctl_available: @@ -131,41 +135,6 @@ def gen_functions(functions): ORDER_DICT = {"F": np.asfortranarray, "C": np.ascontiguousarray} -if _is_dpc_backend: - - from sklearn.utils.validation import check_is_fitted - - from onedal.datatypes import from_table, to_table - - class DummyEstimatorWithTableConversions(BaseEstimator): - - def fit(self, X, y=None): - sua_iface, xp, _ = _get_sycl_namespace(X) - X_table = to_table(X) - y_table = to_table(y) - # The presence of the fitted attributes (ending with a trailing - # underscore) is required for the correct check. The cleanup of - # the memory will occur at the estimator instance deletion. - self.x_attr_ = from_table( - X_table, sua_iface=sua_iface, sycl_queue=X.sycl_queue, xp=xp - ) - self.y_attr_ = from_table( - y_table, sua_iface=sua_iface, sycl_queue=X.sycl_queue, xp=xp - ) - return self - - def predict(self, X): - # Checks if the estimator is fitted by verifying the presence of - # fitted attributes (ending with a trailing underscore). - check_is_fitted(self) - sua_iface, xp, _ = _get_sycl_namespace(X) - X_table = to_table(X) - returned_X = from_table( - X_table, sua_iface=sua_iface, sycl_queue=X.sycl_queue, xp=xp - ) - return returned_X - - def gen_clsf_data(n_samples, n_features, dtype=None): data, label = make_classification( n_classes=2, n_samples=n_samples, n_features=n_features, random_state=777 @@ -369,7 +338,7 @@ def test_table_conversions_memory_leaks(dataframe, queue, order, data_shape, dty pytest.skip("SYCL device memory leak check requires the level zero sysman") _kfold_function_template( - DummyEstimatorWithTableConversions, + DummyEstimator, dataframe, data_shape, queue, diff --git a/sklearnex/tests/utils/__init__.py b/sklearnex/tests/utils/__init__.py index 60ca67fa37..db728fe913 100644 --- a/sklearnex/tests/utils/__init__.py +++ b/sklearnex/tests/utils/__init__.py @@ -21,6 +21,7 @@ SPECIAL_INSTANCES, UNPATCHED_FUNCTIONS, UNPATCHED_MODELS, + DummyEstimator, _get_processor_info, call_method, gen_dataset, @@ -39,6 +40,7 @@ "gen_models_info", "gen_dataset", "sklearn_clone_dict", + "DummyEstimator", ] _IS_INTEL = "GenuineIntel" in _get_processor_info() diff --git a/sklearnex/tests/utils/base.py b/sklearnex/tests/utils/base.py index 1949519585..33d3804b8f 100755 --- a/sklearnex/tests/utils/base.py +++ b/sklearnex/tests/utils/base.py @@ -32,8 +32,11 @@ ) from sklearn.datasets import load_diabetes, load_iris from sklearn.neighbors._base import KNeighborsMixin +from sklearn.utils.validation import check_is_fitted +from onedal.datatypes import from_table, to_table from onedal.tests.utils._dataframes_support import _convert_to_dataframe +from onedal.utils._array_api import _get_sycl_namespace from sklearnex import get_patch_map, patch_sklearn, sklearn_is_patched, unpatch_sklearn from sklearnex.basic_statistics import BasicStatistics, IncrementalBasicStatistics from sklearnex.linear_model import LogisticRegression @@ -369,3 +372,44 @@ def _get_processor_info(): ) return proc + + +class DummyEstimator(BaseEstimator): + + def fit(self, X, y=None): + sua_iface, xp, _ = _get_sycl_namespace(X) + X_table = to_table(X) + y_table = to_table(y) + # The presence of the fitted attributes (ending with a trailing + # underscore) is required for the correct check. The cleanup of + # the memory will occur at the estimator instance deletion. + if sua_iface: + self.x_attr_ = from_table( + X_table, sua_iface=sua_iface, sycl_queue=X.sycl_queue, xp=xp + ) + self.y_attr_ = from_table( + y_table, + sua_iface=sua_iface, + sycl_queue=X.sycl_queue if y is None else y.sycl_queue, + xp=xp, + ) + else: + self.x_attr = from_table(X_table) + self.y_attr = from_table(y_table) + + return self + + def predict(self, X): + # Checks if the estimator is fitted by verifying the presence of + # fitted attributes (ending with a trailing underscore). + check_is_fitted(self) + sua_iface, xp, _ = _get_sycl_namespace(X) + X_table = to_table(X) + if sua_iface: + returned_X = from_table( + X_table, sua_iface=sua_iface, sycl_queue=X.sycl_queue, xp=xp + ) + else: + returned_X = from_table(X_table) + + return returned_X diff --git a/sklearnex/utils/__init__.py b/sklearnex/utils/__init__.py index 4c3fe21154..686e089adf 100755 --- a/sklearnex/utils/__init__.py +++ b/sklearnex/utils/__init__.py @@ -14,6 +14,6 @@ # limitations under the License. # =============================================================================== -from .validation import _assert_all_finite +from .validation import assert_all_finite -__all__ = ["_assert_all_finite"] +__all__ = ["assert_all_finite"] diff --git a/sklearnex/utils/tests/test_finite.py b/sklearnex/utils/tests/test_finite.py deleted file mode 100644 index 7d83667699..0000000000 --- a/sklearnex/utils/tests/test_finite.py +++ /dev/null @@ -1,89 +0,0 @@ -# ============================================================================== -# Copyright 2024 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -import time - -import numpy as np -import numpy.random as rand -import pytest -from numpy.testing import assert_raises - -from sklearnex.utils import _assert_all_finite - - -@pytest.mark.parametrize("dtype", [np.float32, np.float64]) -@pytest.mark.parametrize( - "shape", - [ - [16, 2048], - [ - 2**16 + 3, - ], - [1000, 1000], - ], -) -@pytest.mark.parametrize("allow_nan", [False, True]) -def test_sum_infinite_actually_finite(dtype, shape, allow_nan): - X = np.empty(shape, dtype=dtype) - X.fill(np.finfo(dtype).max) - _assert_all_finite(X, allow_nan=allow_nan) - - -@pytest.mark.parametrize("dtype", [np.float32, np.float64]) -@pytest.mark.parametrize( - "shape", - [ - [16, 2048], - [ - 65539, # 2**16 + 3, - ], - [1000, 1000], - ], -) -@pytest.mark.parametrize("allow_nan", [False, True]) -@pytest.mark.parametrize("check", ["inf", "NaN", None]) -@pytest.mark.parametrize("seed", [0, int(time.time())]) -def test_assert_finite_random_location(dtype, shape, allow_nan, check, seed): - rand.seed(seed) - X = rand.uniform(high=np.finfo(dtype).max, size=shape).astype(dtype) - - if check: - loc = rand.randint(0, X.size - 1) - X.reshape((-1,))[loc] = float(check) - - if check is None or (allow_nan and check == "NaN"): - _assert_all_finite(X, allow_nan=allow_nan) - else: - assert_raises(ValueError, _assert_all_finite, X, allow_nan=allow_nan) - - -@pytest.mark.parametrize("dtype", [np.float32, np.float64]) -@pytest.mark.parametrize("allow_nan", [False, True]) -@pytest.mark.parametrize("check", ["inf", "NaN", None]) -@pytest.mark.parametrize("seed", [0, int(time.time())]) -def test_assert_finite_random_shape_and_location(dtype, allow_nan, check, seed): - lb, ub = 32768, 1048576 # lb is a patching condition, ub 2^20 - rand.seed(seed) - X = rand.uniform(high=np.finfo(dtype).max, size=rand.randint(lb, ub)).astype(dtype) - - if check: - loc = rand.randint(0, X.size - 1) - X[loc] = float(check) - - if check is None or (allow_nan and check == "NaN"): - _assert_all_finite(X, allow_nan=allow_nan) - else: - assert_raises(ValueError, _assert_all_finite, X, allow_nan=allow_nan) diff --git a/sklearnex/utils/tests/test_validation.py b/sklearnex/utils/tests/test_validation.py new file mode 100644 index 0000000000..37d0a6df6e --- /dev/null +++ b/sklearnex/utils/tests/test_validation.py @@ -0,0 +1,240 @@ +# ============================================================================== +# Copyright contributors to the oneDAL project +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +import time + +import numpy as np +import numpy.random as rand +import pytest + +from daal4py.sklearn._utils import sklearn_check_version +from onedal.tests.utils._dataframes_support import ( + _convert_to_dataframe, + get_dataframes_and_queues, +) +from sklearnex import config_context +from sklearnex.tests.utils import DummyEstimator, gen_dataset +from sklearnex.utils.validation import _check_sample_weight, validate_data + +# array_api support starts in sklearn 1.2, and array_api_strict conformance starts in sklearn 1.3 +_dataframes_supported = ( + "numpy,pandas" + + (",dpctl" if sklearn_check_version("1.2") else "") + + (",array_api" if sklearn_check_version("1.3") else "") +) + + +@pytest.mark.parametrize("dtype", [np.float32, np.float64]) +@pytest.mark.parametrize( + "shape", + [ + [16, 2048], + [2**16 + 3], + [1000, 1000], + ], +) +@pytest.mark.parametrize("ensure_all_finite", ["allow-nan", True]) +def test_sum_infinite_actually_finite(dtype, shape, ensure_all_finite): + est = DummyEstimator() + X = np.empty(shape, dtype=dtype) + X.fill(np.finfo(dtype).max) + X = np.atleast_2d(X) + X_array = validate_data(est, X, ensure_all_finite=ensure_all_finite) + assert type(X_array) == type(X) + + +@pytest.mark.parametrize("dtype", [np.float32, np.float64]) +@pytest.mark.parametrize( + "shape", + [ + [16, 2048], + [2**16 + 3], + [1000, 1000], + ], +) +@pytest.mark.parametrize("ensure_all_finite", ["allow-nan", True]) +@pytest.mark.parametrize("check", ["inf", "NaN", None]) +@pytest.mark.parametrize("seed", [0, int(time.time())]) +@pytest.mark.parametrize( + "dataframe, queue", + get_dataframes_and_queues(_dataframes_supported), +) +def test_validate_data_random_location( + dataframe, queue, dtype, shape, ensure_all_finite, check, seed +): + est = DummyEstimator() + rand.seed(seed) + X = rand.uniform(high=np.finfo(dtype).max, size=shape).astype(dtype) + + if check: + loc = rand.randint(0, X.size - 1) + X.reshape((-1,))[loc] = float(check) + + # column heavy pandas inputs are very slow in sklearn's check_array even without + # the finite check, just transpose inputs to guarantee fast processing in tests + X = _convert_to_dataframe( + np.atleast_2d(X).T, + target_df=dataframe, + sycl_queue=queue, + ) + + dispatch = {} + if sklearn_check_version("1.2") and dataframe != "pandas": + dispatch["array_api_dispatch"] = True + + with config_context(**dispatch): + + allow_nan = ensure_all_finite == "allow-nan" + if check is None or (allow_nan and check == "NaN"): + validate_data(est, X, ensure_all_finite=ensure_all_finite) + else: + type_err = "infinity" if allow_nan else "[NaN|infinity]" + msg_err = f"Input X contains {type_err}" + with pytest.raises(ValueError, match=msg_err): + validate_data(est, X, ensure_all_finite=ensure_all_finite) + + +@pytest.mark.parametrize("dtype", [np.float32, np.float64]) +@pytest.mark.parametrize("ensure_all_finite", ["allow-nan", True]) +@pytest.mark.parametrize("check", ["inf", "NaN", None]) +@pytest.mark.parametrize("seed", [0, int(time.time())]) +@pytest.mark.parametrize( + "dataframe, queue", + get_dataframes_and_queues(_dataframes_supported), +) +def test_validate_data_random_shape_and_location( + dataframe, queue, dtype, ensure_all_finite, check, seed +): + est = DummyEstimator() + lb, ub = 32768, 1048576 # lb is a patching condition, ub 2^20 + rand.seed(seed) + X = rand.uniform(high=np.finfo(dtype).max, size=rand.randint(lb, ub)).astype(dtype) + + if check: + loc = rand.randint(0, X.size - 1) + X[loc] = float(check) + + X = _convert_to_dataframe( + np.atleast_2d(X).T, + target_df=dataframe, + sycl_queue=queue, + ) + + dispatch = {} + if sklearn_check_version("1.2") and dataframe != "pandas": + dispatch["array_api_dispatch"] = True + + with config_context(**dispatch): + + allow_nan = ensure_all_finite == "allow-nan" + if check is None or (allow_nan and check == "NaN"): + validate_data(est, X, ensure_all_finite=ensure_all_finite) + else: + type_err = "infinity" if allow_nan else "[NaN|infinity]" + msg_err = f"Input X contains {type_err}." + with pytest.raises(ValueError, match=msg_err): + validate_data(est, X, ensure_all_finite=ensure_all_finite) + + +@pytest.mark.parametrize("dtype", [np.float32, np.float64]) +@pytest.mark.parametrize("check", ["inf", "NaN", None]) +@pytest.mark.parametrize("seed", [0, int(time.time())]) +@pytest.mark.parametrize( + "dataframe, queue", + get_dataframes_and_queues(_dataframes_supported), +) +def test__check_sample_weight_random_shape_and_location( + dataframe, queue, dtype, check, seed +): + # This testing assumes that array api inputs to validate_data will only occur + # with sklearn array_api support which began in sklearn 1.2. This would assume + # that somewhere upstream of the validate_data call, a data conversion of dpnp, + # dpctl, or array_api inputs to numpy inputs would have occurred. + + lb, ub = 32768, 1048576 # lb is a patching condition, ub 2^20 + rand.seed(seed) + shape = (rand.randint(lb, ub), 2) + X = rand.uniform(high=np.finfo(dtype).max, size=shape).astype(dtype) + sample_weight = rand.uniform(high=np.finfo(dtype).max, size=shape[0]).astype(dtype) + + if check: + loc = rand.randint(0, shape[0] - 1) + sample_weight[loc] = float(check) + + X = _convert_to_dataframe( + X, + target_df=dataframe, + sycl_queue=queue, + ) + sample_weight = _convert_to_dataframe( + sample_weight, + target_df=dataframe, + sycl_queue=queue, + ) + + dispatch = {} + if sklearn_check_version("1.2") and dataframe != "pandas": + dispatch["array_api_dispatch"] = True + + with config_context(**dispatch): + + if check is None: + X_out = _check_sample_weight(sample_weight, X) + if dispatch: + assert type(X_out) == type(X) + else: + assert isinstance(X_out, np.ndarray) + else: + msg_err = "Input sample_weight contains [NaN|infinity]" + with pytest.raises(ValueError, match=msg_err): + X_out = _check_sample_weight(sample_weight, X) + + +@pytest.mark.parametrize("dtype", [np.float32, np.float64]) +@pytest.mark.parametrize( + "dataframe, queue", + get_dataframes_and_queues(_dataframes_supported), +) +def test_validate_data_output(dtype, dataframe, queue): + # This testing assumes that array api inputs to validate_data will only occur + # with sklearn array_api support which began in sklearn 1.2. This would assume + # that somewhere upstream of the validate_data call, a data conversion of dpnp, + # dpctl, or array_api inputs to numpy inputs would have occurred. + est = DummyEstimator() + X, y = gen_dataset(est, queue=queue, target_df=dataframe, dtype=dtype)[0] + + dispatch = {} + if sklearn_check_version("1.2") and dataframe != "pandas": + dispatch["array_api_dispatch"] = True + + with config_context(**dispatch): + X_out, y_out = validate_data(est, X, y) + # check sklearn validate_data operations work underneath + X_array = validate_data(est, X, reset=False) + + for orig, first, second in ((X, X_out, X_array), (y, y_out, None)): + if dispatch: + assert type(orig) == type( + first + ), f"validate_data converted {type(orig)} to {type(first)}" + if second is not None: + assert type(orig) == type( + second + ), f"from_array converted {type(orig)} to {type(second)}" + else: + # array_api_strict from sklearn < 1.2 and pandas will convert to numpy arrays + assert isinstance(first, np.ndarray) + assert second is None or isinstance(second, np.ndarray) diff --git a/sklearnex/utils/validation.py b/sklearnex/utils/validation.py index b2d1898643..4d12602d74 100755 --- a/sklearnex/utils/validation.py +++ b/sklearnex/utils/validation.py @@ -14,4 +14,195 @@ # limitations under the License. # =============================================================================== -from daal4py.sklearn.utils.validation import _assert_all_finite +import numbers + +import scipy.sparse as sp +from sklearn.utils.validation import _assert_all_finite as _sklearn_assert_all_finite +from sklearn.utils.validation import _num_samples, check_array, check_non_negative + +from daal4py.sklearn._utils import daal_check_version, sklearn_check_version + +from ._array_api import get_namespace + +if sklearn_check_version("1.6"): + from sklearn.utils.validation import validate_data as _sklearn_validate_data + + _finite_keyword = "ensure_all_finite" + +else: + from sklearn.base import BaseEstimator + + _sklearn_validate_data = BaseEstimator._validate_data + _finite_keyword = "force_all_finite" + + +if daal_check_version((2024, "P", 700)): + from onedal.utils.validation import _assert_all_finite as _onedal_assert_all_finite + + def _onedal_supported_format(X, xp): + # array_api does not have a `strides` or `flags` attribute for testing memory + # order. When dlpack support is brought in for oneDAL, general support for + # array_api can be enabled and the hasattr check can be removed. + # _onedal_supported_format is therefore conservative in verifying attributes and + # does not support array_api. This will block onedal_assert_all_finite from being + # used for array_api inputs but will allow dpnp ndarrays and dpctl tensors. + # only check contiguous arrays to prevent unnecessary copying of data, even if + # non-contiguous arrays can now be converted to oneDAL tables. + return ( + X.dtype in [xp.float32, xp.float64] + and hasattr(X, "flags") + and (X.flags["C_CONTIGUOUS"] or X.flags["F_CONTIGUOUS"]) + ) + +else: + from daal4py.utils.validation import _assert_all_finite as _onedal_assert_all_finite + from onedal.utils._array_api import _is_numpy_namespace + + def _onedal_supported_format(X, xp): + # daal4py _assert_all_finite only supports numpy namespaces, use internally- + # defined check to validate inputs, otherwise offload to sklearn + return X.dtype in [xp.float32, xp.float64] and _is_numpy_namespace(xp) + + +def _sklearnex_assert_all_finite( + X, + *, + allow_nan=False, + input_name="", +): + # size check is an initial match to daal4py for performance reasons, can be + # optimized later + xp, _ = get_namespace(X) + if X.size < 32768 or not _onedal_supported_format(X, xp): + if sklearn_check_version("1.1"): + _sklearn_assert_all_finite(X, allow_nan=allow_nan, input_name=input_name) + else: + _sklearn_assert_all_finite(X, allow_nan=allow_nan) + else: + _onedal_assert_all_finite(X, allow_nan=allow_nan, input_name=input_name) + + +def assert_all_finite( + X, + *, + allow_nan=False, + input_name="", +): + _sklearnex_assert_all_finite( + X.data if sp.issparse(X) else X, + allow_nan=allow_nan, + input_name=input_name, + ) + + +def validate_data( + _estimator, + /, + X="no_validation", + y="no_validation", + **kwargs, +): + # force finite check to not occur in sklearn, default is True + # `ensure_all_finite` is the most up-to-date keyword name in sklearn + # _finite_keyword provides backward compatability for `force_all_finite` + ensure_all_finite = kwargs.pop("ensure_all_finite", True) + kwargs[_finite_keyword] = False + + out = _sklearn_validate_data( + _estimator, + X=X, + y=y, + **kwargs, + ) + + check_x = not isinstance(X, str) or X != "no_validation" + check_y = not (y is None or isinstance(y, str) and y == "no_validation") + + if ensure_all_finite: + # run local finite check + allow_nan = ensure_all_finite == "allow-nan" + # the return object from validate_data can be a single + + # element (either x or y) or both (as a tuple). An iterator along with + # check_x and check_y can go through the output properly without + # stacking layers of if statements to make sure the proper input_name + # is used + arg = iter(out if isinstance(out, tuple) else (out,)) + if check_x: + assert_all_finite(next(arg), allow_nan=allow_nan, input_name="X") + if check_y: + assert_all_finite(next(arg), allow_nan=allow_nan, input_name="y") + + if check_y and "dtype" in kwargs: + # validate_data does not do full dtype conversions, as it uses check_X_y + # oneDAL can make tables from [int32, float32, float64], requiring + # a dtype check and conversion. This will query the array_namespace and + # convert y as necessary. This is important especially for regressors. + dtype = kwargs["dtype"] + if not isinstance(dtype, (tuple, list)): + dtype = tuple(dtype) + + outx, outy = out if check_x else (None, out) + if outy.dtype not in dtype: + yp, _ = get_namespace(outy) + # use asarray rather than astype because of numpy support + outy = yp.asarray(outy, dtype=dtype[0]) + out = (outx, outy) if check_x else outy + + return out + + +def _check_sample_weight( + sample_weight, X, dtype=None, copy=False, ensure_non_negative=False +): + + n_samples = _num_samples(X) + xp, _ = get_namespace(X) + + if dtype is not None and dtype not in [xp.float32, xp.float64]: + dtype = xp.float64 + + if sample_weight is None: + if hasattr(X, "device"): + sample_weight = xp.ones(n_samples, dtype=dtype, device=X.device) + else: + sample_weight = xp.ones(n_samples, dtype=dtype) + elif isinstance(sample_weight, numbers.Number): + if hasattr(X, "device"): + sample_weight = xp.full( + n_samples, sample_weight, dtype=dtype, device=X.device + ) + else: + sample_weight = xp.full(n_samples, sample_weight, dtype=dtype) + else: + if dtype is None: + dtype = [xp.float64, xp.float32] + + params = { + "accept_sparse": False, + "ensure_2d": False, + "dtype": dtype, + "order": "C", + "copy": copy, + _finite_keyword: False, + } + if sklearn_check_version("1.1"): + params["input_name"] = "sample_weight" + + sample_weight = check_array(sample_weight, **params) + assert_all_finite(sample_weight, input_name="sample_weight") + + if sample_weight.ndim != 1: + raise ValueError("Sample weights must be 1D array or scalar") + + if sample_weight.shape != (n_samples,): + raise ValueError( + "sample_weight.shape == {}, expected {}!".format( + sample_weight.shape, (n_samples,) + ) + ) + + if ensure_non_negative: + check_non_negative(sample_weight, "`sample_weight`") + + return sample_weight From e1e8b0f95179ffbda0d2f75f961506fe6cd877fc Mon Sep 17 00:00:00 2001 From: david-cortes-intel Date: Tue, 10 Dec 2024 10:46:00 +0100 Subject: [PATCH 2/7] clear some warnings for daal4py doc builds (#2205) --- .gitignore | 1 + doc/daal4py/algorithms.rst | 7 +++++++ doc/daal4py/conf.py | 7 ++++--- doc/daal4py/index.rst | 2 ++ doc/daal4py/model-builders.rst | 5 +++-- 5 files changed, 17 insertions(+), 5 deletions(-) diff --git a/.gitignore b/.gitignore index f40d99729a..d1a918b47f 100755 --- a/.gitignore +++ b/.gitignore @@ -11,6 +11,7 @@ daal4py.egg-info **/.ipynb_checkpoints doc/_build doc/sources/samples/*.ipynb +doc/daal4py/_build/ *.obj *.pyd build_oneapi_backend diff --git a/doc/daal4py/algorithms.rst b/doc/daal4py/algorithms.rst index 3c2451d2ca..735e29a3c7 100755 --- a/doc/daal4py/algorithms.rst +++ b/doc/daal4py/algorithms.rst @@ -530,6 +530,7 @@ Mean Squared Error Algorithm (MSE) Parameters and semantics are described in |onedal-dg-mse|_. .. rubric:: Examples: + - `In Adagrad `__ - `In LBFGS `__ - `In SGD `__ @@ -544,6 +545,7 @@ Logistic Loss Parameters and semantics are described in |onedal-dg-logistic-loss|_. .. rubric:: Examples: + - `In SGD `__ .. autoclass:: daal4py.optimization_solver_logistic_loss @@ -556,6 +558,7 @@ Cross-entropy Loss Parameters and semantics are described in |onedal-dg-cross-entropy-loss|_. .. rubric:: Examples: + - `In LBFGS `__ .. autoclass:: daal4py.optimization_solver_cross_entropy_loss @@ -570,6 +573,7 @@ Stochastic Gradient Descent Algorithm Parameters and semantics are described in |onedal-dg-sgd|_. .. rubric:: Examples: + - `Using Logistic Loss `__ - `Using MSE `__ @@ -583,6 +587,7 @@ Limited-Memory Broyden-Fletcher-Goldfarb-Shanno Algorithm Parameters and semantics are described in |onedal-dg-lbfgs|_. .. rubric:: Examples: + - `Using MSE `__ .. autoclass:: daal4py.optimization_solver_lbfgs @@ -595,6 +600,7 @@ Adaptive Subgradient Method Parameters and semantics are described in |onedal-dg-adagrad|_. .. rubric:: Examples: + - `Using MSE `__ .. autoclass:: daal4py.optimization_solver_adagrad @@ -607,6 +613,7 @@ Stochastic Average Gradient Descent Parameters and semantics are described in |onedal-dg-stochastic-average-gradient-descent-saga|_. .. rubric:: Examples: + - `Single Proces saga-logistc_loss `__ .. autoclass:: daal4py.optimization_solver_saga diff --git a/doc/daal4py/conf.py b/doc/daal4py/conf.py index b850d1ae50..e93b54074d 100644 --- a/doc/daal4py/conf.py +++ b/doc/daal4py/conf.py @@ -29,9 +29,10 @@ # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. # -# import os -# import sys -# sys.path.insert(0, os.path.abspath('.')) +import os +import sys + +sys.path.insert(0, os.path.abspath("../..")) # -- Project information ----------------------------------------------------- diff --git a/doc/daal4py/index.rst b/doc/daal4py/index.rst index 4af3d38aea..f862d29009 100644 --- a/doc/daal4py/index.rst +++ b/doc/daal4py/index.rst @@ -195,6 +195,8 @@ Scikit-Learn API and patching .. tip:: We recommend using the 'scikit-learn-intelex package patching ' _ for the scikit-learn patching. + + daal4py exposes some oneDAL solvers using a scikit-learn compatible API. daal4py can furthermore monkey-patch the ``sklearn`` package to use the DAAL diff --git a/doc/daal4py/model-builders.rst b/doc/daal4py/model-builders.rst index 86e4ca7182..55a307ee76 100644 --- a/doc/daal4py/model-builders.rst +++ b/doc/daal4py/model-builders.rst @@ -16,9 +16,9 @@ .. _model-builders: -############################################### +################################################### Model Builders for the Gradient Boosting Frameworks -############################################### +################################################### .. include:: note.rst @@ -92,6 +92,7 @@ The returned prediction has the shape: * ``(n_rows, n_features + 1)`` for SHAP contributions * ``(n_rows, n_features + 1, n_features + 1)`` for SHAP interactions + Here, ``n_rows`` is the number of rows (i.e., observations) in ``test_data``, and ``n_features`` is the number of features in the dataset. From 0b74e742d654392754d502f1d33c73cdafaa9a6a Mon Sep 17 00:00:00 2001 From: Aleksei Khomenko Date: Tue, 10 Dec 2024 16:16:04 +0100 Subject: [PATCH 3/7] CI: add `skywalking-eyes` license header check (#2215) --- .ci/scripts/install_windows.bat | 11 -- .github/.licenserc.yaml | 68 ++++++++++ .github/ISSUE_TEMPLATE/-bug_report.md | 16 +++ .github/Pull_Request_template.md | 16 +++ .github/issue_template/Bug_report.md | 16 +++ .github/workflows/renovate-validation.yml | 16 +++ .github/workflows/skywalking-eyes.yml | 45 +++++++ CODE_OF_CONDUCT.md | 16 +++ README.md | 26 +++- daal4py/README.md | 16 +++ doc/daal4py/algorithms.rst | 28 ++-- doc/daal4py/contents.rst | 30 ++--- doc/daal4py/data.rst | 30 ++--- doc/daal4py/examples.rst | 30 ++--- doc/daal4py/index.rst | 28 ++-- doc/daal4py/model-builders.rst | 32 +++-- doc/daal4py/note.rst | 30 ++--- doc/daal4py/scaling.rst | 28 ++-- doc/daal4py/sklearn.rst | 28 ++-- doc/daal4py/streaming.rst | 30 ++--- doc/sources/404.rst | 28 ++-- doc/sources/algorithms.rst | 48 ++++--- doc/sources/array_api.rst | 28 ++-- doc/sources/blogs.rst | 28 ++-- doc/sources/contribute.rst | 32 +++-- doc/sources/deprecation.rst | 34 +++-- doc/sources/distributed-mode.rst | 52 ++++---- doc/sources/guide/acceleration.rst | 28 ++-- doc/sources/index.rst | 42 +++--- doc/sources/input-types.rst | 28 ++-- doc/sources/kaggle.rst | 28 ++-- doc/sources/kaggle/automl.rst | 32 +++-- doc/sources/kaggle/classification.rst | 112 ++++++++-------- doc/sources/kaggle/note-about-tps.rst | 28 ++-- doc/sources/kaggle/regression.rst | 54 ++++---- doc/sources/license.rst | 28 ++-- doc/sources/non-scikit-algorithms.rst | 30 ++--- doc/sources/oneapi-gpu.rst | 34 +++-- doc/sources/patching/patch-kmeans-example.rst | 28 ++-- doc/sources/patching/patch-one-algorithm.rst | 28 ++-- .../patching/patch-several-algorithms.rst | 28 ++-- doc/sources/patching/patching-options.rst | 28 ++-- doc/sources/patching/undo-patch.rst | 28 ++-- doc/sources/preview.rst | 28 ++-- doc/sources/quick-start.rst | 120 +++++++++--------- doc/sources/samples.rst | 30 ++--- doc/sources/support.rst | 34 +++-- doc/sources/tutorials.rst | 40 +++--- doc/sources/verbose.rst | 28 ++-- examples/notebooks/README.md | 26 +++- 50 files changed, 918 insertions(+), 762 deletions(-) delete mode 100644 .ci/scripts/install_windows.bat create mode 100644 .github/.licenserc.yaml create mode 100644 .github/workflows/skywalking-eyes.yml diff --git a/.ci/scripts/install_windows.bat b/.ci/scripts/install_windows.bat deleted file mode 100644 index f7c19d77f3..0000000000 --- a/.ci/scripts/install_windows.bat +++ /dev/null @@ -1,11 +0,0 @@ -REM SPDX-FileCopyrightText: 2020 Intel Corporation -REM -REM SPDX-License-Identifier: MIT - -set URL=%1 -set COMPONENTS=%2 - -curl.exe --output webimage.exe --url %URL% --retry 5 --retry-delay 5 -start /b /wait webimage.exe -s -x -f webimage_extracted -del webimage.exe -webimage_extracted\bootstrapper.exe -s --action install --components=%COMPONENTS% --eula=accept --continue-with-optional-error=yes -p=NEED_VS2017_INTEGRATION=0 -p=NEED_VS2019_INTEGRATION=0 diff --git a/.github/.licenserc.yaml b/.github/.licenserc.yaml new file mode 100644 index 0000000000..20ad7f595b --- /dev/null +++ b/.github/.licenserc.yaml @@ -0,0 +1,68 @@ +# Copyright contributors to the oneDAL project +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +header: + license: + spdx-id: Apache-2.0 + copyright-owner: contributors to the oneDAL project + pattern: | + (Copyright \d{4} Intel Corporation|Copyright contributors to the oneDAL project|Copyright \d{4} Fujitsu Limited) + + Licensed under the Apache License, Version 2\.0 \(the "License"\); + you may not use this file except in compliance with the License\. + You may obtain a copy of the License at + + http:\/\/www\.apache\.org\/licenses\/LICENSE-2\.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied\. + See the License for the specific language governing permissions and + limitations under the License\. + paths: + - '**' + + paths-ignore: + - '.gitignore' + - '.gitattributes' + # Empty __init__.py files + - 'daal4py/sklearn/monkeypatch/__init__.py' + - 'generator/__init__.py' + # Clang-format configs + - '.clang-format' + - '_clang-format' + # All .csv and .ipynb files + - '**/*.csv' + - '**/*.ipynb' + # Something in doc/ + - 'doc/daal4py/_static/style.css' + - 'doc/daal4py/_templates/layout.html' + - 'doc/sources/_static/style.css' + - 'doc/sources/_templates/footer.html' + - 'doc/sources/_templates/layout.html' + - 'doc/daal4py/third-party-programs.txt' + - 'doc/third-party-programs-sklearnex.txt' + # requirements + - 'dependencies-dev' + - 'requirements*.txt' + # Some files from .ci/.github + - '.github/CODEOWNERS' + - '.github/renovate.json' + # Specific files + - 'setup.cfg' + - 'LICENSE' + + comment: never + + license-location-threshold: 80 # specifies the index threshold where the license header can be located. diff --git a/.github/ISSUE_TEMPLATE/-bug_report.md b/.github/ISSUE_TEMPLATE/-bug_report.md index d008668bb8..af95855314 100644 --- a/.github/ISSUE_TEMPLATE/-bug_report.md +++ b/.github/ISSUE_TEMPLATE/-bug_report.md @@ -1,3 +1,19 @@ + + --- name: " Bug_report" about: Create a report to help us improve diff --git a/.github/Pull_Request_template.md b/.github/Pull_Request_template.md index c79b8b8fa0..c546e5e0d6 100644 --- a/.github/Pull_Request_template.md +++ b/.github/Pull_Request_template.md @@ -1,3 +1,19 @@ + + ## Description _Add a comprehensive description of proposed changes_ diff --git a/.github/issue_template/Bug_report.md b/.github/issue_template/Bug_report.md index 61be954108..457ef6db73 100644 --- a/.github/issue_template/Bug_report.md +++ b/.github/issue_template/Bug_report.md @@ -1,3 +1,19 @@ + + --- name: Bug report about: Create a report to help us improve diff --git a/.github/workflows/renovate-validation.yml b/.github/workflows/renovate-validation.yml index 0a4d12fb94..0916ce1bc6 100644 --- a/.github/workflows/renovate-validation.yml +++ b/.github/workflows/renovate-validation.yml @@ -1,3 +1,19 @@ +#=============================================================================== +# Copyright 2021 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +#=============================================================================== + name: renovate-validation on: diff --git a/.github/workflows/skywalking-eyes.yml b/.github/workflows/skywalking-eyes.yml new file mode 100644 index 0000000000..218176b492 --- /dev/null +++ b/.github/workflows/skywalking-eyes.yml @@ -0,0 +1,45 @@ +# Copyright contributors to the oneDAL project +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: Check License Header + +on: + pull_request: + branches: + - main + - rls/* + push: + branches: + - main + - rls/* + +permissions: read-all + +concurrency: + group: ${{ github.workflow }}-${{ github.event_name }}-${{ github.ref_name }}-${{ github.event.number || github.sha }} + cancel-in-progress: true + +jobs: + check-license-header: + name: Copyright Check + if: github.event.repository.fork == false + runs-on: ubuntu-24.04 + steps: + - name: "Checkout code" + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + - name: "Run check" + uses: apache/skywalking-eyes/header@cd7b195c51fd3d6ad52afceb760719ddc6b3ee91 # v0.6.0 + with: + config: ".github/.licenserc.yaml" + mode: "check" diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md index 6ffa9221d8..1ad20877e1 100644 --- a/CODE_OF_CONDUCT.md +++ b/CODE_OF_CONDUCT.md @@ -1,3 +1,19 @@ + + # Contributor Covenant Code of Conduct ## Our Pledge diff --git a/README.md b/README.md index 717a01a6e7..114a943a4c 100755 --- a/README.md +++ b/README.md @@ -1,7 +1,23 @@ + +
-# Intel(R) Extension for Scikit-learn* +# Intel(R) Extension for Scikit-learn*

Speed up your scikit-learn applications for Intel(R) CPUs and GPUs across single- and multi-node configurations @@ -35,7 +51,7 @@ With Intel(R) Extension for Scikit-learn, you can: * Continue to use the open-source scikit-learn API * Enable and disable the extension with a couple of lines of code or at the command line -Intel(R) Extension for Scikit-learn is also a part of [Intel(R) AI Tools](https://www.intel.com/content/www/us/en/developer/tools/oneapi/ai-analytics-toolkit.html). +Intel(R) Extension for Scikit-learn is also a part of [Intel(R) AI Tools](https://www.intel.com/content/www/us/en/developer/tools/oneapi/ai-analytics-toolkit.html). ## Acceleration @@ -91,7 +107,7 @@ See all installation instructions in the [Installation Guide](https://uxlfoundat The software acceleration is achieved through patching. It means, replacing the stock scikit-learn algorithms with their optimized versions provided by the extension. The patching only affects [supported algorithms and their parameters](https://uxlfoundation.github.io/scikit-learn-intelex/latest/algorithms.html). -You can still use not supported ones in your code, the package simply fallbacks into the stock version of scikit-learn. +You can still use not supported ones in your code, the package simply fallbacks into the stock version of scikit-learn. > **_TIP:_** Enable [verbose mode](https://uxlfoundation.github.io/scikit-learn-intelex/latest/verbose.html) to see which implementation of the algorithm is currently used. @@ -106,7 +122,7 @@ To patch scikit-learn, you can: patch_sklearn() ``` -:eyes: Read about [other ways to patch scikit-learn](https://uxlfoundation.github.io/scikit-learn-intelex/index.html#usage). +:eyes: Read about [other ways to patch scikit-learn](https://uxlfoundation.github.io/scikit-learn-intelex/index.html#usage). ## Documentation @@ -134,5 +150,5 @@ The acceleration is achieved through the use of the Intel(R) oneAPI Data Analyti We welcome community contributions, check our [Contributing Guidelines](https://github.com/uxlfoundation/scikit-learn-intelex/blob/master/CONTRIBUTING.md) to learn more. ------------------------------------------------------------------------ -\* The Intel logo, and other Intel marks are trademarks of Intel Corporation or its subsidiaries. Other names and brands may be claimed as the property of others. +\* The Intel logo, and other Intel marks are trademarks of Intel Corporation or its subsidiaries. Other names and brands may be claimed as the property of others. diff --git a/daal4py/README.md b/daal4py/README.md index 8d7e288df1..477260336b 100755 --- a/daal4py/README.md +++ b/daal4py/README.md @@ -1,3 +1,19 @@ + + # daal4py - A Convenient Python API to the Intel(R) oneAPI Data Analytics Library [![Build Status](https://dev.azure.com/daal/daal4py/_apis/build/status/CI?branchName=main)](https://dev.azure.com/daal/daal4py/_build/latest?definitionId=9&branchName=main) [![Coverity Scan Build Status](https://scan.coverity.com/projects/21716/badge.svg)](https://scan.coverity.com/projects/daal4py) diff --git a/doc/daal4py/algorithms.rst b/doc/daal4py/algorithms.rst index 735e29a3c7..d93d25b9cc 100755 --- a/doc/daal4py/algorithms.rst +++ b/doc/daal4py/algorithms.rst @@ -1,18 +1,16 @@ -.. ****************************************************************************** -.. * Copyright 2020 Intel Corporation -.. * -.. * Licensed under the Apache License, Version 2.0 (the "License"); -.. * you may not use this file except in compliance with the License. -.. * You may obtain a copy of the License at -.. * -.. * http://www.apache.org/licenses/LICENSE-2.0 -.. * -.. * Unless required by applicable law or agreed to in writing, software -.. * distributed under the License is distributed on an "AS IS" BASIS, -.. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -.. * See the License for the specific language governing permissions and -.. * limitations under the License. -.. *******************************************************************************/ +.. Copyright 2020 Intel Corporation +.. +.. Licensed under the Apache License, Version 2.0 (the "License"); +.. you may not use this file except in compliance with the License. +.. You may obtain a copy of the License at +.. +.. http://www.apache.org/licenses/LICENSE-2.0 +.. +.. Unless required by applicable law or agreed to in writing, software +.. distributed under the License is distributed on an "AS IS" BASIS, +.. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +.. See the License for the specific language governing permissions and +.. limitations under the License. ########## Algorithms diff --git a/doc/daal4py/contents.rst b/doc/daal4py/contents.rst index c98075f365..c862c4ac2c 100644 --- a/doc/daal4py/contents.rst +++ b/doc/daal4py/contents.rst @@ -1,18 +1,16 @@ -.. ****************************************************************************** -.. * Copyright 2020 Intel Corporation -.. * -.. * Licensed under the Apache License, Version 2.0 (the "License"); -.. * you may not use this file except in compliance with the License. -.. * You may obtain a copy of the License at -.. * -.. * http://www.apache.org/licenses/LICENSE-2.0 -.. * -.. * Unless required by applicable law or agreed to in writing, software -.. * distributed under the License is distributed on an "AS IS" BASIS, -.. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -.. * See the License for the specific language governing permissions and -.. * limitations under the License. -.. *******************************************************************************/ +.. Copyright 2020 Intel Corporation +.. +.. Licensed under the Apache License, Version 2.0 (the "License"); +.. you may not use this file except in compliance with the License. +.. You may obtain a copy of the License at +.. +.. http://www.apache.org/licenses/LICENSE-2.0 +.. +.. Unless required by applicable law or agreed to in writing, software +.. distributed under the License is distributed on an "AS IS" BASIS, +.. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +.. See the License for the specific language governing permissions and +.. limitations under the License. .. _contents: @@ -21,7 +19,7 @@ Contents ######## .. include:: note.rst - + .. toctree:: :maxdepth: 2 :caption: Contents: diff --git a/doc/daal4py/data.rst b/doc/daal4py/data.rst index 28c5473349..68232acb79 100644 --- a/doc/daal4py/data.rst +++ b/doc/daal4py/data.rst @@ -1,18 +1,16 @@ -.. ****************************************************************************** -.. * Copyright 2020 Intel Corporation -.. * -.. * Licensed under the Apache License, Version 2.0 (the "License"); -.. * you may not use this file except in compliance with the License. -.. * You may obtain a copy of the License at -.. * -.. * http://www.apache.org/licenses/LICENSE-2.0 -.. * -.. * Unless required by applicable law or agreed to in writing, software -.. * distributed under the License is distributed on an "AS IS" BASIS, -.. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -.. * See the License for the specific language governing permissions and -.. * limitations under the License. -.. *******************************************************************************/ +.. Copyright 2020 Intel Corporation +.. +.. Licensed under the Apache License, Version 2.0 (the "License"); +.. you may not use this file except in compliance with the License. +.. You may obtain a copy of the License at +.. +.. http://www.apache.org/licenses/LICENSE-2.0 +.. +.. Unless required by applicable law or agreed to in writing, software +.. distributed under the License is distributed on an "AS IS" BASIS, +.. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +.. See the License for the specific language governing permissions and +.. limitations under the License. .. _data: @@ -21,7 +19,7 @@ Input Data ########## .. include:: note.rst - + All array arguments to compute functions and to algorithm constructors can be provided in different formats. daal4py will automatically do its best to work on the provided data with minimal overhead, most notably without copying the data. diff --git a/doc/daal4py/examples.rst b/doc/daal4py/examples.rst index a4be55d9b4..b45a3cffc0 100755 --- a/doc/daal4py/examples.rst +++ b/doc/daal4py/examples.rst @@ -1,25 +1,23 @@ -.. ****************************************************************************** -.. * Copyright 2020 Intel Corporation -.. * -.. * Licensed under the Apache License, Version 2.0 (the "License"); -.. * you may not use this file except in compliance with the License. -.. * You may obtain a copy of the License at -.. * -.. * http://www.apache.org/licenses/LICENSE-2.0 -.. * -.. * Unless required by applicable law or agreed to in writing, software -.. * distributed under the License is distributed on an "AS IS" BASIS, -.. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -.. * See the License for the specific language governing permissions and -.. * limitations under the License. -.. *******************************************************************************/ +.. Copyright 2020 Intel Corporation +.. +.. Licensed under the Apache License, Version 2.0 (the "License"); +.. you may not use this file except in compliance with the License. +.. You may obtain a copy of the License at +.. +.. http://www.apache.org/licenses/LICENSE-2.0 +.. +.. Unless required by applicable law or agreed to in writing, software +.. distributed under the License is distributed on an "AS IS" BASIS, +.. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +.. See the License for the specific language governing permissions and +.. limitations under the License. ########## Examples ########## .. include:: note.rst - + Below are examples on how to utilize daal4py for various usage styles. General usage diff --git a/doc/daal4py/index.rst b/doc/daal4py/index.rst index f862d29009..4228e11db9 100644 --- a/doc/daal4py/index.rst +++ b/doc/daal4py/index.rst @@ -1,18 +1,16 @@ -.. ****************************************************************************** -.. * Copyright 2020 Intel Corporation -.. * -.. * Licensed under the Apache License, Version 2.0 (the "License"); -.. * you may not use this file except in compliance with the License. -.. * You may obtain a copy of the License at -.. * -.. * http://www.apache.org/licenses/LICENSE-2.0 -.. * -.. * Unless required by applicable law or agreed to in writing, software -.. * distributed under the License is distributed on an "AS IS" BASIS, -.. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -.. * See the License for the specific language governing permissions and -.. * limitations under the License. -.. *******************************************************************************/ +.. Copyright 2020 Intel Corporation +.. +.. Licensed under the Apache License, Version 2.0 (the "License"); +.. you may not use this file except in compliance with the License. +.. You may obtain a copy of the License at +.. +.. http://www.apache.org/licenses/LICENSE-2.0 +.. +.. Unless required by applicable law or agreed to in writing, software +.. distributed under the License is distributed on an "AS IS" BASIS, +.. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +.. See the License for the specific language governing permissions and +.. limitations under the License. .. _index: diff --git a/doc/daal4py/model-builders.rst b/doc/daal4py/model-builders.rst index 55a307ee76..644fdb34e3 100644 --- a/doc/daal4py/model-builders.rst +++ b/doc/daal4py/model-builders.rst @@ -1,18 +1,16 @@ -.. ****************************************************************************** -.. * Copyright 2023 Intel Corporation -.. * -.. * Licensed under the Apache License, Version 2.0 (the "License"); -.. * you may not use this file except in compliance with the License. -.. * You may obtain a copy of the License at -.. * -.. * http://www.apache.org/licenses/LICENSE-2.0 -.. * -.. * Unless required by applicable law or agreed to in writing, software -.. * distributed under the License is distributed on an "AS IS" BASIS, -.. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -.. * See the License for the specific language governing permissions and -.. * limitations under the License. -.. *******************************************************************************/ +.. Copyright 2023 Intel Corporation +.. +.. Licensed under the Apache License, Version 2.0 (the "License"); +.. you may not use this file except in compliance with the License. +.. You may obtain a copy of the License at +.. +.. http://www.apache.org/licenses/LICENSE-2.0 +.. +.. Unless required by applicable law or agreed to in writing, software +.. distributed under the License is distributed on an "AS IS" BASIS, +.. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +.. See the License for the specific language governing permissions and +.. limitations under the License. .. _model-builders: @@ -39,7 +37,7 @@ for the best performance on the Intel(R) Architecture. .. note:: Currently, experimental support for XGBoost* and LightGBM* categorical data is not supported. - For the model conversion to work with daal4py, convert non-numeric data to numeric data + For the model conversion to work with daal4py, convert non-numeric data to numeric data before training and converting the model. Conversion @@ -90,7 +88,7 @@ For these models, the ``predict()`` method takes additional keyword arguments: The returned prediction has the shape: - * ``(n_rows, n_features + 1)`` for SHAP contributions + * ``(n_rows, n_features + 1)`` for SHAP contributions * ``(n_rows, n_features + 1, n_features + 1)`` for SHAP interactions Here, ``n_rows`` is the number of rows (i.e., observations) in diff --git a/doc/daal4py/note.rst b/doc/daal4py/note.rst index c7fe1b09e3..7ea559a381 100644 --- a/doc/daal4py/note.rst +++ b/doc/daal4py/note.rst @@ -1,20 +1,18 @@ -.. ****************************************************************************** -.. * Copyright 2023 Intel Corporation -.. * -.. * Licensed under the Apache License, Version 2.0 (the "License"); -.. * you may not use this file except in compliance with the License. -.. * You may obtain a copy of the License at -.. * -.. * http://www.apache.org/licenses/LICENSE-2.0 -.. * -.. * Unless required by applicable law or agreed to in writing, software -.. * distributed under the License is distributed on an "AS IS" BASIS, -.. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -.. * See the License for the specific language governing permissions and -.. * limitations under the License. -.. *******************************************************************************/ +.. Copyright 2023 Intel Corporation +.. +.. Licensed under the Apache License, Version 2.0 (the "License"); +.. you may not use this file except in compliance with the License. +.. You may obtain a copy of the License at +.. +.. http://www.apache.org/licenses/LICENSE-2.0 +.. +.. Unless required by applicable law or agreed to in writing, software +.. distributed under the License is distributed on an "AS IS" BASIS, +.. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +.. See the License for the specific language governing permissions and +.. limitations under the License. .. _note: -.. note:: Scikit-learn patching functionality in daal4py was deprecated and moved to a separate package, `Intel(R) Extension for Scikit-learn* `_. +.. note:: Scikit-learn patching functionality in daal4py was deprecated and moved to a separate package, `Intel(R) Extension for Scikit-learn* `_. All future patches will be available only in Intel(R) Extension for Scikit-learn*. Use the scikit-learn-intelex package instead of daal4py for the scikit-learn acceleration. diff --git a/doc/daal4py/scaling.rst b/doc/daal4py/scaling.rst index ba21cb5c85..09b766a06a 100644 --- a/doc/daal4py/scaling.rst +++ b/doc/daal4py/scaling.rst @@ -1,18 +1,16 @@ -.. ****************************************************************************** -.. * Copyright 2020 Intel Corporation -.. * -.. * Licensed under the Apache License, Version 2.0 (the "License"); -.. * you may not use this file except in compliance with the License. -.. * You may obtain a copy of the License at -.. * -.. * http://www.apache.org/licenses/LICENSE-2.0 -.. * -.. * Unless required by applicable law or agreed to in writing, software -.. * distributed under the License is distributed on an "AS IS" BASIS, -.. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -.. * See the License for the specific language governing permissions and -.. * limitations under the License. -.. *******************************************************************************/ +.. Copyright 2020 Intel Corporation +.. +.. Licensed under the Apache License, Version 2.0 (the "License"); +.. you may not use this file except in compliance with the License. +.. You may obtain a copy of the License at +.. +.. http://www.apache.org/licenses/LICENSE-2.0 +.. +.. Unless required by applicable law or agreed to in writing, software +.. distributed under the License is distributed on an "AS IS" BASIS, +.. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +.. See the License for the specific language governing permissions and +.. limitations under the License. .. _distributed: diff --git a/doc/daal4py/sklearn.rst b/doc/daal4py/sklearn.rst index 478885bae9..c723ef489c 100755 --- a/doc/daal4py/sklearn.rst +++ b/doc/daal4py/sklearn.rst @@ -1,18 +1,16 @@ -.. ****************************************************************************** -.. * Copyright 2020 Intel Corporation -.. * -.. * Licensed under the Apache License, Version 2.0 (the "License"); -.. * you may not use this file except in compliance with the License. -.. * You may obtain a copy of the License at -.. * -.. * http://www.apache.org/licenses/LICENSE-2.0 -.. * -.. * Unless required by applicable law or agreed to in writing, software -.. * distributed under the License is distributed on an "AS IS" BASIS, -.. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -.. * See the License for the specific language governing permissions and -.. * limitations under the License. -.. *******************************************************************************/ +.. Copyright 2020 Intel Corporation +.. +.. Licensed under the Apache License, Version 2.0 (the "License"); +.. you may not use this file except in compliance with the License. +.. You may obtain a copy of the License at +.. +.. http://www.apache.org/licenses/LICENSE-2.0 +.. +.. Unless required by applicable law or agreed to in writing, software +.. distributed under the License is distributed on an "AS IS" BASIS, +.. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +.. See the License for the specific language governing permissions and +.. limitations under the License. .. _sklearn: diff --git a/doc/daal4py/streaming.rst b/doc/daal4py/streaming.rst index 6c8e97dc2e..246d4709a9 100644 --- a/doc/daal4py/streaming.rst +++ b/doc/daal4py/streaming.rst @@ -1,18 +1,16 @@ -.. ****************************************************************************** -.. * Copyright 2020 Intel Corporation -.. * -.. * Licensed under the Apache License, Version 2.0 (the "License"); -.. * you may not use this file except in compliance with the License. -.. * You may obtain a copy of the License at -.. * -.. * http://www.apache.org/licenses/LICENSE-2.0 -.. * -.. * Unless required by applicable law or agreed to in writing, software -.. * distributed under the License is distributed on an "AS IS" BASIS, -.. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -.. * See the License for the specific language governing permissions and -.. * limitations under the License. -.. *******************************************************************************/ +.. Copyright 2020 Intel Corporation +.. +.. Licensed under the Apache License, Version 2.0 (the "License"); +.. you may not use this file except in compliance with the License. +.. You may obtain a copy of the License at +.. +.. http://www.apache.org/licenses/LICENSE-2.0 +.. +.. Unless required by applicable law or agreed to in writing, software +.. distributed under the License is distributed on an "AS IS" BASIS, +.. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +.. See the License for the specific language governing permissions and +.. limitations under the License. .. _streaming: @@ -21,7 +19,7 @@ Streaming Data ############## .. include:: note.rst - + For large quantities of data it might be impossible to provide all input data at once. This might be because the data resides in multiple files and merging it is to costly (or not feasible in other ways). In other cases the data is simply too diff --git a/doc/sources/404.rst b/doc/sources/404.rst index 9532f8db9b..853c67f405 100644 --- a/doc/sources/404.rst +++ b/doc/sources/404.rst @@ -1,18 +1,16 @@ -.. ****************************************************************************** -.. * Copyright 2022 Intel Corporation -.. * -.. * Licensed under the Apache License, Version 2.0 (the "License"); -.. * you may not use this file except in compliance with the License. -.. * You may obtain a copy of the License at -.. * -.. * http://www.apache.org/licenses/LICENSE-2.0 -.. * -.. * Unless required by applicable law or agreed to in writing, software -.. * distributed under the License is distributed on an "AS IS" BASIS, -.. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -.. * See the License for the specific language governing permissions and -.. * limitations under the License. -.. *******************************************************************************/ +.. Copyright 2022 Intel Corporation +.. +.. Licensed under the Apache License, Version 2.0 (the "License"); +.. you may not use this file except in compliance with the License. +.. You may obtain a copy of the License at +.. +.. http://www.apache.org/licenses/LICENSE-2.0 +.. +.. Unless required by applicable law or agreed to in writing, software +.. distributed under the License is distributed on an "AS IS" BASIS, +.. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +.. See the License for the specific language governing permissions and +.. limitations under the License. :orphan: :nosearch: diff --git a/doc/sources/algorithms.rst b/doc/sources/algorithms.rst index 473255c0c2..611e157156 100755 --- a/doc/sources/algorithms.rst +++ b/doc/sources/algorithms.rst @@ -1,18 +1,16 @@ -.. ****************************************************************************** -.. * Copyright 2020 Intel Corporation -.. * -.. * Licensed under the Apache License, Version 2.0 (the "License"); -.. * you may not use this file except in compliance with the License. -.. * You may obtain a copy of the License at -.. * -.. * http://www.apache.org/licenses/LICENSE-2.0 -.. * -.. * Unless required by applicable law or agreed to in writing, software -.. * distributed under the License is distributed on an "AS IS" BASIS, -.. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -.. * See the License for the specific language governing permissions and -.. * limitations under the License. -.. *******************************************************************************/ +.. Copyright 2020 Intel Corporation +.. +.. Licensed under the Apache License, Version 2.0 (the "License"); +.. you may not use this file except in compliance with the License. +.. You may obtain a copy of the License at +.. +.. http://www.apache.org/licenses/LICENSE-2.0 +.. +.. Unless required by applicable law or agreed to in writing, software +.. distributed under the License is distributed on an "AS IS" BASIS, +.. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +.. See the License for the specific language governing permissions and +.. limitations under the License. .. _sklearn_algorithms: @@ -50,12 +48,12 @@ Classification - ``criterion`` != `'gini'` - Multi-output and sparse data are not supported * - `KNeighborsClassifier` - - + - - For ``algorithm`` == `'kd_tree'`: - + all parameters except ``metric`` != `'euclidean'` or `'minkowski'` with ``p`` != `2` - For ``algorithm`` == `'brute'`: - + all parameters except ``metric`` not in [`'euclidean'`, `'manhattan'`, `'minkowski'`, `'chebyshev'`, `'cosine'`] - Multi-output and sparse data are not supported * - `LogisticRegression` @@ -182,12 +180,12 @@ Nearest Neighbors - Parameters - Data formats * - `NearestNeighbors` - - + - - For ``algorithm`` == 'kd_tree': - + all parameters except ``metric`` != `'euclidean'` or `'minkowski'` with ``p`` != `2` - For ``algorithm`` == 'brute': - + all parameters except ``metric`` not in [`'euclidean'`, `'manhattan'`, `'minkowski'`, `'chebyshev'`, `'cosine'`] - Sparse data is not supported @@ -213,12 +211,12 @@ Other Tasks - Only dense data is supported * - `pairwise_distance` - All parameters are supported except: - + - ``metric`` not in [`'cosine'`, `'correlation'`] - Only dense data is supported * - `roc_auc_score` - All parameters are supported except: - + - ``average`` != `None` - ``sample_weight`` != `None` - ``max_fpr`` != `None` @@ -344,7 +342,7 @@ Dimensionality Reduction - Data formats * - `PCA` - All parameters are supported except: - + - ``svd_solver`` not in [`'full'`, `'covariance_eigh'`] - Sparse data is not supported @@ -496,7 +494,7 @@ Dimensionality Reduction - Data formats * - `PCA` - All parameters are supported except: - + - ``svd_solver`` not in [`'full'`, `'covariance_eigh'`] - ``fit`` is the only method supported - Sparse data is not supported diff --git a/doc/sources/array_api.rst b/doc/sources/array_api.rst index 4c71e0cb88..25344bf4fe 100644 --- a/doc/sources/array_api.rst +++ b/doc/sources/array_api.rst @@ -1,18 +1,16 @@ -.. ****************************************************************************** -.. * Copyright 2024 Intel Corporation -.. * -.. * Licensed under the Apache License, Version 2.0 (the "License"); -.. * you may not use this file except in compliance with the License. -.. * You may obtain a copy of the License at -.. * -.. * http://www.apache.org/licenses/LICENSE-2.0 -.. * -.. * Unless required by applicable law or agreed to in writing, software -.. * distributed under the License is distributed on an "AS IS" BASIS, -.. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -.. * See the License for the specific language governing permissions and -.. * limitations under the License. -.. *******************************************************************************/ +.. Copyright 2024 Intel Corporation +.. +.. Licensed under the Apache License, Version 2.0 (the "License"); +.. you may not use this file except in compliance with the License. +.. You may obtain a copy of the License at +.. +.. http://www.apache.org/licenses/LICENSE-2.0 +.. +.. Unless required by applicable law or agreed to in writing, software +.. distributed under the License is distributed on an "AS IS" BASIS, +.. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +.. See the License for the specific language governing permissions and +.. limitations under the License. .. _array_api: diff --git a/doc/sources/blogs.rst b/doc/sources/blogs.rst index 2d9b0024e5..55b330637f 100644 --- a/doc/sources/blogs.rst +++ b/doc/sources/blogs.rst @@ -1,18 +1,16 @@ -.. ****************************************************************************** -.. * Copyright 2021 Intel Corporation -.. * -.. * Licensed under the Apache License, Version 2.0 (the "License"); -.. * you may not use this file except in compliance with the License. -.. * You may obtain a copy of the License at -.. * -.. * http://www.apache.org/licenses/LICENSE-2.0 -.. * -.. * Unless required by applicable law or agreed to in writing, software -.. * distributed under the License is distributed on an "AS IS" BASIS, -.. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -.. * See the License for the specific language governing permissions and -.. * limitations under the License. -.. *******************************************************************************/ +.. Copyright 2021 Intel Corporation +.. +.. Licensed under the Apache License, Version 2.0 (the "License"); +.. you may not use this file except in compliance with the License. +.. You may obtain a copy of the License at +.. +.. http://www.apache.org/licenses/LICENSE-2.0 +.. +.. Unless required by applicable law or agreed to in writing, software +.. distributed under the License is distributed on an "AS IS" BASIS, +.. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +.. See the License for the specific language governing permissions and +.. limitations under the License. .. _blogs: diff --git a/doc/sources/contribute.rst b/doc/sources/contribute.rst index ff4cb901e9..81d053b715 100644 --- a/doc/sources/contribute.rst +++ b/doc/sources/contribute.rst @@ -1,24 +1,22 @@ -.. ****************************************************************************** -.. * Copyright 2022 Intel Corporation -.. * -.. * Licensed under the Apache License, Version 2.0 (the "License"); -.. * you may not use this file except in compliance with the License. -.. * You may obtain a copy of the License at -.. * -.. * http://www.apache.org/licenses/LICENSE-2.0 -.. * -.. * Unless required by applicable law or agreed to in writing, software -.. * distributed under the License is distributed on an "AS IS" BASIS, -.. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -.. * See the License for the specific language governing permissions and -.. * limitations under the License. -.. *******************************************************************************/ +.. Copyright 2022 Intel Corporation +.. +.. Licensed under the Apache License, Version 2.0 (the "License"); +.. you may not use this file except in compliance with the License. +.. You may obtain a copy of the License at +.. +.. http://www.apache.org/licenses/LICENSE-2.0 +.. +.. Unless required by applicable law or agreed to in writing, software +.. distributed under the License is distributed on an "AS IS" BASIS, +.. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +.. See the License for the specific language governing permissions and +.. limitations under the License. ################## How to Contribute ################## -As an open source project, we welcome community contributions to Intel(R) Extension for Scikit-learn. +As an open source project, we welcome community contributions to Intel(R) Extension for Scikit-learn. This document explains how to participate in project conversations, log bugs and enhancement requests, and submit code patches. Licensing @@ -49,7 +47,7 @@ Continuous Integration (CI) testing is enabled for the repository. Your pull req Code Style ------------------- -We use `black `_ and `isort `_ formatters for Python* code. +We use `black `_ and `isort `_ formatters for Python* code. The line length is 90 characters; use default options otherwise. You can find the linter configuration in `.pyproject.toml `_. A GitHub* Action verifies if your changes comply with the output of the auto-formatting tools. diff --git a/doc/sources/deprecation.rst b/doc/sources/deprecation.rst index 5bc3f3fd19..2ae9305740 100644 --- a/doc/sources/deprecation.rst +++ b/doc/sources/deprecation.rst @@ -1,30 +1,28 @@ -.. ****************************************************************************** -.. * Copyright 2023 Intel Corporation -.. * -.. * Licensed under the Apache License, Version 2.0 (the "License"); -.. * you may not use this file except in compliance with the License. -.. * You may obtain a copy of the License at -.. * -.. * http://www.apache.org/licenses/LICENSE-2.0 -.. * -.. * Unless required by applicable law or agreed to in writing, software -.. * distributed under the License is distributed on an "AS IS" BASIS, -.. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -.. * See the License for the specific language governing permissions and -.. * limitations under the License. -.. *******************************************************************************/ +.. Copyright 2023 Intel Corporation +.. +.. Licensed under the Apache License, Version 2.0 (the "License"); +.. you may not use this file except in compliance with the License. +.. You may obtain a copy of the License at +.. +.. http://www.apache.org/licenses/LICENSE-2.0 +.. +.. Unless required by applicable law or agreed to in writing, software +.. distributed under the License is distributed on an "AS IS" BASIS, +.. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +.. See the License for the specific language governing permissions and +.. limitations under the License. Deprecation Notice ================== -This page provides information about the deprecations of a specific |intelex| functionality. +This page provides information about the deprecations of a specific |intelex| functionality. macOS* Support ************** -**Deprecation:** macOS* support is deprecated for |intelex|. The 2023.x releases are the last to provide it. +**Deprecation:** macOS* support is deprecated for |intelex|. The 2023.x releases are the last to provide it. **Reasons for deprecation:** No modern X86 macOS*-based systems are to be released. -**Alternatives:** The 2023.x version on macOS*. +**Alternatives:** The 2023.x version on macOS*. diff --git a/doc/sources/distributed-mode.rst b/doc/sources/distributed-mode.rst index fa652a4c2b..c78a50d9e0 100644 --- a/doc/sources/distributed-mode.rst +++ b/doc/sources/distributed-mode.rst @@ -1,18 +1,16 @@ -.. ****************************************************************************** -.. * Copyright 2021 Intel Corporation -.. * -.. * Licensed under the Apache License, Version 2.0 (the "License"); -.. * you may not use this file except in compliance with the License. -.. * You may obtain a copy of the License at -.. * -.. * http://www.apache.org/licenses/LICENSE-2.0 -.. * -.. * Unless required by applicable law or agreed to in writing, software -.. * distributed under the License is distributed on an "AS IS" BASIS, -.. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -.. * See the License for the specific language governing permissions and -.. * limitations under the License. -.. *******************************************************************************/ +.. Copyright 2021 Intel Corporation +.. +.. Licensed under the Apache License, Version 2.0 (the "License"); +.. you may not use this file except in compliance with the License. +.. You may obtain a copy of the License at +.. +.. http://www.apache.org/licenses/LICENSE-2.0 +.. +.. Unless required by applicable law or agreed to in writing, software +.. distributed under the License is distributed on an "AS IS" BASIS, +.. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +.. See the License for the specific language governing permissions and +.. limitations under the License. .. _distributed: @@ -20,33 +18,33 @@ Distributed Mode ================ |intelex| offers Single Program, Multiple Data (SPMD) supported interfaces for distributed computing. -Several `GPU-supported algorithms `_ -also provide distributed, multi-GPU computing capabilities via integration with ``mpi4py``. The prerequisites -match those of GPU computing, along with an MPI backend of your choice (`Intel MPI recommended -`_, available -via ``impi-devel`` python package) and the ``mpi4py`` python package. If using |intelex| +Several `GPU-supported algorithms `_ +also provide distributed, multi-GPU computing capabilities via integration with ``mpi4py``. The prerequisites +match those of GPU computing, along with an MPI backend of your choice (`Intel MPI recommended +`_, available +via ``impi-devel`` python package) and the ``mpi4py`` python package. If using |intelex| `installed from sources `_, ensure that the spmd_backend is built. -Note that |intelex| now supports GPU offloading to speed up MPI operations. This is supported automatically with +Note that |intelex| now supports GPU offloading to speed up MPI operations. This is supported automatically with some MPI backends, but in order to use GPU offloading with Intel MPI, set the following environment variable (providing data on device without this may lead to a runtime error): - + :: export I_MPI_OFFLOAD=1 -Estimators can be imported from the ``sklearnex.spmd`` module. Data should be distributed across multiple nodes as -desired, and should be transfered to a dpctl or dpnp array before being passed to the estimator. View a full -example of this process in the |intelex| repository, where many examples of our SPMD-supported estimators are +Estimators can be imported from the ``sklearnex.spmd`` module. Data should be distributed across multiple nodes as +desired, and should be transfered to a dpctl or dpnp array before being passed to the estimator. View a full +example of this process in the |intelex| repository, where many examples of our SPMD-supported estimators are available: https://github.com/uxlfoundation/scikit-learn-intelex/blob/main/examples/sklearnex/. To run: :: mpirun -n 4 python linear_regression_spmd.py -Note that additional mpirun arguments can be added as desired. SPMD-supported estimators are listed in the +Note that additional mpirun arguments can be added as desired. SPMD-supported estimators are listed in the `algorithms support documentation `_. -Additionally, daal4py offers some distributed functionality, see +Additionally, daal4py offers some distributed functionality, see `documentation `_ for further details. diff --git a/doc/sources/guide/acceleration.rst b/doc/sources/guide/acceleration.rst index 952ee73539..3b7b662080 100644 --- a/doc/sources/guide/acceleration.rst +++ b/doc/sources/guide/acceleration.rst @@ -1,18 +1,16 @@ -.. ****************************************************************************** -.. * Copyright 2022 Intel Corporation -.. * -.. * Licensed under the Apache License, Version 2.0 (the "License"); -.. * you may not use this file except in compliance with the License. -.. * You may obtain a copy of the License at -.. * -.. * http://www.apache.org/licenses/LICENSE-2.0 -.. * -.. * Unless required by applicable law or agreed to in writing, software -.. * distributed under the License is distributed on an "AS IS" BASIS, -.. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -.. * See the License for the specific language governing permissions and -.. * limitations under the License. -.. *******************************************************************************/ +.. Copyright 2022 Intel Corporation +.. +.. Licensed under the Apache License, Version 2.0 (the "License"); +.. you may not use this file except in compliance with the License. +.. You may obtain a copy of the License at +.. +.. http://www.apache.org/licenses/LICENSE-2.0 +.. +.. Unless required by applicable law or agreed to in writing, software +.. distributed under the License is distributed on an "AS IS" BASIS, +.. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +.. See the License for the specific language governing permissions and +.. limitations under the License. ######################## Tuning Guide diff --git a/doc/sources/index.rst b/doc/sources/index.rst index cc7becdb8e..627692b118 100755 --- a/doc/sources/index.rst +++ b/doc/sources/index.rst @@ -1,18 +1,16 @@ -.. ****************************************************************************** -.. * Copyright 2020 Intel Corporation -.. * -.. * Licensed under the Apache License, Version 2.0 (the "License"); -.. * you may not use this file except in compliance with the License. -.. * You may obtain a copy of the License at -.. * -.. * http://www.apache.org/licenses/LICENSE-2.0 -.. * -.. * Unless required by applicable law or agreed to in writing, software -.. * distributed under the License is distributed on an "AS IS" BASIS, -.. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -.. * See the License for the specific language governing permissions and -.. * limitations under the License. -.. *******************************************************************************/ +.. Copyright 2020 Intel Corporation +.. +.. Licensed under the Apache License, Version 2.0 (the "License"); +.. you may not use this file except in compliance with the License. +.. You may obtain a copy of the License at +.. +.. http://www.apache.org/licenses/LICENSE-2.0 +.. +.. Unless required by applicable law or agreed to in writing, software +.. distributed under the License is distributed on an "AS IS" BASIS, +.. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +.. See the License for the specific language governing permissions and +.. limitations under the License. .. |intelex_repo| replace:: |intelex| repository .. _intelex_repo: https://github.com/uxlfoundation/scikit-learn-intelex @@ -43,13 +41,13 @@ Intel(R) Extension for Scikit-learn is also a part of `Intel(R) AI Tools `_. +These performance charts use benchmarks that you can find in the `scikit-learn bench repository `_. -Supported Algorithms +Supported Algorithms --------------------- -See all of the :ref:`sklearn_algorithms`. +See all of the :ref:`sklearn_algorithms`. Intel(R) Optimizations @@ -96,7 +94,7 @@ Enable Intel(R) GPU optimizations quick-start.rst samples.rst kaggle.rst - + .. toctree:: :caption: Developer Guide :hidden: @@ -111,7 +109,7 @@ Enable Intel(R) GPU optimizations verbose.rst preview.rst deprecation.rst - + .. toctree:: :caption: Performance @@ -121,7 +119,7 @@ Enable Intel(R) GPU optimizations guide/acceleration.rst .. toctree:: - :caption: Learn + :caption: Learn :hidden: :maxdepth: 2 @@ -132,7 +130,7 @@ Enable Intel(R) GPU optimizations :caption: More :hidden: :maxdepth: 2 - + Support contribute.rst license.rst diff --git a/doc/sources/input-types.rst b/doc/sources/input-types.rst index cd1714feb8..a0781119c9 100644 --- a/doc/sources/input-types.rst +++ b/doc/sources/input-types.rst @@ -1,18 +1,16 @@ -.. ****************************************************************************** -.. * Copyright 2024 Intel Corporation -.. * -.. * Licensed under the Apache License, Version 2.0 (the "License"); -.. * you may not use this file except in compliance with the License. -.. * You may obtain a copy of the License at -.. * -.. * http://www.apache.org/licenses/LICENSE-2.0 -.. * -.. * Unless required by applicable law or agreed to in writing, software -.. * distributed under the License is distributed on an "AS IS" BASIS, -.. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -.. * See the License for the specific language governing permissions and -.. * limitations under the License. -.. *******************************************************************************/ +.. Copyright 2024 Intel Corporation +.. +.. Licensed under the Apache License, Version 2.0 (the "License"); +.. you may not use this file except in compliance with the License. +.. You may obtain a copy of the License at +.. +.. http://www.apache.org/licenses/LICENSE-2.0 +.. +.. Unless required by applicable law or agreed to in writing, software +.. distributed under the License is distributed on an "AS IS" BASIS, +.. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +.. See the License for the specific language governing permissions and +.. limitations under the License. .. _input_types: diff --git a/doc/sources/kaggle.rst b/doc/sources/kaggle.rst index 364ab723d1..6f704289bc 100644 --- a/doc/sources/kaggle.rst +++ b/doc/sources/kaggle.rst @@ -1,18 +1,16 @@ -.. ****************************************************************************** -.. * Copyright 2021 Intel Corporation -.. * -.. * Licensed under the Apache License, Version 2.0 (the "License"); -.. * you may not use this file except in compliance with the License. -.. * You may obtain a copy of the License at -.. * -.. * http://www.apache.org/licenses/LICENSE-2.0 -.. * -.. * Unless required by applicable law or agreed to in writing, software -.. * distributed under the License is distributed on an "AS IS" BASIS, -.. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -.. * See the License for the specific language governing permissions and -.. * limitations under the License. -.. *******************************************************************************/ +.. Copyright 2021 Intel Corporation +.. +.. Licensed under the Apache License, Version 2.0 (the "License"); +.. you may not use this file except in compliance with the License. +.. You may obtain a copy of the License at +.. +.. http://www.apache.org/licenses/LICENSE-2.0 +.. +.. Unless required by applicable law or agreed to in writing, software +.. distributed under the License is distributed on an "AS IS" BASIS, +.. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +.. See the License for the specific language governing permissions and +.. limitations under the License. .. _kaggle: diff --git a/doc/sources/kaggle/automl.rst b/doc/sources/kaggle/automl.rst index 0156e5ec89..2663010481 100644 --- a/doc/sources/kaggle/automl.rst +++ b/doc/sources/kaggle/automl.rst @@ -1,20 +1,18 @@ -.. ****************************************************************************** -.. * Copyright 2021 Intel Corporation -.. * -.. * Licensed under the Apache License, Version 2.0 (the "License"); -.. * you may not use this file except in compliance with the License. -.. * You may obtain a copy of the License at -.. * -.. * http://www.apache.org/licenses/LICENSE-2.0 -.. * -.. * Unless required by applicable law or agreed to in writing, software -.. * distributed under the License is distributed on an "AS IS" BASIS, -.. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -.. * See the License for the specific language governing permissions and -.. * limitations under the License. -.. *******************************************************************************/ - -.. |automl_with_intelex_jun| replace:: AutoML MultiClass Classification (Gradient Boosting, Random Forest, kNN) using AutoGluon with |intelex| +.. Copyright 2021 Intel Corporation +.. +.. Licensed under the Apache License, Version 2.0 (the "License"); +.. you may not use this file except in compliance with the License. +.. You may obtain a copy of the License at +.. +.. http://www.apache.org/licenses/LICENSE-2.0 +.. +.. Unless required by applicable law or agreed to in writing, software +.. distributed under the License is distributed on an "AS IS" BASIS, +.. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +.. See the License for the specific language governing permissions and +.. limitations under the License. + +.. |automl_with_intelex_jun| replace:: AutoML MultiClass Classification (Gradient Boosting, Random Forest, kNN) using AutoGluon with |intelex| .. _automl_with_intelex_jun: https://www.kaggle.com/alex97andreev/tps-jun-autogluon-with-sklearnex .. |automl_with_intelex_tps_oct| replace:: AutoML Binary Classification (Gradient Boosting, Random Forest) using AutoGluon with |intelex| diff --git a/doc/sources/kaggle/classification.rst b/doc/sources/kaggle/classification.rst index c0176cab62..a017af2bd5 100644 --- a/doc/sources/kaggle/classification.rst +++ b/doc/sources/kaggle/classification.rst @@ -1,18 +1,16 @@ -.. ****************************************************************************** -.. * Copyright 2021 Intel Corporation -.. * -.. * Licensed under the Apache License, Version 2.0 (the "License"); -.. * you may not use this file except in compliance with the License. -.. * You may obtain a copy of the License at -.. * -.. * http://www.apache.org/licenses/LICENSE-2.0 -.. * -.. * Unless required by applicable law or agreed to in writing, software -.. * distributed under the License is distributed on an "AS IS" BASIS, -.. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -.. * See the License for the specific language governing permissions and -.. * limitations under the License. -.. *******************************************************************************/ +.. Copyright 2021 Intel Corporation +.. +.. Licensed under the Apache License, Version 2.0 (the "License"); +.. you may not use this file except in compliance with the License. +.. You may obtain a copy of the License at +.. +.. http://www.apache.org/licenses/LICENSE-2.0 +.. +.. Unless required by applicable law or agreed to in writing, software +.. distributed under the License is distributed on an "AS IS" BASIS, +.. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +.. See the License for the specific language governing permissions and +.. limitations under the License. Kaggle Kernels for Classification Tasks *************************************** @@ -35,11 +33,11 @@ Binary Classification - Content * - `Logistic Regression for Binary Classification `_ - + **Data:** [TPS Nov 2021] Synthetic spam emails data - Identify spam emails via features extracted from the email - - + - - data preprocessing (normalization) - search for optimal parameters using Optuna @@ -47,23 +45,23 @@ Binary Classification - performance comparison to scikit-learn * - `Feature Importance in Random Forest for Binary Classification `_ - + **Data:** [TPS Nov 2021] Synthetic spam emails data - Identify spam emails via features extracted from the email - - - + - + - reducing DataFrame memory usage - computing feature importance with ELI5 and the default scikit-learn permutation importance - training using scikit-learn-intelex - performance comparison to scikit-learn * - `Random Forest for Binary Classification `_ - + **Data:** [TPS Apr 2021] Synthetic data based on Titanic dataset - Predict whether a passenger survivies - - - + - + - data preprocessing - feature construction - search for optimal parameters using Optuna @@ -71,21 +69,21 @@ Binary Classification - performance comparison to scikit-learn * - `Support Vector Classification (SVC) for Binary Classification `_ - + **Data:** [TPS Apr 2021] Synthetic data based on Titanic dataset - Predict whether a passenger survivies - - - + - + - data preprocessing - training and prediction using scikit-learn-intelex - performance comparison to scikit-learn * - `Support Vector Classification (SVC) with Feature Preprocessing for Binary Classification `_ - + **Data:** [TPS Apr 2021] Synthetic data based on Titanic dataset - Predict whether a passenger survivies - - - + - + - data preprocessing - feature engineering - training and prediction using scikit-learn-intelex @@ -104,10 +102,10 @@ MultiClass Classification - Content * - `Logistic Regression for MultiClass Classification with Quantile Transformer `_ - + **Data:** [TPS Jun 2021] Synthetic eCommerce data - Predict the category of an eCommerce product - - + - - data preprocessing with Quantile Transformer - training and prediction using scikit-learn-intelex @@ -115,19 +113,19 @@ MultiClass Classification - performance comparison to scikit-learn * - `Support Vector Classification (SVC) for MultiClass Classification (example 1) `_ - + **Data:** [TPS May 2021] Synthetic eCommerce data - Predict the category of an eCommerce product - - + - - data preprocessing - training and prediction using scikit-learn-intelex * - `Stacking Classifer with Logistic Regression, kNN, Random Forest, and Quantile Transformer `_ - + **Data:** [TPS Jun 2021] Synthetic eCommerce data - Predict the category of an eCommerce product - - + - - data preprocessing: one-hot encoding, dimensionality reduction with PCA, normalization - creating a stacking classifier with logistic regression, kNN, and random forest, @@ -137,31 +135,31 @@ MultiClass Classification - performance comparison to scikit-learn * - `Support Vector Classification (SVC) for MultiClass Classification (example 2) `_ - + **Data:** [TPS Dec 2021] Synthetic Forest Cover Type data - Predict the forest cover type - - + - - data preprocessing - training and prediction using scikit-learn-intelex - performance comparison to scikit-learn * - `Feature Importance in Random Forest for MultiClass Classification `_ - + **Data:** [TPS Dec 2021] Synthetic Forest Cover Type data - Predict the forest cover type - - - + - + - reducing DataFrame memory usage - computing feature importance with ELI5 - training and prediction using scikit-learn-intelex - performance comparison to scikit-learn * - `k-Nearest Neighbors (kNN) for MultiClass Classification `_ - + **Data:** [TPS Feb 2022] Bacteria DNA - Predict bacteria species based on repeated lossy measurements of DNA snippets - - + - - data preprocessing - training and prediction using scikit-learn-intelex - performance comparison to scikit-learn @@ -179,21 +177,21 @@ Classification Tasks in Computer Vision - Content * - `Support Vector Classification (SVC) for MultiClass Classification (CV task) `_ - + **Data:** Digit Recognizer (MNIST) - Recognize hand-written digits - - - + - + - data preprocessing - training and prediction using scikit-learn-intelex - performance comparison to scikit-learn * - `k-Nearest Neighbors (kNN) for MultiClass Classification (CV task) `_ - + **Data:** Digit Recognizer (MNIST) - Recognize hand-written digits - - - + - + - training and prediction using scikit-learn-intelex - performance comparison to scikit-learn @@ -210,11 +208,11 @@ Classification Tasks in Natural Language Processing - Content * - `Support Vector Classification (SVC) for a Binary Classification (NLP task) `_ - + **Data:** Natural Language Processing with Disaster Tweets - Predict which tweets are about real disasters and which ones are not - - - + - + - data preprocessing - TF-IDF calculation - search for optimal paramters using Optuna @@ -222,21 +220,21 @@ Classification Tasks in Natural Language Processing - performance comparison to scikit-learn * - `One-vs-Rest Support Vector Machine (SVM) with Text Data for MultiClass Classification `_ - + **Data:** What's Cooking - Use recipe ingredients to predict the cuisine - - - + - + - feature extraction using TfidfVectorizer - training and prediction using scikit-learn-intelex - performance comparison to scikit-learn * - `Support Vector Classification (SVC) for Binary Classification with Sparse Data (NLP task) `_ - + **Data:** Stack Overflow questions - Predict the binary quality rating for Stack Overflow questions - - - + - + - data preprocessing - TF-IDF calculation - search for optimal paramters using Optuna diff --git a/doc/sources/kaggle/note-about-tps.rst b/doc/sources/kaggle/note-about-tps.rst index 692c6a3d0e..2e53d54afe 100644 --- a/doc/sources/kaggle/note-about-tps.rst +++ b/doc/sources/kaggle/note-about-tps.rst @@ -1,17 +1,15 @@ -.. ****************************************************************************** -.. * Copyright 2021 Intel Corporation -.. * -.. * Licensed under the Apache License, Version 2.0 (the "License"); -.. * you may not use this file except in compliance with the License. -.. * You may obtain a copy of the License at -.. * -.. * http://www.apache.org/licenses/LICENSE-2.0 -.. * -.. * Unless required by applicable law or agreed to in writing, software -.. * distributed under the License is distributed on an "AS IS" BASIS, -.. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -.. * See the License for the specific language governing permissions and -.. * limitations under the License. -.. *******************************************************************************/ +.. Copyright 2021 Intel Corporation +.. +.. Licensed under the Apache License, Version 2.0 (the "License"); +.. you may not use this file except in compliance with the License. +.. You may obtain a copy of the License at +.. +.. http://www.apache.org/licenses/LICENSE-2.0 +.. +.. Unless required by applicable law or agreed to in writing, software +.. distributed under the License is distributed on an "AS IS" BASIS, +.. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +.. See the License for the specific language governing permissions and +.. limitations under the License. TPS stands for Tabular Playground Series, which is a series of beginner-friendly Kaggle competitions. diff --git a/doc/sources/kaggle/regression.rst b/doc/sources/kaggle/regression.rst index abdbebc15b..3bd555c1c4 100644 --- a/doc/sources/kaggle/regression.rst +++ b/doc/sources/kaggle/regression.rst @@ -1,18 +1,16 @@ -.. ****************************************************************************** -.. * Copyright 2021 Intel Corporation -.. * -.. * Licensed under the Apache License, Version 2.0 (the "License"); -.. * you may not use this file except in compliance with the License. -.. * You may obtain a copy of the License at -.. * -.. * http://www.apache.org/licenses/LICENSE-2.0 -.. * -.. * Unless required by applicable law or agreed to in writing, software -.. * distributed under the License is distributed on an "AS IS" BASIS, -.. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -.. * See the License for the specific language governing permissions and -.. * limitations under the License. -.. *******************************************************************************/ +.. Copyright 2021 Intel Corporation +.. +.. Licensed under the Apache License, Version 2.0 (the "License"); +.. you may not use this file except in compliance with the License. +.. You may obtain a copy of the License at +.. +.. http://www.apache.org/licenses/LICENSE-2.0 +.. +.. Unless required by applicable law or agreed to in writing, software +.. distributed under the License is distributed on an "AS IS" BASIS, +.. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +.. See the License for the specific language governing permissions and +.. limitations under the License. Kaggle Kernels for Regression Tasks ************************************ @@ -35,17 +33,17 @@ Using a Single Regressor - Content * - `Baseline Nu Support Vector Regression (nuSVR) with RBF Kernel `_ - + **Data:** [TPS Jul 2021] Synthetic pollution data - Predict air pollution measurements over time based on weather and input values from multiple sensors - - + - - data preprocessing - search for optimal paramters using Optuna - training and prediction using scikit-learn-intelex * - `Nu Support Vector Regression (nuSVR) `__ - + **Data:** [TPS Aug 2021] Synthetic loan data - Calculate loss associated with a loan defaults - @@ -57,7 +55,7 @@ Using a Single Regressor * - `Nu Support Vector Regression (nuSVR) `__ - + **Data:** House Prices dataset - Predict sale prices for a property based on its characteristics - @@ -71,10 +69,10 @@ Using a Single Regressor - performance comparison to scikit-learn * - `Random Forest Regression `_ - + **Data:** [TPS Jul 2021] Synthetic pollution data - Predict air pollution measurements over time based on weather and input values from multiple sensors - - + - - checking correlation between features - search for best paramters using GridSearchCV @@ -83,10 +81,10 @@ Using a Single Regressor * - `Random Forest Regression with Feature Engineering `_ - + **Data:** [TPS Jul 2021] Synthetic pollution data - Predict air pollution measurements over time based on weather and input values from multiple sensors - - + - - data preprocessing - feature engineering @@ -106,8 +104,8 @@ Using a Single Regressor - performance comparison to scikit-learn * - `Ridge Regression `_ - - + + **Data:** [TPS Sep 2021] Synthetic insurance data - Predict the probability of a customer making a claim upon an insurance policy - @@ -131,10 +129,10 @@ Stacking Regressors - Content * - `Stacking Regressor with Random Fores, SVR, and LASSO `_ - + **Data:** [TPS Jul 2021] Synthetic pollution data - Predict air pollution measurements over time based on weather and input values from multiple sensors - - + - - feature engineering - creating a stacking regressor @@ -144,7 +142,7 @@ Stacking Regressors * - `Stacking Regressor with ElasticNet, LASSO, and Ridge Regression for Time-series data `_ - + **Data:** Predict Future Sales dataset - Predict total sales for every product and store in the next month based on daily sales data - diff --git a/doc/sources/license.rst b/doc/sources/license.rst index 2fa5b9dba8..3a65017eda 100644 --- a/doc/sources/license.rst +++ b/doc/sources/license.rst @@ -1,18 +1,16 @@ -.. ****************************************************************************** -.. * Copyright 2023 Intel Corporation -.. * -.. * Licensed under the Apache License, Version 2.0 (the "License"); -.. * you may not use this file except in compliance with the License. -.. * You may obtain a copy of the License at -.. * -.. * http://www.apache.org/licenses/LICENSE-2.0 -.. * -.. * Unless required by applicable law or agreed to in writing, software -.. * distributed under the License is distributed on an "AS IS" BASIS, -.. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -.. * See the License for the specific language governing permissions and -.. * limitations under the License. -.. *******************************************************************************/ +.. Copyright 2023 Intel Corporation +.. +.. Licensed under the Apache License, Version 2.0 (the "License"); +.. you may not use this file except in compliance with the License. +.. You may obtain a copy of the License at +.. +.. http://www.apache.org/licenses/LICENSE-2.0 +.. +.. Unless required by applicable law or agreed to in writing, software +.. distributed under the License is distributed on an "AS IS" BASIS, +.. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +.. See the License for the specific language governing permissions and +.. limitations under the License. .. _license: diff --git a/doc/sources/non-scikit-algorithms.rst b/doc/sources/non-scikit-algorithms.rst index 620461843f..3e0c33e810 100644 --- a/doc/sources/non-scikit-algorithms.rst +++ b/doc/sources/non-scikit-algorithms.rst @@ -1,22 +1,20 @@ -.. ****************************************************************************** -.. * Copyright 2024 Intel Corporation -.. * -.. * Licensed under the Apache License, Version 2.0 (the "License"); -.. * you may not use this file except in compliance with the License. -.. * You may obtain a copy of the License at -.. * -.. * http://www.apache.org/licenses/LICENSE-2.0 -.. * -.. * Unless required by applicable law or agreed to in writing, software -.. * distributed under the License is distributed on an "AS IS" BASIS, -.. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -.. * See the License for the specific language governing permissions and -.. * limitations under the License. -.. *******************************************************************************/ +.. Copyright 2024 Intel Corporation +.. +.. Licensed under the Apache License, Version 2.0 (the "License"); +.. you may not use this file except in compliance with the License. +.. You may obtain a copy of the License at +.. +.. http://www.apache.org/licenses/LICENSE-2.0 +.. +.. Unless required by applicable law or agreed to in writing, software +.. distributed under the License is distributed on an "AS IS" BASIS, +.. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +.. See the License for the specific language governing permissions and +.. limitations under the License. Non-Scikit-Learn Algorithms =========================== -Algorithms not presented in the original scikit-learn are described here. All algorithms are +Algorithms not presented in the original scikit-learn are described here. All algorithms are available for both CPU and GPU (including distributed mode) BasicStatistics diff --git a/doc/sources/oneapi-gpu.rst b/doc/sources/oneapi-gpu.rst index 37bfbf3a92..f9808f97e4 100644 --- a/doc/sources/oneapi-gpu.rst +++ b/doc/sources/oneapi-gpu.rst @@ -1,18 +1,16 @@ -.. ****************************************************************************** -.. * Copyright 2020 Intel Corporation -.. * -.. * Licensed under the Apache License, Version 2.0 (the "License"); -.. * you may not use this file except in compliance with the License. -.. * You may obtain a copy of the License at -.. * -.. * http://www.apache.org/licenses/LICENSE-2.0 -.. * -.. * Unless required by applicable law or agreed to in writing, software -.. * distributed under the License is distributed on an "AS IS" BASIS, -.. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -.. * See the License for the specific language governing permissions and -.. * limitations under the License. -.. *******************************************************************************/ +.. Copyright 2020 Intel Corporation +.. +.. Licensed under the Apache License, Version 2.0 (the "License"); +.. you may not use this file except in compliance with the License. +.. You may obtain a copy of the License at +.. +.. http://www.apache.org/licenses/LICENSE-2.0 +.. +.. Unless required by applicable law or agreed to in writing, software +.. distributed under the License is distributed on an "AS IS" BASIS, +.. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +.. See the License for the specific language governing permissions and +.. limitations under the License. .. _oneapi_gpu: @@ -64,16 +62,16 @@ specific device with the help of dpctl: :code:`usm_ndarray`. The algorithms from the stock version of scikit-learn do not support this feature. - Use global configurations of |intelex|\*: - + 1. The :code:`target_offload` option can be used to set the device primarily used to perform computations. Accepted data types are :code:`str` and :code:`dpctl.SyclQueue`. If you pass a string to :code:`target_offload`, it should either be ``"auto"``, which means that the execution context is deduced from the location of input data, or a string with SYCL* filter selector. The default value is ``"auto"``. - + 2. The :code:`allow_fallback_to_host` option - is a Boolean flag. If set to :code:`True`, the computation is allowed + is a Boolean flag. If set to :code:`True`, the computation is allowed to fallback to the host device when a particular estimator does not support the selected device. The default value is :code:`False`. diff --git a/doc/sources/patching/patch-kmeans-example.rst b/doc/sources/patching/patch-kmeans-example.rst index c6324bbb11..555e479faa 100644 --- a/doc/sources/patching/patch-kmeans-example.rst +++ b/doc/sources/patching/patch-kmeans-example.rst @@ -1,18 +1,16 @@ -.. ****************************************************************************** -.. * Copyright 2021 Intel Corporation -.. * -.. * Licensed under the Apache License, Version 2.0 (the "License"); -.. * you may not use this file except in compliance with the License. -.. * You may obtain a copy of the License at -.. * -.. * http://www.apache.org/licenses/LICENSE-2.0 -.. * -.. * Unless required by applicable law or agreed to in writing, software -.. * distributed under the License is distributed on an "AS IS" BASIS, -.. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -.. * See the License for the specific language governing permissions and -.. * limitations under the License. -.. *******************************************************************************/ +.. Copyright 2021 Intel Corporation +.. +.. Licensed under the Apache License, Version 2.0 (the "License"); +.. you may not use this file except in compliance with the License. +.. You may obtain a copy of the License at +.. +.. http://www.apache.org/licenses/LICENSE-2.0 +.. +.. Unless required by applicable law or agreed to in writing, software +.. distributed under the License is distributed on an "AS IS" BASIS, +.. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +.. See the License for the specific language governing permissions and +.. limitations under the License. :: diff --git a/doc/sources/patching/patch-one-algorithm.rst b/doc/sources/patching/patch-one-algorithm.rst index 2191b79cb4..7216e1a417 100644 --- a/doc/sources/patching/patch-one-algorithm.rst +++ b/doc/sources/patching/patch-one-algorithm.rst @@ -1,18 +1,16 @@ -.. ****************************************************************************** -.. * Copyright 2021 Intel Corporation -.. * -.. * Licensed under the Apache License, Version 2.0 (the "License"); -.. * you may not use this file except in compliance with the License. -.. * You may obtain a copy of the License at -.. * -.. * http://www.apache.org/licenses/LICENSE-2.0 -.. * -.. * Unless required by applicable law or agreed to in writing, software -.. * distributed under the License is distributed on an "AS IS" BASIS, -.. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -.. * See the License for the specific language governing permissions and -.. * limitations under the License. -.. *******************************************************************************/ +.. Copyright 2021 Intel Corporation +.. +.. Licensed under the Apache License, Version 2.0 (the "License"); +.. you may not use this file except in compliance with the License. +.. You may obtain a copy of the License at +.. +.. http://www.apache.org/licenses/LICENSE-2.0 +.. +.. Unless required by applicable law or agreed to in writing, software +.. distributed under the License is distributed on an "AS IS" BASIS, +.. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +.. See the License for the specific language governing permissions and +.. limitations under the License. :: diff --git a/doc/sources/patching/patch-several-algorithms.rst b/doc/sources/patching/patch-several-algorithms.rst index 2709497127..32527dc5aa 100644 --- a/doc/sources/patching/patch-several-algorithms.rst +++ b/doc/sources/patching/patch-several-algorithms.rst @@ -1,18 +1,16 @@ -.. ****************************************************************************** -.. * Copyright 2021 Intel Corporation -.. * -.. * Licensed under the Apache License, Version 2.0 (the "License"); -.. * you may not use this file except in compliance with the License. -.. * You may obtain a copy of the License at -.. * -.. * http://www.apache.org/licenses/LICENSE-2.0 -.. * -.. * Unless required by applicable law or agreed to in writing, software -.. * distributed under the License is distributed on an "AS IS" BASIS, -.. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -.. * See the License for the specific language governing permissions and -.. * limitations under the License. -.. *******************************************************************************/ +.. Copyright 2021 Intel Corporation +.. +.. Licensed under the Apache License, Version 2.0 (the "License"); +.. you may not use this file except in compliance with the License. +.. You may obtain a copy of the License at +.. +.. http://www.apache.org/licenses/LICENSE-2.0 +.. +.. Unless required by applicable law or agreed to in writing, software +.. distributed under the License is distributed on an "AS IS" BASIS, +.. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +.. See the License for the specific language governing permissions and +.. limitations under the License. :: diff --git a/doc/sources/patching/patching-options.rst b/doc/sources/patching/patching-options.rst index 5705a43ca0..99aa270ad7 100644 --- a/doc/sources/patching/patching-options.rst +++ b/doc/sources/patching/patching-options.rst @@ -1,18 +1,16 @@ -.. ****************************************************************************** -.. * Copyright 2021 Intel Corporation -.. * -.. * Licensed under the Apache License, Version 2.0 (the "License"); -.. * you may not use this file except in compliance with the License. -.. * You may obtain a copy of the License at -.. * -.. * http://www.apache.org/licenses/LICENSE-2.0 -.. * -.. * Unless required by applicable law or agreed to in writing, software -.. * distributed under the License is distributed on an "AS IS" BASIS, -.. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -.. * See the License for the specific language governing permissions and -.. * limitations under the License. -.. *******************************************************************************/ +.. Copyright 2021 Intel Corporation +.. +.. Licensed under the Apache License, Version 2.0 (the "License"); +.. you may not use this file except in compliance with the License. +.. You may obtain a copy of the License at +.. +.. http://www.apache.org/licenses/LICENSE-2.0 +.. +.. Unless required by applicable law or agreed to in writing, software +.. distributed under the License is distributed on an "AS IS" BASIS, +.. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +.. See the License for the specific language governing permissions and +.. limitations under the License. .. important:: diff --git a/doc/sources/patching/undo-patch.rst b/doc/sources/patching/undo-patch.rst index 5692a76c7b..db356d84a5 100644 --- a/doc/sources/patching/undo-patch.rst +++ b/doc/sources/patching/undo-patch.rst @@ -1,18 +1,16 @@ -.. ****************************************************************************** -.. * Copyright 2021 Intel Corporation -.. * -.. * Licensed under the Apache License, Version 2.0 (the "License"); -.. * you may not use this file except in compliance with the License. -.. * You may obtain a copy of the License at -.. * -.. * http://www.apache.org/licenses/LICENSE-2.0 -.. * -.. * Unless required by applicable law or agreed to in writing, software -.. * distributed under the License is distributed on an "AS IS" BASIS, -.. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -.. * See the License for the specific language governing permissions and -.. * limitations under the License. -.. *******************************************************************************/ +.. Copyright 2021 Intel Corporation +.. +.. Licensed under the Apache License, Version 2.0 (the "License"); +.. you may not use this file except in compliance with the License. +.. You may obtain a copy of the License at +.. +.. http://www.apache.org/licenses/LICENSE-2.0 +.. +.. Unless required by applicable law or agreed to in writing, software +.. distributed under the License is distributed on an "AS IS" BASIS, +.. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +.. See the License for the specific language governing permissions and +.. limitations under the License. :: diff --git a/doc/sources/preview.rst b/doc/sources/preview.rst index 7f19c6a264..6bf148ea95 100644 --- a/doc/sources/preview.rst +++ b/doc/sources/preview.rst @@ -1,18 +1,16 @@ -.. ****************************************************************************** -.. * Copyright 2024 Intel Corporation -.. * -.. * Licensed under the Apache License, Version 2.0 (the "License"); -.. * you may not use this file except in compliance with the License. -.. * You may obtain a copy of the License at -.. * -.. * http://www.apache.org/licenses/LICENSE-2.0 -.. * -.. * Unless required by applicable law or agreed to in writing, software -.. * distributed under the License is distributed on an "AS IS" BASIS, -.. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -.. * See the License for the specific language governing permissions and -.. * limitations under the License. -.. *******************************************************************************/ +.. Copyright 2024 Intel Corporation +.. +.. Licensed under the Apache License, Version 2.0 (the "License"); +.. you may not use this file except in compliance with the License. +.. You may obtain a copy of the License at +.. +.. http://www.apache.org/licenses/LICENSE-2.0 +.. +.. Unless required by applicable law or agreed to in writing, software +.. distributed under the License is distributed on an "AS IS" BASIS, +.. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +.. See the License for the specific language governing permissions and +.. limitations under the License. .. _preview: diff --git a/doc/sources/quick-start.rst b/doc/sources/quick-start.rst index deef962868..e9ed7ea184 100644 --- a/doc/sources/quick-start.rst +++ b/doc/sources/quick-start.rst @@ -1,18 +1,16 @@ -.. ****************************************************************************** -.. * Copyright 2021 Intel Corporation -.. * -.. * Licensed under the Apache License, Version 2.0 (the "License"); -.. * you may not use this file except in compliance with the License. -.. * You may obtain a copy of the License at -.. * -.. * http://www.apache.org/licenses/LICENSE-2.0 -.. * -.. * Unless required by applicable law or agreed to in writing, software -.. * distributed under the License is distributed on an "AS IS" BASIS, -.. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -.. * See the License for the specific language governing permissions and -.. * limitations under the License. -.. *******************************************************************************/ +.. Copyright 2021 Intel Corporation +.. +.. Licensed under the Apache License, Version 2.0 (the "License"); +.. you may not use this file except in compliance with the License. +.. You may obtain a copy of the License at +.. +.. http://www.apache.org/licenses/LICENSE-2.0 +.. +.. Unless required by applicable law or agreed to in writing, software +.. distributed under the License is distributed on an "AS IS" BASIS, +.. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +.. See the License for the specific language governing permissions and +.. limitations under the License. .. |intelex_repo| replace:: |intelex| repository .. _intelex_repo: https://github.com/uxlfoundation/scikit-learn-intelex @@ -21,7 +19,7 @@ Quick Start #################### -Get ready to elevate your scikit-learn code with |intelex| and experience the benefits of accelerated performance in just a few simple steps. +Get ready to elevate your scikit-learn code with |intelex| and experience the benefits of accelerated performance in just a few simple steps. Compatibility with Scikit-learn* --------------------------------- @@ -31,15 +29,15 @@ Intel(R) Extension for Scikit-learn is compatible with the last four versions of Integrate |intelex| -------------------- -Patching +Patching ********************** -Once you install Intel*(R) Extension for Scikit-learn*, you replace algorithms that exist in the scikit-learn package with their optimized versions from the extension. +Once you install Intel*(R) Extension for Scikit-learn*, you replace algorithms that exist in the scikit-learn package with their optimized versions from the extension. This action is called ``patching``. This is not a permanent change so you can always undo the patching if necessary. -To patch Intel® Extension for Scikit-learn, use one of these methods: +To patch Intel® Extension for Scikit-learn, use one of these methods: -.. list-table:: +.. list-table:: :header-rows: 1 :align: left @@ -48,16 +46,16 @@ To patch Intel® Extension for Scikit-learn, use one of these methods: * - Use a flag in the command line - Run this command: - :: - + :: + python -m sklearnex my_application.py - * - Modify your script + * - Modify your script - Add the following lines: :: - + from sklearnex import patch_sklearn - patch_sklearn() + patch_sklearn() * - Import an estimator from the ``sklearnex`` module - Run this command: @@ -70,21 +68,21 @@ To patch Intel® Extension for Scikit-learn, use one of these methods: These patching methods are interchangeable. They support different enabling scenarios while producing the same result. - + **Example** This example shows how to patch Intel(R) extension for Scikit-Learn by modifing your script. To make sure that patching is registered by the scikit-learn estimators, always import scikit-learn after these lines. - + .. code-block:: python :caption: Example: Drop-In Patching - + import numpy as np from sklearnex import patch_sklearn patch_sklearn() # You need to re-import scikit-learn algorithms after the patch from sklearn.cluster import KMeans - + # The use of the original Scikit-learn is not changed X = np.array([[1, 2], [1, 4], [1, 0], [10, 2], [10, 4], [10, 0]]) @@ -97,11 +95,11 @@ Global Patching You can also use global patching to patch all your scikit-learn applications without any additional actions. -Before you begin, make sure that you have read and write permissions for Scikit-learn files. +Before you begin, make sure that you have read and write permissions for Scikit-learn files. With global patching, you can: -.. list-table:: +.. list-table:: :header-rows: 1 :align: left @@ -111,19 +109,19 @@ With global patching, you can: * - Patch all supported algorithms - Run this command: - :: - + :: + python -m sklearnex.glob patch_sklearn - + - If you run the global patching command several times with different parameters, then only the last configuration is applied. * - Patch selected algorithms - Use ``--algorithm`` or ``-a`` keys with a list of algorithms to patch. For example, to patch only ``SVC`` and ``RandomForestClassifier`` estimators, run :: - + python -m sklearnex.glob patch_sklearn -a svc random_forest_classifier - - - + + - * - Enable global patching via code - Use the ``patch_sklearn`` function with the ``global_patch`` argument: @@ -132,7 +130,7 @@ With global patching, you can: from sklearnex import patch_sklearn patch_sklearn(global_patch=True) import sklearn - + - After that, Scikit-learn patches is enabled in the current application and in all others that use the same environment. * - Disable patching notifications - Use ``--no-verbose`` or ``-nv`` keys: @@ -140,7 +138,7 @@ With global patching, you can: :: python -m sklearnex.glob patch_sklearn -a svc random_forest_classifier -nv - - + - * - Disable global patching - Run this command: @@ -156,7 +154,7 @@ With global patching, you can: from sklearnex import unpatch_sklearn unpatch_sklearn(global_patch=True) - - + .. tip:: If you clone an environment with enabled global patching, it will already be applied in the new environment. Unpatching @@ -169,20 +167,20 @@ To unpatch successfully, you must reimport the scikit-learn package:: sklearnex.unpatch_sklearn() # Re-import scikit-learn algorithms after the unpatch - from sklearn.cluster import KMeans + from sklearn.cluster import KMeans -Installation +Installation -------------------- .. contents:: :local: -.. tip:: To prevent version conflicts, we recommend creating and activating a new environment for |intelex|. +.. tip:: To prevent version conflicts, we recommend creating and activating a new environment for |intelex|. -Install from PyPI +Install from PyPI ********************** -Recommended by default. +Recommended by default. To install |intelex|, run: @@ -223,12 +221,12 @@ To prevent version conflicts, we recommend installing `scikit-learn-intelex` int .. tab:: Conda-Forge channel - Recommended by default. - + Recommended by default. + To install, run:: conda install scikit-learn-intelex -c conda-forge - + .. list-table:: **Supported Configurations** :header-rows: 1 :align: left @@ -252,12 +250,12 @@ To prevent version conflicts, we recommend installing `scikit-learn-intelex` int .. tab:: Intel channel - Recommended for the Intel® Distribution for Python users. + Recommended for the Intel® Distribution for Python users. To install, run:: conda install scikit-learn-intelex -c https://software.repos.intel.com/python/conda/ - + .. list-table:: **Supported Configurations** :header-rows: 1 :align: left @@ -277,7 +275,7 @@ To prevent version conflicts, we recommend installing `scikit-learn-intelex` int - [CPU, GPU] - [CPU, GPU] - [CPU, GPU] - + .. tab:: Main channel @@ -285,7 +283,7 @@ To prevent version conflicts, we recommend installing `scikit-learn-intelex` int To install, run:: conda install scikit-learn-intelex - + .. list-table:: **Supported Configurations** :header-rows: 1 :align: left @@ -321,9 +319,13 @@ Download the Intel AI Tools `here `_ for each version of Intel® Extension for Scikit-learn*. +======= +See the `Release Notes `_ for each version of Intel® Extension for Scikit-learn*. +>>>>>>> e8a9b150 (CI: add `skywalking-eyes` license header check) -System Requirements +System Requirements -------------------- Hardware Requirements @@ -339,7 +341,7 @@ Hardware Requirements - SSE4.2 - AVX2 - AVX512 - + .. note:: ARM* architecture is not supported. .. tab:: GPU @@ -367,21 +369,21 @@ Software Requirements - Linux* OS: Ubuntu* 18.04 or newer - Windows* OS 10 or newer - Windows* Server 2019 or newer - + .. important:: - + If you use accelerators, refer to `oneAPI DPC++/C++ Compiler System Requirements `_. Intel(R) Extension for Scikit-learn is compatible with the last four versions of scikit-learn: * 1.0.X * 1.1.X -* 1.2.X +* 1.2.X * 1.3.X Memory Requirements ********************** -By default, algorithms in |intelex| run in the multi-thread mode. This mode uses all available threads. +By default, algorithms in |intelex| run in the multi-thread mode. This mode uses all available threads. Optimized scikit-learn algorithms can consume more RAM than their corresponding unoptimized versions. .. list-table:: @@ -395,7 +397,7 @@ Optimized scikit-learn algorithms can consume more RAM than their corresponding - Both Scikit-learn and |intelex| consume approximately the same amount of RAM. - In |intelex|, an algorithm with ``N`` threads consumes ``N`` times more RAM. -In all |intelex| algorithms with GPU support, computations run on device memory. +In all |intelex| algorithms with GPU support, computations run on device memory. The device memory must be large enough to store a copy of the entire dataset. You may also require additional device memory for internal arrays that are used in computation. diff --git a/doc/sources/samples.rst b/doc/sources/samples.rst index 00071f0fad..82300e1b12 100644 --- a/doc/sources/samples.rst +++ b/doc/sources/samples.rst @@ -1,23 +1,21 @@ -.. ****************************************************************************** -.. * Copyright 2021 Intel Corporation -.. * -.. * Licensed under the Apache License, Version 2.0 (the "License"); -.. * you may not use this file except in compliance with the License. -.. * You may obtain a copy of the License at -.. * -.. * http://www.apache.org/licenses/LICENSE-2.0 -.. * -.. * Unless required by applicable law or agreed to in writing, software -.. * distributed under the License is distributed on an "AS IS" BASIS, -.. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -.. * See the License for the specific language governing permissions and -.. * limitations under the License. -.. *******************************************************************************/ +.. Copyright 2021 Intel Corporation +.. +.. Licensed under the Apache License, Version 2.0 (the "License"); +.. you may not use this file except in compliance with the License. +.. You may obtain a copy of the License at +.. +.. http://www.apache.org/licenses/LICENSE-2.0 +.. +.. Unless required by applicable law or agreed to in writing, software +.. distributed under the License is distributed on an "AS IS" BASIS, +.. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +.. See the License for the specific language governing permissions and +.. limitations under the License. .. _samples: ####### -Samples +Samples ####### The following samples are also provided as Jupyter notebooks in |intelex| repository. diff --git a/doc/sources/support.rst b/doc/sources/support.rst index 9548ec2ab7..b219347982 100644 --- a/doc/sources/support.rst +++ b/doc/sources/support.rst @@ -1,33 +1,31 @@ -.. ****************************************************************************** -.. * Copyright 2021 Intel Corporation -.. * -.. * Licensed under the Apache License, Version 2.0 (the "License"); -.. * you may not use this file except in compliance with the License. -.. * You may obtain a copy of the License at -.. * -.. * http://www.apache.org/licenses/LICENSE-2.0 -.. * -.. * Unless required by applicable law or agreed to in writing, software -.. * distributed under the License is distributed on an "AS IS" BASIS, -.. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -.. * See the License for the specific language governing permissions and -.. * limitations under the License. -.. *******************************************************************************/ +.. Copyright 2021 Intel Corporation +.. +.. Licensed under the Apache License, Version 2.0 (the "License"); +.. you may not use this file except in compliance with the License. +.. You may obtain a copy of the License at +.. +.. http://www.apache.org/licenses/LICENSE-2.0 +.. +.. Unless required by applicable law or agreed to in writing, software +.. distributed under the License is distributed on an "AS IS" BASIS, +.. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +.. See the License for the specific language governing permissions and +.. limitations under the License. ###################################################### Intel(R) Extension for Scikit-learn Support ###################################################### -We are committed to providing support and assistance to help you make the most out of Intel(R) Extension for Scikit-learn. +We are committed to providing support and assistance to help you make the most out of Intel(R) Extension for Scikit-learn. -Use the following methods if you face any challenges. +Use the following methods if you face any challenges. Issues ---------------------------------- -If you have a problem, check out the `GitHub Issues `_ to see if the issue you want to address is already reported. +If you have a problem, check out the `GitHub Issues `_ to see if the issue you want to address is already reported. You may find users that have encountered the same bug or have similar ideas for changes or updates. diff --git a/doc/sources/tutorials.rst b/doc/sources/tutorials.rst index ce778bb5ab..6a43eaf4bf 100644 --- a/doc/sources/tutorials.rst +++ b/doc/sources/tutorials.rst @@ -1,18 +1,16 @@ -.. ****************************************************************************** -.. * Copyright 2024 Intel Corporation -.. * -.. * Licensed under the Apache License, Version 2.0 (the "License"); -.. * you may not use this file except in compliance with the License. -.. * You may obtain a copy of the License at -.. * -.. * http://www.apache.org/licenses/LICENSE-2.0 -.. * -.. * Unless required by applicable law or agreed to in writing, software -.. * distributed under the License is distributed on an "AS IS" BASIS, -.. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -.. * See the License for the specific language governing permissions and -.. * limitations under the License. -.. *******************************************************************************/ +.. Copyright 2024 Intel Corporation +.. +.. Licensed under the Apache License, Version 2.0 (the "License"); +.. you may not use this file except in compliance with the License. +.. You may obtain a copy of the License at +.. +.. http://www.apache.org/licenses/LICENSE-2.0 +.. +.. Unless required by applicable law or agreed to in writing, software +.. distributed under the License is distributed on an "AS IS" BASIS, +.. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +.. See the License for the specific language governing permissions and +.. limitations under the License. ################################################ @@ -35,20 +33,20 @@ Tutorials .. grid-item-card:: Advanced scikit-learn* Essentials for Machine Learning :link: https://www.intel.com/content/www/us/en/developer/videos/advanced-scikit-learn-essentials-for-ml.html :padding: 1 - - Special technique to perform scikit-learn computation on Intel GPUs. - + + Special technique to perform scikit-learn computation on Intel GPUs. + .. grid-item-card:: Develop Efficient AI Solutions with Accelerated Machine Learning :link: https://www.intel.com/content/www/us/en/developer/videos/accelerated-machine-learning-for-ai-solutions.html :padding: 1 - Techniques for maximizing Intel® Extension for Scikit-learn*. + Techniques for maximizing Intel® Extension for Scikit-learn*. .. grid-item-card:: Getting started with classical Machine Learning Frameworks using Google Colaboratory :link: https://community.intel.com/t5/Blogs/Tech-Innovation/Artificial-Intelligence-AI/Getting-started-with-classical-Machine-Learning-Frameworks-using/post/1450139 :padding: 1 - Simple Installation of Intel® Extension for Scikit-learn* on Google Colaboratory. + Simple Installation of Intel® Extension for Scikit-learn* on Google Colaboratory. .. grid-item-card:: Accelerate Machine Learning Workloads: K-means and GPairs Algorithms :link: https://www.intel.com/content/www/us/en/developer/videos/accelerate-ml-workloads-k-means-gpairs-algorithms.html @@ -68,7 +66,7 @@ Tutorials An overview of scikit-learn essentials. -Case Studies +Case Studies ---------------------------------- .. grid:: 3 diff --git a/doc/sources/verbose.rst b/doc/sources/verbose.rst index cf77cc0669..acbfcd35c3 100755 --- a/doc/sources/verbose.rst +++ b/doc/sources/verbose.rst @@ -1,18 +1,16 @@ -.. ****************************************************************************** -.. * Copyright 2020 Intel Corporation -.. * -.. * Licensed under the Apache License, Version 2.0 (the "License"); -.. * you may not use this file except in compliance with the License. -.. * You may obtain a copy of the License at -.. * -.. * http://www.apache.org/licenses/LICENSE-2.0 -.. * -.. * Unless required by applicable law or agreed to in writing, software -.. * distributed under the License is distributed on an "AS IS" BASIS, -.. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -.. * See the License for the specific language governing permissions and -.. * limitations under the License. -.. *******************************************************************************/ +.. Copyright 2020 Intel Corporation +.. +.. Licensed under the Apache License, Version 2.0 (the "License"); +.. you may not use this file except in compliance with the License. +.. You may obtain a copy of the License at +.. +.. http://www.apache.org/licenses/LICENSE-2.0 +.. +.. Unless required by applicable law or agreed to in writing, software +.. distributed under the License is distributed on an "AS IS" BASIS, +.. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +.. See the License for the specific language governing permissions and +.. limitations under the License. .. _verbose: diff --git a/examples/notebooks/README.md b/examples/notebooks/README.md index b19a2a3b08..bad51687ba 100644 --- a/examples/notebooks/README.md +++ b/examples/notebooks/README.md @@ -1,21 +1,37 @@ + + # :snake: Intel(R) Extension for Scikit-learn* notebooks -This folder contains examples of python notebooks that use Intel(R) extension for Scikit-learn for popular datasets. +This folder contains examples of python notebooks that use Intel(R) extension for Scikit-learn for popular datasets. #### :rocket: Jupyter startup guide You can use python notebooks with the help of Jupyter* notebook to run the following files: ```bash conda install -c conda-forge notebook scikit-learn-intelex -``` -or +``` +or ```bash pip install notebook scikit-learn-intelex -``` +``` Run Jupyter after installation: ```bash jupyter notebook --notebook-dir=./ --ip=* --no-browser -``` +``` #### :pencil: Table of contents From 963298b3a96dccc18edea8bf462dbfde439be284 Mon Sep 17 00:00:00 2001 From: Samir Nasibli Date: Wed, 11 Dec 2024 09:01:59 +0100 Subject: [PATCH 4/7] MAINT: refactor some sklearnex examples (#1948) * MAINT: refactor some sklearnex examples * fixed random_forest_regressor_dpnp.py --- examples/sklearnex/incremental_pca_dpctl.py | 19 ++++++++++++-- .../sklearnex/random_forest_regressor_dpnp.py | 26 ++++++++++++++----- 2 files changed, 37 insertions(+), 8 deletions(-) diff --git a/examples/sklearnex/incremental_pca_dpctl.py b/examples/sklearnex/incremental_pca_dpctl.py index 0e176b139a..efec3ed5cf 100644 --- a/examples/sklearnex/incremental_pca_dpctl.py +++ b/examples/sklearnex/incremental_pca_dpctl.py @@ -13,14 +13,29 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== + +# sklearnex IncrementalPCA example for GPU offloading with DPCtl usm ndarray: +# SKLEARNEX_PREVIEW=YES python ./incremental_pca_dpctl.py + import dpctl import dpctl.tensor as dpt -from sklearnex.preview.decomposition import IncrementalPCA +# Import estimator via sklearnex's patch mechanism from sklearn +from sklearnex import patch_sklearn, sklearn_is_patched + +patch_sklearn() + +# Function that can validate current state of patching +sklearn_is_patched() + +# Import estimator from the patched sklearn namespace. +from sklearn.decomposition import IncrementalPCA + +# Or just directly import estimator from sklearnex namespace. +# from sklearnex.preview.decomposition import IncrementalPCA # We create GPU SyclQueue and then put data to dpctl tensor using # the queue. It allows us to do computation on GPU. - queue = dpctl.SyclQueue("gpu") incpca = IncrementalPCA() diff --git a/examples/sklearnex/random_forest_regressor_dpnp.py b/examples/sklearnex/random_forest_regressor_dpnp.py index 5b8b6bc616..da87a10b4b 100644 --- a/examples/sklearnex/random_forest_regressor_dpnp.py +++ b/examples/sklearnex/random_forest_regressor_dpnp.py @@ -15,16 +15,30 @@ # ============================================================================== # sklearnex RF example for GPU offloading with DPNP ndarray: -# python ./random_forest_regressor_dpnp_batch.py +# python ./random_forest_regressor_dpnp.py +import dpctl import dpnp -import numpy as np from sklearn.datasets import make_regression from sklearn.model_selection import train_test_split +# Import estimator via sklearnex's patch mechanism from sklearn +from sklearnex import patch_sklearn, sklearn_is_patched + +patch_sklearn() + +# Function that can validate current state of patching +sklearn_is_patched() + +# Import estimator from the patched sklearn namespace. +from sklearn.ensemble import RandomForestRegressor + +# Or just directly import estimator from sklearnex namespace. from sklearnex.ensemble import RandomForestRegressor -sycl_device = "gpu:0" +# We create GPU SyclQueue and then put data to dpctl tensor using +# the queue. It allows us to do computation on GPU. +queue = dpctl.SyclQueue("gpu") X, y = make_regression( n_samples=1000, n_features=4, n_informative=2, random_state=0, shuffle=False @@ -32,9 +46,9 @@ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42) -dpnp_X_train = dpnp.asarray(X_train, device=sycl_device) -dpnp_y_train = dpnp.asarray(y_train, device=sycl_device) -dpnp_X_test = dpnp.asarray(X_test, device=sycl_device) +dpnp_X_train = dpnp.asarray(X_train, usm_type="device", sycl_queue=queue) +dpnp_y_train = dpnp.asarray(y_train, usm_type="device", sycl_queue=queue) +dpnp_X_test = dpnp.asarray(X_test, usm_type="device", sycl_queue=queue) rf = RandomForestRegressor(max_depth=2, random_state=0).fit(dpnp_X_train, dpnp_y_train) From 83bd03ae1e0729250df14851fcb4bdd63fc6372d Mon Sep 17 00:00:00 2001 From: david-cortes-intel Date: Thu, 12 Dec 2024 11:14:33 +0100 Subject: [PATCH 5/7] ignore config files from file explorers (#2224) --- .gitignore | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.gitignore b/.gitignore index d1a918b47f..204eac7d3f 100755 --- a/.gitignore +++ b/.gitignore @@ -35,3 +35,7 @@ tests/_results* # json reports from pytest .pytest_reports/* + +# Configurations from file explorers +.directory +.DS_Store From 01acae25eeae85111ff02393ae60dd27a5dbe00b Mon Sep 17 00:00:00 2001 From: Ian Faust Date: Thu, 12 Dec 2024 11:42:46 +0100 Subject: [PATCH 6/7] Remove license header from PR template (#2223) * Update .licenserc.yaml * Update Pull_Request_template.md --- .github/.licenserc.yaml | 1 + .github/Pull_Request_template.md | 16 ---------------- 2 files changed, 1 insertion(+), 16 deletions(-) diff --git a/.github/.licenserc.yaml b/.github/.licenserc.yaml index 20ad7f595b..9eeafcbc21 100644 --- a/.github/.licenserc.yaml +++ b/.github/.licenserc.yaml @@ -58,6 +58,7 @@ header: - 'requirements*.txt' # Some files from .ci/.github - '.github/CODEOWNERS' + - '.github/Pull_Request_template.md' - '.github/renovate.json' # Specific files - 'setup.cfg' diff --git a/.github/Pull_Request_template.md b/.github/Pull_Request_template.md index c546e5e0d6..c79b8b8fa0 100644 --- a/.github/Pull_Request_template.md +++ b/.github/Pull_Request_template.md @@ -1,19 +1,3 @@ - - ## Description _Add a comprehensive description of proposed changes_ From 77430600451e21dd155da7396558058847de7e60 Mon Sep 17 00:00:00 2001 From: Ian Faust Date: Thu, 12 Dec 2024 14:45:52 +0100 Subject: [PATCH 7/7] [CI, Enhancement] Add Codecov support (#2222) * Update ci.yml * Update ci.yml * Create codecov.yml * Update codecov.yml * Update codecov.yml * Update codecov.yml * Update codecov.yml * Update ci.yml * Update codecov.yml * Update codecov.yml * Update codecov.yml * Update codecov.yml * Update codecov.yml --- .github/workflows/ci.yml | 25 +++++++++------ .github/workflows/codecov.yml | 58 +++++++++++++++++++++++++++++++++++ 2 files changed, 73 insertions(+), 10 deletions(-) create mode 100644 .github/workflows/codecov.yml diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 2f19a48193..9450f922ae 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -139,14 +139,16 @@ jobs: export COVERAGE_FILE=$(pwd)/.coverage.sklearn if [ "${{ steps.set-env.outputs.DPCFLAG }}" == "" ]; then export CPU=cpu; fi bash .ci/scripts/run_sklearn_tests.sh $CPU - - name: Archive coverage data + - name: Create coverage report + run: | + source venv/bin/activate + coverage combine .coverage.sklearnex .coverage.sklearn + coverage json -o coverage.lnx${{ matrix.PYTHON_VERSION }}_${{ matrix.SKLEARN_VERSION }}.json + - name: Archive coverage report uses: actions/upload-artifact@v4 with: name: coverage_lnx_Py${{ matrix.PYTHON_VERSION }}_${{ matrix.SKLEARN_VERSION }} - path: | - .coverage.sklearn - .coverage.sklearnex - include-hidden-files: true + path: coverage.lnx${{ matrix.PYTHON_VERSION }}_${{ matrix.SKLEARN_VERSION }}.json - name: Sklearn testing [preview] run: | source venv/bin/activate @@ -282,14 +284,17 @@ jobs: set COVERAGE_FILE=%cd%\.coverage.sklearn if "${{ steps.set-env.outputs.DPCFLAG }}"=="" set CPU=cpu bash .ci/scripts/run_sklearn_tests.sh %CPU% - - name: Archive coverage data + - name: Create coverage report + shell: cmd + run: | + call .\venv\Scripts\activate.bat + coverage combine .coverage.sklearnex .coverage.sklearn + coverage json -o coverage.win${{ matrix.PYTHON_VERSION }}_${{ matrix.SKLEARN_VERSION }}.json + - name: Archive coverage report uses: actions/upload-artifact@v4 with: name: coverage_win_Py${{ matrix.PYTHON_VERSION }}_${{ matrix.SKLEARN_VERSION }} - path: | - .coverage.sklearn - .coverage.sklearnex - include-hidden-files: true + path: coverage.win${{ matrix.PYTHON_VERSION }}_${{ matrix.SKLEARN_VERSION }}.json - name: Sklearn testing [preview] shell: cmd run: | diff --git a/.github/workflows/codecov.yml b/.github/workflows/codecov.yml new file mode 100644 index 0000000000..d50d4955a2 --- /dev/null +++ b/.github/workflows/codecov.yml @@ -0,0 +1,58 @@ +#=============================================================================== +# Copyright contributors to the oneDAL project +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +#=============================================================================== + +name: codecov +on: + workflow_run: + workflows: [CI] + types: + - completed + +permissions: read-all + +concurrency: + group: ${{ github.workflow }}-${{ github.event_name }}-${{ github.ref_name }}-${{ github.event.number || github.sha }} + cancel-in-progress: true + +jobs: + upload_coverage: + name: Codecov + runs-on: ubuntu-24.04 + if: ${{ github.repository == 'uxlfoundation/scikit-learn-intelex' && github.event.workflow_run.conclusion == 'success' }} + timeout-minutes: 5 + + steps: + - name: Checkout Scikit-learn-intelex + uses: actions/checkout@v4 + - name: Download artifacts + uses: actions/download-artifact@v4 + with: + path: coverage + github-token: ${{ github.token }} + run-id: ${{ github.event.workflow_run.id }} + merge-multiple: true + - name: Install codecov cli + run: | + curl -Os https://cli.codecov.io/latest/linux/codecov + chmod +x codecov + - name: Upload to codecov + run: | + export PR=${{ github.event.workflow_run.pull_requests[0].number }} + export SHA=${{ github.event.workflow_run.head_sha }} + export VARARGS="-n github" + # if a PR, pass proper information to codecov about SHA and PR, otherwise use main branch info + if [ -n "${PR}" ]; then export VARARGS="${VARARGS}-${PR}-${SHA} -P ${PR} -C ${SHA}"; fi + ./codecov -v upload-process -Z -t ${{ secrets.CODECOV_TOKEN }} $VARARGS -F github -s ./coverage