Skip to content

Commit

Permalink
[fix] changes for GitHub actions PR CI for matching oneDAL Nightly-bu…
Browse files Browse the repository at this point in the history
…ild (#2076)

* Update ci.yml

* Update activate_components.bat

* Update test_linear.py

* Update test_incremental_linear.py

* Update test_kmeans.py

* Update deselected_tests.yaml

* Update deselected_tests.yaml

* add deselction mechanism for Non-Intel Hardware

* remove warnings

* address codefactor recommendations

* make explicit

* mistake in deselection process

* remove bad code

* remove bad code

* isort fixes

* forgotten change to incremental_linear

* add more deselections

* match #2081

* fix errors in formatting

* correct english

* second english correction

* remove some deselections

* set 2025.2 fail for recheck
  • Loading branch information
icfaust authored Oct 4, 2024
1 parent 2fccf44 commit a2b9bf3
Show file tree
Hide file tree
Showing 9 changed files with 98 additions and 21 deletions.
4 changes: 2 additions & 2 deletions .github/scripts/activate_components.bat
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,8 @@ rem %3 - dpcpp activate flag
rem prepare vc
call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvarsall" x64
rem prepare icx only if no parameter is given.
if "%3"=="" call .\dpcpp\compiler\%1\env\vars.bat
if "%3"=="" call .\oneapi\compiler\%1\env\vars.bat
rem prepare tbb
call .\dpcpp\tbb\%2\env\vars.bat
call .\oneapi\tbb\%2\env\vars.bat
rem prepare oneDAL
call .\__release_win\daal\latest\env\vars.bat
4 changes: 2 additions & 2 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -219,8 +219,8 @@ jobs:
shell: cmd
run: |
call .\venv\Scripts\activate.bat
call .\dpcpp\compiler\${{ env.DPCPP_VERSION }}\env\vars.bat
call .\dpcpp\compiler\${{ env.DPCPP_VERSION }}\bin\sycl-ls.exe
call .\oneapi\compiler\${{ env.DPCPP_VERSION }}\env\vars.bat
call .\oneapi\compiler\${{ env.DPCPP_VERSION }}\bin\sycl-ls.exe
bash .ci/scripts/describe_system.sh
- name: Build daal4py/sklearnex
shell: cmd
Expand Down
2 changes: 1 addition & 1 deletion sklearnex/cluster/k_means.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,8 +102,8 @@ def _onedal_fit_supported(self, method_name, X, y=None, sample_weight=None):
patching_status = PatchingConditionsChain(f"sklearn.cluster.{class_name}.fit")

sample_count = _num_samples(X)
self._algorithm = self.algorithm
supported_algs = ["auto", "full", "lloyd", "elkan"]

if self.algorithm == "elkan":
logging.getLogger("sklearnex").info(
"oneDAL does not support 'elkan', using 'lloyd' algorithm instead."
Expand Down
28 changes: 17 additions & 11 deletions sklearnex/cluster/tests/test_kmeans.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
get_queues,
)
from sklearnex import config_context
from sklearnex.tests.utils import _IS_INTEL


def generate_dense_dataset(n_samples, n_features, density, n_clusters):
Expand All @@ -45,11 +46,11 @@ def generate_dense_dataset(n_samples, n_features, density, n_clusters):


@pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues())
@pytest.mark.parametrize("algorithm", ["lloyd", "elkan"])
@pytest.mark.parametrize(
"algorithm", ["lloyd" if sklearn_check_version("1.1") else "full", "elkan"]
)
@pytest.mark.parametrize("init", ["k-means++", "random"])
def test_sklearnex_import_for_dense_data(dataframe, queue, algorithm, init):
if not sklearn_check_version("1.1") and algorithm == "lloyd":
pytest.skip("lloyd requires sklearn>=1.1.")
from sklearnex.cluster import KMeans

X_dense = generate_dense_dataset(1000, 10, 0.5, 3)
Expand All @@ -70,7 +71,9 @@ def test_sklearnex_import_for_dense_data(dataframe, queue, algorithm, init):
reason="Sparse data requires oneDAL>=2024.7.0",
)
@pytest.mark.parametrize("queue", get_queues())
@pytest.mark.parametrize("algorithm", ["lloyd", "elkan"])
@pytest.mark.parametrize(
"algorithm", ["lloyd" if sklearn_check_version("1.1") else "full", "elkan"]
)
@pytest.mark.parametrize("init", ["k-means++", "random"])
def test_sklearnex_import_for_sparse_data(queue, algorithm, init):
from sklearnex.cluster import KMeans
Expand All @@ -86,11 +89,10 @@ def test_sklearnex_import_for_sparse_data(queue, algorithm, init):


@pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues())
@pytest.mark.parametrize("algorithm", ["lloyd", "elkan"])
@pytest.mark.parametrize(
"algorithm", ["lloyd" if sklearn_check_version("1.1") else "full", "elkan"]
)
def test_results_on_dense_gold_data(dataframe, queue, algorithm):
if not sklearn_check_version("1.1") and algorithm == "lloyd":
pytest.skip("lloyd requires sklearn>=1.1.")

from sklearnex.cluster import KMeans

X_train = np.array([[1, 2], [1, 4], [1, 0], [10, 2], [10, 4], [10, 0]])
Expand Down Expand Up @@ -121,15 +123,19 @@ def test_results_on_dense_gold_data(dataframe, queue, algorithm):
)
@pytest.mark.parametrize("queue", get_queues())
@pytest.mark.parametrize("init", ["k-means++", "random", "arraylike"])
@pytest.mark.parametrize("algorithm", ["lloyd", "elkan"])
@pytest.mark.parametrize(
"algorithm", ["lloyd" if sklearn_check_version("1.1") else "full", "elkan"]
)
@pytest.mark.parametrize(
"dims", [(1000, 10, 0.95, 3), (50000, 100, 0.75, 10), (10000, 10, 0.8, 5)]
)
def test_dense_vs_sparse(queue, init, algorithm, dims):
from sklearnex.cluster import KMeans

if init == "random":
pytest.skip("Random initialization in sparse K-means is buggy.")
if init == "random" or (not _IS_INTEL and init == "k-means++"):
if daal_check_version((2025, "P", 200)):
pytest.fail("Re-verify failure of k-means++ in 2025.2 oneDAL")
pytest.skip(f"{init} initialization for sparse K-means is non-conformant.")

# For higher level of sparsity (smaller density) the test may fail
n_samples, n_features, density, n_clusters = dims
Expand Down
6 changes: 5 additions & 1 deletion sklearnex/linear_model/tests/test_incremental_linear.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
get_dataframes_and_queues,
)
from sklearnex.linear_model import IncrementalLinearRegression
from sklearnex.tests.utils import _IS_INTEL


@pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues())
Expand Down Expand Up @@ -129,7 +130,10 @@ def test_sklearnex_partial_fit_multitarget_on_gold_data(
np_y_pred = _as_numpy(y_pred)

assert inclin.n_features_in_ == 2
tol = 7e-6 if dtype == np.float32 else 1e-7
tol = 1e-7
if dtype == np.float32:
tol = 7e-6 if _IS_INTEL else 2e-5

assert_allclose(inclin.coef_, [1.0, 2.0], atol=tol)
if fit_intercept:
assert_allclose(inclin.intercept_, 3.0, atol=tol)
Expand Down
5 changes: 4 additions & 1 deletion sklearnex/linear_model/tests/test_linear.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
_convert_to_dataframe,
get_dataframes_and_queues,
)
from sklearnex.tests.utils import _IS_INTEL


@pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues())
Expand Down Expand Up @@ -56,7 +57,9 @@ def test_sklearnex_import_linear(dataframe, queue, dtype, macro_block):
assert "sklearnex" in linreg.__module__
assert linreg.n_features_in_ == 2

tol = 1e-5 if _as_numpy(linreg.coef_).dtype == np.float32 else 1e-7
tol = 1e-7
if _as_numpy(linreg.coef_).dtype == np.float32:
tol = 1e-5 if _IS_INTEL else 2e-5
assert_allclose(_as_numpy(linreg.intercept_), 3.0, rtol=tol)
assert_allclose(_as_numpy(linreg.coef_), [1.0, 2.0], rtol=tol)

Expand Down
45 changes: 42 additions & 3 deletions sklearnex/tests/test_run_to_run_stability.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
)
from sklearnex.svm import SVC
from sklearnex.tests.utils import (
_IS_INTEL,
PATCHED_MODELS,
SPECIAL_INSTANCES,
call_method,
Expand Down Expand Up @@ -154,6 +155,14 @@ def test_standard_estimator_stability(estimator, method, dataframe, queue):
pytest.skip(f"variation observed in {estimator}.score")
if estimator in ["IncrementalEmpiricalCovariance"] and method == "mahalanobis":
pytest.skip("allowed fallback to sklearn occurs")
if (
not _IS_INTEL
and ("Neighbors" in estimator or "LocalOutlierFactor" in estimator)
and method in ["score", "predict", "kneighbors", "kneighbors_graph"]
):
if daal_check_version((2025, "P", 200)):
pytest.fail("Re-verify failure of algorithms in 2025.2 oneDAL")
pytest.skip(f"{estimator} shows instability on Non-Intel hardware")

if "NearestNeighbors" in estimator and "radius" in method:
pytest.skip(f"RadiusNeighbors estimator not implemented in sklearnex")
Expand Down Expand Up @@ -182,6 +191,14 @@ def test_special_estimator_stability(estimator, method, dataframe, queue):
pytest.skip(f"variation observed in KMeans.score")
if "NearestNeighbors" in estimator and "radius" in method:
pytest.skip(f"RadiusNeighbors estimator not implemented in sklearnex")
if (
not _IS_INTEL
and ("Neighbors" in estimator or "LocalOutlierFactor" in estimator)
and method in ["score", "predict", "kneighbors", "kneighbors_graph"]
):
if daal_check_version((2025, "P", 200)):
pytest.fail("Re-verify failure of algorithms in 2025.2 oneDAL")
pytest.skip(f"{estimator} shows instability on Non-Intel hardware")

est = SPECIAL_INSTANCES[estimator]

Expand All @@ -200,11 +217,25 @@ def test_special_estimator_stability(estimator, method, dataframe, queue):
@pytest.mark.parametrize("dataframe, queue", get_dataframes_and_queues("numpy,array_api"))
@pytest.mark.parametrize("estimator, method", gen_models_info(SPARSE_INSTANCES))
def test_sparse_estimator_stability(estimator, method, dataframe, queue):
if "KMeans" in estimator and method == "score" and queue == None:
pytest.skip(f"variation observed in KMeans.score")

if "KMeans" in estimator and method in "score" and queue == None:
pytest.skip(f"variation observed in KMeans.{method}")
if (
not daal_check_version((2025, "P", 0))
and "KMeans()" in estimator
and queue == None
):
pytest.skip(f"variation observed in KMeans.{method} in 2024.7 oneDAL")
if "NearestNeighbors" in estimator and "radius" in method:
pytest.skip(f"RadiusNeighbors estimator not implemented in sklearnex")
if (
not _IS_INTEL
and ("Neighbors" in estimator or "LocalOutlierFactor" in estimator)
and method in ["score", "predict", "kneighbors", "kneighbors_graph"]
):
if daal_check_version((2025, "P", 200)):
pytest.fail("Re-verify failure of algorithms in 2025.2 oneDAL")
pytest.skip(f"{estimator} shows instability on Non-Intel hardware")

est = SPARSE_INSTANCES[estimator]

if method and not hasattr(est, method):
Expand All @@ -228,6 +259,14 @@ def test_other_estimator_stability(estimator, method, dataframe, queue):
pytest.skip(f"variation observed in KMeans.score")
if "NearestNeighbors" in estimator and "radius" in method:
pytest.skip(f"RadiusNeighbors estimator not implemented in sklearnex")
if (
not _IS_INTEL
and ("Neighbors" in estimator or "LocalOutlierFactor" in estimator)
and method in ["score", "predict", "kneighbors", "kneighbors_graph"]
):
if daal_check_version((2025, "P", 200)):
pytest.fail("Re-verify failure of algorithms in 2025.2 oneDAL")
pytest.skip(f"{estimator} shows instability on Non-Intel hardware")

est = STABILITY_INSTANCES[estimator]

Expand Down
3 changes: 3 additions & 0 deletions sklearnex/tests/utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
SPECIAL_INSTANCES,
UNPATCHED_FUNCTIONS,
UNPATCHED_MODELS,
_get_processor_info,
call_method,
gen_dataset,
gen_models_info,
Expand All @@ -39,3 +40,5 @@
"gen_dataset",
"sklearn_clone_dict",
]

_IS_INTEL = "GenuineIntel" in _get_processor_info()
22 changes: 22 additions & 0 deletions sklearnex/tests/utils/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@
# limitations under the License.
# ==============================================================================

import platform
import subprocess
from functools import partial
from inspect import Parameter, getattr_static, isclass, signature

Expand Down Expand Up @@ -344,3 +346,23 @@ def gen_dataset(
np.uint32,
np.uint64,
]


def _get_processor_info():
proc = ""
if platform.system() == "Linux":
proc = (
subprocess.check_output(["/usr/bin/cat", "/proc/cpuinfo"])
.strip()
.decode("utf-8")
)
elif platform.system() == "Windows":
proc = platform.processor()
elif platform.system() == "Darwin":
proc = (
subprocess.check_output(["/usr/bin/sysctl", "-n", "machdep.cpu.brand_string"])
.strip()
.decode("utf-8")
)

return proc

0 comments on commit a2b9bf3

Please sign in to comment.