Skip to content

Commit

Permalink
Fix n_components choice and fix tests
Browse files Browse the repository at this point in the history
  • Loading branch information
olegkkruglov committed Aug 26, 2024
1 parent 9355c2e commit 091ad43
Show file tree
Hide file tree
Showing 2 changed files with 40 additions and 38 deletions.
8 changes: 4 additions & 4 deletions sklearnex/preview/decomposition/incremental_pca.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ def _onedal_fit_transform(self, X, queue=None):
return self._onedal_transform(X, queue)

def _onedal_partial_fit(self, X, check_input=True, queue=None):
first_pass = not hasattr(self, "components_")
first_pass = not hasattr(self, "_onedal_estimator")

if check_input:
if sklearn_check_version("1.0"):
Expand All @@ -78,10 +78,10 @@ def _onedal_partial_fit(self, X, check_input=True, queue=None):
n_samples, n_features = X.shape

if self.n_components is None:
if not hasattr(self, "components_"):
if not hasattr(self, "_components_shape"):
self.n_components_ = min(n_samples, n_features)
else:
self.n_components_ = self.components_.shape[0]
self._components_shape = self.n_components_

elif not self.n_components <= n_features:
raise ValueError(
"n_components=%r invalid for n_features=%d, need "
Expand Down
70 changes: 36 additions & 34 deletions sklearnex/spmd/decomposition/tests/test_incremental_pca_spmd.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,14 +19,14 @@
from numpy.testing import assert_allclose

from onedal.tests.utils._dataframes_support import (
_as_numpy,
_convert_to_dataframe,
get_dataframes_and_queues,
)
from sklearnex.tests._utils_spmd import (
_generate_statistic_data,
_get_local_tensor,
_mpi_libs_and_gpu_available,
_spmd_assert_allclose,
)


Expand Down Expand Up @@ -56,6 +56,7 @@ def test_incremental_pca_fit_spmd_gold(dataframe, queue, whiten, dtype):
[4.0, 16.0],
[5.0, 32.0],
[6.0, 64.0],
[7.0, 128.0],
]
).astype(dtype=dtype)
dpt_X = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe)
Expand All @@ -72,7 +73,6 @@ def test_incremental_pca_fit_spmd_gold(dataframe, queue, whiten, dtype):
assert_allclose(incpca.components_, incpca_spmd.components_)
assert_allclose(incpca.singular_values_, incpca_spmd.singular_values_)
assert_allclose(incpca.mean_, incpca_spmd.mean_)
assert_allclose(incpca.mean_, incpca_spmd.mean_)
assert_allclose(incpca.var_, incpca_spmd.var_)
assert_allclose(incpca.explained_variance_, incpca_spmd.explained_variance_)
assert_allclose(
Expand Down Expand Up @@ -109,36 +109,36 @@ def test_incremental_pca_partial_fit_spmd_gold(
[4.0, 16.0],
[5.0, 32.0],
[6.0, 64.0],
[7.0, 128.0],
[8.0, 0.0],
[9.0, 2.0],
[10.0, 4.0],
[11.0, 8.0],
[12.0, 16.0],
[13.0, 32.0],
[14.0, 64.0],
[15.0, 128.0],
]
).astype(dtype=dtype)
dpt_X = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe)
X_split = np.array_split(X, num_blocks)
local_X = _get_local_tensor(X)
split_local_X = np.array_split(local_X, num_blocks)

y = np.dot(X, [1, 2]) + 3
dpt_y = _convert_to_dataframe(y, sycl_queue=queue, target_df=dataframe)
local_y = _get_local_tensor(y)
split_local_y = np.array_split(local_y, num_blocks)

incpca_spmd = IncrementalPCA_SPMD(whiten=whiten)
incpca = IncrementalPCA(whiten=whiten)

for i in range(num_blocks):
local_dpt_X = _convert_to_dataframe(
split_local_X[i], sycl_queue=queue, target_df=dataframe
)
local_dpt_y = _convert_to_dataframe(
split_local_y[i], sycl_queue=queue, target_df=dataframe
)
incpca_spmd.partial_fit(local_dpt_X, local_dpt_y)

incpca.fit(dpt_X, dpt_y)
dpt_X = _convert_to_dataframe(X_split[i], sycl_queue=queue, target_df=dataframe)
incpca.partial_fit(dpt_X)
incpca_spmd.partial_fit(local_dpt_X)

assert_allclose(incpca.n_components_, incpca_spmd.n_components_)
assert_allclose(incpca.components_, incpca_spmd.components_)
assert_allclose(incpca.singular_values_, incpca_spmd.singular_values_)
assert_allclose(incpca.mean_, incpca_spmd.mean_)
assert_allclose(incpca.mean_, incpca_spmd.mean_)
assert_allclose(incpca.var_, incpca_spmd.var_)
assert_allclose(incpca.explained_variance_, incpca_spmd.explained_variance_)
assert_allclose(
Expand Down Expand Up @@ -167,6 +167,8 @@ def test_incremental_pca_fit_spmd_random(
from sklearnex.preview.decomposition import IncrementalPCA
from sklearnex.spmd.decomposition import IncrementalPCA as IncrementalPCA_SPMD

tol = 7e-5 if dtype == np.float32 else 1e-7

# Create data and process into dpt
X = _generate_statistic_data(num_samples, num_features, dtype)
dpt_X = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe)
Expand All @@ -181,21 +183,20 @@ def test_incremental_pca_fit_spmd_random(
incpca_spmd.fit(local_dpt_X)
incpca.fit(dpt_X)

assert_allclose(incpca.n_components_, incpca_spmd.n_components_)
assert_allclose(incpca.components_, incpca_spmd.components_)
assert_allclose(incpca.singular_values_, incpca_spmd.singular_values_)
assert_allclose(incpca.mean_, incpca_spmd.mean_)
assert_allclose(incpca.mean_, incpca_spmd.mean_)
assert_allclose(incpca.var_, incpca_spmd.var_)
assert_allclose(incpca.explained_variance_, incpca_spmd.explained_variance_)
assert_allclose(incpca.n_components_, incpca_spmd.n_components_, atol=tol)
assert_allclose(incpca.components_, incpca_spmd.components_, atol=tol)
assert_allclose(incpca.singular_values_, incpca_spmd.singular_values_, atol=tol)
assert_allclose(incpca.mean_, incpca_spmd.mean_, atol=tol)
assert_allclose(incpca.var_, incpca_spmd.var_, atol=tol)
assert_allclose(incpca.explained_variance_, incpca_spmd.explained_variance_, atol=tol)
assert_allclose(
incpca.explained_variance_ratio_, incpca_spmd.explained_variance_ratio_
incpca.explained_variance_ratio_, incpca_spmd.explained_variance_ratio_, atol=tol
)

y_trans_spmd = incpca_spmd.transform(dpt_X_test)
y_trans = incpca.transform(dpt_X_test)

_spmd_assert_allclose(y_trans_spmd, y_trans)
assert_allclose(_as_numpy(y_trans_spmd), _as_numpy(y_trans), atol=tol)


@pytest.mark.skipif(
Expand All @@ -209,7 +210,7 @@ def test_incremental_pca_fit_spmd_random(
@pytest.mark.parametrize("whiten", [True, False])
@pytest.mark.parametrize("n_components", [None, 2, 5])
@pytest.mark.parametrize("num_blocks", [1, 2])
@pytest.mark.parametrize("num_samples", [100, 200])
@pytest.mark.parametrize("num_samples", [200, 400])
@pytest.mark.parametrize("num_features", [10, 20])
@pytest.mark.parametrize("dtype", [np.float32, np.float64])
@pytest.mark.mpi
Expand All @@ -227,6 +228,8 @@ def test_incremental_pca_partial_fit_spmd_random(
from sklearnex.preview.decomposition import IncrementalPCA
from sklearnex.spmd.decomposition import IncrementalPCA as IncrementalPCA_SPMD

tol = 3e-4 if dtype == np.float32 else 1e-7

# Create data and process into dpt
X = _generate_statistic_data(num_samples, num_features, dtype)
dpt_X = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe)
Expand All @@ -247,18 +250,17 @@ def test_incremental_pca_partial_fit_spmd_random(
incpca_spmd.partial_fit(local_dpt_X)
incpca.partial_fit(dpt_X)

assert_allclose(incpca.n_components_, incpca_spmd.n_components_)
assert_allclose(incpca.components_, incpca_spmd.components_)
assert_allclose(incpca.singular_values_, incpca_spmd.singular_values_)
assert_allclose(incpca.mean_, incpca_spmd.mean_)
assert_allclose(incpca.mean_, incpca_spmd.mean_)
assert_allclose(incpca.var_, incpca_spmd.var_)
assert_allclose(incpca.explained_variance_, incpca_spmd.explained_variance_)
assert_allclose(incpca.n_components_, incpca_spmd.n_components_, atol=tol)
assert_allclose(incpca.components_, incpca_spmd.components_, atol=tol)
assert_allclose(incpca.singular_values_, incpca_spmd.singular_values_, atol=tol)
assert_allclose(incpca.mean_, incpca_spmd.mean_, atol=tol)
assert_allclose(incpca.var_, incpca_spmd.var_, atol=tol)
assert_allclose(incpca.explained_variance_, incpca_spmd.explained_variance_, atol=tol)
assert_allclose(
incpca.explained_variance_ratio_, incpca_spmd.explained_variance_ratio_
incpca.explained_variance_ratio_, incpca_spmd.explained_variance_ratio_, atol=tol
)

y_trans_spmd = incpca_spmd.transform(dpt_X_test)
y_trans = incpca.transform(dpt_X_test)

_spmd_assert_allclose(y_trans_spmd, y_trans)
assert_allclose(_as_numpy(y_trans_spmd), _as_numpy(y_trans), atol=tol)

0 comments on commit 091ad43

Please sign in to comment.