-
Notifications
You must be signed in to change notification settings - Fork 179
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
ENH: SPMD interface for IncrementalPCA (#1979)
- Loading branch information
1 parent
d9f46b7
commit 9f63db2
Showing
8 changed files
with
458 additions
and
27 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,117 @@ | ||
# ============================================================================== | ||
# Copyright 2024 Intel Corporation | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
# ============================================================================== | ||
|
||
from daal4py.sklearn._utils import get_dtype | ||
|
||
from ...datatypes import _convert_to_supported, from_table, to_table | ||
from ...decomposition import IncrementalPCA as base_IncrementalPCA | ||
from ...utils import _check_array | ||
from .._base import BaseEstimatorSPMD | ||
|
||
|
||
class IncrementalPCA(BaseEstimatorSPMD, base_IncrementalPCA): | ||
""" | ||
Distributed incremental estimator for PCA based on oneDAL implementation. | ||
Allows for distributed PCA computation if data is split into batches. | ||
API is the same as for `onedal.decomposition.IncrementalPCA` | ||
""" | ||
|
||
def _reset(self): | ||
self._partial_result = super(base_IncrementalPCA, self)._get_backend( | ||
"decomposition", "dim_reduction", "partial_train_result" | ||
) | ||
if hasattr(self, "components_"): | ||
del self.components_ | ||
|
||
def partial_fit(self, X, y=None, queue=None): | ||
"""Incremental fit with X. All of X is processed as a single batch. | ||
Parameters | ||
---------- | ||
X : array-like of shape (n_samples, n_features) | ||
Training data, where `n_samples` is the number of samples and | ||
`n_features` is the number of features. | ||
y : Ignored | ||
Not used, present for API consistency by convention. | ||
Returns | ||
------- | ||
self : object | ||
Returns the instance itself. | ||
""" | ||
X = _check_array(X) | ||
n_samples, n_features = X.shape | ||
|
||
first_pass = not hasattr(self, "components_") | ||
if first_pass: | ||
self.components_ = None | ||
self.n_samples_seen_ = n_samples | ||
self.n_features_in_ = n_features | ||
else: | ||
self.n_samples_seen_ += n_samples | ||
|
||
if self.n_components is None: | ||
if self.components_ is None: | ||
self.n_components_ = min(n_samples, n_features) | ||
else: | ||
self.n_components_ = self.components_.shape[0] | ||
else: | ||
self.n_components_ = self.n_components | ||
|
||
self._queue = queue | ||
|
||
policy = super(base_IncrementalPCA, self)._get_policy(queue, X) | ||
X = _convert_to_supported(policy, X) | ||
|
||
if not hasattr(self, "_dtype"): | ||
self._dtype = get_dtype(X) | ||
self._params = self._get_onedal_params(X) | ||
|
||
X_table = to_table(X) | ||
self._partial_result = super(base_IncrementalPCA, self)._get_backend( | ||
"decomposition", | ||
"dim_reduction", | ||
"partial_train", | ||
policy, | ||
self._params, | ||
self._partial_result, | ||
X_table, | ||
) | ||
return self | ||
|
||
def _create_model(self): | ||
m = super(base_IncrementalPCA, self)._get_backend( | ||
"decomposition", "dim_reduction", "model" | ||
) | ||
m.eigenvectors = to_table(self.components_) | ||
m.means = to_table(self.mean_) | ||
if self.whiten: | ||
m.eigenvalues = to_table(self.explained_variance_) | ||
self._onedal_model = m | ||
return m | ||
|
||
def predict(self, X, queue=None): | ||
policy = super(base_IncrementalPCA, self)._get_policy(queue, X) | ||
model = self._create_model() | ||
X = _convert_to_supported(policy, X) | ||
params = self._get_onedal_params(X, stage="predict") | ||
|
||
result = super(base_IncrementalPCA, self)._get_backend( | ||
"decomposition", "dim_reduction", "infer", policy, params, model, to_table(X) | ||
) | ||
return from_table(result.transformed_data) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
# ============================================================================== | ||
# Copyright 2024 Intel Corporation | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
# ============================================================================== | ||
|
||
from onedal.spmd.decomposition import IncrementalPCA as onedalSPMD_IncrementalPCA | ||
|
||
from ...preview.decomposition import IncrementalPCA as base_IncrementalPCA | ||
|
||
|
||
class IncrementalPCA(base_IncrementalPCA): | ||
""" | ||
Distributed incremental estimator for PCA based on sklearnex implementation. | ||
Allows for distributed PCA computation if data is split into batches. | ||
API is the same as for `sklearnex.decomposition.IncrementalPCA` | ||
""" | ||
|
||
_onedal_incremental_pca = staticmethod(onedalSPMD_IncrementalPCA) |
Oops, something went wrong.