Merge branch 'intel:main' into enh/sua_data_management

uxlfoundation · Oct 1, 2024 · de12203 · de12203
2 parents 9d3cd89 + 3208718
commit de12203
Show file tree

Hide file tree

Showing 7 changed files with 127 additions and 8 deletions.
diff --git a/.github/Pull_Request_template.md b/.github/Pull_Request_template.md
@@ -1,28 +1,35 @@
-### Description
+## Description
 
 _Add a comprehensive description of proposed changes_
 
-_List issue number(s) if exist(s): #6 (for example)_
+_List associated issue number(s) if exist(s): #6 (for example)_
+
+_Documentation PR (if needed): #1340 (for example)_
+
+_Benchmarks PR (if needed): https://github.com/IntelPython/scikit-learn_bench/pull/155 (for example)_
 
 ---
 
-Checklist to comply with before moving PR from draft:
+Checklist to comply with **before moving PR from draft**:
 
 **PR completeness and readability**
 
 - [ ] I have reviewed my changes thoroughly before submitting this pull request.
 - [ ] I have commented my code, particularly in hard-to-understand areas.
 - [ ] I have updated the documentation to reflect the changes or created a separate PR with update and provided its number in the description, if necessary.
 - [ ] Git commit message contains an appropriate signed-off-by string _(see [CONTRIBUTING.md](https://github.com/intel/scikit-learn-intelex/blob/main/CONTRIBUTING.md#pull-requests) for details)_.
-- [ ] I have added a respective label(s) to PR if I have a permission for that.  
+- [ ] I have added a respective label(s) to PR if I have a permission for that.
 - [ ] I have resolved any merge conflicts that might occur with the base branch.
 
 **Testing**
 
-- [ ] The unit tests pass successfully.
 - [ ] I have run it locally and tested the changes extensively.
+- [ ] All CI jobs are green or I have provided justification why they aren't.
+- [ ] I have extended testing suite if new functionality was introduced in this PR.
 
 **Performance**
 
 - [ ] I have measured performance for affected algorithms using [scikit-learn_bench](https://github.com/IntelPython/scikit-learn_bench) and provided at least summary table with measured data, if performance change is expected.
 - [ ] I have provided justification why performance has changed or why changes are not expected.
+- [ ] I have provided justification why quality metrics have changed or why changes are not expected.
+- [ ] I have extended benchmarking suite and provided corresponding scikit-learn_bench PR if new measurable functionality was introduced in this PR.
diff --git a/.github/workflows/pr-checklist.yml b/.github/workflows/pr-checklist.yml
@@ -0,0 +1,54 @@
+#===============================================================================
+# Copyright 2024 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#===============================================================================
+
+name: Check PR Checklist
+
+on:
+  pull_request:
+    types: [opened, edited, synchronize]
+
+jobs:
+  checklist:
+    name: Close all checkboxes before moving from draft
+    timeout-minutes: 5
+    runs-on: ubuntu-24.04
+    steps:
+    - name: Checkout
+      uses: actions/checkout@v4
+    - name: Get pull request details
+      id: pr
+      uses: actions/github-script@v7
+      with:
+        script: |
+          const pr_desc = await github.rest.pulls.get({
+            owner: context.repo.owner,
+            repo: context.repo.repo,
+            pull_number: context.payload.pull_request.number
+          });
+          core.setOutput('body', pr_desc.data.body)
+          core.setOutput('draft', pr_desc.data.draft)
+    - name: Check if all checkboxes are checked
+      id: checkboxes
+      env:
+        DESCRIPTION: ${{ steps.pr.outputs.body }}
+      run: |
+        UNCHECKED=$(echo "$DESCRIPTION" | grep -c '\[ \]' || true)
+        echo "unchecked=$UNCHECKED" >> $GITHUB_OUTPUT
+    - name: Fail if not all checkboxes are checked and PR is not draft
+      if: ${{ (steps.pr.outputs.draft == 'false') && (steps.checkboxes.outputs.unchecked != '0') }}
+      run: |
+        echo "Unchecked checkboxes: ${{ steps.checkboxes.outputs.unchecked }}"
+        exit 1
diff --git a/generator/wrapper_gen.py b/generator/wrapper_gen.py
@@ -1003,6 +1003,8 @@ def __cinit__(self):
 
 # this is our actual algorithm class for Python
 cdef class {{algo}}{{'('+iface[0]|lower+'__iface__)' if iface[0] else ''}}:
+    cdef tuple _params
+
     '''
     {{algo}}
     {{params_all|fmt('{}', 'sphinx', sep='\n')|indent(4)}}
@@ -1017,6 +1019,17 @@ def __cinit__(self,
         self.c_ptr = mk_{{algo}}(
             {{params_all|fmt('{}', 'arg_cyext', sep=',\n')|indent(25+(algo|length))}}
         )
+        current_locals = locals()
+        ordered_input_args = '''
+            {{params_all|fmt('{}', 'name', sep=' ')|indent(0)}}
+        '''.strip().split()
+        self._params = tuple(
+            current_locals[arg]
+            for arg in ordered_input_args
+        )
+
+    def __reduce__(self):
+        return (self.__class__, self._params)
 
 {% if not iface[0] %}
     # the C++ manager__iface__ (de-templatized)

diff --git a/onedal/cluster/dbscan.cpp b/onedal/cluster/dbscan.cpp
@@ -149,7 +149,6 @@ ONEDAL_PY_INIT_MODULE(dbscan) {
     ONEDAL_PY_INSTANTIATE(init_compute_ops, sub, policy_list, task_list);
     ONEDAL_PY_INSTANTIATE(init_compute_result, sub, task_list);
 #endif // ONEDAL_DATA_PARALLEL_SPMD
-
 }
 
 } // namespace oneapi::dal::python
diff --git a/onedal/covariance/covariance.cpp b/onedal/covariance/covariance.cpp
@@ -189,7 +189,6 @@ ONEDAL_PY_INIT_MODULE(covariance) {
             ONEDAL_PY_INSTANTIATE(init_compute_hyperparameters, sub, task::compute);
         #endif // defined(ONEDAL_VERSION) && ONEDAL_VERSION >= 20240000
     #endif
-
 }
 
 } // namespace oneapi::dal::python
diff --git a/onedal/linear_model/linear_model.cpp b/onedal/linear_model/linear_model.cpp
@@ -318,7 +318,6 @@ ONEDAL_PY_INIT_MODULE(linear_model) {
     ONEDAL_PY_INSTANTIATE(init_train_hyperparameters, sub, task_list);
 #endif // defined(ONEDAL_VERSION) && ONEDAL_VERSION >= 20240000
 #endif // ONEDAL_DATA_PARALLEL_SPMD
-
 }
 
 ONEDAL_PY_TYPE2STR(dal::linear_regression::task::regression, "regression");

diff --git a/tests/test_daal4py_serialization.py b/tests/test_daal4py_serialization.py
@@ -0,0 +1,48 @@
+# ==============================================================================
+# Copyright 2024 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+import pickle
+import unittest
+
+import numpy as np
+
+import daal4py
+
+
+class Test(unittest.TestCase):
+    def test_serialization_of_qr(self):
+        obj_original = daal4py.qr(fptype="float")
+        obj_deserialized = pickle.loads(pickle.dumps(obj_original))
+
+        rng = np.random.default_rng(seed=123)
+        X = rng.standard_normal(size=(10, 5))
+
+        Q_orig = obj_original.compute(X).matrixQ
+        Q_deserialized = obj_deserialized.compute(X).matrixQ
+        np.testing.assert_almost_equal(Q_orig, Q_deserialized)
+        assert Q_orig.dtype == Q_deserialized.dtype
+
+    def test_serialization_of_kmeans(self):
+        obj_original = daal4py.kmeans_init(nClusters=4)
+        obj_deserialized = pickle.loads(pickle.dumps(obj_original))
+
+        rng = np.random.default_rng(seed=123)
+        X = rng.standard_normal(size=(100, 20))
+
+        np.testing.assert_almost_equal(
+            obj_original.compute(X).centroids,
+            obj_deserialized.compute(X).centroids,
+        )