Merge pull request #14 from yu9824/dev

v2.1.6
yu9824 · Oct 21, 2023 · 358f4bd · 358f4bd
2 parents b04d11f + 512889d
commit 358f4bd
Show file tree

Hide file tree

Showing 4 changed files with 49 additions and 15 deletions.
diff --git a/README.md b/README.md
@@ -15,7 +15,7 @@
 This is an algorithm for evenly partitioning data in a `scikit-learn`-like interface.
 (See [References](#References) for details of the algorithm.)
 
-![simulateion_gif](https://github.com/yu9824/kennard_stone/blob/main/example/simulate.gif?raw=true "Simulateion")
+![simulation_gif](https://github.com/yu9824/kennard_stone/blob/main/example/simulate.gif?raw=true "simulation_gif")
 
 ## How to install
 
@@ -35,7 +35,7 @@ conda install -c conda-forge kennard-stone
 
 The project site is [here](https://anaconda.org/conda-forge/kennard-stone).
 
-You need `numpy` and `scikit-learn` to run.
+You need `numpy>=1.20` and `scikit-learn` to run.
 
 ## How to use
 
@@ -178,6 +178,13 @@ X_train, X_test, y_train, y_test = train_test_split(
 )
 ```
 
+The parallelization is used when calculating the distance matrix,
+so it doesn't conflict with something like `cross_validate`  in parallel when using `KFold`.
+
+```python
+# OK: does not conflict each other
+cross_validate(estimator, X, y, cv=KFold(5, n_jobs=-1), n_jobs=-1)
+```
 
 ## LICENSE
 
@@ -199,7 +206,7 @@ Copyright (c) 2021 yu9824
 
 ## Histories
 
-### v2.0.0
+### v2.0.0 (deprecated)
 
 - Define Extended Kennard-Stone algorithm (multi-class) i.e. Improve KFold algorithm.
 - Delete `alternate` argument in `KFold`.
@@ -209,24 +216,24 @@ Copyright (c) 2021 yu9824
 
 - Fix bug with Python3.7.
 
-### v2.1.0
+### v2.1.0 (deprecated)
 
 - Optimize algorithm
 - Deal with Large number of data.
   - parallel calculation when calculating distance (Add `n_jobs` argument)
   - replacing recursive functions with for-loops
 - Add other than "euclidean" calculation methods (Add `metric` argument)
 
-### v2.1.1
+### v2.1.1 (deprecated)
 
 - Fix bug when `metric="nan_euclidean"`.
 
-### v2.1.2
+### v2.1.2 (deprecated)
 
 - Fix details.
   - Update docstrings and typings.
 
-### v2.1.3
+### v2.1.3 (deprecated)
 
 - Fix details.
   - Update some typings. (You have access to a list of strings that can be used in the metric.)
@@ -240,3 +247,8 @@ Copyright (c) 2021 yu9824
 ### v2.1.5
 
 - Delete "klusinski" metric to support scipy>=1.11
+
+### v2.1.6
+
+- Improve typing in `kennard_stone.train_test_split`
+- Add some docstrings.
diff --git a/kennard_stone/__init__.py b/kennard_stone/__init__.py
@@ -1,6 +1,13 @@
+"""
+This is an algorithm for evenly partitioning data in a `scikit-learn`-like
+interface.
+
+Copyright © 2021 yu9824
+"""
+
 from .kennard_stone import KFold, train_test_split
 
-__version__ = "2.1.5"
+__version__ = "2.1.6"
 __license__ = "MIT"
 __author__ = "yu9824"
 __copyright__ = "Copyright © 2021 yu9824"

diff --git a/kennard_stone/_deprecated.py b/kennard_stone/_deprecated.py
@@ -1,3 +1,15 @@
+"""
+This program was used in v1. It is implemented in a foolproof manner and is
+left for checking the calculation results.
+
+We do not recommend users to use it because it is very slow with no parallel
+computation implemented, and the problem is that it only implements
+the KennardStone method using Euclidean distance.
+
+Copyright © 2021 yu9824
+"""
+
+
 import numpy as np
 
 from sklearn.utils import check_array

diff --git a/kennard_stone/kennard_stone.py b/kennard_stone/kennard_stone.py
@@ -2,10 +2,10 @@
 Copyright © 2021 yu9824
 """
 
-from typing import overload, Union, Optional, Generator, Callable
+from typing import overload, Union, Optional, TypeVar
 
 # The fllowing has deprecated in Python >= 3.9
-from typing import List, Set
+from typing import List, Set, Generator, Callable
 
 import sys
 import pkgutil
@@ -25,6 +25,9 @@
 from sklearn.metrics.pairwise import pairwise_distances
 from sklearn.utils import check_array
 
+# for typing
+T = TypeVar("T")
+
 
 class IgnoredArgumentWarning(Warning):
     """Warning used to ignore an argument."""
@@ -214,19 +217,19 @@ def _iter_indices(self, X, y=None, groups=None):
 
 @overload
 def train_test_split(
-    *arrays,
+    *arrays: T,
     test_size: Optional[Union[float, int]] = None,
     train_size: Optional[Union[float, int]] = None,
     metric: Union[
         METRCIS, Callable[[ArrayLike, ArrayLike], np.ndarray]
     ] = "euclidean",
     n_jobs: Optional[int] = None,
-) -> list:
-    pass
+) -> List[T]:
+    ...
 
 
 def train_test_split(
-    *arrays,
+    *arrays: T,
     test_size: Optional[Union[float, int]] = None,
     train_size: Optional[Union[float, int]] = None,
     metric: Union[
@@ -235,7 +238,7 @@ def train_test_split(
     n_jobs: Optional[int] = None,
     random_state: None = None,
     shuffle: None = None,
-) -> list:
+) -> List[T]:
     """Split arrays or matrices into train and test subsets using the
     Kennard-Stone algorithm.