chakki-works · jrabag · Oct 22, 2020 · Oct 23, 2020 · Oct 23, 2020 · Oct 23, 2020
diff --git a/.github/FUNDING.yml b/.github/FUNDING.yml
@@ -0,0 +1 @@
+github: Hironsan
diff --git a/.github/workflows/pip.yml b/.github/workflows/pip.yml
@@ -0,0 +1,26 @@
+name: test package installation
+
+on:
+  schedule:
+    - cron: "0 0 * * *"
+
+jobs:
+  build:
+    if: contains(github.event.head_commit.message, '[skip ci]') == false
+    runs-on: ${{ matrix.os }}
+    strategy:
+      matrix:
+        python-version: [3.6, 3.7, 3.8]
+        os: [ubuntu-latest, macos-latest]
+
+    steps:
+    - uses: actions/checkout@v2
+    - name: Set up Python
+      uses: actions/setup-python@v2
+      with:
+        python-version: ${{ matrix.python-version }}
+    - name: Install dependencies
+      run: |
+        pip install --upgrade pip
+        pip install -U setuptools
+    - run: pip install seqeval
diff --git a/Pipfile b/Pipfile
@@ -9,6 +9,7 @@ autopep8 = "*"
 flake8 = "*"
 pytest-cov = "*"
 isort = "*"
+atomicwrites="*"
 
 [packages]
 numpy = "*"

diff --git a/Pipfile.lock b/Pipfile.lock
diff --git a/README.md b/README.md
@@ -70,6 +70,21 @@ In strict mode, the inputs are evaluated according to the specified schema. The
 weighted avg       0.50      0.50      0.50         2
 ```
 
+With the partial match, the inputs are evaluated according the number of tags. It isn't compatible with strict mode.
+```python
+print(classification_report(y_true, y_pred, partial_match=True))
+
+              precision    recall  f1-score   support
+
+        MISC       0.75      1.00      0.86         3
+         PER       1.00      1.00      1.00         2
+
+   micro avg       0.83      1.00      0.91         5
+   macro avg       0.88      1.00      0.93         5
+weighted avg       0.85      1.00      0.91         5
+```
+
+
 A minimum case to explain differences between the default and strict mode:
 
 ```python

diff --git a/seqeval/metrics/sequence_labeling.py b/seqeval/metrics/sequence_labeling.py
@@ -27,7 +27,8 @@ def precision_recall_fscore_support(y_true: List[List[str]],
                                     beta: float = 1.0,
                                     sample_weight: Optional[List[int]] = None,
                                     zero_division: str = 'warn',
-                                    suffix: bool = False) -> SCORES:
+                                    suffix: bool = False,
+                                    partial_match: bool = False) -> SCORES:
     """Compute precision, recall, F-measure and support for each class.
 
     Args:
@@ -70,6 +71,8 @@ def precision_recall_fscore_support(y_true: List[List[str]],
 
         suffix : bool, False by default.
 
+        partial_match : bool, False by default.
+
     Returns:
         precision : float (if average is not None) or array of float, shape = [n_unique_labels]
 
@@ -121,9 +124,32 @@ def extract_tp_actual_correct(y_true, y_pred, suffix, *args):
         for type_name in target_names:
             entities_true_type = entities_true.get(type_name, set())
             entities_pred_type = entities_pred.get(type_name, set())
-            tp_sum = np.append(tp_sum, len(entities_true_type & entities_pred_type))
-            pred_sum = np.append(pred_sum, len(entities_pred_type))
-            true_sum = np.append(true_sum, len(entities_true_type))
+            if partial_match:
+                n_sublist = len(y_true)
+                vector_size = 0
+                if entities_true_type:
+                    vector_size = max(entities_true_type)[1]
+                if entities_pred_type:
+                    vector_size = max(max(entities_pred_type)[1], vector_size)
+
+                vector_size += n_sublist
+                entities_true_vector = np.zeros(vector_size, dtype=np.bool8)
+                # fill true values
+                for star, end in entities_true_type:
+                    entities_true_vector[star:end + 1] = True
+                # fill predict values
+                entities_pred_vector = np.zeros(vector_size, dtype=np.bool8)
+                for star, end in entities_pred_type:
+                    entities_pred_vector[star:end + 1] = True
+
+                tp_sum = np.append(tp_sum, (entities_true_vector * entities_pred_vector).sum())
+                pred_sum = np.append(pred_sum, entities_pred_vector.sum())
+                true_sum = np.append(true_sum, entities_true_vector.sum())
+
+            else:
+                tp_sum = np.append(tp_sum, len(entities_true_type & entities_pred_type))
+                pred_sum = np.append(pred_sum, len(entities_pred_type))
+                true_sum = np.append(true_sum, len(entities_true_type))
 
         return pred_sum, tp_sum, true_sum
 
@@ -281,7 +307,8 @@ def f1_score(y_true: List[List[str]], y_pred: List[List[str]],
              mode: Optional[str] = None,
              sample_weight: Optional[List[int]] = None,
              zero_division: str = 'warn',
-             scheme: Optional[Type[Token]] = None):
+             scheme: Optional[Type[Token]] = None,
+             partial_match: bool = False):
     """Compute the F1 score.
 
     The F1 score can be interpreted as a weighted average of the precision and
@@ -330,6 +357,8 @@ def f1_score(y_true: List[List[str]], y_pred: List[List[str]],
 
         suffix : bool, False by default.
 
+        partial_match : bool, False by default.
+
     Returns:
         score : float or array of float, shape = [n_unique_labels].
 
@@ -354,15 +383,17 @@ def f1_score(y_true: List[List[str]], y_pred: List[List[str]],
                                                         sample_weight=sample_weight,
                                                         zero_division=zero_division,
                                                         scheme=scheme,
-                                                        suffix=suffix)
+                                                        suffix=suffix
+                                                        )
     else:
         _, _, f, _ = precision_recall_fscore_support(y_true, y_pred,
                                                      average=average,
                                                      warn_for=('f-score',),
                                                      beta=1,
                                                      sample_weight=sample_weight,
                                                      zero_division=zero_division,
-                                                     suffix=suffix)
+                                                     suffix=suffix,
+                                                     partial_match=partial_match)
     return f
 
 
@@ -406,7 +437,8 @@ def precision_score(y_true: List[List[str]], y_pred: List[List[str]],
                     mode: Optional[str] = None,
                     sample_weight: Optional[List[int]] = None,
                     zero_division: str = 'warn',
-                    scheme: Optional[Type[Token]] = None):
+                    scheme: Optional[Type[Token]] = None,
+                    partial_match: bool = False):
     """Compute the precision.
 
     The precision is the ratio ``tp / (tp + fp)`` where ``tp`` is the number of
@@ -454,6 +486,8 @@ def precision_score(y_true: List[List[str]], y_pred: List[List[str]],
 
         suffix : bool, False by default.
 
+        partial_match : bool, False by default.
+
     Returns:
         score : float or array of float, shape = [n_unique_labels].
 
@@ -484,7 +518,8 @@ def precision_score(y_true: List[List[str]], y_pred: List[List[str]],
                                                      warn_for=('precision',),
                                                      sample_weight=sample_weight,
                                                      zero_division=zero_division,
-                                                     suffix=suffix)
+                                                     suffix=suffix,
+                                                     partial_match=partial_match)
     return p
 
 
@@ -495,7 +530,8 @@ def recall_score(y_true: List[List[str]], y_pred: List[List[str]],
                  mode: Optional[str] = None,
                  sample_weight: Optional[List[int]] = None,
                  zero_division: str = 'warn',
-                 scheme: Optional[Type[Token]] = None):
+                 scheme: Optional[Type[Token]] = None,
+                 partial_match: bool = False,):
     """Compute the recall.
 
     The recall is the ratio ``tp / (tp + fn)`` where ``tp`` is the number of
@@ -543,6 +579,8 @@ def recall_score(y_true: List[List[str]], y_pred: List[List[str]],
 
         suffix : bool, False by default.
 
+        partial_match : bool, False by default.
+
     Returns:
         score : float.
 
@@ -573,7 +611,8 @@ def recall_score(y_true: List[List[str]], y_pred: List[List[str]],
                                                      warn_for=('recall',),
                                                      sample_weight=sample_weight,
                                                      zero_division=zero_division,
-                                                     suffix=suffix)
+                                                     suffix=suffix,
+                                                     partial_match=partial_match)
     return r
 
 
@@ -617,7 +656,8 @@ def classification_report(y_true, y_pred,
                           mode=None,
                           sample_weight=None,
                           zero_division='warn',
-                          scheme=None):
+                          scheme=None,
+                          partial_match: bool = False):
     """Build a text report showing the main classification metrics.
 
     Args:
@@ -648,6 +688,8 @@ def classification_report(y_true, y_pred,
 
         suffix : bool, False by default.
 
+        partial_match : bool, False by default.
+
     Returns:
         report : string/dict. Summary of the precision, recall, F1 score for each class.
 
@@ -694,7 +736,8 @@ def classification_report(y_true, y_pred,
         average=None,
         sample_weight=sample_weight,
         zero_division=zero_division,
-        suffix=suffix
+        suffix=suffix,
+        partial_match=partial_match,
     )
     for row in zip(target_names, p, r, f1, s):
         reporter.write(*row)
@@ -708,7 +751,8 @@ def classification_report(y_true, y_pred,
             average=average,
             sample_weight=sample_weight,
             zero_division=zero_division,
-            suffix=suffix
+            suffix=suffix,
+            partial_match=partial_match
         )
         reporter.write('{} avg'.format(average), avg_p, avg_r, avg_f1, support)
     reporter.write_blank()

diff --git a/setup.py b/setup.py
@@ -22,7 +22,7 @@
     os.system('python setup.py sdist bdist_wheel upload')
     sys.exit()
 
-required = ['numpy==1.19.2', 'scikit-learn==0.23.2']
+required = ['numpy>=1.14.0', 'scikit-learn>=0.21.3']
 
 setup(
     name=NAME,
@@ -45,13 +45,9 @@
     classifiers=[
         'License :: OSI Approved :: MIT License',
         'Programming Language :: Python',
-        'Programming Language :: Python :: 2.6',
-        'Programming Language :: Python :: 2.7',
-        'Programming Language :: Python :: 3',
-        'Programming Language :: Python :: 3.3',
-        'Programming Language :: Python :: 3.4',
-        'Programming Language :: Python :: 3.5',
         'Programming Language :: Python :: 3.6',
+        'Programming Language :: Python :: 3.7',
+        'Programming Language :: Python :: 3.8',
         'Programming Language :: Python :: Implementation :: CPython',
         'Programming Language :: Python :: Implementation :: PyPy'
     ],

diff --git a/tests/test_metrics.py b/tests/test_metrics.py
@@ -123,9 +123,15 @@ def test_performance_measure(self):
     def test_classification_report(self):
         print(classification_report(self.y_true, self.y_pred))
 
+    def test_classification_report(self):
+        print(classification_report(self.y_true, self.y_pred, partial_match=True))
+
     def test_inv_classification_report(self):
         print(classification_report(self.y_true_inv, self.y_pred_inv, suffix=True))
 
+    def test_classification_report(self):
+        print(classification_report(self.y_true_inv, self.y_pred_inv, suffix=True, partial_match=True))
+
     def test_by_ground_truth(self):
         with open(self.file_name) as f:
             output = subprocess.check_output(['perl', 'conlleval.pl'], stdin=f).decode('utf-8')