Merge pull request #60 from mideind/pyproject

Migration to pyproject.toml
mideind · Sep 25, 2023 · 208dae8 · 208dae8
2 parents b0d86c3 + 5fbf2f1
commit 208dae8
Show file tree

Hide file tree

Showing 10 changed files with 92 additions and 136 deletions.
diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml
@@ -18,9 +18,9 @@ jobs:
         python-version: [ "3.8", "3.9", "3.10", "3.11", "pypy-3.9", "pypy-3.10"]
 
     steps:
-    - uses: actions/checkout@v2
+    - uses: actions/checkout@v4
     - name: Set up Python ${{ matrix.python-version }}
-      uses: actions/setup-python@v2
+      uses: actions/setup-python@v4
       with:
         python-version: ${{ matrix.python-version }}
     - name: Install GreynirCorrect

diff --git a/pyproject.toml b/pyproject.toml
@@ -1,10 +1,71 @@
+[project]
+name = "reynir-correct"
+version = "4.0.0"
+description = "Spelling and grammar correction for Icelandic"
+authors = [{ name = "Miðeind ehf", email = "[email protected]" }]
+readme = { file = "README.rst", content-type = "text/x-rst" }
+license = { file = "LICENSE.txt" }
+# For classifier list see: https://pypi.org/pypi?%3Aaction=list_classifiers
+classifiers = [
+    "Development Status :: 5 - Production/Stable",
+    "Intended Audience :: Developers",
+    "Intended Audience :: Science/Research",
+    "License :: OSI Approved :: MIT License",
+    "Operating System :: Unix",
+    "Operating System :: POSIX",
+    "Operating System :: Microsoft :: Windows",
+    "Operating System :: MacOS",
+    "Natural Language :: Icelandic",
+    "Programming Language :: Python",
+    "Programming Language :: Python :: 3",
+    "Programming Language :: Python :: 3.8",
+    "Programming Language :: Python :: 3.9",
+    "Programming Language :: Python :: 3.10",
+    "Programming Language :: Python :: 3.11",
+    "Programming Language :: Python :: Implementation :: CPython",
+    "Programming Language :: Python :: Implementation :: PyPy",
+    "Topic :: Software Development :: Libraries :: Python Modules",
+    "Topic :: Utilities",
+    "Topic :: Text Processing :: Linguistic",
+]
+requires-python = ">=3.8"
+dependencies = ["reynir>=3.5.3", "icegrams>=1.1.2", "typing_extensions"]
+
+[project.urls]
+Repository = "https://github.com/mideind/GreynirCorrect"
+
+[project.optional-dependencies]
+# dev dependencies
+dev = ["pytest"]
+# sentence_classifier dependencies
+sentence_classifier = ["transformers", "datasets", "torch"]
+
+[project.scripts]
+# 'correct' command line tool
+correct = "reynir_correct.main:main"
+
+# *** Configuration of tools ***
+
+[tool.setuptools.packages.find]
+where = ["src"]
+
+[tool.setuptools.package-data]
+where = ["src"]
+
+[tool.pytest.ini_options]
+filterwarnings = [
+    # Ignore deprecation warnings in libraries, their problem not ours
+    "ignore::DeprecationWarning",
+]
+
 [tool.ruff]
 line-length = 120
 
 [tool.black]
 line-length = 120
 
 [tool.isort]
-known_future_library = ["__future__", "typing", "typing_extensions"]  # This forces these imports to placed at the top
+# This forces these imports to placed at the top
+known_future_library = ["__future__", "typing", "typing_extensions"]
 profile = "black"
 line_length = 120
diff --git a/setup.py b/setup.py
diff --git a/src/reynir_correct/__init__.py b/src/reynir_correct/__init__.py
@@ -43,11 +43,11 @@
 from .errtokenizer import Correct_TOK, CorrectionPipeline, CorrectToken
 from .readability import FleschKincaidFeedback, FleschKincaidScorer, RareWordsFinder
 from .settings import Settings
-from .version import __version__
 from .wrappers import CorrectedSentence, CorrectionResult, GreynirCorrectAPI, ParseResultStats, check_errors
 
 __author__ = "Miðeind ehf"
 __copyright__ = "(C) 2023 Miðeind ehf."
+__version__ = "4.0.0"  # Remember to update in pyproject.toml as well
 
 __all__ = (
     "Greynir",

diff --git a/src/reynir_correct/errtokenizer.py b/src/reynir_correct/errtokenizer.py
@@ -1841,7 +1841,8 @@ def add_ritmyndir_error(token: CorrectToken) -> CorrectToken:
         return token
 
     def get_details(code: str, txt: str, correct: str, lemma: str) -> Tuple[str, str, List[str]]:
-        """Return short and detailed descriptions for the error category plus a link to grammar references where possible"""
+        """Return short and detailed descriptions for the error category
+        plus a link to grammar references where possible"""
         # text is the short version, about the category and the error.
         # details is the long version with references.
         try:
@@ -2497,7 +2498,12 @@ def number_error(token: CorrectToken, replace: str, code: str, instruction_txt:
                                 suggest=correct,
                             )
                         )
-            if suppress_suggestions and token.error_code == "Z001" and isinstance(token.val, list) and any(v.ordfl == "lo" for v in token.val):  # type: ignore
+            if (
+                suppress_suggestions
+                and token.error_code == "Z001"
+                and isinstance(token.val, list)
+                and any(v.ordfl == "lo" for v in token.val)
+            ):
                 orig = token.original.strip() if token.original else token.txt
                 token.remove_error(orig)
 
@@ -2968,11 +2974,12 @@ def __init__(
         self._generate_suggestion_list = options.pop("generate_suggestion_list", False)
         # Skip spelling suggestions
         self._suppress_suggestions = options.pop("suppress_suggestions", False)
-        # Only give suggestions, don't correct everything automatically. Currently only applies to lookup_unknown_words and check_wording.
+        # Only give suggestions, don't correct everything automatically.
+        # Currently only applies to lookup_unknown_words and check_wording.
         self._suggest_not_correct = options.pop("suggest_not_correct", False)
         # Wordlist for words that should not be marked as errors or corrected
         self._ignore_wordlist = options.pop("ignore_wordlist", set())
-        self._ignore_rules = options.pop("ignore_rules", set())
+        self._ignore_rules = cast(frozenset, options.pop("ignore_rules", frozenset()))
         self.settings = settings
 
     def correct_tokens(self, stream: TokenIterator) -> TokenIterator:

diff --git a/src/reynir_correct/main.py b/src/reynir_correct/main.py
@@ -82,7 +82,11 @@
     nargs="?",
     type=str,
     default="text",
-    help="Determine output format.\ntext: Corrected text only.\ncsv: One token per line in CSV format.\njson: One token per line in JSON format.\nm2: M2 format, GEC standard.",
+    help="""Determine output format.
+text: Corrected text only.
+csv: One token per line in CSV format.
+json: One token per line in JSON format.
+m2: M2 format, GEC standard.""",
 )
 
 # Determines whether we supply only token-level annotations or also sentence-level annotations
@@ -127,7 +131,8 @@
 
 parser.add_argument(
     "--sentence_prefilter",
-    help="Run a heuristic filter on sentences to determine whether they are probably correct. Probably correct sentences will not go through the full parsing process.",
+    help="""Run a heuristic filter on sentences to determine whether they are probably
+correct. Probably correct sentences will not go through the full parsing process.""",
     action="store_true",
 )
 parser.add_argument(
@@ -145,7 +150,8 @@
     "--tov_config",
     nargs=1,
     type=str,
-    help="Add additional use-specific rules in a configuration file to check for custom tone-of-voice issues. Uses the same format as the default GreynirCorrect.conf file",
+    help="""Add additional use-specific rules in a configuration file to check for custom
+tone-of-voice issues. Uses the same format as the default GreynirCorrect.conf file""",
     default=None,
 )
 

diff --git a/src/reynir_correct/readability.py b/src/reynir_correct/readability.py
@@ -31,6 +31,7 @@
     This module implements the Flesch reading ease score for Icelandic text.
     A high score indicates that the text is easy to read, while a low score
     indicates that the text is difficult to read.
+
 """
 
 from __future__ import annotations
@@ -190,7 +191,8 @@ class RareWordsFinder:
 
     Rare words are defined as words which have a probability lower than the low_prob_cutoff.
     The probability of a word is calculated by looking up the word in an n-gram model.
-    The class is designed to be used with the tokenizer module and maintains an internal state which needs to be reset manually.
+    The class is designed to be used with the tokenizer module and maintains an internal
+    state which needs to be reset manually.
     """
 
     def __init__(self):
@@ -200,7 +202,8 @@ def __init__(self):
     def get_rare_words_from_stream(
         self, tok_stream: Iterable[tokenizer.Tok], max_words: int, low_prob_cutoff: float
     ) -> List[Tuple[str, float]]:
-        """Tracks the probability of each word in a token stream. This is done by yielding the tokens in the token stream."""
+        """Tracks the probability of each word in a token stream.
+        This is done by yielding the tokens in the token stream."""
         rare_words_dict: Dict[str, float] = {}
         for token in tok_stream:
             # Only consider words, not punctuation, numbers, etc.

diff --git a/src/reynir_correct/spelling.py b/src/reynir_correct/spelling.py
@@ -661,7 +661,7 @@ def _case_of(text: str) -> Callable[[str], str]:
             # We don't use .istitle() and .title() because
             # they consider apostrophes to be word separators
             return lambda s: s[0].upper() + s[1:]
-        return str
+        return str  # noqa
 
     def _cast(self, word: str) -> str:
         """Cast the word to lowercase and correct accents"""

diff --git a/src/reynir_correct/version.py b/src/reynir_correct/version.py
diff --git a/src/reynir_correct/wrappers.py b/src/reynir_correct/wrappers.py
@@ -294,7 +294,7 @@ def _correct_spelling(
         # TODO: The pipeline needs a refactoring.
         # We use some hacks here to avoid having to rewrite the pipeline at this point.
         self.gc.pipeline._text_or_gen = text
-        self.gc.pipeline._ignore_rules = ignore_rules or set()
+        self.gc.pipeline._ignore_rules = cast(frozenset, ignore_rules or frozenset())
         self.gc.pipeline._suppress_suggestions = suppress_suggestions
         return self.gc.pipeline.tokenize()  # type: ignore