Skip to content

Commit

Permalink
Merge pull request #60 from mideind/pyproject
Browse files Browse the repository at this point in the history
Migration to pyproject.toml
  • Loading branch information
sveinbjornt authored Sep 25, 2023
2 parents b0d86c3 + 5fbf2f1 commit 208dae8
Show file tree
Hide file tree
Showing 10 changed files with 92 additions and 136 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/python-package.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,9 @@ jobs:
python-version: [ "3.8", "3.9", "3.10", "3.11", "pypy-3.9", "pypy-3.10"]

steps:
- uses: actions/checkout@v2
- uses: actions/checkout@v4
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v2
uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}
- name: Install GreynirCorrect
Expand Down
63 changes: 62 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,10 +1,71 @@
[project]
name = "reynir-correct"
version = "4.0.0"
description = "Spelling and grammar correction for Icelandic"
authors = [{ name = "Miðeind ehf", email = "[email protected]" }]
readme = { file = "README.rst", content-type = "text/x-rst" }
license = { file = "LICENSE.txt" }
# For classifier list see: https://pypi.org/pypi?%3Aaction=list_classifiers
classifiers = [
"Development Status :: 5 - Production/Stable",
"Intended Audience :: Developers",
"Intended Audience :: Science/Research",
"License :: OSI Approved :: MIT License",
"Operating System :: Unix",
"Operating System :: POSIX",
"Operating System :: Microsoft :: Windows",
"Operating System :: MacOS",
"Natural Language :: Icelandic",
"Programming Language :: Python",
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3.8",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: Implementation :: CPython",
"Programming Language :: Python :: Implementation :: PyPy",
"Topic :: Software Development :: Libraries :: Python Modules",
"Topic :: Utilities",
"Topic :: Text Processing :: Linguistic",
]
requires-python = ">=3.8"
dependencies = ["reynir>=3.5.3", "icegrams>=1.1.2", "typing_extensions"]

[project.urls]
Repository = "https://github.com/mideind/GreynirCorrect"

[project.optional-dependencies]
# dev dependencies
dev = ["pytest"]
# sentence_classifier dependencies
sentence_classifier = ["transformers", "datasets", "torch"]

[project.scripts]
# 'correct' command line tool
correct = "reynir_correct.main:main"

# *** Configuration of tools ***

[tool.setuptools.packages.find]
where = ["src"]

[tool.setuptools.package-data]
where = ["src"]

[tool.pytest.ini_options]
filterwarnings = [
# Ignore deprecation warnings in libraries, their problem not ours
"ignore::DeprecationWarning",
]

[tool.ruff]
line-length = 120

[tool.black]
line-length = 120

[tool.isort]
known_future_library = ["__future__", "typing", "typing_extensions"] # This forces these imports to placed at the top
# This forces these imports to placed at the top
known_future_library = ["__future__", "typing", "typing_extensions"]
profile = "black"
line_length = 120
120 changes: 0 additions & 120 deletions setup.py

This file was deleted.

2 changes: 1 addition & 1 deletion src/reynir_correct/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,11 +43,11 @@
from .errtokenizer import Correct_TOK, CorrectionPipeline, CorrectToken
from .readability import FleschKincaidFeedback, FleschKincaidScorer, RareWordsFinder
from .settings import Settings
from .version import __version__
from .wrappers import CorrectedSentence, CorrectionResult, GreynirCorrectAPI, ParseResultStats, check_errors

__author__ = "Miðeind ehf"
__copyright__ = "(C) 2023 Miðeind ehf."
__version__ = "4.0.0" # Remember to update in pyproject.toml as well

__all__ = (
"Greynir",
Expand Down
15 changes: 11 additions & 4 deletions src/reynir_correct/errtokenizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -1841,7 +1841,8 @@ def add_ritmyndir_error(token: CorrectToken) -> CorrectToken:
return token

def get_details(code: str, txt: str, correct: str, lemma: str) -> Tuple[str, str, List[str]]:
"""Return short and detailed descriptions for the error category plus a link to grammar references where possible"""
"""Return short and detailed descriptions for the error category
plus a link to grammar references where possible"""
# text is the short version, about the category and the error.
# details is the long version with references.
try:
Expand Down Expand Up @@ -2497,7 +2498,12 @@ def number_error(token: CorrectToken, replace: str, code: str, instruction_txt:
suggest=correct,
)
)
if suppress_suggestions and token.error_code == "Z001" and isinstance(token.val, list) and any(v.ordfl == "lo" for v in token.val): # type: ignore
if (
suppress_suggestions
and token.error_code == "Z001"
and isinstance(token.val, list)
and any(v.ordfl == "lo" for v in token.val)
):
orig = token.original.strip() if token.original else token.txt
token.remove_error(orig)

Expand Down Expand Up @@ -2968,11 +2974,12 @@ def __init__(
self._generate_suggestion_list = options.pop("generate_suggestion_list", False)
# Skip spelling suggestions
self._suppress_suggestions = options.pop("suppress_suggestions", False)
# Only give suggestions, don't correct everything automatically. Currently only applies to lookup_unknown_words and check_wording.
# Only give suggestions, don't correct everything automatically.
# Currently only applies to lookup_unknown_words and check_wording.
self._suggest_not_correct = options.pop("suggest_not_correct", False)
# Wordlist for words that should not be marked as errors or corrected
self._ignore_wordlist = options.pop("ignore_wordlist", set())
self._ignore_rules = options.pop("ignore_rules", set())
self._ignore_rules = cast(frozenset, options.pop("ignore_rules", frozenset()))
self.settings = settings

def correct_tokens(self, stream: TokenIterator) -> TokenIterator:
Expand Down
12 changes: 9 additions & 3 deletions src/reynir_correct/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,11 @@
nargs="?",
type=str,
default="text",
help="Determine output format.\ntext: Corrected text only.\ncsv: One token per line in CSV format.\njson: One token per line in JSON format.\nm2: M2 format, GEC standard.",
help="""Determine output format.
text: Corrected text only.
csv: One token per line in CSV format.
json: One token per line in JSON format.
m2: M2 format, GEC standard.""",
)

# Determines whether we supply only token-level annotations or also sentence-level annotations
Expand Down Expand Up @@ -127,7 +131,8 @@

parser.add_argument(
"--sentence_prefilter",
help="Run a heuristic filter on sentences to determine whether they are probably correct. Probably correct sentences will not go through the full parsing process.",
help="""Run a heuristic filter on sentences to determine whether they are probably
correct. Probably correct sentences will not go through the full parsing process.""",
action="store_true",
)
parser.add_argument(
Expand All @@ -145,7 +150,8 @@
"--tov_config",
nargs=1,
type=str,
help="Add additional use-specific rules in a configuration file to check for custom tone-of-voice issues. Uses the same format as the default GreynirCorrect.conf file",
help="""Add additional use-specific rules in a configuration file to check for custom
tone-of-voice issues. Uses the same format as the default GreynirCorrect.conf file""",
default=None,
)

Expand Down
7 changes: 5 additions & 2 deletions src/reynir_correct/readability.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
This module implements the Flesch reading ease score for Icelandic text.
A high score indicates that the text is easy to read, while a low score
indicates that the text is difficult to read.
"""

from __future__ import annotations
Expand Down Expand Up @@ -190,7 +191,8 @@ class RareWordsFinder:
Rare words are defined as words which have a probability lower than the low_prob_cutoff.
The probability of a word is calculated by looking up the word in an n-gram model.
The class is designed to be used with the tokenizer module and maintains an internal state which needs to be reset manually.
The class is designed to be used with the tokenizer module and maintains an internal
state which needs to be reset manually.
"""

def __init__(self):
Expand All @@ -200,7 +202,8 @@ def __init__(self):
def get_rare_words_from_stream(
self, tok_stream: Iterable[tokenizer.Tok], max_words: int, low_prob_cutoff: float
) -> List[Tuple[str, float]]:
"""Tracks the probability of each word in a token stream. This is done by yielding the tokens in the token stream."""
"""Tracks the probability of each word in a token stream.
This is done by yielding the tokens in the token stream."""
rare_words_dict: Dict[str, float] = {}
for token in tok_stream:
# Only consider words, not punctuation, numbers, etc.
Expand Down
2 changes: 1 addition & 1 deletion src/reynir_correct/spelling.py
Original file line number Diff line number Diff line change
Expand Up @@ -661,7 +661,7 @@ def _case_of(text: str) -> Callable[[str], str]:
# We don't use .istitle() and .title() because
# they consider apostrophes to be word separators
return lambda s: s[0].upper() + s[1:]
return str
return str # noqa

def _cast(self, word: str) -> str:
"""Cast the word to lowercase and correct accents"""
Expand Down
1 change: 0 additions & 1 deletion src/reynir_correct/version.py

This file was deleted.

2 changes: 1 addition & 1 deletion src/reynir_correct/wrappers.py
Original file line number Diff line number Diff line change
Expand Up @@ -294,7 +294,7 @@ def _correct_spelling(
# TODO: The pipeline needs a refactoring.
# We use some hacks here to avoid having to rewrite the pipeline at this point.
self.gc.pipeline._text_or_gen = text
self.gc.pipeline._ignore_rules = ignore_rules or set()
self.gc.pipeline._ignore_rules = cast(frozenset, ignore_rules or frozenset())
self.gc.pipeline._suppress_suggestions = suppress_suggestions
return self.gc.pipeline.tokenize() # type: ignore

Expand Down

0 comments on commit 208dae8

Please sign in to comment.