Skip to content

Commit

Permalink
Add --ignore-multiline-regex option. (codespell-project#3476)
Browse files Browse the repository at this point in the history
  • Loading branch information
julian-smith-artifex-com authored Jul 8, 2024
1 parent d2707c3 commit f3d85db
Show file tree
Hide file tree
Showing 3 changed files with 97 additions and 6 deletions.
62 changes: 58 additions & 4 deletions codespell_lib/_codespell.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
Pattern,
Sequence,
Set,
TextIO,
Tuple,
)

Expand Down Expand Up @@ -201,11 +202,17 @@ def __str__(self) -> str:


class FileOpener:
def __init__(self, use_chardet: bool, quiet_level: int) -> None:
def __init__(
self,
use_chardet: bool,
quiet_level: int,
ignore_multiline_regex: Optional[Pattern[str]],
) -> None:
self.use_chardet = use_chardet
if use_chardet:
self.init_chardet()
self.quiet_level = quiet_level
self.ignore_multiline_regex = ignore_multiline_regex

def init_chardet(self) -> None:
try:
Expand Down Expand Up @@ -247,7 +254,7 @@ def open_with_chardet(self, filename: str) -> Tuple[List[str], str]:
)
raise
else:
lines = f.readlines()
lines = self.get_lines(f)
f.close()

return lines, f.encoding
Expand All @@ -262,7 +269,7 @@ def open_with_internal(self, filename: str) -> Tuple[List[str], str]:
print(f'WARNING: Trying next encoding "{encoding}"', file=sys.stderr)
with open(filename, encoding=encoding, newline="") as f:
try:
lines = f.readlines()
lines = self.get_lines(f)
except UnicodeDecodeError:
if not self.quiet_level & QuietLevels.ENCODING:
print(
Expand All @@ -279,6 +286,22 @@ def open_with_internal(self, filename: str) -> Tuple[List[str], str]:

return lines, encoding

def get_lines(self, f: TextIO) -> List[str]:
if self.ignore_multiline_regex:
text = f.read()
pos = 0
text2 = ""
for m in re.finditer(self.ignore_multiline_regex, text):
text2 += text[pos : m.start()]
# Replace with blank lines so line numbers are unchanged.
text2 += "\n" * m.group().count("\n")
pos = m.end()
text2 += text[pos:]
lines = text2.split("\n")
else:
lines = f.readlines()
return lines


# -.-:-.-:-.-:-.:-.-:-.-:-.-:-.-:-.:-.-:-.-:-.-:-.-:-.:-.-:-

Expand Down Expand Up @@ -411,6 +434,19 @@ def parse_options(
'e.g., "\\bmatch\\b". Defaults to '
"empty/disabled.",
)
parser.add_argument(
"--ignore-multiline-regex",
action="store",
type=str,
help="regular expression that is used to ignore "
"text that may span multi-line regions. "
"The regex is run with re.DOTALL. For example to "
"allow skipping of regions of Python code using "
"begin/end comments one could use: "
"--ignore-multiline-regex "
"'# codespell:ignore-begin *\\n.*# codespell:ignore-end *\\n'. "
"Defaults to empty/disabled.",
)
parser.add_argument(
"-I",
"--ignore-words",
Expand Down Expand Up @@ -1115,6 +1151,20 @@ def main(*args: str) -> int:
else:
ignore_word_regex = None

if options.ignore_multiline_regex:
try:
ignore_multiline_regex = re.compile(
options.ignore_multiline_regex, re.DOTALL
)
except re.error as e:
return _usage_error(
parser,
f"ERROR: invalid --ignore-multiline-regex "
f'"{options.ignore_multiline_regex}" ({e})',
)
else:
ignore_multiline_regex = None

ignore_words, ignore_words_cased = parse_ignore_words_option(
options.ignore_words_list
)
Expand Down Expand Up @@ -1203,7 +1253,11 @@ def main(*args: str) -> int:
for exclude_file in exclude_files:
build_exclude_hashes(exclude_file, exclude_lines)

file_opener = FileOpener(options.hard_encoding_detection, options.quiet_level)
file_opener = FileOpener(
options.hard_encoding_detection,
options.quiet_level,
ignore_multiline_regex,
)

glob_match = GlobMatch(
flatten_clean_comma_separated_arguments(options.skip) if options.skip else []
Expand Down
37 changes: 37 additions & 0 deletions codespell_lib/tests/test_basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -942,6 +942,43 @@ def test_ignore_regex_option(
assert cs.main(fname, r"--ignore-regex=\bdonn\b") == 1


def test_ignore_multiline_regex_option(
tmp_path: Path,
capsys: pytest.CaptureFixture[str],
) -> None:
"""Test ignore regex option functionality."""

# Invalid regex.
result = cs.main("--ignore-multiline-regex=(", std=True)
assert isinstance(result, tuple)
code, stdout, _ = result
assert code == EX_USAGE
assert "usage:" in stdout

fname = tmp_path / "flag.txt"
fname.write_text(
"""
Please see http://example.com/abandonned for info
# codespell:ignore-begin
'''
abandonned
abandonned
'''
# codespell:ignore-end
abandonned
"""
)
assert cs.main(fname) == 4
assert (
cs.main(
fname,
"--ignore-multiline-regex",
"codespell:ignore-begin.*codespell:ignore-end",
)
== 2
)


def test_uri_regex_option(
tmp_path: Path,
capsys: pytest.CaptureFixture[str],
Expand Down
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,6 @@ max-complexity = 45
[tool.ruff.lint.pylint]
allow-magic-value-types = ["bytes", "int", "str",]
max-args = 13
max-branches = 46
max-returns = 11
max-branches = 47
max-returns = 12
max-statements = 119

0 comments on commit f3d85db

Please sign in to comment.