From 1e467e1d6a0da391a73520ac06310096f971f422 Mon Sep 17 00:00:00 2001 From: Gibran Date: Mon, 28 Jun 2021 19:32:38 -0500 Subject: [PATCH 1/5] 1948 Add Specifying Encoder Rule --- CHANGELOG.md | 8 +- docs/conf.py | 2 +- scripts/check_generic_visit.py | 2 +- tests/fixtures/noqa/noqa.py | 6 +- tests/test_checker/test_noqa.py | 1 + .../test_attributes/test_open_encoding.py | 88 +++++++++++++++++++ .../presets/types/tree.py | 1 + .../violations/best_practices.py | 30 +++++++ .../visitors/ast/attributes.py | 38 +++++++- 9 files changed, 169 insertions(+), 7 deletions(-) create mode 100644 tests/test_visitors/test_ast/test_attributes/test_open_encoding.py diff --git a/CHANGELOG.md b/CHANGELOG.md index a9bec257a..f42aceddd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,7 +16,13 @@ Semantic versioning in our case means: But, in the future we might change the configuration names / logic, change the client facing API, change code conventions signigicantly, etc. - +## 0.16.0 + +### Features + +- Forbid using open without specifying encoding. + + ## 0.15.3 WIP ### Bugfixes diff --git a/docs/conf.py b/docs/conf.py index 04b8a1fe7..cc590042a 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -21,7 +21,7 @@ # -- Project information ----------------------------------------------------- def _get_project_meta(): - with open('../pyproject.toml') as pyproject: + with open('../pyproject.toml', encoding=None) as pyproject: file_contents = pyproject.read() return tomlkit.parse(file_contents)['tool']['poetry'] diff --git a/scripts/check_generic_visit.py b/scripts/check_generic_visit.py index 8c25a014b..d977aac61 100644 --- a/scripts/check_generic_visit.py +++ b/scripts/check_generic_visit.py @@ -33,7 +33,7 @@ my_print('"self.generic_visit(node)" should be last statement here:') for fn, line in matches: - with open(fn, 'r') as fp: + with open(fn, 'r', encoding='utf-8-sig') as fp: source = fp.read() lines = source.splitlines() highlighted = highlight( diff --git a/tests/fixtures/noqa/noqa.py b/tests/fixtures/noqa/noqa.py index f2c8ee77b..7861c9baa 100644 --- a/tests/fixtures/noqa/noqa.py +++ b/tests/fixtures/noqa/noqa.py @@ -381,7 +381,7 @@ def function_with_wrong_yield(): for literal in bad_concatenation: # noqa: WPS327, WPS328 continue -with open(bad_concatenation): # noqa: WPS328 +with open(bad_concatenation): # noqa: WPS328, WPS467 pass # noqa: WPS420 @@ -404,7 +404,7 @@ def some_other_function(): string_concat = 'a' + 'b' # noqa: WPS336 my_print(one == 'a' or one == 'b') # noqa: WPS514 -file_obj = open('filaname.py') # noqa: WPS515 +file_obj = open('filaname.py') # noqa: WPS515, WPS467 my_print(type(file_obj) == int) # noqa: WPS516 my_print(*[], **{'@': 1}) # noqa: WPS517, WPS445 @@ -549,7 +549,7 @@ def bad_default_values( for nodes[0] in (1, 2, 3): # noqa: WPS405 anti_wps428 = 1 -with open('some') as MyBadException.custom: # noqa: WPS406 +with open('some') as MyBadException.custom: # noqa: WPS406, WPS467 anti_wps428 = 1 diff --git a/tests/test_checker/test_noqa.py b/tests/test_checker/test_noqa.py index 9ef12b3c8..000ae3d4c 100644 --- a/tests/test_checker/test_noqa.py +++ b/tests/test_checker/test_noqa.py @@ -246,6 +246,7 @@ 'WPS464': 0, # logically unacceptable. 'WPS465': 1, 'WPS466': 0, # defined in version specific table. + 'WPS467': 3, 'WPS500': 1, 'WPS501': 1, diff --git a/tests/test_visitors/test_ast/test_attributes/test_open_encoding.py b/tests/test_visitors/test_ast/test_attributes/test_open_encoding.py new file mode 100644 index 000000000..d0d043c7e --- /dev/null +++ b/tests/test_visitors/test_ast/test_attributes/test_open_encoding.py @@ -0,0 +1,88 @@ +import pytest + +from wemake_python_styleguide.violations.best_practices import ( + UnspecifiedEncodingViolation, +) +from wemake_python_styleguide.visitors.ast.attributes import EncodingVisitor + +unspecified_encoding_with = """ +with open('filename.txt') as fd: + fd.read() +""" + +specified_encoding_with = """ +with open('filename.txt', encoding='ascii') as fd: + fd.read() +""" + +unspecified_encoding_assign = """ +file = open('filename.txt', 'r') +""" + +specified_encoding_assign = """ +file = open('filename.txt', 'r', encoding='utf8') +""" + +specified_encoding_with_none = """ +with open('filename.txt', encoding=None) as fd: + fd.read() +""" + +unspecified_encoding_with_multiple = """ +with open('filename.txt', 'w', -1) as fd: + fd.read() +""" + +specified_encoding_with_multiple = """ +with open('filename.txt', 'w', -1, None) as fd: + fd.read() +""" + +unspecified_encoding_assign_multiple = """ +file = open('filename.txt', 'w', -1) +""" + +specified_encoding_assign_multiple = """ +file = open('filename.txt', 'w', -1, None) +""" + + +@pytest.mark.parametrize('code', [ + unspecified_encoding_with, + unspecified_encoding_assign, + unspecified_encoding_with_multiple, + unspecified_encoding_assign_multiple, +]) +def test_unspecified_encoding( + assert_errors, + code, + default_options, + parse_ast_tree, +): + """Testing open encoding is unspecified.""" + tree = parse_ast_tree(code) + visitor = EncodingVisitor(default_options, tree=tree) + visitor.run() + + assert_errors(visitor, [UnspecifiedEncodingViolation]) + + +@pytest.mark.parametrize('code', [ + specified_encoding_with, + specified_encoding_assign, + specified_encoding_with_none, + specified_encoding_with_multiple, + specified_encoding_assign_multiple, +]) +def test_specified_encoding( + assert_errors, + code, + default_options, + parse_ast_tree, +): + """Testing open encoding is specified.""" + tree = parse_ast_tree(code) + visitor = EncodingVisitor(default_options, tree=tree) + visitor.run() + + assert_errors(visitor, []) diff --git a/wemake_python_styleguide/presets/types/tree.py b/wemake_python_styleguide/presets/types/tree.py index 90262ea94..e4b8f205f 100644 --- a/wemake_python_styleguide/presets/types/tree.py +++ b/wemake_python_styleguide/presets/types/tree.py @@ -46,6 +46,7 @@ loops.SyncForLoopVisitor, attributes.WrongAttributeVisitor, + attributes.EncodingVisitor, annotations.WrongAnnotationVisitor, functions.WrongFunctionCallVisitor, diff --git a/wemake_python_styleguide/violations/best_practices.py b/wemake_python_styleguide/violations/best_practices.py index 564fb3703..a6fb16ea7 100644 --- a/wemake_python_styleguide/violations/best_practices.py +++ b/wemake_python_styleguide/violations/best_practices.py @@ -83,6 +83,7 @@ EmptyCommentViolation BitwiseAndBooleanMixupViolation NewStyledDecoratorViolation + UnspecifiedEncodingViolation Best practices -------------- @@ -154,6 +155,7 @@ .. autoclass:: EmptyCommentViolation .. autoclass:: BitwiseAndBooleanMixupViolation .. autoclass:: NewStyledDecoratorViolation +.. autoclass:: UnspecifiedEncodingViolation """ @@ -2572,3 +2574,31 @@ def my_function(): ... error_template = 'Found new-styled decorator' code = 466 + + +@final +class UnspecifiedEncodingViolation(ASTViolation): + """ + Forbid using open without specifying encoding. + + Reasoning: + Inspired by https://www.python.org/dev/peps/pep-0597/ + Not specifying the encoding could be considered a bug. + Developers using macOS or Linux may forget that the + default encoding is not always UTF-8. + + Solution: + Specify the encoding in open. + + Example: + # Correct: + open('filename.txt', encoding='utf8') + + # Wrong: + with open('filename.txt') + + .. versionadded:: 0.16.0 + """ + + error_template = 'Found unespecified encoding' + code = 467 diff --git a/wemake_python_styleguide/visitors/ast/attributes.py b/wemake_python_styleguide/visitors/ast/attributes.py index b3c74fd85..47d03d75c 100644 --- a/wemake_python_styleguide/visitors/ast/attributes.py +++ b/wemake_python_styleguide/visitors/ast/attributes.py @@ -1,11 +1,12 @@ import ast -from typing import ClassVar, FrozenSet +from typing import ClassVar, FrozenSet, List from typing_extensions import final from wemake_python_styleguide.logic.naming import access from wemake_python_styleguide.violations.best_practices import ( ProtectedAttributeViolation, + UnspecifiedEncodingViolation, ) from wemake_python_styleguide.violations.oop import ( DirectMagicAttributeAccessViolation, @@ -73,3 +74,38 @@ def _check_magic_attribute(self, node: ast.Attribute) -> None: self._ensure_attribute_type( node, DirectMagicAttributeAccessViolation, ) + + +@final +class EncodingVisitor(BaseNodeVisitor): + """Check if open function has the encoding parameter.""" + + def __init__(self, *args, **kwargs) -> None: + """Creates the booleans indicating encoding was found.""" + super().__init__(*args, **kwargs) + self._encoding = False + + def visit_Call(self, node: ast.Call): + """Visit calls and finds if it is an open function.""" + if isinstance(node.func, ast.Name) and node.func.id == 'open': + if node.keywords: + for keyword in node.keywords: + self._check_keywords(keyword) + if node.args: + self._check_args(node.args) + + if self._encoding is False: + self.add_violation(UnspecifiedEncodingViolation(node)) + + self.generic_visit(node) + + def _check_keywords(self, node: ast.keyword): + """Check if there is an encoding parameter.""" + if node.arg == 'encoding': + self._encoding = True + + def _check_args(self, node: List[ast.expr]): + """Check if there is an encoding parameter.""" + if len(node) > 3: + if isinstance(node[3], ast.Constant): + self._encoding = True From 7203962def106917d5cbbe4a40faaa117eae1cf3 Mon Sep 17 00:00:00 2001 From: Gibran Date: Tue, 29 Jun 2021 14:49:33 -0500 Subject: [PATCH 2/5] Refactored Encoding Visitor --- .../visitors/ast/attributes.py | 28 ++++++++----------- 1 file changed, 11 insertions(+), 17 deletions(-) diff --git a/wemake_python_styleguide/visitors/ast/attributes.py b/wemake_python_styleguide/visitors/ast/attributes.py index 47d03d75c..653fae926 100644 --- a/wemake_python_styleguide/visitors/ast/attributes.py +++ b/wemake_python_styleguide/visitors/ast/attributes.py @@ -80,32 +80,26 @@ def _check_magic_attribute(self, node: ast.Attribute) -> None: class EncodingVisitor(BaseNodeVisitor): """Check if open function has the encoding parameter.""" - def __init__(self, *args, **kwargs) -> None: - """Creates the booleans indicating encoding was found.""" - super().__init__(*args, **kwargs) - self._encoding = False - - def visit_Call(self, node: ast.Call): + def visit_Call(self, node: ast.Call) -> None: """Visit calls and finds if it is an open function.""" if isinstance(node.func, ast.Name) and node.func.id == 'open': + is_positional = False + is_keyword = False + if node.keywords: - for keyword in node.keywords: - self._check_keywords(keyword) + is_keyword = self._check_keywords(node.keywords) if node.args: - self._check_args(node.args) + is_positional = self._check_args(node.args) - if self._encoding is False: + if not is_positional and not is_keyword: self.add_violation(UnspecifiedEncodingViolation(node)) self.generic_visit(node) - def _check_keywords(self, node: ast.keyword): + def _check_keywords(self, keywords: List[ast.keyword]) -> bool: """Check if there is an encoding parameter.""" - if node.arg == 'encoding': - self._encoding = True + return bool(filter(lambda keyword: keyword.arg == 'encoding', keywords)) - def _check_args(self, node: List[ast.expr]): + def _check_args(self, positionals: List[ast.expr]) -> bool: """Check if there is an encoding parameter.""" - if len(node) > 3: - if isinstance(node[3], ast.Constant): - self._encoding = True + return len(positionals) > 3 and isinstance(positionals[3], ast.Constant) From 9bc9033fa1aef406be1ebcb4faa051c688cff36f Mon Sep 17 00:00:00 2001 From: Nikita Sobolev Date: Tue, 29 Jun 2021 23:33:24 +0300 Subject: [PATCH 3/5] Simplified code a little --- .../visitors/ast/attributes.py | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/wemake_python_styleguide/visitors/ast/attributes.py b/wemake_python_styleguide/visitors/ast/attributes.py index 653fae926..30f9b2687 100644 --- a/wemake_python_styleguide/visitors/ast/attributes.py +++ b/wemake_python_styleguide/visitors/ast/attributes.py @@ -4,6 +4,7 @@ from typing_extensions import final from wemake_python_styleguide.logic.naming import access +from wemake_python_styleguide.logic.tree.functions import given_function_called from wemake_python_styleguide.violations.best_practices import ( ProtectedAttributeViolation, UnspecifiedEncodingViolation, @@ -82,14 +83,9 @@ class EncodingVisitor(BaseNodeVisitor): def visit_Call(self, node: ast.Call) -> None: """Visit calls and finds if it is an open function.""" - if isinstance(node.func, ast.Name) and node.func.id == 'open': - is_positional = False - is_keyword = False - - if node.keywords: - is_keyword = self._check_keywords(node.keywords) - if node.args: - is_positional = self._check_args(node.args) + if given_function_called(node, {'open'}): + is_keyword = self._check_keywords(node.keywords) + is_positional = self._check_args(node.args) if not is_positional and not is_keyword: self.add_violation(UnspecifiedEncodingViolation(node)) @@ -98,7 +94,7 @@ def visit_Call(self, node: ast.Call) -> None: def _check_keywords(self, keywords: List[ast.keyword]) -> bool: """Check if there is an encoding parameter.""" - return bool(filter(lambda keyword: keyword.arg == 'encoding', keywords)) + return any(keyword.arg == 'encoding' for keyword in keywords) def _check_args(self, positionals: List[ast.expr]) -> bool: """Check if there is an encoding parameter.""" From c5b458ceaf6f057a76e16b9651d8dc6b5d7442a7 Mon Sep 17 00:00:00 2001 From: Gibran Date: Tue, 29 Jun 2021 18:44:15 -0500 Subject: [PATCH 4/5] Changed encoders to eliminate warnings --- docs/conf.py | 2 +- scripts/check_generic_visit.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index cc590042a..cb86e64cc 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -21,7 +21,7 @@ # -- Project information ----------------------------------------------------- def _get_project_meta(): - with open('../pyproject.toml', encoding=None) as pyproject: + with open('../pyproject.toml', encoding='ascii') as pyproject: file_contents = pyproject.read() return tomlkit.parse(file_contents)['tool']['poetry'] diff --git a/scripts/check_generic_visit.py b/scripts/check_generic_visit.py index d977aac61..d5aa54599 100644 --- a/scripts/check_generic_visit.py +++ b/scripts/check_generic_visit.py @@ -33,7 +33,7 @@ my_print('"self.generic_visit(node)" should be last statement here:') for fn, line in matches: - with open(fn, 'r', encoding='utf-8-sig') as fp: + with open(fn, 'r', encoding='utf-8') as fp: source = fp.read() lines = source.splitlines() highlighted = highlight( From 30e0db9b5835722a413c1ff790e96e120c758d92 Mon Sep 17 00:00:00 2001 From: Gibran Date: Tue, 29 Jun 2021 18:46:02 -0500 Subject: [PATCH 5/5] Modified UnsepcifiedEncodingViolation error template --- wemake_python_styleguide/violations/best_practices.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/wemake_python_styleguide/violations/best_practices.py b/wemake_python_styleguide/violations/best_practices.py index a6fb16ea7..93ce739b4 100644 --- a/wemake_python_styleguide/violations/best_practices.py +++ b/wemake_python_styleguide/violations/best_practices.py @@ -2600,5 +2600,5 @@ class UnspecifiedEncodingViolation(ASTViolation): .. versionadded:: 0.16.0 """ - error_template = 'Found unespecified encoding' + error_template = 'Found `open()` call without specified encoding' code = 467