diff --git a/src/conferatur/api/cli.py b/src/conferatur/api/cli.py index 18af75e0..fc09b1c6 100644 --- a/src/conferatur/api/cli.py +++ b/src/conferatur/api/cli.py @@ -25,14 +25,15 @@ def argparser(parser): parser.add_argument('--entrypoint', default='/api', help='the jsonrpc api address') parser.add_argument('--with-explorer', action='store_true', - help='also create the explorer to test api calls with, this is a rudimentary feature currently ' + help='also create the explorer to test api calls with, ' + 'this is a rudimentary feature currently ' 'only meant for testing and debugging') parser.add_argument('--list-methods', action='store_true', help='list the available jsonrpc methods') return parser -def create_app(entrypoint: str=None, with_explorer: bool=None): +def create_app(entrypoint: str = None, with_explorer: bool = None): """ Create the Flask app @@ -90,4 +91,3 @@ def main(parser, args): else: app = create_app(args.entrypoint, args.with_explorer) app.run(host=args.host, port=args.port, debug=args.debug) - diff --git a/src/conferatur/api/jsonrpc.py b/src/conferatur/api/jsonrpc.py index e3d23fd4..d369274c 100644 --- a/src/conferatur/api/jsonrpc.py +++ b/src/conferatur/api/jsonrpc.py @@ -147,4 +147,3 @@ def _help(): for name, func in methods.items.items()} return methods - diff --git a/src/conferatur/csv.py b/src/conferatur/csv.py index aa19f274..77407586 100644 --- a/src/conferatur/csv.py +++ b/src/conferatur/csv.py @@ -276,7 +276,7 @@ def next_field(): yield yield_line() -def reader(file: typing.io.TextIO, dialect: typing.Union[None, str, Dialect]=None) -> Reader: +def reader(file: typing.io.TextIO, dialect: typing.Union[None, str, Dialect] = None) -> Reader: if dialect is None: dialect = DefaultDialect elif type(dialect) is str: @@ -285,6 +285,3 @@ def reader(file: typing.io.TextIO, dialect: typing.Union[None, str, Dialect]=Non dialect = known_dialects[dialect] return Reader(file, dialect) - - - diff --git a/src/conferatur/docblock.py b/src/conferatur/docblock.py index b28f5700..f9d840dc 100644 --- a/src/conferatur/docblock.py +++ b/src/conferatur/docblock.py @@ -11,7 +11,8 @@ logger = logging.getLogger(__name__) Docblock = namedtuple('Docblock', ['docs', 'params', 'result', 'result_type']) -Param = namedtuple('Param', ['name', 'type', 'type_doc', 'is_required', 'description', 'examples']) +Param = namedtuple('Param', ['name', 'type', 'type_doc', 'is_required', + 'description', 'examples']) DocblockParam = namedtuple('DocblockParam', ['name', 'type', 'value']) @@ -19,7 +20,8 @@ def format_docs(docs): return textwrap.dedent(docs).strip() -def doc_param_parser(docstring, key, no_name=None, allow_multiple=None, replace_strat=None): +def doc_param_parser(docstring, key, no_name=None, allow_multiple=None, + replace_strat=None): results = [] if no_name or allow_multiple else {} if replace_strat is None: @@ -44,7 +46,8 @@ def _(match): param = DocblockParam(**param) if allow_multiple: # check if it already exists, if not create a new object - idx = [idx for idx, val in enumerate(results) if match[2] not in val] + idx = [idx for idx, val in enumerate(results) + if match[2] not in val] if not len(idx): idx = len(results) results.append({}) @@ -61,7 +64,9 @@ def _(match): else: regex = r'^[ \t]*:%s[ \t]+(?:([^:]+)[ \t]+)?([a-z_]+):(?:[ \t]+(.*))?$' - docs = re.sub(regex % (re.escape(key),), _, docstring, flags=re.MULTILINE).strip() + docs = re.sub( + regex % (re.escape(key),), _, docstring, flags=re.MULTILINE + ).strip() return docs, results @@ -95,7 +100,8 @@ def decode_examples(match, param): param['value'] = decode_literal(param['value']) return '' - docs, examples = doc_param_parser(docs, 'example', allow_multiple=True, replace_strat=decode_examples) + docs, examples = doc_param_parser(docs, 'example', allow_multiple=True, + replace_strat=decode_examples) params = [] for idx, name in enumerate(args): @@ -106,7 +112,8 @@ def decode_examples(match, param): description = doc_params[name].value param = Param(name, - argspec.annotations[name] if name in argspec.annotations else None, + argspec.annotations[name] if name in argspec.annotations + else None, type_, idx < defaults_idx, description, @@ -132,5 +139,5 @@ def apply_template(self): def rst_to_html(text): writer = HTML5Writer() settings = {'output_encoding': 'unicode', 'table_style': 'table'} - return publish_string(text, writer=writer, writer_name='html5', settings_overrides=settings) - + return publish_string(text, writer=writer, writer_name='html5', + settings_overrides=settings) diff --git a/src/conferatur/normalization/cli.py b/src/conferatur/normalization/cli.py index 42bed657..088af3df 100644 --- a/src/conferatur/normalization/cli.py +++ b/src/conferatur/normalization/cli.py @@ -167,4 +167,3 @@ def main(parser, args): output_file = output_files[0] output_file.write(text) output_file.close() - diff --git a/src/conferatur/normalization/core.py b/src/conferatur/normalization/core.py index b93527ab..e4249ccf 100644 --- a/src/conferatur/normalization/core.py +++ b/src/conferatur/normalization/core.py @@ -26,7 +26,9 @@ def normalize(text: str) -> str: class LocalizedFile: """ - Reads and applies normalization rules from a locale-based file, it will automatically determine the "best fit" for a given locale, if one is available. + Reads and applies normalization rules from a locale-based file, it will + automatically determine the "best fit" for a given locale, if one is + available. :param str|class normalizer: Normalizer name (or class) :param str locale: Which locale to search for @@ -44,7 +46,8 @@ class LocalizedFile: def __init__(self, normalizer, locale: str, path: str, encoding=None): path = os.path.realpath(path) if not os.path.isdir(path): - raise NotADirectoryError("Expected '%s' to be a directory" % (str(path),)) + raise NotADirectoryError("Expected '%s' to be a directory" % + (str(path),)) files = {standardize_tag(file): file for file in os.listdir(path) @@ -53,7 +56,9 @@ def __init__(self, normalizer, locale: str, path: str, encoding=None): locale = standardize_tag(locale) match = best_match(locale, files.keys())[0] if match == 'und': - raise FileNotFoundError("Could not find a locale file for locale '%s' in '%s'" % (locale, str(path))) + raise FileNotFoundError( + "Could not find a locale file for locale '%s' in '%s'" % + (locale, str(path))) file = os.path.join(path, files[match]) @@ -80,7 +85,7 @@ class Replace: :example return: "Nudge wink!" """ - def __init__(self, search: str, replace: str=''): + def __init__(self, search: str, replace: str = ''): self._search = search self._replace = replace @@ -106,7 +111,7 @@ class ReplaceWords: def __init__(self, search: str, replace: str): search = search.strip() replace = replace.strip() - + args = tuple(map(re.escape, [ search[0].upper(), search[0].lower(), @@ -144,7 +149,8 @@ class File: def __init__(self, normalizer, file, encoding=None): try: - cls = normalizer if inspect.isclass(normalizer) else normalization.name_to_normalizer(normalizer) + cls = normalizer if inspect.isclass(normalizer) else \ + normalization.name_to_normalizer(normalizer) except ValueError: raise ValueError("Unknown normalizer %s" % (repr(normalizer))) @@ -173,7 +179,8 @@ class RegexReplace: Case-insensitivity is supported by adding inline modifiers. - You might want to use capturing groups to preserve the case. When replacing a character not captured, the information about its case is lost... + You might want to use capturing groups to preserve the case. When replacing + a character not captured, the information about its case is lost... Eg. would replace "HAHA! Hahaha!" to "HeHe! Hehehe!": @@ -184,7 +191,8 @@ class RegexReplace: +------------------+-------------+ - No regex flags are set by default, you can set them yourself though in the regex, and combine them at will, eg. multiline, dotall and ignorecase. + No regex flags are set by default, you can set them yourself though in the + regex, and combine them at will, eg. multiline, dotall and ignorecase. Eg. would replace "Newline" to "newline": @@ -200,7 +208,7 @@ class RegexReplace: :example return: "HeHe! Hehehe!" """ - def __init__(self, search: str, replace: str=None): + def __init__(self, search: str, replace: str = None): self._pattern = re.compile(search) self._substitution = replace if replace is not None else '' @@ -223,7 +231,8 @@ def __init__(self): class AlphaNumericUnicode(RegexReplace): """ - Simple alphanumeric filter, takes into account all unicode alphanumeric characters + Simple alphanumeric filter, takes into account all unicode alphanumeric + characters. :example text: "Das, öder die Flipper-Wåld Gespütt!" :example return: "DasöderdieFlipperWåldGespütt" @@ -249,7 +258,8 @@ def normalize(self, text: str) -> str: class Unidecode: """ - Unidecode characters to ASCII form, see `Python's Unidecode package `_ for more info. + Unidecode characters to ASCII form, see `Python's Unidecode package + `_ for more info. :example text: "𝖂𝖊𝖓𝖓 𝖎𝖘𝖙 𝖉𝖆𝖘 𝕹𝖚𝖓𝖘𝖙ü𝖈𝖐 𝖌𝖎𝖙 𝖚𝖓𝖉 𝕾𝖑𝖔𝖙𝖊𝖗𝖒𝖊𝖞𝖊𝖗?" :example return: "Wenn ist das Nunstuck git und Slotermeyer?" @@ -262,15 +272,21 @@ def normalize(self, text: str) -> str: class Config: r""" - Use config notation to define normalization rules. This notation is a list of normalizers, one per line, with optional arguments (separated by a space). + Use config notation to define normalization rules. This notation is a + list of normalizers, one per line, with optional arguments (separated by a + space). - The normalizers can be any of the core normalizers, or you can refer to your own normalizer class (like you would use in a python import, eg. `my.own.package.MyNormalizerClass`). + The normalizers can be any of the core normalizers, or you can refer to your + own normalizer class (like you would use in a python import, eg. + `my.own.package.MyNormalizerClass`). Additional rules: - Normalizer names are case-insensitive. - Arguments MAY be wrapped in double quotes. - - If an argument contains a space, newline or double quote, it MUST be wrapped in double quotes. - - A double quote itself is represented in this quoted argument as two double quotes: `""`. + - If an argument contains a space, newline or double quote, it MUST be + wrapped in double quotes. + - A double quote itself is represented in this quoted argument as two + double quotes: `""`. The normalization rules are applied top-to-bottom and follow this format: @@ -278,7 +294,7 @@ class Config: Normalizer1 arg1 "arg 2" # This is a comment - + Normalizer2 # (Normalizer2 has no arguments) Normalizer3 "This is argument 1 @@ -289,7 +305,13 @@ class Config: :param str config: configuration text :example text: "He bravely turned his tail and fled" - :example config: '# using a simple config file\nLowercase \n\n # it even supports comments\n# If there is a space in the argument, make sure you quote it though!\n regexreplace "y t" "Y T"\n \n # extraneous whitespaces are ignored \n replace e a\n' + :example config: '''# using a simple config file\nLowercase \n + # it even supports comments + # If there is a space in the argument, make sure you quote it though! + regexreplace "y t" "Y T" + \n\n + # extraneous whitespaces are ignored + replace e a\n''' :example return: "ha bravalY Turnad his tail and flad" """ @@ -313,7 +335,8 @@ def normalize(self, text: str) -> str: class ConfigFile(Config): """ - Load config from a file, see :py:class:`Config` for information about config notation + Load config from a file, see :py:class:`Config` for information about config + notation :param typing.io.TextIO file: The file :param str encoding: The file encoding @@ -330,4 +353,3 @@ def __init__(self, file, encoding=None): with open(file, encoding=encoding) as f: self._parse_config(f) - diff --git a/src/conferatur/normalization/logger.py b/src/conferatur/normalization/logger.py index cfcf7c8f..1ad7fe65 100644 --- a/src/conferatur/normalization/logger.py +++ b/src/conferatur/normalization/logger.py @@ -118,4 +118,3 @@ def _(cls, text): normalize_stack.pop() return result return _ - diff --git a/tests/conferatur/normalization/test_core.py b/tests/conferatur/normalization/test_core.py index 2d236d20..a9d2883f 100644 --- a/tests/conferatur/normalization/test_core.py +++ b/tests/conferatur/normalization/test_core.py @@ -9,8 +9,8 @@ def test_logs(caplog): # using a simple config file lowercase lowercase - # Let's replace double quotes with single quotes (note wrapping in double quotes, - # to allow the use of double quotes in an argument. + # Let's replace double quotes with single quotes (note wrapping in double + # quotes, to allow the use of double quotes in an argument. RegexReplace "[""]" ' # A space in the argument: wrap in double quotes as well Replace 'ni' "'ecky ecky ecky'" @@ -19,11 +19,13 @@ def test_logs(caplog): normalized = normalizer.normalize('No! Not the Knights Who Say "Ni"!') assert normalized == "no! not the knights who say 'ecky ecky ecky'!" - assert len(caplog.records) == 0, "logs shouldn't be propagated unless we register our own handlers" + assert len(caplog.records) == 0, \ + "logs shouldn't be propagated unless we register our own handlers" def test_config(): - # Lets replace spaces with a newline (without using regex), demonstrating multiline arguments + # Lets replace spaces with a newline (without using regex), + # demonstrating multiline arguments # also note that the normalizer name is case-insensitive config = 'replace " " "\n"' @@ -49,14 +51,17 @@ def test_composite(): comp = NormalizationComposite() comp.add(normalizer) comp.add(Replace(' ni', ' Ekke Ekke Ekke Ekke Ptang Zoo Boing')) - assert comp.normalize(text) == 'knights who say: Ekke Ekke Ekke Ekke Ptang Zoo Boing!' + assert comp.normalize(text) == \ + 'knights who say: Ekke Ekke Ekke Ekke Ptang Zoo Boing!' comp.add(Lowercase()) - assert comp.normalize(text) == 'knights who say: ekke ekke ekke ekke ptang zoo boing!' + assert comp.normalize(text) == \ + 'knights who say: ekke ekke ekke ekke ptang zoo boing!' normalizer.add(Replace(' ni', ' nope')) assert comp.normalize(text) == 'knights who say: nope!' - assert comp.normalize('Ich fälle Bäume und hüpf und spring.') == 'ich falle baume und hupf und spring.' + assert comp.normalize('Ich fälle Bäume und hüpf und spring.') == \ + 'ich falle baume und hupf und spring.' def test_lowercase(): @@ -64,24 +69,32 @@ def test_lowercase(): def test_unicode(): - assert Unidecode().normalize('Eine große europäische Schwalbe') == 'Eine grosse europaische Schwalbe' + assert Unidecode().normalize('Eine große europäische Schwalbe') == \ + 'Eine grosse europaische Schwalbe' def test_alphanumericunicode(): - assert AlphaNumericUnicode().normalize("Das, öder die Flipper-Wåld Gespütt!") == 'DasöderdieFlipperWåldGespütt' + assert AlphaNumericUnicode().normalize( + "Das, öder die Flipper-Wåld Gespütt!" + ) == 'DasöderdieFlipperWåldGespütt' def test_alphanumeric(): - assert AlphaNumeric().normalize("She turned me into a newt.") == 'Sheturnedmeintoanewt' - assert AlphaNumeric().normalize("Das, öder die Flipper-Wåld Gespütt!") == 'DasderdieFlipperWldGesptt' + assert AlphaNumeric().normalize("She turned me into a newt.") == \ + 'Sheturnedmeintoanewt' + assert AlphaNumeric().normalize("Das, öder die Flipper-Wåld Gespütt!") == \ + 'DasderdieFlipperWldGesptt' def test_regexreplace(): normalizer = RegexReplace('(scratch)', r"\1 (his arm's off)") - assert normalizer.normalize('Tis but a scratch.') == "Tis but a scratch (his arm's off)." + assert normalizer.normalize('Tis but a scratch.') == \ + "Tis but a scratch (his arm's off)." assert RegexReplace('ha', 'he').normalize('HA! Hahaha!') == 'HA! Hahehe!' - assert RegexReplace('(?i)(h)a', r'\1e').normalize('HAHA! Hahaha!') == 'HeHe! Hehehe!' - assert RegexReplace('(?msi)new.line', 'newline').normalize("New\nline") == 'newline' + assert RegexReplace('(?i)(h)a', r'\1e').normalize('HAHA! Hahaha!') == \ + 'HeHe! Hehehe!' + assert RegexReplace('(?msi)new.line', 'newline').normalize("New\nline") == \ + 'newline' def test_file(): @@ -104,10 +117,12 @@ def test_replacewords(): def test_replace(): normalizer = Replace('scratch', 'flesh wound') - assert normalizer.normalize('Tis but a scratch.') == 'Tis but a flesh wound.' + assert normalizer.normalize('Tis but a scratch.') == \ + 'Tis but a flesh wound.' def test_localizedfile(): path = './resources/test/normalizers/configfile' normalizer = LocalizedFile('Config', 'en_UK', path) - assert normalizer.normalize("𝔊𝔯𝔞𝔫𝔡𝔢 𝔖𝔞𝔰𝔰𝔬 𝔡'ℑ𝔱𝔞𝔩𝔦𝔞") == "gran sasso d'italia" \ No newline at end of file + assert normalizer.normalize("𝔊𝔯𝔞𝔫𝔡𝔢 𝔖𝔞𝔰𝔰𝔬 𝔡'ℑ𝔱𝔞𝔩𝔦𝔞") == \ + "gran sasso d'italia" diff --git a/tests/conferatur/test_csv.py b/tests/conferatur/test_csv.py index 3d19fcb1..38e7c2bd 100644 --- a/tests/conferatur/test_csv.py +++ b/tests/conferatur/test_csv.py @@ -15,9 +15,10 @@ def _reader(text): assert _reader('') == [] - expected = [['Some line', 'some other'], ['dsfgdsg'], ['stay', 'togther '], ['fsdss']] + expected = [['Some line', 'some other'], ['dsfgdsg'], ['stay', 'togther '], + ['fsdss']] assert _reader(''' - Some line, some other \t + Some line, some other \t \t dsfgdsg \n \t \r @@ -65,9 +66,11 @@ def _reader(text): assert _reader('replace " " "\n"') == [['replace', ' ', '\n']] - expected = [['Lowercase'], ['regexreplace', 'y t', 'Y T'], ['Replace', 'e', 'a']] + expected = [['Lowercase'], + ['regexreplace', 'y t', 'Y T'], + ['Replace', 'e', 'a']] gotten = _reader('''# using a simple config file -Lowercase +Lowercase \n # it even supports comments # If there is a space in the argument, make sure you quote it though! @@ -84,7 +87,8 @@ def _reader(text): expected = [ ['Normalizer1', 'arg1', 'arg 2'], ['Normalizer2'], - ['Normalizer3', 'This is argument 1\nSpanning multiple lines\n', 'argument 2'], + ['Normalizer3', 'This is argument 1\nSpanning multiple lines\n', + 'argument 2'], ['Normalizer4', 'argument with double quote (")'] ] @@ -104,5 +108,6 @@ def _reader(text): assert _reader("lower case \n") == [['lower', 'case']] assert _reader('test "stuff "\t') == [['test', 'stuff ']] assert _reader('test "stuff "\n') == [['test', 'stuff ']] - assert _reader('test "stuff\n\t"\n\t \t YEs \t \n') == [['test', 'stuff\n\t'], ['YEs']] + assert _reader('test "stuff\n\t"\n\t \t YEs \t \n') == \ + [['test', 'stuff\n\t'], ['YEs']] assert _reader("\n\n\n\nline5")[0].lineno == 5