From 3b9477a0cae197d8a6cb48397a73a1f800bbaf45 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Melissa=20Weber=20Mendon=C3=A7a?= Date: Wed, 13 Dec 2023 07:46:06 -0300 Subject: [PATCH 1/3] Fixes incorrect parsing of Warning directive The Warning directive does not admit a title, see https://docutils.sourceforge.io/docs/ref/doctree.html#warning Also improves tests for directive parsing. --- papyri/gen.py | 7 +- papyri/tests/test_ascii_expected.py | 2 + papyri/tests/test_parse.py | 81 +++++++++++++++++- papyri/ts.py | 123 ++++++++++++++++++---------- 4 files changed, 164 insertions(+), 49 deletions(-) diff --git a/papyri/gen.py b/papyri/gen.py index fcaaa668..cbbc95ba 100644 --- a/papyri/gen.py +++ b/papyri/gen.py @@ -1385,7 +1385,8 @@ def collect_narrative_docs(self): title_map = {} blbs = {} with self.progress() as p2: - task = p2.add_task("Parsing narative", total=len(files)) + task = p2.add_task("Parsing narrative", total=len(files)) + for p in files: p2.update(task, description=compress_user(str(p)).ljust(7)) p2.advance(task) @@ -1983,9 +1984,9 @@ def extract_docstring( if item_docstring is None and not isinstance(target_item, ModuleType): return None, [], api_object - elif item_docstring is None and isinstance(target_item, ModuleType): item_docstring = """This module has no documentation""" + try: sections = ts.parse(item_docstring.encode(), qa) except (AssertionError, NotImplementedError) as e: @@ -2063,6 +2064,7 @@ def collect_api_docs(self, root: str, limit_to: List[str]) -> None: ) collected = {k: v for k, v in collected.items() if k not in excluded} + if limit_to: non_existinsing = [k for k in limit_to if k not in collected] if non_existinsing: @@ -2076,6 +2078,7 @@ def collect_api_docs(self, root: str, limit_to: List[str]) -> None: self.log.info("DEV: regenerating docs only for") for k, v in collected.items(): self.log.info(f" {k}:{v}") + aliases: Dict[FullQual, Cannonical] aliases, not_found = collector.compute_aliases() rev_aliases: Dict[Cannonical, FullQual] = {v: k for k, v in aliases.items()} diff --git a/papyri/tests/test_ascii_expected.py b/papyri/tests/test_ascii_expected.py index 8c2fbcd4..95a81c50 100644 --- a/papyri/tests/test_ascii_expected.py +++ b/papyri/tests/test_ascii_expected.py @@ -12,6 +12,8 @@ def _get_result_for_name(name): + # WARNING: This test only works if the papyri and numpy docs are generated and + # ingested first gstore = GraphStore(ingest_dir, {}) key = next(iter(gstore.glob((None, None, "module", name)))) diff --git a/papyri/tests/test_parse.py b/papyri/tests/test_parse.py index ba24f5ce..c2d99511 100644 --- a/papyri/tests/test_parse.py +++ b/papyri/tests/test_parse.py @@ -1,9 +1,16 @@ from textwrap import dedent +from pathlib import Path import pytest from papyri import errors -from papyri.ts import parse +from papyri.ts import parse, Node, TSVisitor +from tree_sitter import Language, Parser + +parser = Parser() +pth = str(Path(__file__).parent.parent / "rst.so") +RST = Language(pth, "rst") +parser.set_language(RST) # @pytest.mark.xfail(strict=True) @@ -17,7 +24,6 @@ def test_parse_space_in_directive_section(): should raise/warn in papyri. It may depends on the tree-sitter rst version. - """ ) pytest.raises( @@ -28,6 +34,77 @@ def test_parse_space_in_directive_section(): ) +def test_parse_directive_body(): + data1 = dedent( + """ + + .. directive:: Directive title + + This directive declares a title and content in a block separated from + the definition by an empty new line. + + """ + ) + data2 = dedent( + """ + + .. directive:: Directive title + This directive declares a title and content not separated by an empty + newline. + + """ + ) + + text1 = data1.strip("\n").encode() + text2 = data2.strip("\n").encode() + + tree1 = parser.parse(text1) + tree2 = parser.parse(text2) + + directive1 = Node(tree1.root_node).without_whitespace() + directive2 = Node(tree2.root_node).without_whitespace() + + tsv1 = TSVisitor(text1, directive1, "test_parse_directive_body") + tsv2 = TSVisitor(text2, directive2, "test_parse_directive_body") + + items1 = tsv1.visit(directive1) + items2 = tsv2.visit(directive2) + + assert items1[0].name == "directive" + assert items1[0].args == "Directive title" + assert items1[0].options == dict() + assert items1[0].value == "This directive declares a title and content in a block separated from\nthe definition by an empty new line." + assert items1[0].children == [] + + assert items2[0].name == "directive" + assert items2[0].args == "Directive title" + assert items2[0].options == dict() + assert items2[0].value == "This directive declares a title and content not separated by an empty\nnewline." + assert items2[0].children == [] + +def test_parse_warning_directive(): + data = dedent( + """ + + .. warning:: Title + + The warning directive does not admit a title. + + """ + ) + text = data.strip("\n").encode() + tree = parser.parse(text) + directive = Node(tree.root_node) + tsv = TSVisitor(text, directive, "test_parse_directive_body") + new_node = directive.without_whitespace() + items = tsv.visit(new_node) + + assert items[0].name == "warning" + assert items[0].args == "" + assert items[0].options == dict() + assert items[0].value == "Title The warning directive does not admit a title." + assert items[0].children == [] + def test_parse_space(): [section] = parse( "Element-wise maximum of two arrays, propagating any NaNs.".encode(), diff --git a/papyri/ts.py b/papyri/ts.py index 13ed7140..609279cc 100644 --- a/papyri/ts.py +++ b/papyri/ts.py @@ -65,7 +65,7 @@ class Node: In particular we want to be able to extract whitespace information, which is made hard by tree sitter. - So we intercept iterating through childrens, and if the bytes start/stop + So we intercept iterating through children, and if the bytes start/stop don't match, we insert a fake Whitespace node that has similar api to tree sitter official nodes. """ @@ -81,9 +81,9 @@ def children(self): if not self._with_whitespace: return [Node(n, _with_whitespace=False) for n in self.node.children] - self.node.children current_byte = self.start_byte current_point = self.start_point + new_nodes = [] if self.node.children: for n in self.node.children: @@ -185,7 +185,7 @@ def type(self): class TSVisitor: """ - Tree sitter Visitor, + Tree sitter Visitor Walk the tree sitter tree and convert each node into our kind of internal node. @@ -267,6 +267,7 @@ def visit(self, node): # print(f'ERROR node: {self.as_text(c)!r}, skipping') return [] for c in node.children: + # c= kind = c.type if kind == "::": if acc and isinstance(acc[-1], inline_nodes): @@ -560,7 +561,6 @@ def visit_target(self, node, prev_end=None): # breakpoint() if pp.type == ".." and name.type == "name": return [Unimplemented("untarget", self.as_text(name))] - # print(node.children) return [Unimplemented("target", self.as_text(node))] # def visit_arguments(self, node, prev_end=None): @@ -578,20 +578,30 @@ def visit_inline_target(self, node, prev_end): return [Unimplemented("inline_target", self.as_text(node))] def visit_directive(self, node, prev_end=None): + """ + Main entry point for directives. + + Parses directive arguments, options and content into a MMystDirective + object. + + Parameters + ---------- + node: Node + The directive to parse + prev_end: Unknown + + Returns + ------- + directive: MMystDirective + + """ # TODO: # make it part of the type if a block directive (has, or not), a body. - # directive_name: str - # args0: List[str] - ## TODO : this is likely wrong... - # inner: Optional[Paragraph] - text = self.bytes[node.start_byte : node.end_byte].decode() - if "anaconda" in text: - print("...", text) - is_substitution_definition = False if len(node.children) == 4: + # This directive has a body kinds = [n.type for n in node.children] if tuple(kinds) == ("type", "::", " ", "body"): is_substitution_definition = True @@ -607,9 +617,7 @@ def visit_directive(self, node, prev_end=None): _1, _role, _2 = node.children body_children = [] else: - raise ValueError - assert _1.type == ".." - assert _2.type == "::" + raise ValueError(f"Wrong number of children: {len(node.children)}") if _role.end_point != _2.start_point and not is_substitution_definition: block_data = self.bytes[node.start_byte : node.end_byte].decode() @@ -618,46 +626,71 @@ def visit_directive(self, node, prev_end=None): ) role = self.bytes[_role.start_byte : _role.end_byte].decode() + import itertools groups = itertools.groupby(body_children, lambda x: x.type) groups = [(k, list(v)) for k, v in groups] - if groups and groups[0][0] == "arguments": - arg = list(groups.pop(0)[1]) - assert len(arg) == 1 - argument = self.as_text(arg[0]) - else: - argument = "" - if groups and groups[0][0] == "options": - # to parse - p0 = groups.pop(0) - options = [] - assert len(p0[1]) == 1 - opt_node = p0[1][0] - for field in opt_node.children: - assert field.type == "field" - if len(field.children) == 4: - c1, name, c2, body = field.children - options.append((self.as_text(name), self.as_text(body))) - elif len(field.children) == 3: - c1, name, c2 = field.children - options.append((self.as_text(name), "")) - else: - assert False + if role == "warning": + # The warning directive does not take a title argument; + # however, the contents for the directive may be defined inline + # with the directive name, or as a separate block. + # See https://docutils.sourceforge.io/docs/ref/doctree.html#warning + if len(groups) == 1: + content_node = list(groups[0][1]) + content = self.as_text(content_node[0]) + elif len(groups) == 2: + content_node = [groups[0][1][0], groups[1][1][0]] + content = ( + self.as_text(content_node[0]) + " " + self.as_text(content_node[1]) + ) + else: + raise ValueError(f"{role} directive has no content") - else: - options = [] - if groups and groups[0][0] == "content": - # to parse - content_node = list(groups.pop(0)[1]) - assert len(content_node) == 1 - content = self.as_text(content_node[0]) padding = (content_node[0].start_point[1] - _1.start_point[1]) * " " content = dedent(padding + content) + argument = "" + options = [] + groups = [] else: - content = "" + if groups and groups[0][0] == "arguments": + arg = list(groups.pop(0)[1]) + assert len(arg) == 1 + argument = self.as_text(arg[0]) + else: + argument = "" + + if groups and groups[0][0] == "options": + # to parse + p0 = groups.pop(0) + options = [] + assert len(p0[1]) == 1 + opt_node = p0[1][0] + for field in opt_node.children: + assert field.type == "field" + if len(field.children) == 4: + c1, name, c2, body = field.children + options.append((self.as_text(name), self.as_text(body))) + elif len(field.children) == 3: + c1, name, c2 = field.children + options.append((self.as_text(name), "")) + else: + assert False + else: + options = [] + + if groups and groups[0][0] == "content": + # to parse + content_node = list(groups.pop(0)[1]) + assert len(content_node) == 1 + content = self.as_text(content_node[0]) + padding = (content_node[0].start_point[1] - _1.start_point[1]) * " " + content = dedent(padding + content) + else: + content = "" + assert not groups # todo , we may want to see about the indentation of the content. From fe6a6742a8ea2318274385ba363acb979092de67 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Melissa=20Weber=20Mendon=C3=A7a?= Date: Mon, 18 Dec 2023 12:16:53 -0300 Subject: [PATCH 2/3] Fix linting and style --- papyri/tests/test_parse.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/papyri/tests/test_parse.py b/papyri/tests/test_parse.py index c2d99511..47696eb0 100644 --- a/papyri/tests/test_parse.py +++ b/papyri/tests/test_parse.py @@ -73,15 +73,22 @@ def test_parse_directive_body(): assert items1[0].name == "directive" assert items1[0].args == "Directive title" assert items1[0].options == dict() - assert items1[0].value == "This directive declares a title and content in a block separated from\nthe definition by an empty new line." + assert ( + items1[0].value + == "This directive declares a title and content in a block separated from\nthe definition by an empty new line." + ) assert items1[0].children == [] assert items2[0].name == "directive" assert items2[0].args == "Directive title" assert items2[0].options == dict() - assert items2[0].value == "This directive declares a title and content not separated by an empty\nnewline." + assert ( + items2[0].value + == "This directive declares a title and content not separated by an empty\nnewline." + ) assert items2[0].children == [] + def test_parse_warning_directive(): data = dedent( """ @@ -105,6 +112,7 @@ def test_parse_warning_directive(): assert items[0].value == "Title The warning directive does not admit a title." assert items[0].children == [] + def test_parse_space(): [section] = parse( "Element-wise maximum of two arrays, propagating any NaNs.".encode(), From b69175df023bf96c87c3ecbb07dbf8ccbd8ac79e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Melissa=20Weber=20Mendon=C3=A7a?= Date: Tue, 19 Dec 2023 10:55:48 -0300 Subject: [PATCH 3/3] Import parser from ts --- papyri/tests/test_parse.py | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/papyri/tests/test_parse.py b/papyri/tests/test_parse.py index 47696eb0..66a5c1c3 100644 --- a/papyri/tests/test_parse.py +++ b/papyri/tests/test_parse.py @@ -1,16 +1,9 @@ from textwrap import dedent -from pathlib import Path import pytest from papyri import errors -from papyri.ts import parse, Node, TSVisitor -from tree_sitter import Language, Parser - -parser = Parser() -pth = str(Path(__file__).parent.parent / "rst.so") -RST = Language(pth, "rst") -parser.set_language(RST) +from papyri.ts import parse, Node, TSVisitor, parser # @pytest.mark.xfail(strict=True)