Fixes incorrect parsing of Warning directive (#354)

The Warning directive does not admit a title, see https://docutils.sourceforge.io/docs/ref/doctree.html#warning Also improves tests for directive parsing.
jupyter · Dec 20, 2023 · b7cb92f · b7cb92f
2 parents 0a0d3e6 + b69175d
commit b7cb92f
Show file tree

Hide file tree

Showing 4 changed files with 165 additions and 49 deletions.
diff --git a/papyri/gen.py b/papyri/gen.py
@@ -1385,7 +1385,8 @@ def collect_narrative_docs(self):
         title_map = {}
         blbs = {}
         with self.progress() as p2:
-            task = p2.add_task("Parsing narative", total=len(files))
+            task = p2.add_task("Parsing narrative", total=len(files))
+
             for p in files:
                 p2.update(task, description=compress_user(str(p)).ljust(7))
                 p2.advance(task)
@@ -1983,9 +1984,9 @@ def extract_docstring(
 
         if item_docstring is None and not isinstance(target_item, ModuleType):
             return None, [], api_object
-
         elif item_docstring is None and isinstance(target_item, ModuleType):
             item_docstring = """This module has no documentation"""
+
         try:
             sections = ts.parse(item_docstring.encode(), qa)
         except (AssertionError, NotImplementedError) as e:
@@ -2063,6 +2064,7 @@ def collect_api_docs(self, root: str, limit_to: List[str]) -> None:
             )
 
         collected = {k: v for k, v in collected.items() if k not in excluded}
+
         if limit_to:
             non_existinsing = [k for k in limit_to if k not in collected]
             if non_existinsing:
@@ -2076,6 +2078,7 @@ def collect_api_docs(self, root: str, limit_to: List[str]) -> None:
             self.log.info("DEV: regenerating docs only for")
             for k, v in collected.items():
                 self.log.info(f"    {k}:{v}")
+
         aliases: Dict[FullQual, Cannonical]
         aliases, not_found = collector.compute_aliases()
         rev_aliases: Dict[Cannonical, FullQual] = {v: k for k, v in aliases.items()}

diff --git a/papyri/tests/test_ascii_expected.py b/papyri/tests/test_ascii_expected.py
@@ -12,6 +12,8 @@
 
 
 def _get_result_for_name(name):
+    # WARNING: This test only works if the papyri and numpy docs are generated and
+    # ingested first
     gstore = GraphStore(ingest_dir, {})
     key = next(iter(gstore.glob((None, None, "module", name))))
 

diff --git a/papyri/tests/test_parse.py b/papyri/tests/test_parse.py
@@ -3,7 +3,7 @@
 import pytest
 
 from papyri import errors
-from papyri.ts import parse
+from papyri.ts import parse, Node, TSVisitor, parser
 
 
 # @pytest.mark.xfail(strict=True)
@@ -17,7 +17,6 @@ def test_parse_space_in_directive_section():
         should raise/warn in papyri.
         It may depends on the tree-sitter rst version.
 
-
     """
     )
     pytest.raises(
@@ -28,6 +27,85 @@ def test_parse_space_in_directive_section():
     )
 
 
+def test_parse_directive_body():
+    data1 = dedent(
+        """
+
+    .. directive:: Directive title
+
+        This directive declares a title and content in a block separated from
+        the definition by an empty new line.
+
+    """
+    )
+    data2 = dedent(
+        """
+
+    .. directive:: Directive title
+        This directive declares a title and content not separated by an empty
+        newline.
+
+    """
+    )
+
+    text1 = data1.strip("\n").encode()
+    text2 = data2.strip("\n").encode()
+
+    tree1 = parser.parse(text1)
+    tree2 = parser.parse(text2)
+
+    directive1 = Node(tree1.root_node).without_whitespace()
+    directive2 = Node(tree2.root_node).without_whitespace()
+
+    tsv1 = TSVisitor(text1, directive1, "test_parse_directive_body")
+    tsv2 = TSVisitor(text2, directive2, "test_parse_directive_body")
+
+    items1 = tsv1.visit(directive1)
+    items2 = tsv2.visit(directive2)
+
+    assert items1[0].name == "directive"
+    assert items1[0].args == "Directive title"
+    assert items1[0].options == dict()
+    assert (
+        items1[0].value
+        == "This directive declares a title and content in a block separated from\nthe definition by an empty new line."
+    )
+    assert items1[0].children == []
+
+    assert items2[0].name == "directive"
+    assert items2[0].args == "Directive title"
+    assert items2[0].options == dict()
+    assert (
+        items2[0].value
+        == "This directive declares a title and content not separated by an empty\nnewline."
+    )
+    assert items2[0].children == []
+
+
+def test_parse_warning_directive():
+    data = dedent(
+        """
+
+    .. warning:: Title
+
+        The warning directive does not admit a title.
+
+    """
+    )
+    text = data.strip("\n").encode()
+    tree = parser.parse(text)
+    directive = Node(tree.root_node)
+    tsv = TSVisitor(text, directive, "test_parse_directive_body")
+    new_node = directive.without_whitespace()
+    items = tsv.visit(new_node)
+
+    assert items[0].name == "warning"
+    assert items[0].args == ""
+    assert items[0].options == dict()
+    assert items[0].value == "Title The warning directive does not admit a title."
+    assert items[0].children == []
+
+
 def test_parse_space():
     [section] = parse(
         "Element-wise maximum of two arrays, propagating any NaNs.".encode(),

diff --git a/papyri/ts.py b/papyri/ts.py
@@ -65,7 +65,7 @@ class Node:
     In particular we want to be able to extract whitespace information,
     which is made hard by tree sitter.
 
-    So we intercept iterating through childrens, and if the bytes start/stop
+    So we intercept iterating through children, and if the bytes start/stop
     don't match, we insert a fake Whitespace node that has similar api to tree
     sitter official nodes.
     """
@@ -81,9 +81,9 @@ def children(self):
         if not self._with_whitespace:
             return [Node(n, _with_whitespace=False) for n in self.node.children]
 
-        self.node.children
         current_byte = self.start_byte
         current_point = self.start_point
+
         new_nodes = []
         if self.node.children:
             for n in self.node.children:
@@ -185,7 +185,7 @@ def type(self):
 
 class TSVisitor:
     """
-    Tree sitter Visitor,
+    Tree sitter Visitor
 
     Walk the tree sitter tree and convert each node into our kind of internal node.
 
@@ -267,6 +267,7 @@ def visit(self, node):
             # print(f'ERROR node: {self.as_text(c)!r}, skipping')
             return []
         for c in node.children:
+            # c=<ts.Node directive>
             kind = c.type
             if kind == "::":
                 if acc and isinstance(acc[-1], inline_nodes):
@@ -560,7 +561,6 @@ def visit_target(self, node, prev_end=None):
             # breakpoint()
             if pp.type == ".." and name.type == "name":
                 return [Unimplemented("untarget", self.as_text(name))]
-        # print(node.children)
         return [Unimplemented("target", self.as_text(node))]
 
     # def visit_arguments(self, node, prev_end=None):
@@ -578,20 +578,30 @@ def visit_inline_target(self, node, prev_end):
         return [Unimplemented("inline_target", self.as_text(node))]
 
     def visit_directive(self, node, prev_end=None):
+        """
+        Main entry point for directives.
+
+        Parses directive arguments, options and content into a MMystDirective
+        object.
+
+        Parameters
+        ----------
+        node: Node
+            The directive to parse
+        prev_end: Unknown
+
+        Returns
+        -------
+        directive: MMystDirective
+
+        """
         # TODO:
         # make it part of the type if a block directive (has, or not), a body.
 
-        # directive_name: str
-        # args0: List[str]
-        ## TODO : this is likely wrong...
-        # inner: Optional[Paragraph]
-        text = self.bytes[node.start_byte : node.end_byte].decode()
-        if "anaconda" in text:
-            print("...", text)
-
         is_substitution_definition = False
 
         if len(node.children) == 4:
+            # This directive has a body
             kinds = [n.type for n in node.children]
             if tuple(kinds) == ("type", "::", " ", "body"):
                 is_substitution_definition = True
@@ -607,9 +617,7 @@ def visit_directive(self, node, prev_end=None):
             _1, _role, _2 = node.children
             body_children = []
         else:
-            raise ValueError
-            assert _1.type == ".."
-            assert _2.type == "::"
+            raise ValueError(f"Wrong number of children: {len(node.children)}")
 
         if _role.end_point != _2.start_point and not is_substitution_definition:
             block_data = self.bytes[node.start_byte : node.end_byte].decode()
@@ -618,46 +626,71 @@ def visit_directive(self, node, prev_end=None):
             )
 
         role = self.bytes[_role.start_byte : _role.end_byte].decode()
+
         import itertools
 
         groups = itertools.groupby(body_children, lambda x: x.type)
         groups = [(k, list(v)) for k, v in groups]
 
-        if groups and groups[0][0] == "arguments":
-            arg = list(groups.pop(0)[1])
-            assert len(arg) == 1
-            argument = self.as_text(arg[0])
-        else:
-            argument = ""
-        if groups and groups[0][0] == "options":
-            # to parse
-            p0 = groups.pop(0)
-            options = []
-            assert len(p0[1]) == 1
-            opt_node = p0[1][0]
-            for field in opt_node.children:
-                assert field.type == "field"
-                if len(field.children) == 4:
-                    c1, name, c2, body = field.children
-                    options.append((self.as_text(name), self.as_text(body)))
-                elif len(field.children) == 3:
-                    c1, name, c2 = field.children
-                    options.append((self.as_text(name), ""))
-                else:
-                    assert False
+        if role == "warning":
+            # The warning directive does not take a title argument;
+            # however, the contents for the directive may be defined inline
+            # with the directive name, or as a separate block.
+            # See https://docutils.sourceforge.io/docs/ref/doctree.html#warning
+            if len(groups) == 1:
+                content_node = list(groups[0][1])
+                content = self.as_text(content_node[0])
+            elif len(groups) == 2:
+                content_node = [groups[0][1][0], groups[1][1][0]]
+                content = (
+                    self.as_text(content_node[0]) + " " + self.as_text(content_node[1])
+                )
+            else:
+                raise ValueError(f"{role} directive has no content")
 
-        else:
-            options = []
-        if groups and groups[0][0] == "content":
-            # to parse
-            content_node = list(groups.pop(0)[1])
-            assert len(content_node) == 1
-            content = self.as_text(content_node[0])
             padding = (content_node[0].start_point[1] - _1.start_point[1]) * " "
             content = dedent(padding + content)
+            argument = ""
+            options = []
+            groups = []
 
         else:
-            content = ""
+            if groups and groups[0][0] == "arguments":
+                arg = list(groups.pop(0)[1])
+                assert len(arg) == 1
+                argument = self.as_text(arg[0])
+            else:
+                argument = ""
+
+            if groups and groups[0][0] == "options":
+                # to parse
+                p0 = groups.pop(0)
+                options = []
+                assert len(p0[1]) == 1
+                opt_node = p0[1][0]
+                for field in opt_node.children:
+                    assert field.type == "field"
+                    if len(field.children) == 4:
+                        c1, name, c2, body = field.children
+                        options.append((self.as_text(name), self.as_text(body)))
+                    elif len(field.children) == 3:
+                        c1, name, c2 = field.children
+                        options.append((self.as_text(name), ""))
+                    else:
+                        assert False
+            else:
+                options = []
+
+            if groups and groups[0][0] == "content":
+                # to parse
+                content_node = list(groups.pop(0)[1])
+                assert len(content_node) == 1
+                content = self.as_text(content_node[0])
+                padding = (content_node[0].start_point[1] - _1.start_point[1]) * " "
+                content = dedent(padding + content)
+            else:
+                content = ""
+
         assert not groups
         # todo , we may want to see about the indentation of the content.