Skip to content

Commit

Permalink
Fixes incorrect parsing of Warning directive (#354)
Browse files Browse the repository at this point in the history
The Warning directive does not admit a title, see
https://docutils.sourceforge.io/docs/ref/doctree.html#warning

Also improves tests for directive parsing.
  • Loading branch information
Carreau authored Dec 20, 2023
2 parents 0a0d3e6 + b69175d commit b7cb92f
Show file tree
Hide file tree
Showing 4 changed files with 165 additions and 49 deletions.
7 changes: 5 additions & 2 deletions papyri/gen.py
Original file line number Diff line number Diff line change
Expand Up @@ -1385,7 +1385,8 @@ def collect_narrative_docs(self):
title_map = {}
blbs = {}
with self.progress() as p2:
task = p2.add_task("Parsing narative", total=len(files))
task = p2.add_task("Parsing narrative", total=len(files))

for p in files:
p2.update(task, description=compress_user(str(p)).ljust(7))
p2.advance(task)
Expand Down Expand Up @@ -1983,9 +1984,9 @@ def extract_docstring(

if item_docstring is None and not isinstance(target_item, ModuleType):
return None, [], api_object

elif item_docstring is None and isinstance(target_item, ModuleType):
item_docstring = """This module has no documentation"""

try:
sections = ts.parse(item_docstring.encode(), qa)
except (AssertionError, NotImplementedError) as e:
Expand Down Expand Up @@ -2063,6 +2064,7 @@ def collect_api_docs(self, root: str, limit_to: List[str]) -> None:
)

collected = {k: v for k, v in collected.items() if k not in excluded}

if limit_to:
non_existinsing = [k for k in limit_to if k not in collected]
if non_existinsing:
Expand All @@ -2076,6 +2078,7 @@ def collect_api_docs(self, root: str, limit_to: List[str]) -> None:
self.log.info("DEV: regenerating docs only for")
for k, v in collected.items():
self.log.info(f" {k}:{v}")

aliases: Dict[FullQual, Cannonical]
aliases, not_found = collector.compute_aliases()
rev_aliases: Dict[Cannonical, FullQual] = {v: k for k, v in aliases.items()}
Expand Down
2 changes: 2 additions & 0 deletions papyri/tests/test_ascii_expected.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@


def _get_result_for_name(name):
# WARNING: This test only works if the papyri and numpy docs are generated and
# ingested first
gstore = GraphStore(ingest_dir, {})
key = next(iter(gstore.glob((None, None, "module", name))))

Expand Down
82 changes: 80 additions & 2 deletions papyri/tests/test_parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import pytest

from papyri import errors
from papyri.ts import parse
from papyri.ts import parse, Node, TSVisitor, parser


# @pytest.mark.xfail(strict=True)
Expand All @@ -17,7 +17,6 @@ def test_parse_space_in_directive_section():
should raise/warn in papyri.
It may depends on the tree-sitter rst version.
"""
)
pytest.raises(
Expand All @@ -28,6 +27,85 @@ def test_parse_space_in_directive_section():
)


def test_parse_directive_body():
data1 = dedent(
"""
.. directive:: Directive title
This directive declares a title and content in a block separated from
the definition by an empty new line.
"""
)
data2 = dedent(
"""
.. directive:: Directive title
This directive declares a title and content not separated by an empty
newline.
"""
)

text1 = data1.strip("\n").encode()
text2 = data2.strip("\n").encode()

tree1 = parser.parse(text1)
tree2 = parser.parse(text2)

directive1 = Node(tree1.root_node).without_whitespace()
directive2 = Node(tree2.root_node).without_whitespace()

tsv1 = TSVisitor(text1, directive1, "test_parse_directive_body")
tsv2 = TSVisitor(text2, directive2, "test_parse_directive_body")

items1 = tsv1.visit(directive1)
items2 = tsv2.visit(directive2)

assert items1[0].name == "directive"
assert items1[0].args == "Directive title"
assert items1[0].options == dict()
assert (
items1[0].value
== "This directive declares a title and content in a block separated from\nthe definition by an empty new line."
)
assert items1[0].children == []

assert items2[0].name == "directive"
assert items2[0].args == "Directive title"
assert items2[0].options == dict()
assert (
items2[0].value
== "This directive declares a title and content not separated by an empty\nnewline."
)
assert items2[0].children == []


def test_parse_warning_directive():
data = dedent(
"""
.. warning:: Title
The warning directive does not admit a title.
"""
)
text = data.strip("\n").encode()
tree = parser.parse(text)
directive = Node(tree.root_node)
tsv = TSVisitor(text, directive, "test_parse_directive_body")
new_node = directive.without_whitespace()
items = tsv.visit(new_node)

assert items[0].name == "warning"
assert items[0].args == ""
assert items[0].options == dict()
assert items[0].value == "Title The warning directive does not admit a title."
assert items[0].children == []


def test_parse_space():
[section] = parse(
"Element-wise maximum of two arrays, propagating any NaNs.".encode(),
Expand Down
123 changes: 78 additions & 45 deletions papyri/ts.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ class Node:
In particular we want to be able to extract whitespace information,
which is made hard by tree sitter.
So we intercept iterating through childrens, and if the bytes start/stop
So we intercept iterating through children, and if the bytes start/stop
don't match, we insert a fake Whitespace node that has similar api to tree
sitter official nodes.
"""
Expand All @@ -81,9 +81,9 @@ def children(self):
if not self._with_whitespace:
return [Node(n, _with_whitespace=False) for n in self.node.children]

self.node.children
current_byte = self.start_byte
current_point = self.start_point

new_nodes = []
if self.node.children:
for n in self.node.children:
Expand Down Expand Up @@ -185,7 +185,7 @@ def type(self):

class TSVisitor:
"""
Tree sitter Visitor,
Tree sitter Visitor
Walk the tree sitter tree and convert each node into our kind of internal node.
Expand Down Expand Up @@ -267,6 +267,7 @@ def visit(self, node):
# print(f'ERROR node: {self.as_text(c)!r}, skipping')
return []
for c in node.children:
# c=<ts.Node directive>
kind = c.type
if kind == "::":
if acc and isinstance(acc[-1], inline_nodes):
Expand Down Expand Up @@ -560,7 +561,6 @@ def visit_target(self, node, prev_end=None):
# breakpoint()
if pp.type == ".." and name.type == "name":
return [Unimplemented("untarget", self.as_text(name))]
# print(node.children)
return [Unimplemented("target", self.as_text(node))]

# def visit_arguments(self, node, prev_end=None):
Expand All @@ -578,20 +578,30 @@ def visit_inline_target(self, node, prev_end):
return [Unimplemented("inline_target", self.as_text(node))]

def visit_directive(self, node, prev_end=None):
"""
Main entry point for directives.
Parses directive arguments, options and content into a MMystDirective
object.
Parameters
----------
node: Node
The directive to parse
prev_end: Unknown
Returns
-------
directive: MMystDirective
"""
# TODO:
# make it part of the type if a block directive (has, or not), a body.

# directive_name: str
# args0: List[str]
## TODO : this is likely wrong...
# inner: Optional[Paragraph]
text = self.bytes[node.start_byte : node.end_byte].decode()
if "anaconda" in text:
print("...", text)

is_substitution_definition = False

if len(node.children) == 4:
# This directive has a body
kinds = [n.type for n in node.children]
if tuple(kinds) == ("type", "::", " ", "body"):
is_substitution_definition = True
Expand All @@ -607,9 +617,7 @@ def visit_directive(self, node, prev_end=None):
_1, _role, _2 = node.children
body_children = []
else:
raise ValueError
assert _1.type == ".."
assert _2.type == "::"
raise ValueError(f"Wrong number of children: {len(node.children)}")

if _role.end_point != _2.start_point and not is_substitution_definition:
block_data = self.bytes[node.start_byte : node.end_byte].decode()
Expand All @@ -618,46 +626,71 @@ def visit_directive(self, node, prev_end=None):
)

role = self.bytes[_role.start_byte : _role.end_byte].decode()

import itertools

groups = itertools.groupby(body_children, lambda x: x.type)
groups = [(k, list(v)) for k, v in groups]

if groups and groups[0][0] == "arguments":
arg = list(groups.pop(0)[1])
assert len(arg) == 1
argument = self.as_text(arg[0])
else:
argument = ""
if groups and groups[0][0] == "options":
# to parse
p0 = groups.pop(0)
options = []
assert len(p0[1]) == 1
opt_node = p0[1][0]
for field in opt_node.children:
assert field.type == "field"
if len(field.children) == 4:
c1, name, c2, body = field.children
options.append((self.as_text(name), self.as_text(body)))
elif len(field.children) == 3:
c1, name, c2 = field.children
options.append((self.as_text(name), ""))
else:
assert False
if role == "warning":
# The warning directive does not take a title argument;
# however, the contents for the directive may be defined inline
# with the directive name, or as a separate block.
# See https://docutils.sourceforge.io/docs/ref/doctree.html#warning
if len(groups) == 1:
content_node = list(groups[0][1])
content = self.as_text(content_node[0])
elif len(groups) == 2:
content_node = [groups[0][1][0], groups[1][1][0]]
content = (
self.as_text(content_node[0]) + " " + self.as_text(content_node[1])
)
else:
raise ValueError(f"{role} directive has no content")

else:
options = []
if groups and groups[0][0] == "content":
# to parse
content_node = list(groups.pop(0)[1])
assert len(content_node) == 1
content = self.as_text(content_node[0])
padding = (content_node[0].start_point[1] - _1.start_point[1]) * " "
content = dedent(padding + content)
argument = ""
options = []
groups = []

else:
content = ""
if groups and groups[0][0] == "arguments":
arg = list(groups.pop(0)[1])
assert len(arg) == 1
argument = self.as_text(arg[0])
else:
argument = ""

if groups and groups[0][0] == "options":
# to parse
p0 = groups.pop(0)
options = []
assert len(p0[1]) == 1
opt_node = p0[1][0]
for field in opt_node.children:
assert field.type == "field"
if len(field.children) == 4:
c1, name, c2, body = field.children
options.append((self.as_text(name), self.as_text(body)))
elif len(field.children) == 3:
c1, name, c2 = field.children
options.append((self.as_text(name), ""))
else:
assert False
else:
options = []

if groups and groups[0][0] == "content":
# to parse
content_node = list(groups.pop(0)[1])
assert len(content_node) == 1
content = self.as_text(content_node[0])
padding = (content_node[0].start_point[1] - _1.start_point[1]) * " "
content = dedent(padding + content)
else:
content = ""

assert not groups
# todo , we may want to see about the indentation of the content.

Expand Down

0 comments on commit b7cb92f

Please sign in to comment.