Skip to content

Commit

Permalink
doccursor: stub our shiny new cursor class
Browse files Browse the repository at this point in the history
As a 1st step, we make a _very_ shallow wrapper around the Clang cursor.
There is of course no point to it yet, but with this simple trick we are
now using `DocCursor`s everywhere past `parse()`.

The only effect of this commit is to worsen performance very slightly
due to needless invocation of path specific code on all cursors. Running
the full test suit is now ~9% slower on my machine. We shall revisit
this later when it actually matters.
  • Loading branch information
BrunoMSantos committed Nov 16, 2023
1 parent 92c4f36 commit 9bf1c4d
Show file tree
Hide file tree
Showing 2 changed files with 104 additions and 39 deletions.
84 changes: 84 additions & 0 deletions src/hawkmoth/doccursor.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,14 @@
SourceRange,
)

def _get_meta(cursor):
return {
'line': cursor.comment.extent.start.line if cursor.comment else '',
'cursor.kind': cursor.kind,
'cursor.displayname': cursor.displayname,
'cursor.spelling': cursor.spelling,
}

# Workaround for clang cursor.get_tokens() being unreliable for cursors whose
# extent contains macro expansions. The result may be empty or contain bogus
# tokens, depending on the case.
Expand Down Expand Up @@ -453,3 +461,79 @@ def _get_inheritance(cursor):
inherited.append(f'{pad(access_spec)}{child.type.spelling}')

return ': ' + ', '.join(inherited) if len(inherited) > 0 else None


class DocCursor:
"""Documentation centric wrapper for Clang's own ``Cursor``.
This class abstracts a documentation worthy cursor so the user can query
relevant bits for documentation purpose, but otherwise hide all the
complications behind Clang's AST traversal and extraction of said bits of
information.
Technically, this class can hold any Clang cursor within itself, but it
won't expose any relevant information for those.
"""

def __init__(self, domain=None, cursor=None, comments=None):
self._comments = comments if comments else {}
self._cc = cursor

self.domain = domain
self.hash = self._cc.hash
self.kind = self._cc.kind

if self.hash in self._comments:
self.comment = self._comments[self.hash]
else:
self.comment = None

# TODO:
# We mimic everything we need from Clang's cursor for a drop in
# replacement. Later these will likely be more intelligent versions
# that incorporate logic from the helper parser functions.
self.access_specifier = self._cc.access_specifier
self.displayname = self._cc.displayname
if self.kind == CursorKind.ENUM_DECL:
self.enum_type = self._cc.enum_type
if self.kind == CursorKind.ENUM_CONSTANT_DECL:
if '=' in [t.spelling for t in _cursor_get_tokens(self._cc)]:
self.enum_value = self._cc.enum_value
else:
self.enum_value = None
self.exception_specification_kind = self._cc.exception_specification_kind
self.extent = self._cc.extent
self.is_anonymous = self._cc.is_anonymous
self.is_const_method = self._cc.is_const_method
self.is_default_method = self._cc.is_default_method
self.is_pure_virtual_method = self._cc.is_pure_virtual_method
self.is_scoped_enum = self._cc.is_scoped_enum
self.is_static_method = self._cc.is_static_method
self.is_virtual_method = self._cc.is_virtual_method
self.result_type = self._cc.result_type
self.semantic_parent = self._cc.semantic_parent
self.spelling = self._cc.spelling
self.storage_class = self._cc.storage_class
self.translation_unit = self._cc.translation_unit
self.type = self._cc.type

def __hash__(self):
return self.hash

def get_children(self):
"""Get children cursors."""
domain = self.domain

# Identify `extern "C"` blocks and change domain accordingly. For some
# reason, the Python bindings don't return the cursor kind LINKAGE_SPEC
# as one would expect, so we need to do it the hard way.
if domain == 'cpp' and self.kind == CursorKind.UNEXPOSED_DECL:
tokens = _cursor_get_tokens(self)
ntoken = next(tokens, None)
if ntoken and ntoken.spelling == 'extern':
ntoken = next(tokens, None)
if ntoken and ntoken.spelling == '"C"':
domain = 'c'

for c in self._cc.get_children():
yield DocCursor(domain=domain, cursor=c, comments=self._comments)
59 changes: 20 additions & 39 deletions src/hawkmoth/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,8 @@
from hawkmoth.doccursor import (
CursorKind,
TokenKind,
DocCursor,
_get_meta,
_cursor_get_tokens,
_function_fixup,
_get_macro_args,
Expand Down Expand Up @@ -197,21 +199,13 @@ def _comment_extract(tu):

return top_level_comments, comments

def _get_meta(comment, cursor=None):
meta = {'line': comment.extent.start.line}
if cursor:
meta['cursor.kind'] = cursor.kind
meta['cursor.displayname'] = cursor.displayname
meta['cursor.spelling'] = cursor.spelling

return meta

def _recursive_parse(domain, comments, errors, cursor, nest):
comment = comments[cursor.hash]
def _recursive_parse(errors, cursor, nest):
domain = cursor.domain
comment = cursor.comment
name = cursor.spelling
ttype = cursor.type.spelling
text = comment.spelling
meta = _get_meta(comment, cursor)
meta = _get_meta(cursor)

if cursor.kind == CursorKind.MACRO_DEFINITION:
# FIXME: check args against comment
Expand Down Expand Up @@ -281,21 +275,14 @@ def _recursive_parse(domain, comments, errors, cursor, nest):
decl_name=decl_name, meta=meta)

for c in cursor.get_children():
if c.hash in comments:
ds.add_children(_recursive_parse(domain, comments,
errors, c, nest + 1))
if c.comment:
ds.add_children(_recursive_parse(errors, c, nest + 1))

return [ds]

elif cursor.kind == CursorKind.ENUM_CONSTANT_DECL:
# Show enumerator value if it's explicitly set in source
if '=' in [t.spelling for t in _cursor_get_tokens(cursor)]:
value = cursor.enum_value
else:
value = None

ds = docstring.EnumeratorDocstring(domain=domain, name=name,
value=value, text=text,
value=cursor.enum_value, text=text,
meta=meta, nest=nest)

return [ds]
Expand Down Expand Up @@ -335,7 +322,7 @@ def _clang_diagnostics(diagnostics, errors):
errors.append(ParserError(ErrorLevel(diag.severity), filename,
diag.location.line, diag.spelling))

def _parse_undocumented_block(domain, comments, errors, cursor, nest):
def _parse_undocumented_block(errors, cursor, nest):
"""Parse undocumented blocks.
Some blocks define plenty of children that may be documented themselves
Expand All @@ -345,8 +332,7 @@ def _parse_undocumented_block(domain, comments, errors, cursor, nest):
ret = []

# Identify `extern "C"` and `extern "C++"` blocks and recursively parse
# their contents. Only `extern "C"` is of any relevance in choosing a
# different domain.
# their contents.
# For some reason, the Python bindings don't return the cursor kind
# LINKAGE_SPEC as one would expect, so we need to do it the hard way.
if cursor.kind == CursorKind.UNEXPOSED_DECL:
Expand All @@ -358,20 +344,16 @@ def _parse_undocumented_block(domain, comments, errors, cursor, nest):
if not ntoken:
return ret

if ntoken.spelling == '"C"':
domain = 'c'
elif ntoken.spelling == '"C++"':
domain = 'cpp'
else:
if ntoken.spelling not in ['"C"', '"C++"']:
message = f'unhandled `extern {ntoken.spelling}` block will mask all children'
errors.append(ParserError(ErrorLevel.WARNING,
cursor.location.file.name,
cursor.location.line, message))
return ret

for c in cursor.get_children():
if c.hash in comments:
ret.extend(_recursive_parse(domain, comments, errors, c, nest))
if c.comment:
ret.extend(_recursive_parse(errors, c, nest))

return ret

Expand Down Expand Up @@ -426,16 +408,15 @@ def parse(filename, domain=None, clang_args=None):

for comment in top_level_comments:
text = comment.spelling
meta = _get_meta(comment)
meta = {'line': comment.extent.start.line}
ds = docstring.TextDocstring(text=text, meta=meta)
result.add_child(ds)

for cursor in tu.cursor.get_children():
if cursor.hash in comments:
result.add_children(_recursive_parse(domain, comments,
errors, cursor, 0))
for cc in tu.cursor.get_children():
cursor = DocCursor(domain=domain, cursor=cc, comments=comments)
if cursor.comment:
result.add_children(_recursive_parse(errors, cursor, 0))
else:
result.add_children(_parse_undocumented_block(domain, comments,
errors, cursor, 0))
result.add_children(_parse_undocumented_block(errors, cursor, 0))

return result, errors

0 comments on commit 9bf1c4d

Please sign in to comment.