From 9bf1c4db6fcff112d146045dfb4338d8e4ac039b Mon Sep 17 00:00:00 2001 From: Bruno Santos <brunomanuelsantos@tecnico.ulisboa.pt> Date: Mon, 13 Nov 2023 19:04:44 +0000 Subject: [PATCH] doccursor: stub our shiny new cursor class As a 1st step, we make a _very_ shallow wrapper around the Clang cursor. There is of course no point to it yet, but with this simple trick we are now using `DocCursor`s everywhere past `parse()`. The only effect of this commit is to worsen performance very slightly due to needless invocation of path specific code on all cursors. Running the full test suit is now ~9% slower on my machine. We shall revisit this later when it actually matters. --- src/hawkmoth/doccursor.py | 84 +++++++++++++++++++++++++++++++++++++++ src/hawkmoth/parser.py | 59 ++++++++++----------------- 2 files changed, 104 insertions(+), 39 deletions(-) diff --git a/src/hawkmoth/doccursor.py b/src/hawkmoth/doccursor.py index 0de8d69e..088b3404 100644 --- a/src/hawkmoth/doccursor.py +++ b/src/hawkmoth/doccursor.py @@ -13,6 +13,14 @@ SourceRange, ) +def _get_meta(cursor): + return { + 'line': cursor.comment.extent.start.line if cursor.comment else '', + 'cursor.kind': cursor.kind, + 'cursor.displayname': cursor.displayname, + 'cursor.spelling': cursor.spelling, + } + # Workaround for clang cursor.get_tokens() being unreliable for cursors whose # extent contains macro expansions. The result may be empty or contain bogus # tokens, depending on the case. @@ -453,3 +461,79 @@ def _get_inheritance(cursor): inherited.append(f'{pad(access_spec)}{child.type.spelling}') return ': ' + ', '.join(inherited) if len(inherited) > 0 else None + + +class DocCursor: + """Documentation centric wrapper for Clang's own ``Cursor``. + + This class abstracts a documentation worthy cursor so the user can query + relevant bits for documentation purpose, but otherwise hide all the + complications behind Clang's AST traversal and extraction of said bits of + information. + + Technically, this class can hold any Clang cursor within itself, but it + won't expose any relevant information for those. + """ + + def __init__(self, domain=None, cursor=None, comments=None): + self._comments = comments if comments else {} + self._cc = cursor + + self.domain = domain + self.hash = self._cc.hash + self.kind = self._cc.kind + + if self.hash in self._comments: + self.comment = self._comments[self.hash] + else: + self.comment = None + + # TODO: + # We mimic everything we need from Clang's cursor for a drop in + # replacement. Later these will likely be more intelligent versions + # that incorporate logic from the helper parser functions. + self.access_specifier = self._cc.access_specifier + self.displayname = self._cc.displayname + if self.kind == CursorKind.ENUM_DECL: + self.enum_type = self._cc.enum_type + if self.kind == CursorKind.ENUM_CONSTANT_DECL: + if '=' in [t.spelling for t in _cursor_get_tokens(self._cc)]: + self.enum_value = self._cc.enum_value + else: + self.enum_value = None + self.exception_specification_kind = self._cc.exception_specification_kind + self.extent = self._cc.extent + self.is_anonymous = self._cc.is_anonymous + self.is_const_method = self._cc.is_const_method + self.is_default_method = self._cc.is_default_method + self.is_pure_virtual_method = self._cc.is_pure_virtual_method + self.is_scoped_enum = self._cc.is_scoped_enum + self.is_static_method = self._cc.is_static_method + self.is_virtual_method = self._cc.is_virtual_method + self.result_type = self._cc.result_type + self.semantic_parent = self._cc.semantic_parent + self.spelling = self._cc.spelling + self.storage_class = self._cc.storage_class + self.translation_unit = self._cc.translation_unit + self.type = self._cc.type + + def __hash__(self): + return self.hash + + def get_children(self): + """Get children cursors.""" + domain = self.domain + + # Identify `extern "C"` blocks and change domain accordingly. For some + # reason, the Python bindings don't return the cursor kind LINKAGE_SPEC + # as one would expect, so we need to do it the hard way. + if domain == 'cpp' and self.kind == CursorKind.UNEXPOSED_DECL: + tokens = _cursor_get_tokens(self) + ntoken = next(tokens, None) + if ntoken and ntoken.spelling == 'extern': + ntoken = next(tokens, None) + if ntoken and ntoken.spelling == '"C"': + domain = 'c' + + for c in self._cc.get_children(): + yield DocCursor(domain=domain, cursor=c, comments=self._comments) diff --git a/src/hawkmoth/parser.py b/src/hawkmoth/parser.py index 45a9fc43..f3e12162 100644 --- a/src/hawkmoth/parser.py +++ b/src/hawkmoth/parser.py @@ -45,6 +45,8 @@ from hawkmoth.doccursor import ( CursorKind, TokenKind, + DocCursor, + _get_meta, _cursor_get_tokens, _function_fixup, _get_macro_args, @@ -197,21 +199,13 @@ def _comment_extract(tu): return top_level_comments, comments -def _get_meta(comment, cursor=None): - meta = {'line': comment.extent.start.line} - if cursor: - meta['cursor.kind'] = cursor.kind - meta['cursor.displayname'] = cursor.displayname - meta['cursor.spelling'] = cursor.spelling - - return meta - -def _recursive_parse(domain, comments, errors, cursor, nest): - comment = comments[cursor.hash] +def _recursive_parse(errors, cursor, nest): + domain = cursor.domain + comment = cursor.comment name = cursor.spelling ttype = cursor.type.spelling text = comment.spelling - meta = _get_meta(comment, cursor) + meta = _get_meta(cursor) if cursor.kind == CursorKind.MACRO_DEFINITION: # FIXME: check args against comment @@ -281,21 +275,14 @@ def _recursive_parse(domain, comments, errors, cursor, nest): decl_name=decl_name, meta=meta) for c in cursor.get_children(): - if c.hash in comments: - ds.add_children(_recursive_parse(domain, comments, - errors, c, nest + 1)) + if c.comment: + ds.add_children(_recursive_parse(errors, c, nest + 1)) return [ds] elif cursor.kind == CursorKind.ENUM_CONSTANT_DECL: - # Show enumerator value if it's explicitly set in source - if '=' in [t.spelling for t in _cursor_get_tokens(cursor)]: - value = cursor.enum_value - else: - value = None - ds = docstring.EnumeratorDocstring(domain=domain, name=name, - value=value, text=text, + value=cursor.enum_value, text=text, meta=meta, nest=nest) return [ds] @@ -335,7 +322,7 @@ def _clang_diagnostics(diagnostics, errors): errors.append(ParserError(ErrorLevel(diag.severity), filename, diag.location.line, diag.spelling)) -def _parse_undocumented_block(domain, comments, errors, cursor, nest): +def _parse_undocumented_block(errors, cursor, nest): """Parse undocumented blocks. Some blocks define plenty of children that may be documented themselves @@ -345,8 +332,7 @@ def _parse_undocumented_block(domain, comments, errors, cursor, nest): ret = [] # Identify `extern "C"` and `extern "C++"` blocks and recursively parse - # their contents. Only `extern "C"` is of any relevance in choosing a - # different domain. + # their contents. # For some reason, the Python bindings don't return the cursor kind # LINKAGE_SPEC as one would expect, so we need to do it the hard way. if cursor.kind == CursorKind.UNEXPOSED_DECL: @@ -358,11 +344,7 @@ def _parse_undocumented_block(domain, comments, errors, cursor, nest): if not ntoken: return ret - if ntoken.spelling == '"C"': - domain = 'c' - elif ntoken.spelling == '"C++"': - domain = 'cpp' - else: + if ntoken.spelling not in ['"C"', '"C++"']: message = f'unhandled `extern {ntoken.spelling}` block will mask all children' errors.append(ParserError(ErrorLevel.WARNING, cursor.location.file.name, @@ -370,8 +352,8 @@ def _parse_undocumented_block(domain, comments, errors, cursor, nest): return ret for c in cursor.get_children(): - if c.hash in comments: - ret.extend(_recursive_parse(domain, comments, errors, c, nest)) + if c.comment: + ret.extend(_recursive_parse(errors, c, nest)) return ret @@ -426,16 +408,15 @@ def parse(filename, domain=None, clang_args=None): for comment in top_level_comments: text = comment.spelling - meta = _get_meta(comment) + meta = {'line': comment.extent.start.line} ds = docstring.TextDocstring(text=text, meta=meta) result.add_child(ds) - for cursor in tu.cursor.get_children(): - if cursor.hash in comments: - result.add_children(_recursive_parse(domain, comments, - errors, cursor, 0)) + for cc in tu.cursor.get_children(): + cursor = DocCursor(domain=domain, cursor=cc, comments=comments) + if cursor.comment: + result.add_children(_recursive_parse(errors, cursor, 0)) else: - result.add_children(_parse_undocumented_block(domain, comments, - errors, cursor, 0)) + result.add_children(_parse_undocumented_block(errors, cursor, 0)) return result, errors