From 9bf1c4db6fcff112d146045dfb4338d8e4ac039b Mon Sep 17 00:00:00 2001
From: Bruno Santos <brunomanuelsantos@tecnico.ulisboa.pt>
Date: Mon, 13 Nov 2023 19:04:44 +0000
Subject: [PATCH] doccursor: stub our shiny new cursor class

As a 1st step, we make a _very_ shallow wrapper around the Clang cursor.
There is of course no point to it yet, but with this simple trick we are
now using `DocCursor`s everywhere past `parse()`.

The only effect of this commit is to worsen performance very slightly
due to needless invocation of path specific code on all cursors. Running
the full test suit is now ~9% slower on my machine. We shall revisit
this later when it actually matters.
---
 src/hawkmoth/doccursor.py | 84 +++++++++++++++++++++++++++++++++++++++
 src/hawkmoth/parser.py    | 59 ++++++++++-----------------
 2 files changed, 104 insertions(+), 39 deletions(-)

diff --git a/src/hawkmoth/doccursor.py b/src/hawkmoth/doccursor.py
index 0de8d69e..088b3404 100644
--- a/src/hawkmoth/doccursor.py
+++ b/src/hawkmoth/doccursor.py
@@ -13,6 +13,14 @@
     SourceRange,
 )
 
+def _get_meta(cursor):
+    return {
+        'line': cursor.comment.extent.start.line if cursor.comment else '',
+        'cursor.kind': cursor.kind,
+        'cursor.displayname': cursor.displayname,
+        'cursor.spelling': cursor.spelling,
+    }
+
 # Workaround for clang cursor.get_tokens() being unreliable for cursors whose
 # extent contains macro expansions. The result may be empty or contain bogus
 # tokens, depending on the case.
@@ -453,3 +461,79 @@ def _get_inheritance(cursor):
             inherited.append(f'{pad(access_spec)}{child.type.spelling}')
 
     return ': ' + ', '.join(inherited) if len(inherited) > 0 else None
+
+
+class DocCursor:
+    """Documentation centric wrapper for Clang's own ``Cursor``.
+
+    This class abstracts a documentation worthy cursor so the user can query
+    relevant bits for documentation purpose, but otherwise hide all the
+    complications behind Clang's AST traversal and extraction of said bits of
+    information.
+
+    Technically, this class can hold any Clang cursor within itself, but it
+    won't expose any relevant information for those.
+    """
+
+    def __init__(self, domain=None, cursor=None, comments=None):
+        self._comments = comments if comments else {}
+        self._cc = cursor
+
+        self.domain = domain
+        self.hash = self._cc.hash
+        self.kind = self._cc.kind
+
+        if self.hash in self._comments:
+            self.comment = self._comments[self.hash]
+        else:
+            self.comment = None
+
+        # TODO:
+        # We mimic everything we need from Clang's cursor for a drop in
+        # replacement. Later these will likely be more intelligent versions
+        # that incorporate logic from the helper parser functions.
+        self.access_specifier = self._cc.access_specifier
+        self.displayname = self._cc.displayname
+        if self.kind == CursorKind.ENUM_DECL:
+            self.enum_type = self._cc.enum_type
+        if self.kind == CursorKind.ENUM_CONSTANT_DECL:
+            if '=' in [t.spelling for t in _cursor_get_tokens(self._cc)]:
+                self.enum_value = self._cc.enum_value
+            else:
+                self.enum_value = None
+        self.exception_specification_kind = self._cc.exception_specification_kind
+        self.extent = self._cc.extent
+        self.is_anonymous = self._cc.is_anonymous
+        self.is_const_method = self._cc.is_const_method
+        self.is_default_method = self._cc.is_default_method
+        self.is_pure_virtual_method = self._cc.is_pure_virtual_method
+        self.is_scoped_enum = self._cc.is_scoped_enum
+        self.is_static_method = self._cc.is_static_method
+        self.is_virtual_method = self._cc.is_virtual_method
+        self.result_type = self._cc.result_type
+        self.semantic_parent = self._cc.semantic_parent
+        self.spelling = self._cc.spelling
+        self.storage_class = self._cc.storage_class
+        self.translation_unit = self._cc.translation_unit
+        self.type = self._cc.type
+
+    def __hash__(self):
+        return self.hash
+
+    def get_children(self):
+        """Get children cursors."""
+        domain = self.domain
+
+        # Identify `extern "C"` blocks and change domain accordingly. For some
+        # reason, the Python bindings don't return the cursor kind LINKAGE_SPEC
+        # as one would expect, so we need to do it the hard way.
+        if domain == 'cpp' and self.kind == CursorKind.UNEXPOSED_DECL:
+            tokens = _cursor_get_tokens(self)
+            ntoken = next(tokens, None)
+            if ntoken and ntoken.spelling == 'extern':
+                ntoken = next(tokens, None)
+                if ntoken and ntoken.spelling == '"C"':
+                    domain = 'c'
+
+        for c in self._cc.get_children():
+            yield DocCursor(domain=domain, cursor=c, comments=self._comments)
diff --git a/src/hawkmoth/parser.py b/src/hawkmoth/parser.py
index 45a9fc43..f3e12162 100644
--- a/src/hawkmoth/parser.py
+++ b/src/hawkmoth/parser.py
@@ -45,6 +45,8 @@
 from hawkmoth.doccursor import (
     CursorKind,
     TokenKind,
+    DocCursor,
+    _get_meta,
     _cursor_get_tokens,
     _function_fixup,
     _get_macro_args,
@@ -197,21 +199,13 @@ def _comment_extract(tu):
 
     return top_level_comments, comments
 
-def _get_meta(comment, cursor=None):
-    meta = {'line': comment.extent.start.line}
-    if cursor:
-        meta['cursor.kind'] = cursor.kind
-        meta['cursor.displayname'] = cursor.displayname
-        meta['cursor.spelling'] = cursor.spelling
-
-    return meta
-
-def _recursive_parse(domain, comments, errors, cursor, nest):
-    comment = comments[cursor.hash]
+def _recursive_parse(errors, cursor, nest):
+    domain = cursor.domain
+    comment = cursor.comment
     name = cursor.spelling
     ttype = cursor.type.spelling
     text = comment.spelling
-    meta = _get_meta(comment, cursor)
+    meta = _get_meta(cursor)
 
     if cursor.kind == CursorKind.MACRO_DEFINITION:
         # FIXME: check args against comment
@@ -281,21 +275,14 @@ def _recursive_parse(domain, comments, errors, cursor, nest):
                                           decl_name=decl_name, meta=meta)
 
         for c in cursor.get_children():
-            if c.hash in comments:
-                ds.add_children(_recursive_parse(domain, comments,
-                                                 errors, c, nest + 1))
+            if c.comment:
+                ds.add_children(_recursive_parse(errors, c, nest + 1))
 
         return [ds]
 
     elif cursor.kind == CursorKind.ENUM_CONSTANT_DECL:
-        # Show enumerator value if it's explicitly set in source
-        if '=' in [t.spelling for t in _cursor_get_tokens(cursor)]:
-            value = cursor.enum_value
-        else:
-            value = None
-
         ds = docstring.EnumeratorDocstring(domain=domain, name=name,
-                                           value=value, text=text,
+                                           value=cursor.enum_value, text=text,
                                            meta=meta, nest=nest)
 
         return [ds]
@@ -335,7 +322,7 @@ def _clang_diagnostics(diagnostics, errors):
         errors.append(ParserError(ErrorLevel(diag.severity), filename,
                                   diag.location.line, diag.spelling))
 
-def _parse_undocumented_block(domain, comments, errors, cursor, nest):
+def _parse_undocumented_block(errors, cursor, nest):
     """Parse undocumented blocks.
 
     Some blocks define plenty of children that may be documented themselves
@@ -345,8 +332,7 @@ def _parse_undocumented_block(domain, comments, errors, cursor, nest):
     ret = []
 
     # Identify `extern "C"` and `extern "C++"` blocks and recursively parse
-    # their contents. Only `extern "C"` is of any relevance in choosing a
-    # different domain.
+    # their contents.
     # For some reason, the Python bindings don't return the cursor kind
     # LINKAGE_SPEC as one would expect, so we need to do it the hard way.
     if cursor.kind == CursorKind.UNEXPOSED_DECL:
@@ -358,11 +344,7 @@ def _parse_undocumented_block(domain, comments, errors, cursor, nest):
             if not ntoken:
                 return ret
 
-            if ntoken.spelling == '"C"':
-                domain = 'c'
-            elif ntoken.spelling == '"C++"':
-                domain = 'cpp'
-            else:
+            if ntoken.spelling not in ['"C"', '"C++"']:
                 message = f'unhandled `extern {ntoken.spelling}` block will mask all children'
                 errors.append(ParserError(ErrorLevel.WARNING,
                                           cursor.location.file.name,
@@ -370,8 +352,8 @@ def _parse_undocumented_block(domain, comments, errors, cursor, nest):
                 return ret
 
             for c in cursor.get_children():
-                if c.hash in comments:
-                    ret.extend(_recursive_parse(domain, comments, errors, c, nest))
+                if c.comment:
+                    ret.extend(_recursive_parse(errors, c, nest))
 
     return ret
 
@@ -426,16 +408,15 @@ def parse(filename, domain=None, clang_args=None):
 
     for comment in top_level_comments:
         text = comment.spelling
-        meta = _get_meta(comment)
+        meta = {'line': comment.extent.start.line}
         ds = docstring.TextDocstring(text=text, meta=meta)
         result.add_child(ds)
 
-    for cursor in tu.cursor.get_children():
-        if cursor.hash in comments:
-            result.add_children(_recursive_parse(domain, comments,
-                                                 errors, cursor, 0))
+    for cc in tu.cursor.get_children():
+        cursor = DocCursor(domain=domain, cursor=cc, comments=comments)
+        if cursor.comment:
+            result.add_children(_recursive_parse(errors, cursor, 0))
         else:
-            result.add_children(_parse_undocumented_block(domain, comments,
-                                                          errors, cursor, 0))
+            result.add_children(_parse_undocumented_block(errors, cursor, 0))
 
     return result, errors